Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit ea85540

Browse files
Optimize String write (#1651)
- Remove redundant bounds checking. - Add ASCII fast-path. - Add tests for UTF-8 encoding across buffer boundaries and malformed surrogate pairs. - Fix ByteBuf leak in pipe() method - two buffers were not released. JAVA-5816
1 parent 04b9f85 commit ea85540

File tree

8 files changed

+1039
-151
lines changed

8 files changed

+1039
-151
lines changed

‎bson/src/main/org/bson/ByteBuf.java‎

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,26 @@ public interface ByteBuf {
184184
*/
185185
byte[] array();
186186

187+
/**
188+
* <p>States whether this buffer is backed by an accessible byte array.</p>
189+
*
190+
* <p>If this method returns {@code true} then the {@link #array()} and {@link #arrayOffset()} methods may safely be invoked.</p>
191+
*
192+
* @return {@code true} if, and only if, this buffer is backed by an array and is not read-only
193+
* @since 5.5
194+
*/
195+
boolean hasArray();
196+
197+
/**
198+
* Returns the offset of the first byte within the backing byte array of
199+
* this buffer.
200+
*
201+
* @throws java.nio.ReadOnlyBufferException If this buffer is backed by an array but is read-only
202+
* @throws UnsupportedOperationException if this buffer is not backed by an accessible array
203+
* @since 5.5
204+
*/
205+
int arrayOffset();
206+
187207
/**
188208
* Returns this buffer's limit.
189209
*

‎bson/src/main/org/bson/ByteBufNIO.java‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,16 @@ public byte[] array() {
132132
return buf.array();
133133
}
134134

135+
@Override
136+
public boolean hasArray() {
137+
return buf.hasArray();
138+
}
139+
140+
@Override
141+
public int arrayOffset() {
142+
return buf.arrayOffset();
143+
}
144+
135145
@Override
136146
public int limit() {
137147
return buf.limit();

‎bson/src/main/org/bson/io/OutputBuffer.java‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ public void writeLong(final long value) {
197197
writeInt64(value);
198198
}
199199

200-
private int writeCharacters(final String str, final boolean checkForNullCharacters) {
200+
protected int writeCharacters(final String str, final boolean checkForNullCharacters) {
201201
int len = str.length();
202202
int total = 0;
203203

‎driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java‎

Lines changed: 194 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package com.mongodb.internal.connection;
1818

19+
import org.bson.BsonSerializationException;
1920
import org.bson.ByteBuf;
2021
import org.bson.io.OutputBuffer;
2122

@@ -25,8 +26,10 @@
2526
import java.util.ArrayList;
2627
import java.util.List;
2728

29+
import static com.mongodb.assertions.Assertions.assertFalse;
2830
import static com.mongodb.assertions.Assertions.assertTrue;
2931
import static com.mongodb.assertions.Assertions.notNull;
32+
import static java.lang.String.format;
3033

3134
/**
3235
* <p>This class is not part of the public API and may be removed or changed at any time</p>
@@ -182,11 +185,17 @@ private ByteBuf getCurrentByteBuffer() {
182185
return currentByteBuffer;
183186
}
184187

188+
private ByteBuf getNextByteBuffer() {
189+
assertFalse(bufferList.get(curBufferIndex).hasRemaining());
190+
return getByteBufferAtIndex(++curBufferIndex);
191+
}
192+
185193
private ByteBuf getByteBufferAtIndex(final int index) {
186194
if (bufferList.size() < index + 1) {
187-
bufferList.add(bufferProvider.getBuffer(index >= (MAX_SHIFT - INITIAL_SHIFT)
188-
? MAX_BUFFER_SIZE
189-
: Math.min(INITIAL_BUFFER_SIZE << index, MAX_BUFFER_SIZE)));
195+
ByteBuf buffer = bufferProvider.getBuffer(index >= (MAX_SHIFT - INITIAL_SHIFT)
196+
? MAX_BUFFER_SIZE
197+
: Math.min(INITIAL_BUFFER_SIZE << index, MAX_BUFFER_SIZE));
198+
bufferList.add(buffer);
190199
}
191200
return bufferList.get(index);
192201
}
@@ -229,6 +238,16 @@ public List<ByteBuf> getByteBuffers() {
229238
return buffers;
230239
}
231240

241+
public List<ByteBuf> getDuplicateByteBuffers() {
242+
ensureOpen();
243+
244+
List<ByteBuf> buffers = new ArrayList<>(bufferList.size());
245+
for (final ByteBuf cur : bufferList) {
246+
buffers.add(cur.duplicate().order(ByteOrder.LITTLE_ENDIAN));
247+
}
248+
return buffers;
249+
}
250+
232251

233252
@Override
234253
public int pipe(final OutputStream out) throws IOException {
@@ -237,14 +256,18 @@ public int pipe(final OutputStream out) throws IOException {
237256
byte[] tmp = new byte[INITIAL_BUFFER_SIZE];
238257

239258
int total = 0;
240-
for (final ByteBuf cur : getByteBuffers()) {
241-
ByteBuf dup = cur.duplicate();
242-
while (dup.hasRemaining()) {
243-
int numBytesToCopy = Math.min(dup.remaining(), tmp.length);
244-
dup.get(tmp, 0, numBytesToCopy);
245-
out.write(tmp, 0, numBytesToCopy);
259+
List<ByteBuf> byteBuffers = getByteBuffers();
260+
try {
261+
for (final ByteBuf cur : byteBuffers) {
262+
while (cur.hasRemaining()) {
263+
int numBytesToCopy = Math.min(cur.remaining(), tmp.length);
264+
cur.get(tmp, 0, numBytesToCopy);
265+
out.write(tmp, 0, numBytesToCopy);
266+
}
267+
total += cur.limit();
246268
}
247-
total += dup.limit();
269+
} finally {
270+
byteBuffers.forEach(ByteBuf::release);
248271
}
249272
return total;
250273
}
@@ -370,4 +393,165 @@ private static final class BufferPositionPair {
370393
this.position = position;
371394
}
372395
}
396+
397+
protected int writeCharacters(final String str, final boolean checkNullTermination) {
398+
int stringLength = str.length();
399+
int sp = 0;
400+
int prevPos = position;
401+
402+
ByteBuf curBuffer = getCurrentByteBuffer();
403+
int curBufferPos = curBuffer.position();
404+
int curBufferLimit = curBuffer.limit();
405+
int remaining = curBufferLimit - curBufferPos;
406+
407+
if (curBuffer.hasArray()) {
408+
byte[] dst = curBuffer.array();
409+
int arrayOffset = curBuffer.arrayOffset();
410+
if (remaining >= str.length() + 1) {
411+
// Write ASCII characters directly to the array until we hit a non-ASCII character.
412+
sp = writeOnArrayAscii(str, dst, arrayOffset + curBufferPos, checkNullTermination);
413+
curBufferPos += sp;
414+
// If the whole string was written as ASCII, append the null terminator.
415+
if (sp == stringLength) {
416+
dst[arrayOffset + curBufferPos++] = 0;
417+
position += sp + 1;
418+
curBuffer.position(curBufferPos);
419+
return sp + 1;
420+
}
421+
// Otherwise, update the position to reflect the partial write.
422+
position += sp;
423+
curBuffer.position(curBufferPos);
424+
}
425+
}
426+
427+
// We get here, when the buffer is not backed by an array, or when the string contains at least one non-ASCII characters.
428+
return writeOnBuffers(str,
429+
checkNullTermination,
430+
sp,
431+
stringLength,
432+
curBufferLimit,
433+
curBufferPos,
434+
curBuffer,
435+
prevPos);
436+
}
437+
438+
private int writeOnBuffers(final String str,
439+
final boolean checkNullTermination,
440+
final int stringPointer,
441+
final int stringLength,
442+
final int bufferLimit,
443+
final int bufferPos,
444+
final ByteBuf buffer,
445+
final int prevPos) {
446+
int remaining;
447+
int sp = stringPointer;
448+
int curBufferPos = bufferPos;
449+
int curBufferLimit = bufferLimit;
450+
ByteBuf curBuffer = buffer;
451+
while (sp < stringLength) {
452+
remaining = curBufferLimit - curBufferPos;
453+
int c = str.charAt(sp);
454+
455+
if (checkNullTermination && c == 0x0) {
456+
throw new BsonSerializationException(
457+
format("BSON cstring '%s' is not valid because it contains a null character " + "at index %d", str, sp));
458+
}
459+
460+
if (c < 0x80) {
461+
if (remaining == 0) {
462+
curBuffer = getNextByteBuffer();
463+
curBufferPos = 0;
464+
curBufferLimit = curBuffer.limit();
465+
}
466+
curBuffer.put((byte) c);
467+
curBufferPos++;
468+
position++;
469+
} else if (c < 0x800) {
470+
if (remaining < 2) {
471+
// Not enough space: use write() to handle buffer boundary
472+
write((byte) (0xc0 + (c >> 6)));
473+
write((byte) (0x80 + (c & 0x3f)));
474+
475+
curBuffer = getCurrentByteBuffer();
476+
curBufferPos = curBuffer.position();
477+
curBufferLimit = curBuffer.limit();
478+
} else {
479+
curBuffer.put((byte) (0xc0 + (c >> 6)));
480+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
481+
curBufferPos += 2;
482+
position += 2;
483+
}
484+
} else {
485+
// Handle multibyte characters (may involve surrogate pairs).
486+
c = Character.codePointAt(str, sp);
487+
/*
488+
Malformed surrogate pairs are encoded as-is (3 byte code unit) without substituting any code point.
489+
This known deviation from the spec and current functionality remains for backward compatibility.
490+
Ticket: JAVA-5575
491+
*/
492+
if (c < 0x10000) {
493+
if (remaining < 3) {
494+
write((byte) (0xe0 + (c >> 12)));
495+
write((byte) (0x80 + ((c >> 6) & 0x3f)));
496+
write((byte) (0x80 + (c & 0x3f)));
497+
498+
curBuffer = getCurrentByteBuffer();
499+
curBufferPos = curBuffer.position();
500+
curBufferLimit = curBuffer.limit();
501+
} else {
502+
curBuffer.put((byte) (0xe0 + (c >> 12)));
503+
curBuffer.put((byte) (0x80 + ((c >> 6) & 0x3f)));
504+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
505+
curBufferPos += 3;
506+
position += 3;
507+
}
508+
} else {
509+
if (remaining < 4) {
510+
write((byte) (0xf0 + (c >> 18)));
511+
write((byte) (0x80 + ((c >> 12) & 0x3f)));
512+
write((byte) (0x80 + ((c >> 6) & 0x3f)));
513+
write((byte) (0x80 + (c & 0x3f)));
514+
515+
curBuffer = getCurrentByteBuffer();
516+
curBufferPos = curBuffer.position();
517+
curBufferLimit = curBuffer.limit();
518+
} else {
519+
curBuffer.put((byte) (0xf0 + (c >> 18)));
520+
curBuffer.put((byte) (0x80 + ((c >> 12) & 0x3f)));
521+
curBuffer.put((byte) (0x80 + ((c >> 6) & 0x3f)));
522+
curBuffer.put((byte) (0x80 + (c & 0x3f)));
523+
curBufferPos += 4;
524+
position += 4;
525+
}
526+
}
527+
}
528+
sp += Character.charCount(c);
529+
}
530+
531+
getCurrentByteBuffer().put((byte) 0);
532+
position++;
533+
return position - prevPos;
534+
}
535+
536+
private static int writeOnArrayAscii(final String str,
537+
final byte[] dst,
538+
final int arrayPosition,
539+
final boolean checkNullTermination) {
540+
int pos = arrayPosition;
541+
int sp = 0;
542+
// Fast common path: This tight loop is JIT-friendly (simple, no calls, few branches),
543+
// It might be unrolled for performance.
544+
for (; sp < str.length(); sp++, pos++) {
545+
char c = str.charAt(sp);
546+
if (checkNullTermination && c == 0) {
547+
throw new BsonSerializationException(
548+
format("BSON cstring '%s' is not valid because it contains a null character " + "at index %d", str, sp));
549+
}
550+
if (c >= 0x80) {
551+
break;
552+
}
553+
dst[pos] = (byte) c;
554+
}
555+
return sp;
556+
}
373557
}

‎driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,16 @@ public byte[] array() {
213213
throw new UnsupportedOperationException("Not implemented yet!");
214214
}
215215

216+
@Override
217+
public boolean hasArray() {
218+
return false;
219+
}
220+
221+
@Override
222+
public int arrayOffset() {
223+
throw new UnsupportedOperationException("Not implemented yet!");
224+
}
225+
216226
@Override
217227
public ByteBuf limit(final int newLimit) {
218228
if (newLimit < 0 || newLimit > capacity()) {

‎driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,16 @@ public byte[] array() {
124124
return proxied.array();
125125
}
126126

127+
@Override
128+
public boolean hasArray() {
129+
return proxied.hasArray();
130+
}
131+
132+
@Override
133+
public int arrayOffset() {
134+
return proxied.arrayOffset();
135+
}
136+
127137
@Override
128138
public int limit() {
129139
if (isWriting) {

‎driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufSpecification.groovy‎

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -249,11 +249,7 @@ class ByteBufSpecification extends Specification {
249249
@Override
250250
ByteBuf getBuffer(final int size) {
251251
io.netty.buffer.ByteBuf buffer = allocator.directBuffer(size, size)
252-
try {
253-
new NettyByteBuf(buffer.retain())
254-
} finally {
255-
buffer.release();
256-
}
252+
new NettyByteBuf(buffer)
257253
}
258254
}
259255
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /