Subsequence versions of ByteBufUtil#writeUtf8(...) methods (#9224)
Motivation It would be useful to be able to write UTF-8 encoded subsequence of CharSequence characters to a ByteBuf without needing to create a temporary object via CharSequence#subSequence(). Modification Add overloads of ByteBufUtil writeUtf8, reserveAndWriteUtf8 and utf8Bytes methods which take explicit subsequence bounds. Result More efficient writing of substrings to byte buffers possible
This commit is contained in:
parent
9dd1aab482
commit
2af769f6dc
@ -21,6 +21,7 @@ import io.netty.util.CharsetUtil;
|
|||||||
import io.netty.util.Recycler;
|
import io.netty.util.Recycler;
|
||||||
import io.netty.util.Recycler.Handle;
|
import io.netty.util.Recycler.Handle;
|
||||||
import io.netty.util.concurrent.FastThreadLocal;
|
import io.netty.util.concurrent.FastThreadLocal;
|
||||||
|
import io.netty.util.internal.MathUtil;
|
||||||
import io.netty.util.internal.PlatformDependent;
|
import io.netty.util.internal.PlatformDependent;
|
||||||
import io.netty.util.internal.StringUtil;
|
import io.netty.util.internal.StringUtil;
|
||||||
import io.netty.util.internal.SystemPropertyUtil;
|
import io.netty.util.internal.SystemPropertyUtil;
|
||||||
@ -472,6 +473,14 @@ public final class ByteBufUtil {
|
|||||||
return buffer.forEachByteDesc(toIndex, fromIndex - toIndex, new ByteProcessor.IndexOfProcessor(value));
|
return buffer.forEachByteDesc(toIndex, fromIndex - toIndex, new ByteProcessor.IndexOfProcessor(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static CharSequence checkCharSequenceBounds(CharSequence seq, int start, int end) {
|
||||||
|
if (MathUtil.isOutOfBounds(start, end - start, seq.length())) {
|
||||||
|
throw new IndexOutOfBoundsException("expected: 0 <= start(" + start + ") <= end (" + end
|
||||||
|
+ ") <= seq.length(" + seq.length() + ')');
|
||||||
|
}
|
||||||
|
return seq;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encode a {@link CharSequence} in <a href="http://en.wikipedia.org/wiki/UTF-8">UTF-8</a> and write
|
* Encode a {@link CharSequence} in <a href="http://en.wikipedia.org/wiki/UTF-8">UTF-8</a> and write
|
||||||
* it to a {@link ByteBuf} allocated with {@code alloc}.
|
* it to a {@link ByteBuf} allocated with {@code alloc}.
|
||||||
@ -496,7 +505,17 @@ public final class ByteBufUtil {
|
|||||||
* This method returns the actual number of bytes written.
|
* This method returns the actual number of bytes written.
|
||||||
*/
|
*/
|
||||||
public static int writeUtf8(ByteBuf buf, CharSequence seq) {
|
public static int writeUtf8(ByteBuf buf, CharSequence seq) {
|
||||||
return reserveAndWriteUtf8(buf, seq, utf8MaxBytes(seq));
|
int seqLength = seq.length();
|
||||||
|
return reserveAndWriteUtf8Seq(buf, seq, 0, seqLength, utf8MaxBytes(seqLength));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equivalent to <code>{@link #writeUtf8(ByteBuf, CharSequence) writeUtf8(buf, seq.subSequence(start, end))}</code>
|
||||||
|
* but avoids subsequence object allocation.
|
||||||
|
*/
|
||||||
|
public static int writeUtf8(ByteBuf buf, CharSequence seq, int start, int end) {
|
||||||
|
checkCharSequenceBounds(seq, start, end);
|
||||||
|
return reserveAndWriteUtf8Seq(buf, seq, start, end, utf8MaxBytes(end - start));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -509,6 +528,21 @@ public final class ByteBufUtil {
|
|||||||
* This method returns the actual number of bytes written.
|
* This method returns the actual number of bytes written.
|
||||||
*/
|
*/
|
||||||
public static int reserveAndWriteUtf8(ByteBuf buf, CharSequence seq, int reserveBytes) {
|
public static int reserveAndWriteUtf8(ByteBuf buf, CharSequence seq, int reserveBytes) {
|
||||||
|
return reserveAndWriteUtf8Seq(buf, seq, 0, seq.length(), reserveBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equivalent to <code>{@link #reserveAndWriteUtf8(ByteBuf, CharSequence, int)
|
||||||
|
* reserveAndWriteUtf8(buf, seq.subSequence(start, end), reserveBytes)}</code> but avoids
|
||||||
|
* subsequence object allocation if possible.
|
||||||
|
*
|
||||||
|
* @return actual number of bytes written
|
||||||
|
*/
|
||||||
|
public static int reserveAndWriteUtf8(ByteBuf buf, CharSequence seq, int start, int end, int reserveBytes) {
|
||||||
|
return reserveAndWriteUtf8Seq(buf, checkCharSequenceBounds(seq, start, end), start, end, reserveBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int reserveAndWriteUtf8Seq(ByteBuf buf, CharSequence seq, int start, int end, int reserveBytes) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (buf instanceof WrappedCompositeByteBuf) {
|
if (buf instanceof WrappedCompositeByteBuf) {
|
||||||
// WrappedCompositeByteBuf is a sub-class of AbstractByteBuf so it needs special handling.
|
// WrappedCompositeByteBuf is a sub-class of AbstractByteBuf so it needs special handling.
|
||||||
@ -516,27 +550,31 @@ public final class ByteBufUtil {
|
|||||||
} else if (buf instanceof AbstractByteBuf) {
|
} else if (buf instanceof AbstractByteBuf) {
|
||||||
AbstractByteBuf byteBuf = (AbstractByteBuf) buf;
|
AbstractByteBuf byteBuf = (AbstractByteBuf) buf;
|
||||||
byteBuf.ensureWritable0(reserveBytes);
|
byteBuf.ensureWritable0(reserveBytes);
|
||||||
int written = writeUtf8(byteBuf, byteBuf.writerIndex, seq, seq.length());
|
int written = writeUtf8(byteBuf, byteBuf.writerIndex, seq, start, end);
|
||||||
byteBuf.writerIndex += written;
|
byteBuf.writerIndex += written;
|
||||||
return written;
|
return written;
|
||||||
} else if (buf instanceof WrappedByteBuf) {
|
} else if (buf instanceof WrappedByteBuf) {
|
||||||
// Unwrap as the wrapped buffer may be an AbstractByteBuf and so we can use fast-path.
|
// Unwrap as the wrapped buffer may be an AbstractByteBuf and so we can use fast-path.
|
||||||
buf = buf.unwrap();
|
buf = buf.unwrap();
|
||||||
} else {
|
} else {
|
||||||
byte[] bytes = seq.toString().getBytes(CharsetUtil.UTF_8);
|
byte[] bytes = seq.subSequence(start, end).toString().getBytes(CharsetUtil.UTF_8);
|
||||||
buf.writeBytes(bytes);
|
buf.writeBytes(bytes);
|
||||||
return bytes.length;
|
return bytes.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fast-Path implementation
|
|
||||||
static int writeUtf8(AbstractByteBuf buffer, int writerIndex, CharSequence seq, int len) {
|
static int writeUtf8(AbstractByteBuf buffer, int writerIndex, CharSequence seq, int len) {
|
||||||
|
return writeUtf8(buffer, writerIndex, seq, 0, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fast-Path implementation
|
||||||
|
static int writeUtf8(AbstractByteBuf buffer, int writerIndex, CharSequence seq, int start, int end) {
|
||||||
int oldWriterIndex = writerIndex;
|
int oldWriterIndex = writerIndex;
|
||||||
|
|
||||||
// We can use the _set methods as these not need to do any index checks and reference checks.
|
// We can use the _set methods as these not need to do any index checks and reference checks.
|
||||||
// This is possible as we called ensureWritable(...) before.
|
// This is possible as we called ensureWritable(...) before.
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = start; i < end; i++) {
|
||||||
char c = seq.charAt(i);
|
char c = seq.charAt(i);
|
||||||
if (c < 0x80) {
|
if (c < 0x80) {
|
||||||
buffer._setByte(writerIndex++, (byte) c);
|
buffer._setByte(writerIndex++, (byte) c);
|
||||||
@ -606,22 +644,35 @@ public final class ByteBufUtil {
|
|||||||
* This method is producing the exact length according to {@link #writeUtf8(ByteBuf, CharSequence)}.
|
* This method is producing the exact length according to {@link #writeUtf8(ByteBuf, CharSequence)}.
|
||||||
*/
|
*/
|
||||||
public static int utf8Bytes(final CharSequence seq) {
|
public static int utf8Bytes(final CharSequence seq) {
|
||||||
|
return utf8ByteCount(seq, 0, seq.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equivalent to <code>{@link #utf8Bytes(CharSequence) utf8Bytes(seq.subSequence(start, end))}</code>
|
||||||
|
* but avoids subsequence object allocation.
|
||||||
|
* <p>
|
||||||
|
* This method is producing the exact length according to {@link #writeUtf8(ByteBuf, CharSequence, int, int)}.
|
||||||
|
*/
|
||||||
|
public static int utf8Bytes(final CharSequence seq, int start, int end) {
|
||||||
|
return utf8ByteCount(checkCharSequenceBounds(seq, start, end), start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int utf8ByteCount(final CharSequence seq, int start, int end) {
|
||||||
if (seq instanceof AsciiString) {
|
if (seq instanceof AsciiString) {
|
||||||
return seq.length();
|
return end - start;
|
||||||
}
|
}
|
||||||
int seqLength = seq.length();
|
int i = start;
|
||||||
int i = 0;
|
|
||||||
// ASCII fast path
|
// ASCII fast path
|
||||||
while (i < seqLength && seq.charAt(i) < 0x80) {
|
while (i < end && seq.charAt(i) < 0x80) {
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
// !ASCII is packed in a separate method to let the ASCII case be smaller
|
// !ASCII is packed in a separate method to let the ASCII case be smaller
|
||||||
return i < seqLength ? i + utf8Bytes(seq, i, seqLength) : i;
|
return i < end ? (i - start) + utf8BytesNonAscii(seq, i, end) : i - start;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int utf8Bytes(final CharSequence seq, final int start, final int length) {
|
private static int utf8BytesNonAscii(final CharSequence seq, final int start, final int end) {
|
||||||
int encodedLength = 0;
|
int encodedLength = 0;
|
||||||
for (int i = start; i < length; i++) {
|
for (int i = start; i < end; i++) {
|
||||||
final char c = seq.charAt(i);
|
final char c = seq.charAt(i);
|
||||||
// making it 100% branchless isn't rewarding due to the many bit operations necessary!
|
// making it 100% branchless isn't rewarding due to the many bit operations necessary!
|
||||||
if (c < 0x800) {
|
if (c < 0x800) {
|
||||||
|
@ -510,6 +510,97 @@ public class ByteBufUtilTest {
|
|||||||
assertTrue(buf instanceof WrappedByteBuf);
|
assertTrue(buf instanceof WrappedByteBuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8Subsequence() {
|
||||||
|
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
||||||
|
ByteBuf buf = Unpooled.buffer(16);
|
||||||
|
buf.writeBytes(usAscii.substring(5, 18).getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = Unpooled.buffer(16);
|
||||||
|
ByteBufUtil.writeUtf8(buf2, usAscii, 5, 18);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
|
|
||||||
|
buf.release();
|
||||||
|
buf2.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReserveAndWriteUtf8Subsequence() {
|
||||||
|
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
||||||
|
ByteBuf buf = Unpooled.buffer(16);
|
||||||
|
buf.writeBytes(usAscii.substring(5, 18).getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = Unpooled.buffer(16);
|
||||||
|
int count = ByteBufUtil.reserveAndWriteUtf8(buf2, usAscii, 5, 18, 16);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
|
assertEquals(buf.readableBytes(), count);
|
||||||
|
|
||||||
|
buf.release();
|
||||||
|
buf2.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUtf8BytesSubsequence() {
|
||||||
|
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
||||||
|
assertEquals(usAscii.substring(5, 18).getBytes(CharsetUtil.UTF_8).length,
|
||||||
|
ByteBufUtil.utf8Bytes(usAscii, 5, 18));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int[][] INVALID_RANGES = new int[][] {
|
||||||
|
{ -1, 5 }, { 5, 30 }, { 10, 5 }
|
||||||
|
};
|
||||||
|
|
||||||
|
interface TestMethod {
|
||||||
|
int invoke(Object... args);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testInvalidSubsequences(TestMethod method) {
|
||||||
|
for (int [] range : INVALID_RANGES) {
|
||||||
|
ByteBuf buf = Unpooled.buffer(16);
|
||||||
|
try {
|
||||||
|
method.invoke(buf, "Some UTF-8 like äÄ∏ŒŒ", range[0], range[1]);
|
||||||
|
fail("Did not throw IndexOutOfBoundsException for range (" + range[0] + ", " + range[1] + ")");
|
||||||
|
} catch (IndexOutOfBoundsException iiobe) {
|
||||||
|
// expected
|
||||||
|
} finally {
|
||||||
|
assertFalse(buf.isReadable());
|
||||||
|
buf.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8InvalidSubsequences() {
|
||||||
|
testInvalidSubsequences(new TestMethod() {
|
||||||
|
@Override
|
||||||
|
public int invoke(Object... args) {
|
||||||
|
return ByteBufUtil.writeUtf8((ByteBuf) args[0], (String) args[1],
|
||||||
|
(Integer) args[2], (Integer) args[3]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReserveAndWriteUtf8InvalidSubsequences() {
|
||||||
|
testInvalidSubsequences(new TestMethod() {
|
||||||
|
@Override
|
||||||
|
public int invoke(Object... args) {
|
||||||
|
return ByteBufUtil.reserveAndWriteUtf8((ByteBuf) args[0], (String) args[1],
|
||||||
|
(Integer) args[2], (Integer) args[3], 32);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUtf8BytesInvalidSubsequences() {
|
||||||
|
testInvalidSubsequences(new TestMethod() {
|
||||||
|
@Override
|
||||||
|
public int invoke(Object... args) {
|
||||||
|
return ByteBufUtil.utf8Bytes((String) args[1], (Integer) args[2], (Integer) args[3]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDecodeUsAscii() {
|
public void testDecodeUsAscii() {
|
||||||
testDecodeString("This is a test", CharsetUtil.US_ASCII);
|
testDecodeString("This is a test", CharsetUtil.US_ASCII);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user