diff --git a/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java b/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java index f2ce52256d..e1a53607f6 100644 --- a/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java +++ b/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java @@ -335,6 +335,92 @@ public final class ByteBufUtil { return -1; } + /** + * Encode a {@link CharSequence} in UTF-8 and write + * it to a {@link ByteBuf}. + * + * This method returns the actual number of bytes written. + */ + public static int writeUtf8(ByteBuf buf, CharSequence seq) { + if (buf == null) { + throw new NullPointerException("buf"); + } + if (seq == null) { + throw new NullPointerException("seq"); + } + // UTF-8 uses max. 3 bytes per char, so calculate the worst case. + final int len = seq.length(); + final int maxSize = len * 3; + buf.ensureWritable(maxSize); + if (buf instanceof AbstractByteBuf) { + // Fast-Path + AbstractByteBuf buffer = (AbstractByteBuf) buf; + int oldWriterIndex = buffer.writerIndex; + int writerIndex = oldWriterIndex; + + // We can use the _set methods as these not need to do any index checks and reference checks. + // This is possible as we called ensureWritable(...) before. + for (int i = 0; i < len; i++) { + char c = seq.charAt(i); + if (c < 0x80) { + buffer._setByte(writerIndex++, (byte) c); + } else if (c < 0x800) { + buffer._setByte(writerIndex++, (byte) (0xc0 | (c >> 6))); + buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f))); + } else { + buffer._setByte(writerIndex++, (byte) (0xe0 | (c >> 12))); + buffer._setByte(writerIndex++, (byte) (0x80 | ((c >> 6) & 0x3f))); + buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f))); + } + } + // update the writerIndex without any extra checks for performance reasons + buffer.writerIndex = writerIndex; + return writerIndex - oldWriterIndex; + } else { + // Maybe we could also check if we can unwrap() to access the wrapped buffer which + // may be an AbstractByteBuf. But this may be overkill so let us keep it simple for now. + byte[] bytes = seq.toString().getBytes(CharsetUtil.UTF_8); + buf.writeBytes(bytes); + return bytes.length; + } + } + + /** + * Encode a {@link CharSequence} in ASCII and write it + * to a {@link ByteBuf}. + * + * This method returns the actual number of bytes written. + */ + public static int writeAscii(ByteBuf buf, CharSequence seq) { + if (buf == null) { + throw new NullPointerException("buf"); + } + if (seq == null) { + throw new NullPointerException("seq"); + } + // ASCII uses 1 byte per char + final int len = seq.length(); + buf.ensureWritable(len); + if (buf instanceof AbstractByteBuf) { + // Fast-Path + AbstractByteBuf buffer = (AbstractByteBuf) buf; + int writerIndex = buffer.writerIndex; + + // We can use the _set methods as these not need to do any index checks and reference checks. + // This is possible as we called ensureWritable(...) before. + for (int i = 0; i < len; i++) { + buffer._setByte(writerIndex++, (byte) seq.charAt(i)); + } + // update the writerIndex without any extra checks for performance reasons + buffer.writerIndex = writerIndex; + } else { + // Maybe we could also check if we can unwrap() to access the wrapped buffer which + // may be an AbstractByteBuf. But this may be overkill so let us keep it simple for now. + buf.writeBytes(seq.toString().getBytes(CharsetUtil.US_ASCII)); + } + return len; + } + /** * Encode the given {@link CharBuffer} using the given {@link Charset} into a new {@link ByteBuf} which * is allocated via the {@link ByteBufAllocator}. diff --git a/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java b/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java new file mode 100644 index 0000000000..fb04c78ce7 --- /dev/null +++ b/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java @@ -0,0 +1,46 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.buffer; + +import io.netty.util.CharsetUtil; +import io.netty.util.ReferenceCountUtil; +import org.junit.Assert; +import org.junit.Test; + +public class ByteBufUtilTest { + + @Test + public void testWriteUsAscii() { + String usAscii = "NettyRocks"; + ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); + buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII)); + ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); + ByteBufUtil.writeAscii(buf2, usAscii); + + Assert.assertEquals(buf, buf2); + } + + @Test + public void testWriteUtf8() { + String usAscii = "Some UTF-8 like äÄ∏ŒŒ"; + ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); + buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8)); + ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); + ByteBufUtil.writeUtf8(buf2, usAscii); + + Assert.assertEquals(buf, buf2); + } +} diff --git a/microbench/src/test/java/io/netty/microbench/buffer/ByteBufUtilBenchmark.java b/microbench/src/test/java/io/netty/microbench/buffer/ByteBufUtilBenchmark.java new file mode 100644 index 0000000000..a4b6a3da9f --- /dev/null +++ b/microbench/src/test/java/io/netty/microbench/buffer/ByteBufUtilBenchmark.java @@ -0,0 +1,167 @@ +/* +* Copyright 2014 The Netty Project +* +* The Netty Project licenses this file to you under the Apache License, +* version 2.0 (the "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at: +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ +package io.netty.microbench.buffer; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufUtil; +import io.netty.buffer.Unpooled; +import io.netty.microbench.util.AbstractMicrobenchmark; +import io.netty.util.CharsetUtil; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; + + +@State(Scope.Benchmark) +@Warmup(iterations = 10) +@Measurement(iterations = 25) +public class ByteBufUtilBenchmark extends AbstractMicrobenchmark { + private ByteBuf buffer; + private ByteBuf wrapped; + + private StringBuilder asciiSequence; + private String ascii; + + private StringBuilder utf8Sequence; + private String utf8; + + @Setup + public void setup() { + // Use buffer sizes that will also allow to write UTF-8 without grow the buffer + buffer = Unpooled.directBuffer(512); + wrapped = Unpooled.unreleasableBuffer(Unpooled.directBuffer(512)); + asciiSequence = new StringBuilder(128); + for (int i = 0; i < 128; i++) { + asciiSequence.append('a'); + } + ascii = asciiSequence.toString(); + + // Generate some mixed UTF-8 String for benchmark + utf8Sequence = new StringBuilder(128); + char[] chars = "Some UTF-8 like äÄ∏ŒŒ".toCharArray(); + for (int i = 0; i < 128; i++) { + utf8Sequence.append(chars[i % chars.length]); + } + utf8 = utf8Sequence.toString(); + asciiSequence = utf8Sequence; + } + + @TearDown + public void tearDown() { + buffer.release(); + wrapped.release(); + } + + @Benchmark + public void writeAsciiStringViaArray() { + buffer.resetWriterIndex(); + buffer.writeBytes(ascii.getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeAsciiStringViaArrayWrapped() { + wrapped.resetWriterIndex(); + wrapped.writeBytes(ascii.getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeAsciiString() { + buffer.resetWriterIndex(); + ByteBufUtil.writeAscii(buffer, ascii); + } + + @Benchmark + public void writeAsciiStringWrapped() { + wrapped.resetWriterIndex(); + ByteBufUtil.writeAscii(wrapped, ascii); + } + + @Benchmark + public void writeAsciiViaArray() { + buffer.resetWriterIndex(); + buffer.writeBytes(asciiSequence.toString().getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeAsciiViaArrayWrapped() { + wrapped.resetWriterIndex(); + wrapped.writeBytes(asciiSequence.toString().getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeAscii() { + buffer.resetWriterIndex(); + ByteBufUtil.writeAscii(buffer, asciiSequence); + } + + @Benchmark + public void writeAsciiWrapped() { + wrapped.resetWriterIndex(); + ByteBufUtil.writeAscii(wrapped, asciiSequence); + } + + @Benchmark + public void writeUtf8StringViaArray() { + buffer.resetWriterIndex(); + buffer.writeBytes(utf8.getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeUtf8StringViaArrayWrapped() { + wrapped.resetWriterIndex(); + wrapped.writeBytes(utf8.getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeUtf8String() { + buffer.resetWriterIndex(); + ByteBufUtil.writeUtf8(buffer, utf8); + } + + @Benchmark + public void writeUtf8StringWrapped() { + wrapped.resetWriterIndex(); + ByteBufUtil.writeUtf8(wrapped, utf8); + } + + @Benchmark + public void writeUtf8ViaArray() { + buffer.resetWriterIndex(); + buffer.writeBytes(utf8Sequence.toString().getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeUtf8ViaArrayWrapped() { + wrapped.resetWriterIndex(); + wrapped.writeBytes(utf8Sequence.toString().getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeUtf8() { + buffer.resetWriterIndex(); + ByteBufUtil.writeUtf8(buffer, utf8Sequence); + } + + @Benchmark + public void writeUtf8Wrapped() { + wrapped.resetWriterIndex(); + ByteBufUtil.writeUtf8(wrapped, utf8Sequence); + } +}