From 61a5e60513fa54556f617aa95efd5a9df1a47c8d Mon Sep 17 00:00:00 2001 From: Norman Maurer Date: Thu, 4 Sep 2014 16:04:33 +0200 Subject: [PATCH] Provide helper methods in ByteBufUtil to write UTF-8/ASCII CharSequences. Related to [#909] Motivation: We expose no methods in ByteBuf to directly write a CharSequence into it. This leads to have the user either convert the CharSequence first to a byte array or use CharsetEncoder. Both cases have some overheads and we can do a lot better for well known Charsets like UTF-8 and ASCII. Modifications: Add ByteBufUtil.writeAscii(...) and ByteBufUtil.writeUtf8(...) which can do the task in an optimized way. This is especially true if the passed in ByteBuf extends AbstractByteBuf which is true for all of our implementations which not wrap another ByteBuf. Result: Writing an ASCII and UTF-8 CharSequence into a AbstractByteBuf is a lot faster then what the user could do by himself as we can make use of some package private methods and so eliminate reference and range checks. When the Charseq is not ASCII or UTF-8 we can still do a very good job and are on par in most of the cases with what the user would do. The following benchmark shows the improvements: Result: 2456866.966 ?(99.9%) 59066.370 ops/s [Average] Statistics: (min, avg, max) = (2297025.189, 2456866.966, 2586003.225), stdev = 78851.914 Confidence interval (99.9%): [2397800.596, 2515933.336] Benchmark Mode Samples Score Score error Units i.n.m.b.ByteBufUtilBenchmark.writeAscii thrpt 50 9398165.238 131503.098 ops/s i.n.m.b.ByteBufUtilBenchmark.writeAsciiString thrpt 50 9695177.968 176684.821 ops/s i.n.m.b.ByteBufUtilBenchmark.writeAsciiStringViaArray thrpt 50 4788597.415 83181.549 ops/s i.n.m.b.ByteBufUtilBenchmark.writeAsciiStringViaArrayWrapped thrpt 50 4722297.435 98984.491 ops/s i.n.m.b.ByteBufUtilBenchmark.writeAsciiStringWrapped thrpt 50 4028689.762 66192.505 ops/s i.n.m.b.ByteBufUtilBenchmark.writeAsciiViaArray thrpt 50 3234841.565 91308.009 ops/s i.n.m.b.ByteBufUtilBenchmark.writeAsciiViaArrayWrapped thrpt 50 3311387.474 39018.933 ops/s i.n.m.b.ByteBufUtilBenchmark.writeAsciiWrapped thrpt 50 3379764.250 66735.415 ops/s i.n.m.b.ByteBufUtilBenchmark.writeUtf8 thrpt 50 5671116.821 101760.081 ops/s i.n.m.b.ByteBufUtilBenchmark.writeUtf8String thrpt 50 5682733.440 111874.084 ops/s i.n.m.b.ByteBufUtilBenchmark.writeUtf8StringViaArray thrpt 50 3564548.995 55709.512 ops/s i.n.m.b.ByteBufUtilBenchmark.writeUtf8StringViaArrayWrapped thrpt 50 3621053.671 47632.820 ops/s i.n.m.b.ByteBufUtilBenchmark.writeUtf8StringWrapped thrpt 50 2634029.071 52304.876 ops/s i.n.m.b.ByteBufUtilBenchmark.writeUtf8ViaArray thrpt 50 3397049.332 57784.119 ops/s i.n.m.b.ByteBufUtilBenchmark.writeUtf8ViaArrayWrapped thrpt 50 3318685.262 35869.562 ops/s i.n.m.b.ByteBufUtilBenchmark.writeUtf8Wrapped thrpt 50 2473791.249 46423.114 ops/s Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 1,387.417 sec - in io.netty.microbench.buffer.ByteBufUtilBenchmark Results : Tests run: 1, Failures: 0, Errors: 0, Skipped: 0 Results : Tests run: 1, Failures: 0, Errors: 0, Skipped: 0 The *ViaArray* benchmarks are basically doing a toString().getBytes(Charset) which the others are using ByteBufUtil.write*(...). --- .../java/io/netty/buffer/ByteBufUtil.java | 86 +++++++++ .../java/io/netty/buffer/ByteBufUtilTest.java | 46 +++++ .../buffer/ByteBufUtilBenchmark.java | 167 ++++++++++++++++++ 3 files changed, 299 insertions(+) create mode 100644 buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java create mode 100644 microbench/src/test/java/io/netty/microbench/buffer/ByteBufUtilBenchmark.java diff --git a/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java b/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java index f2ce52256d..e1a53607f6 100644 --- a/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java +++ b/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java @@ -335,6 +335,92 @@ public final class ByteBufUtil { return -1; } + /** + * Encode a {@link CharSequence} in UTF-8 and write + * it to a {@link ByteBuf}. + * + * This method returns the actual number of bytes written. + */ + public static int writeUtf8(ByteBuf buf, CharSequence seq) { + if (buf == null) { + throw new NullPointerException("buf"); + } + if (seq == null) { + throw new NullPointerException("seq"); + } + // UTF-8 uses max. 3 bytes per char, so calculate the worst case. + final int len = seq.length(); + final int maxSize = len * 3; + buf.ensureWritable(maxSize); + if (buf instanceof AbstractByteBuf) { + // Fast-Path + AbstractByteBuf buffer = (AbstractByteBuf) buf; + int oldWriterIndex = buffer.writerIndex; + int writerIndex = oldWriterIndex; + + // We can use the _set methods as these not need to do any index checks and reference checks. + // This is possible as we called ensureWritable(...) before. + for (int i = 0; i < len; i++) { + char c = seq.charAt(i); + if (c < 0x80) { + buffer._setByte(writerIndex++, (byte) c); + } else if (c < 0x800) { + buffer._setByte(writerIndex++, (byte) (0xc0 | (c >> 6))); + buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f))); + } else { + buffer._setByte(writerIndex++, (byte) (0xe0 | (c >> 12))); + buffer._setByte(writerIndex++, (byte) (0x80 | ((c >> 6) & 0x3f))); + buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f))); + } + } + // update the writerIndex without any extra checks for performance reasons + buffer.writerIndex = writerIndex; + return writerIndex - oldWriterIndex; + } else { + // Maybe we could also check if we can unwrap() to access the wrapped buffer which + // may be an AbstractByteBuf. But this may be overkill so let us keep it simple for now. + byte[] bytes = seq.toString().getBytes(CharsetUtil.UTF_8); + buf.writeBytes(bytes); + return bytes.length; + } + } + + /** + * Encode a {@link CharSequence} in ASCII and write it + * to a {@link ByteBuf}. + * + * This method returns the actual number of bytes written. + */ + public static int writeAscii(ByteBuf buf, CharSequence seq) { + if (buf == null) { + throw new NullPointerException("buf"); + } + if (seq == null) { + throw new NullPointerException("seq"); + } + // ASCII uses 1 byte per char + final int len = seq.length(); + buf.ensureWritable(len); + if (buf instanceof AbstractByteBuf) { + // Fast-Path + AbstractByteBuf buffer = (AbstractByteBuf) buf; + int writerIndex = buffer.writerIndex; + + // We can use the _set methods as these not need to do any index checks and reference checks. + // This is possible as we called ensureWritable(...) before. + for (int i = 0; i < len; i++) { + buffer._setByte(writerIndex++, (byte) seq.charAt(i)); + } + // update the writerIndex without any extra checks for performance reasons + buffer.writerIndex = writerIndex; + } else { + // Maybe we could also check if we can unwrap() to access the wrapped buffer which + // may be an AbstractByteBuf. But this may be overkill so let us keep it simple for now. + buf.writeBytes(seq.toString().getBytes(CharsetUtil.US_ASCII)); + } + return len; + } + /** * Encode the given {@link CharBuffer} using the given {@link Charset} into a new {@link ByteBuf} which * is allocated via the {@link ByteBufAllocator}. diff --git a/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java b/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java new file mode 100644 index 0000000000..fb04c78ce7 --- /dev/null +++ b/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java @@ -0,0 +1,46 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.buffer; + +import io.netty.util.CharsetUtil; +import io.netty.util.ReferenceCountUtil; +import org.junit.Assert; +import org.junit.Test; + +public class ByteBufUtilTest { + + @Test + public void testWriteUsAscii() { + String usAscii = "NettyRocks"; + ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); + buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII)); + ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); + ByteBufUtil.writeAscii(buf2, usAscii); + + Assert.assertEquals(buf, buf2); + } + + @Test + public void testWriteUtf8() { + String usAscii = "Some UTF-8 like äÄ∏ŒŒ"; + ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); + buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8)); + ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); + ByteBufUtil.writeUtf8(buf2, usAscii); + + Assert.assertEquals(buf, buf2); + } +} diff --git a/microbench/src/test/java/io/netty/microbench/buffer/ByteBufUtilBenchmark.java b/microbench/src/test/java/io/netty/microbench/buffer/ByteBufUtilBenchmark.java new file mode 100644 index 0000000000..a4b6a3da9f --- /dev/null +++ b/microbench/src/test/java/io/netty/microbench/buffer/ByteBufUtilBenchmark.java @@ -0,0 +1,167 @@ +/* +* Copyright 2014 The Netty Project +* +* The Netty Project licenses this file to you under the Apache License, +* version 2.0 (the "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at: +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations +* under the License. +*/ +package io.netty.microbench.buffer; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufUtil; +import io.netty.buffer.Unpooled; +import io.netty.microbench.util.AbstractMicrobenchmark; +import io.netty.util.CharsetUtil; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; + + +@State(Scope.Benchmark) +@Warmup(iterations = 10) +@Measurement(iterations = 25) +public class ByteBufUtilBenchmark extends AbstractMicrobenchmark { + private ByteBuf buffer; + private ByteBuf wrapped; + + private StringBuilder asciiSequence; + private String ascii; + + private StringBuilder utf8Sequence; + private String utf8; + + @Setup + public void setup() { + // Use buffer sizes that will also allow to write UTF-8 without grow the buffer + buffer = Unpooled.directBuffer(512); + wrapped = Unpooled.unreleasableBuffer(Unpooled.directBuffer(512)); + asciiSequence = new StringBuilder(128); + for (int i = 0; i < 128; i++) { + asciiSequence.append('a'); + } + ascii = asciiSequence.toString(); + + // Generate some mixed UTF-8 String for benchmark + utf8Sequence = new StringBuilder(128); + char[] chars = "Some UTF-8 like äÄ∏ŒŒ".toCharArray(); + for (int i = 0; i < 128; i++) { + utf8Sequence.append(chars[i % chars.length]); + } + utf8 = utf8Sequence.toString(); + asciiSequence = utf8Sequence; + } + + @TearDown + public void tearDown() { + buffer.release(); + wrapped.release(); + } + + @Benchmark + public void writeAsciiStringViaArray() { + buffer.resetWriterIndex(); + buffer.writeBytes(ascii.getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeAsciiStringViaArrayWrapped() { + wrapped.resetWriterIndex(); + wrapped.writeBytes(ascii.getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeAsciiString() { + buffer.resetWriterIndex(); + ByteBufUtil.writeAscii(buffer, ascii); + } + + @Benchmark + public void writeAsciiStringWrapped() { + wrapped.resetWriterIndex(); + ByteBufUtil.writeAscii(wrapped, ascii); + } + + @Benchmark + public void writeAsciiViaArray() { + buffer.resetWriterIndex(); + buffer.writeBytes(asciiSequence.toString().getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeAsciiViaArrayWrapped() { + wrapped.resetWriterIndex(); + wrapped.writeBytes(asciiSequence.toString().getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeAscii() { + buffer.resetWriterIndex(); + ByteBufUtil.writeAscii(buffer, asciiSequence); + } + + @Benchmark + public void writeAsciiWrapped() { + wrapped.resetWriterIndex(); + ByteBufUtil.writeAscii(wrapped, asciiSequence); + } + + @Benchmark + public void writeUtf8StringViaArray() { + buffer.resetWriterIndex(); + buffer.writeBytes(utf8.getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeUtf8StringViaArrayWrapped() { + wrapped.resetWriterIndex(); + wrapped.writeBytes(utf8.getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeUtf8String() { + buffer.resetWriterIndex(); + ByteBufUtil.writeUtf8(buffer, utf8); + } + + @Benchmark + public void writeUtf8StringWrapped() { + wrapped.resetWriterIndex(); + ByteBufUtil.writeUtf8(wrapped, utf8); + } + + @Benchmark + public void writeUtf8ViaArray() { + buffer.resetWriterIndex(); + buffer.writeBytes(utf8Sequence.toString().getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeUtf8ViaArrayWrapped() { + wrapped.resetWriterIndex(); + wrapped.writeBytes(utf8Sequence.toString().getBytes(CharsetUtil.US_ASCII)); + } + + @Benchmark + public void writeUtf8() { + buffer.resetWriterIndex(); + ByteBufUtil.writeUtf8(buffer, utf8Sequence); + } + + @Benchmark + public void writeUtf8Wrapped() { + wrapped.resetWriterIndex(); + ByteBufUtil.writeUtf8(wrapped, utf8Sequence); + } +}