Provide helper methods in ByteBufUtil to write UTF-8/ASCII CharSequences. Related to [#909]

Motivation:

We expose no methods in ByteBuf to directly write a CharSequence into it. This leads to have the user either convert the CharSequence first to a byte array or use CharsetEncoder. Both cases have some overheads and we can do a lot better for well known Charsets like UTF-8 and ASCII.

Modifications:

Add ByteBufUtil.writeAscii(...) and ByteBufUtil.writeUtf8(...) which can do the task in an optimized way. This is especially true if the passed in ByteBuf extends AbstractByteBuf which is true for all of our implementations which not wrap another ByteBuf.

Result:

Writing an ASCII and UTF-8 CharSequence into a AbstractByteBuf is a lot faster then what the user could do by himself as we can make use of some package private methods and so eliminate reference and range checks. When the Charseq is not ASCII or UTF-8 we can still do a very good job and are on par in most of the cases with what the user would do.

The following benchmark shows the improvements:

Result: 2456866.966 ?(99.9%) 59066.370 ops/s [Average]
  Statistics: (min, avg, max) = (2297025.189, 2456866.966, 2586003.225), stdev = 78851.914
  Confidence interval (99.9%): [2397800.596, 2515933.336]

Benchmark                                                        Mode   Samples        Score  Score error    Units
i.n.m.b.ByteBufUtilBenchmark.writeAscii                         thrpt        50  9398165.238   131503.098    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeAsciiString                   thrpt        50  9695177.968   176684.821    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeAsciiStringViaArray           thrpt        50  4788597.415    83181.549    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeAsciiStringViaArrayWrapped    thrpt        50  4722297.435    98984.491    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeAsciiStringWrapped            thrpt        50  4028689.762    66192.505    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeAsciiViaArray                 thrpt        50  3234841.565    91308.009    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeAsciiViaArrayWrapped          thrpt        50  3311387.474    39018.933    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeAsciiWrapped                  thrpt        50  3379764.250    66735.415    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeUtf8                          thrpt        50  5671116.821   101760.081    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeUtf8String                    thrpt        50  5682733.440   111874.084    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeUtf8StringViaArray            thrpt        50  3564548.995    55709.512    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeUtf8StringViaArrayWrapped     thrpt        50  3621053.671    47632.820    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeUtf8StringWrapped             thrpt        50  2634029.071    52304.876    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeUtf8ViaArray                  thrpt        50  3397049.332    57784.119    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeUtf8ViaArrayWrapped           thrpt        50  3318685.262    35869.562    ops/s
i.n.m.b.ByteBufUtilBenchmark.writeUtf8Wrapped                   thrpt        50  2473791.249    46423.114    ops/s
Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 1,387.417 sec - in io.netty.microbench.buffer.ByteBufUtilBenchmark

Results :

Tests run: 1, Failures: 0, Errors: 0, Skipped: 0

Results :

Tests run: 1, Failures: 0, Errors: 0, Skipped: 0

The *ViaArray* benchmarks are basically doing a toString().getBytes(Charset) which the others are using ByteBufUtil.write*(...).
This commit is contained in:
Norman Maurer 2014-09-04 16:04:33 +02:00 committed by Trustin Lee
parent cf6eb70f93
commit fe796fc8ab
3 changed files with 299 additions and 0 deletions

View File

@ -370,6 +370,92 @@ public final class ByteBufUtil {
return -1;
}
/**
* Encode a {@link CharSequence} in <a href="http://en.wikipedia.org/wiki/UTF-8">UTF-8</a> and write
* it to a {@link ByteBuf}.
*
* This method returns the actual number of bytes written.
*/
public static int writeUtf8(ByteBuf buf, CharSequence seq) {
if (buf == null) {
throw new NullPointerException("buf");
}
if (seq == null) {
throw new NullPointerException("seq");
}
// UTF-8 uses max. 3 bytes per char, so calculate the worst case.
final int len = seq.length();
final int maxSize = len * 3;
buf.ensureWritable(maxSize);
if (buf instanceof AbstractByteBuf) {
// Fast-Path
AbstractByteBuf buffer = (AbstractByteBuf) buf;
int oldWriterIndex = buffer.writerIndex;
int writerIndex = oldWriterIndex;
// We can use the _set methods as these not need to do any index checks and reference checks.
// This is possible as we called ensureWritable(...) before.
for (int i = 0; i < len; i++) {
char c = seq.charAt(i);
if (c < 0x80) {
buffer._setByte(writerIndex++, (byte) c);
} else if (c < 0x800) {
buffer._setByte(writerIndex++, (byte) (0xc0 | (c >> 6)));
buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f)));
} else {
buffer._setByte(writerIndex++, (byte) (0xe0 | (c >> 12)));
buffer._setByte(writerIndex++, (byte) (0x80 | ((c >> 6) & 0x3f)));
buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f)));
}
}
// update the writerIndex without any extra checks for performance reasons
buffer.writerIndex = writerIndex;
return writerIndex - oldWriterIndex;
} else {
// Maybe we could also check if we can unwrap() to access the wrapped buffer which
// may be an AbstractByteBuf. But this may be overkill so let us keep it simple for now.
byte[] bytes = seq.toString().getBytes(CharsetUtil.UTF_8);
buf.writeBytes(bytes);
return bytes.length;
}
}
/**
* Encode a {@link CharSequence} in <a href="http://en.wikipedia.org/wiki/ASCII">ASCII</a> and write it
* to a {@link ByteBuf}.
*
* This method returns the actual number of bytes written.
*/
public static int writeAscii(ByteBuf buf, CharSequence seq) {
if (buf == null) {
throw new NullPointerException("buf");
}
if (seq == null) {
throw new NullPointerException("seq");
}
// ASCII uses 1 byte per char
final int len = seq.length();
buf.ensureWritable(len);
if (buf instanceof AbstractByteBuf) {
// Fast-Path
AbstractByteBuf buffer = (AbstractByteBuf) buf;
int writerIndex = buffer.writerIndex;
// We can use the _set methods as these not need to do any index checks and reference checks.
// This is possible as we called ensureWritable(...) before.
for (int i = 0; i < len; i++) {
buffer._setByte(writerIndex++, (byte) seq.charAt(i));
}
// update the writerIndex without any extra checks for performance reasons
buffer.writerIndex = writerIndex;
} else {
// Maybe we could also check if we can unwrap() to access the wrapped buffer which
// may be an AbstractByteBuf. But this may be overkill so let us keep it simple for now.
buf.writeBytes(seq.toString().getBytes(CharsetUtil.US_ASCII));
}
return len;
}
/**
* Encode the given {@link CharBuffer} using the given {@link Charset} into a new {@link ByteBuf} which
* is allocated via the {@link ByteBufAllocator}.

View File

@ -0,0 +1,46 @@
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.buffer;
import io.netty.util.CharsetUtil;
import io.netty.util.ReferenceCountUtil;
import org.junit.Assert;
import org.junit.Test;
public class ByteBufUtilTest {
@Test
public void testWriteUsAscii() {
String usAscii = "NettyRocks";
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeAscii(buf2, usAscii);
Assert.assertEquals(buf, buf2);
}
@Test
public void testWriteUtf8() {
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, usAscii);
Assert.assertEquals(buf, buf2);
}
}

View File

@ -0,0 +1,167 @@
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.microbench.buffer;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufUtil;
import io.netty.buffer.Unpooled;
import io.netty.microbench.util.AbstractMicrobenchmark;
import io.netty.util.CharsetUtil;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
@State(Scope.Benchmark)
@Warmup(iterations = 10)
@Measurement(iterations = 25)
public class ByteBufUtilBenchmark extends AbstractMicrobenchmark {
private ByteBuf buffer;
private ByteBuf wrapped;
private StringBuilder asciiSequence;
private String ascii;
private StringBuilder utf8Sequence;
private String utf8;
@Setup
public void setup() {
// Use buffer sizes that will also allow to write UTF-8 without grow the buffer
buffer = Unpooled.directBuffer(512);
wrapped = Unpooled.unreleasableBuffer(Unpooled.directBuffer(512));
asciiSequence = new StringBuilder(128);
for (int i = 0; i < 128; i++) {
asciiSequence.append('a');
}
ascii = asciiSequence.toString();
// Generate some mixed UTF-8 String for benchmark
utf8Sequence = new StringBuilder(128);
char[] chars = "Some UTF-8 like äÄ∏ŒŒ".toCharArray();
for (int i = 0; i < 128; i++) {
utf8Sequence.append(chars[i % chars.length]);
}
utf8 = utf8Sequence.toString();
asciiSequence = utf8Sequence;
}
@TearDown
public void tearDown() {
buffer.release();
wrapped.release();
}
@Benchmark
public void writeAsciiStringViaArray() {
buffer.resetWriterIndex();
buffer.writeBytes(ascii.getBytes(CharsetUtil.US_ASCII));
}
@Benchmark
public void writeAsciiStringViaArrayWrapped() {
wrapped.resetWriterIndex();
wrapped.writeBytes(ascii.getBytes(CharsetUtil.US_ASCII));
}
@Benchmark
public void writeAsciiString() {
buffer.resetWriterIndex();
ByteBufUtil.writeAscii(buffer, ascii);
}
@Benchmark
public void writeAsciiStringWrapped() {
wrapped.resetWriterIndex();
ByteBufUtil.writeAscii(wrapped, ascii);
}
@Benchmark
public void writeAsciiViaArray() {
buffer.resetWriterIndex();
buffer.writeBytes(asciiSequence.toString().getBytes(CharsetUtil.US_ASCII));
}
@Benchmark
public void writeAsciiViaArrayWrapped() {
wrapped.resetWriterIndex();
wrapped.writeBytes(asciiSequence.toString().getBytes(CharsetUtil.US_ASCII));
}
@Benchmark
public void writeAscii() {
buffer.resetWriterIndex();
ByteBufUtil.writeAscii(buffer, asciiSequence);
}
@Benchmark
public void writeAsciiWrapped() {
wrapped.resetWriterIndex();
ByteBufUtil.writeAscii(wrapped, asciiSequence);
}
@Benchmark
public void writeUtf8StringViaArray() {
buffer.resetWriterIndex();
buffer.writeBytes(utf8.getBytes(CharsetUtil.US_ASCII));
}
@Benchmark
public void writeUtf8StringViaArrayWrapped() {
wrapped.resetWriterIndex();
wrapped.writeBytes(utf8.getBytes(CharsetUtil.US_ASCII));
}
@Benchmark
public void writeUtf8String() {
buffer.resetWriterIndex();
ByteBufUtil.writeUtf8(buffer, utf8);
}
@Benchmark
public void writeUtf8StringWrapped() {
wrapped.resetWriterIndex();
ByteBufUtil.writeUtf8(wrapped, utf8);
}
@Benchmark
public void writeUtf8ViaArray() {
buffer.resetWriterIndex();
buffer.writeBytes(utf8Sequence.toString().getBytes(CharsetUtil.US_ASCII));
}
@Benchmark
public void writeUtf8ViaArrayWrapped() {
wrapped.resetWriterIndex();
wrapped.writeBytes(utf8Sequence.toString().getBytes(CharsetUtil.US_ASCII));
}
@Benchmark
public void writeUtf8() {
buffer.resetWriterIndex();
ByteBufUtil.writeUtf8(buffer, utf8Sequence);
}
@Benchmark
public void writeUtf8Wrapped() {
wrapped.resetWriterIndex();
ByteBufUtil.writeUtf8(wrapped, utf8Sequence);
}
}