Reduce overhead by ByteBufUtil.decodeString(...) which is used by AbstractByteBuf.toString(...)
and AbstractByteBuf.getCharSequence(...)
(#8388)
Motivation: Our current implementation that is used for toString(Charset) operations on AbstractByteBuf implementation is quite slow as it does a lot of uncessary memory copies. We should just use new String(...) as it has a lot of optimizations to handle these cases. Modifications: Rewrite ByteBufUtil.decodeString(...) to use new String(...) Result: Less overhead for toString(Charset) operations. Benchmark (charsetName) (direct) (size) Mode Cnt Score Error Units ByteBufUtilDecodeStringBenchmark.decodeString US-ASCII false 8 thrpt 20 22401645.093 ? 4671452.479 ops/s ByteBufUtilDecodeStringBenchmark.decodeString US-ASCII false 64 thrpt 20 23678483.384 ? 3749164.446 ops/s ByteBufUtilDecodeStringBenchmark.decodeString US-ASCII true 8 thrpt 20 15731142.651 ? 3782931.591 ops/s ByteBufUtilDecodeStringBenchmark.decodeString US-ASCII true 64 thrpt 20 16244232.229 ? 1886259.658 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-8 false 8 thrpt 20 25983680.959 ? 5045782.289 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-8 false 64 thrpt 20 26235589.339 ? 2867004.950 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-8 true 8 thrpt 20 18499027.808 ? 4784684.268 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-8 true 64 thrpt 20 16825286.141 ? 1008712.342 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-16 false 8 thrpt 20 5789879.092 ? 1201786.359 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-16 false 64 thrpt 20 2173243.225 ? 417809.341 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-16 true 8 thrpt 20 5035583.011 ? 1001978.854 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-16 true 64 thrpt 20 2162345.301 ? 402410.408 ops/s ByteBufUtilDecodeStringBenchmark.decodeString ISO-8859-1 false 8 thrpt 20 30039052.376 ? 6539111.622 ops/s ByteBufUtilDecodeStringBenchmark.decodeString ISO-8859-1 false 64 thrpt 20 31414163.515 ? 2096710.526 ops/s ByteBufUtilDecodeStringBenchmark.decodeString ISO-8859-1 true 8 thrpt 20 19538587.855 ? 4639115.572 ops/s ByteBufUtilDecodeStringBenchmark.decodeString ISO-8859-1 true 64 thrpt 20 19467839.722 ? 1672687.213 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld US-ASCII false 8 thrpt 20 10787326.745 ? 1034197.864 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld US-ASCII false 64 thrpt 20 7129801.930 ? 1363019.209 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld US-ASCII true 8 thrpt 20 9002529.605 ? 2017642.445 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld US-ASCII true 64 thrpt 20 3860192.352 ? 826218.738 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-8 false 8 thrpt 20 10532838.027 ? 2151743.968 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-8 false 64 thrpt 20 7185554.597 ? 1387685.785 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-8 true 8 thrpt 20 7352253.316 ? 1333823.850 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-8 true 64 thrpt 20 2825578.707 ? 349701.156 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-16 false 8 thrpt 20 7277446.665 ? 1447034.346 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-16 false 64 thrpt 20 2445929.579 ? 562816.641 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-16 true 8 thrpt 20 6201174.401 ? 1236137.786 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-16 true 64 thrpt 20 2310674.973 ? 525587.959 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld ISO-8859-1 false 8 thrpt 20 11142625.392 ? 1680556.468 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld ISO-8859-1 false 64 thrpt 20 8127116.405 ? 1128513.860 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld ISO-8859-1 true 8 thrpt 20 9405751.952 ? 2193324.806 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld ISO-8859-1 true 64 thrpt 20 3943282.076 ? 737798.070 ops/s Benchmark result is saved to /home/norman/mainframer/netty/microbench/target/reports/performance/ByteBufUtilDecodeStringBenchmark.json Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 1,030.173 sec - in io.netty.buffer.ByteBufUtilDecodeStringBenchmark [1030.460s][info ][gc,heap,exit ] Heap [1030.460s][info ][gc,heap,exit ] garbage-first heap total 516096K, used 257918K [0x0000000609a00000, 0x0000000800000000) [1030.460s][info ][gc,heap,exit ] region size 2048K, 127 young (260096K), 2 survivors (4096K) [1030.460s][info ][gc,heap,exit ] Metaspace used 17123K, capacity 17438K, committed 17792K, reserved 1064960K [1030.460s][info ][gc,heap,exit ] class space used 1709K, capacity 1827K, committed 1920K, reserved 1048576K
This commit is contained in:
parent
69545aedc4
commit
87ec2f882a
@ -53,10 +53,10 @@ import static io.netty.util.internal.StringUtil.isSurrogate;
|
||||
public final class ByteBufUtil {
|
||||
|
||||
private static final InternalLogger logger = InternalLoggerFactory.getInstance(ByteBufUtil.class);
|
||||
private static final FastThreadLocal<CharBuffer> CHAR_BUFFERS = new FastThreadLocal<CharBuffer>() {
|
||||
private static final FastThreadLocal<byte[]> BYTE_ARRAYS = new FastThreadLocal<byte[]>() {
|
||||
@Override
|
||||
protected CharBuffer initialValue() throws Exception {
|
||||
return CharBuffer.allocate(1024);
|
||||
protected byte[] initialValue() throws Exception {
|
||||
return PlatformDependent.allocateUninitializedArray(1024);
|
||||
}
|
||||
};
|
||||
|
||||
@ -756,52 +756,31 @@ public final class ByteBufUtil {
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
static String decodeString(ByteBuf src, int readerIndex, int len, Charset charset) {
|
||||
if (len == 0) {
|
||||
return StringUtil.EMPTY_STRING;
|
||||
}
|
||||
final CharsetDecoder decoder = CharsetUtil.decoder(charset);
|
||||
final int maxLength = (int) ((double) len * decoder.maxCharsPerByte());
|
||||
CharBuffer dst = CHAR_BUFFERS.get();
|
||||
if (dst.length() < maxLength) {
|
||||
dst = CharBuffer.allocate(maxLength);
|
||||
if (maxLength <= MAX_CHAR_BUFFER_SIZE) {
|
||||
CHAR_BUFFERS.set(dst);
|
||||
}
|
||||
} else {
|
||||
dst.clear();
|
||||
}
|
||||
if (src.nioBufferCount() == 1) {
|
||||
decodeString(decoder, src.nioBuffer(readerIndex, len), dst);
|
||||
} else {
|
||||
// We use a heap buffer as CharsetDecoder is most likely able to use a fast-path if src and dst buffers
|
||||
// are both backed by a byte array.
|
||||
ByteBuf buffer = src.alloc().heapBuffer(len);
|
||||
try {
|
||||
buffer.writeBytes(src, readerIndex, len);
|
||||
// Use internalNioBuffer(...) to reduce object creation.
|
||||
decodeString(decoder, buffer.internalNioBuffer(buffer.readerIndex(), len), dst);
|
||||
} finally {
|
||||
// Release the temporary buffer again.
|
||||
buffer.release();
|
||||
}
|
||||
}
|
||||
return dst.flip().toString();
|
||||
}
|
||||
final byte[] array;
|
||||
final int offset;
|
||||
|
||||
private static void decodeString(CharsetDecoder decoder, ByteBuffer src, CharBuffer dst) {
|
||||
try {
|
||||
CoderResult cr = decoder.decode(src, dst, true);
|
||||
if (!cr.isUnderflow()) {
|
||||
cr.throwException();
|
||||
if (src.hasArray()) {
|
||||
array = src.array();
|
||||
offset = src.arrayOffset() + readerIndex;
|
||||
} else {
|
||||
if (len <= 1024) {
|
||||
array = BYTE_ARRAYS.get();
|
||||
} else {
|
||||
array = PlatformDependent.allocateUninitializedArray(len);
|
||||
}
|
||||
cr = decoder.flush(dst);
|
||||
if (!cr.isUnderflow()) {
|
||||
cr.throwException();
|
||||
offset = 0;
|
||||
src.getBytes(readerIndex, array, 0, len);
|
||||
}
|
||||
} catch (CharacterCodingException x) {
|
||||
throw new IllegalStateException(x);
|
||||
if (CharsetUtil.US_ASCII.equals(charset)) {
|
||||
// Fast-path for US-ASCII which is used frequently.
|
||||
return new String(array, 0, offset, len);
|
||||
}
|
||||
return new String(array, offset, len, charset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright 2018 The Netty Project
|
||||
*
|
||||
* The Netty Project licenses this file to you under the Apache License,
|
||||
* version 2.0 (the "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package io.netty.buffer;
|
||||
|
||||
import io.netty.microbench.util.AbstractMicrobenchmark;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.TearDown;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
|
||||
@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS)
|
||||
public class ByteBufUtilDecodeStringBenchmark extends AbstractMicrobenchmark {
|
||||
|
||||
public enum ByteBufType {
|
||||
DIRECT {
|
||||
@Override
|
||||
ByteBuf newBuffer(byte[] bytes, int length) {
|
||||
ByteBuf buffer = Unpooled.directBuffer(length);
|
||||
buffer.writeBytes(bytes, 0, length);
|
||||
return buffer;
|
||||
}
|
||||
},
|
||||
HEAP_OFFSET {
|
||||
@Override
|
||||
ByteBuf newBuffer(byte[] bytes, int length) {
|
||||
return Unpooled.wrappedBuffer(bytes, 1, length);
|
||||
}
|
||||
},
|
||||
HEAP {
|
||||
@Override
|
||||
ByteBuf newBuffer(byte[] bytes, int length) {
|
||||
return Unpooled.wrappedBuffer(bytes, 0, length);
|
||||
}
|
||||
},
|
||||
COMPOSITE {
|
||||
@Override
|
||||
ByteBuf newBuffer(byte[] bytes, int length) {
|
||||
CompositeByteBuf buffer = Unpooled.compositeBuffer();
|
||||
int offset = 0;
|
||||
// 8 buffers per composite.
|
||||
int capacity = length / 8;
|
||||
|
||||
while (length > 0) {
|
||||
buffer.addComponent(true, Unpooled.wrappedBuffer(bytes, offset, Math.min(length, capacity)));
|
||||
length -= capacity;
|
||||
offset += capacity;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
};
|
||||
|
||||
abstract ByteBuf newBuffer(byte[] bytes, int length);
|
||||
}
|
||||
|
||||
@Param({ "8", "64", "1024", "10240", "1073741824" })
|
||||
public int size;
|
||||
|
||||
@Param({ "US-ASCII", "UTF-8" })
|
||||
public String charsetName;
|
||||
|
||||
@Param
|
||||
public ByteBufType bufferType;
|
||||
|
||||
private ByteBuf buffer;
|
||||
private Charset charset;
|
||||
|
||||
@Override
|
||||
protected String[] jvmArgs() {
|
||||
// Ensure we minimize the GC overhead by sizing the heap big enough.
|
||||
return new String[] { "-XX:MaxDirectMemorySize=2g", "-Xmx8g", "-Xms8g", "-Xmn6g" };
|
||||
}
|
||||
|
||||
@Setup
|
||||
public void setup() {
|
||||
byte[] bytes = new byte[size + 2];
|
||||
Arrays.fill(bytes, (byte) 'a');
|
||||
|
||||
// Use an offset to not allow any optimizations because we use the exact passed in byte[] for heap buffers.
|
||||
buffer = bufferType.newBuffer(bytes, size);
|
||||
charset = Charset.forName(charsetName);
|
||||
}
|
||||
|
||||
@TearDown
|
||||
public void teardown() {
|
||||
buffer.release();
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeString() {
|
||||
return ByteBufUtil.decodeString(buffer, buffer.readerIndex(), size, charset);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user