Reduce overhead by ByteBufUtil.decodeString(...) which is used by AbstractByteBuf.toString(...)
and AbstractByteBuf.getCharSequence(...)
(#8388)
Motivation: Our current implementation that is used for toString(Charset) operations on AbstractByteBuf implementation is quite slow as it does a lot of uncessary memory copies. We should just use new String(...) as it has a lot of optimizations to handle these cases. Modifications: Rewrite ByteBufUtil.decodeString(...) to use new String(...) Result: Less overhead for toString(Charset) operations. Benchmark (charsetName) (direct) (size) Mode Cnt Score Error Units ByteBufUtilDecodeStringBenchmark.decodeString US-ASCII false 8 thrpt 20 22401645.093 ? 4671452.479 ops/s ByteBufUtilDecodeStringBenchmark.decodeString US-ASCII false 64 thrpt 20 23678483.384 ? 3749164.446 ops/s ByteBufUtilDecodeStringBenchmark.decodeString US-ASCII true 8 thrpt 20 15731142.651 ? 3782931.591 ops/s ByteBufUtilDecodeStringBenchmark.decodeString US-ASCII true 64 thrpt 20 16244232.229 ? 1886259.658 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-8 false 8 thrpt 20 25983680.959 ? 5045782.289 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-8 false 64 thrpt 20 26235589.339 ? 2867004.950 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-8 true 8 thrpt 20 18499027.808 ? 4784684.268 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-8 true 64 thrpt 20 16825286.141 ? 1008712.342 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-16 false 8 thrpt 20 5789879.092 ? 1201786.359 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-16 false 64 thrpt 20 2173243.225 ? 417809.341 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-16 true 8 thrpt 20 5035583.011 ? 1001978.854 ops/s ByteBufUtilDecodeStringBenchmark.decodeString UTF-16 true 64 thrpt 20 2162345.301 ? 402410.408 ops/s ByteBufUtilDecodeStringBenchmark.decodeString ISO-8859-1 false 8 thrpt 20 30039052.376 ? 6539111.622 ops/s ByteBufUtilDecodeStringBenchmark.decodeString ISO-8859-1 false 64 thrpt 20 31414163.515 ? 2096710.526 ops/s ByteBufUtilDecodeStringBenchmark.decodeString ISO-8859-1 true 8 thrpt 20 19538587.855 ? 4639115.572 ops/s ByteBufUtilDecodeStringBenchmark.decodeString ISO-8859-1 true 64 thrpt 20 19467839.722 ? 1672687.213 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld US-ASCII false 8 thrpt 20 10787326.745 ? 1034197.864 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld US-ASCII false 64 thrpt 20 7129801.930 ? 1363019.209 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld US-ASCII true 8 thrpt 20 9002529.605 ? 2017642.445 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld US-ASCII true 64 thrpt 20 3860192.352 ? 826218.738 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-8 false 8 thrpt 20 10532838.027 ? 2151743.968 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-8 false 64 thrpt 20 7185554.597 ? 1387685.785 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-8 true 8 thrpt 20 7352253.316 ? 1333823.850 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-8 true 64 thrpt 20 2825578.707 ? 349701.156 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-16 false 8 thrpt 20 7277446.665 ? 1447034.346 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-16 false 64 thrpt 20 2445929.579 ? 562816.641 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-16 true 8 thrpt 20 6201174.401 ? 1236137.786 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld UTF-16 true 64 thrpt 20 2310674.973 ? 525587.959 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld ISO-8859-1 false 8 thrpt 20 11142625.392 ? 1680556.468 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld ISO-8859-1 false 64 thrpt 20 8127116.405 ? 1128513.860 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld ISO-8859-1 true 8 thrpt 20 9405751.952 ? 2193324.806 ops/s ByteBufUtilDecodeStringBenchmark.decodeStringOld ISO-8859-1 true 64 thrpt 20 3943282.076 ? 737798.070 ops/s Benchmark result is saved to /home/norman/mainframer/netty/microbench/target/reports/performance/ByteBufUtilDecodeStringBenchmark.json Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 1,030.173 sec - in io.netty.buffer.ByteBufUtilDecodeStringBenchmark [1030.460s][info ][gc,heap,exit ] Heap [1030.460s][info ][gc,heap,exit ] garbage-first heap total 516096K, used 257918K [0x0000000609a00000, 0x0000000800000000) [1030.460s][info ][gc,heap,exit ] region size 2048K, 127 young (260096K), 2 survivors (4096K) [1030.460s][info ][gc,heap,exit ] Metaspace used 17123K, capacity 17438K, committed 17792K, reserved 1064960K [1030.460s][info ][gc,heap,exit ] class space used 1709K, capacity 1827K, committed 1920K, reserved 1048576K
This commit is contained in:
parent
69545aedc4
commit
87ec2f882a
@ -53,10 +53,10 @@ import static io.netty.util.internal.StringUtil.isSurrogate;
|
|||||||
public final class ByteBufUtil {
|
public final class ByteBufUtil {
|
||||||
|
|
||||||
private static final InternalLogger logger = InternalLoggerFactory.getInstance(ByteBufUtil.class);
|
private static final InternalLogger logger = InternalLoggerFactory.getInstance(ByteBufUtil.class);
|
||||||
private static final FastThreadLocal<CharBuffer> CHAR_BUFFERS = new FastThreadLocal<CharBuffer>() {
|
private static final FastThreadLocal<byte[]> BYTE_ARRAYS = new FastThreadLocal<byte[]>() {
|
||||||
@Override
|
@Override
|
||||||
protected CharBuffer initialValue() throws Exception {
|
protected byte[] initialValue() throws Exception {
|
||||||
return CharBuffer.allocate(1024);
|
return PlatformDependent.allocateUninitializedArray(1024);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -756,52 +756,31 @@ public final class ByteBufUtil {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
static String decodeString(ByteBuf src, int readerIndex, int len, Charset charset) {
|
static String decodeString(ByteBuf src, int readerIndex, int len, Charset charset) {
|
||||||
if (len == 0) {
|
if (len == 0) {
|
||||||
return StringUtil.EMPTY_STRING;
|
return StringUtil.EMPTY_STRING;
|
||||||
}
|
}
|
||||||
final CharsetDecoder decoder = CharsetUtil.decoder(charset);
|
final byte[] array;
|
||||||
final int maxLength = (int) ((double) len * decoder.maxCharsPerByte());
|
final int offset;
|
||||||
CharBuffer dst = CHAR_BUFFERS.get();
|
|
||||||
if (dst.length() < maxLength) {
|
|
||||||
dst = CharBuffer.allocate(maxLength);
|
|
||||||
if (maxLength <= MAX_CHAR_BUFFER_SIZE) {
|
|
||||||
CHAR_BUFFERS.set(dst);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
dst.clear();
|
|
||||||
}
|
|
||||||
if (src.nioBufferCount() == 1) {
|
|
||||||
decodeString(decoder, src.nioBuffer(readerIndex, len), dst);
|
|
||||||
} else {
|
|
||||||
// We use a heap buffer as CharsetDecoder is most likely able to use a fast-path if src and dst buffers
|
|
||||||
// are both backed by a byte array.
|
|
||||||
ByteBuf buffer = src.alloc().heapBuffer(len);
|
|
||||||
try {
|
|
||||||
buffer.writeBytes(src, readerIndex, len);
|
|
||||||
// Use internalNioBuffer(...) to reduce object creation.
|
|
||||||
decodeString(decoder, buffer.internalNioBuffer(buffer.readerIndex(), len), dst);
|
|
||||||
} finally {
|
|
||||||
// Release the temporary buffer again.
|
|
||||||
buffer.release();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return dst.flip().toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void decodeString(CharsetDecoder decoder, ByteBuffer src, CharBuffer dst) {
|
if (src.hasArray()) {
|
||||||
try {
|
array = src.array();
|
||||||
CoderResult cr = decoder.decode(src, dst, true);
|
offset = src.arrayOffset() + readerIndex;
|
||||||
if (!cr.isUnderflow()) {
|
} else {
|
||||||
cr.throwException();
|
if (len <= 1024) {
|
||||||
|
array = BYTE_ARRAYS.get();
|
||||||
|
} else {
|
||||||
|
array = PlatformDependent.allocateUninitializedArray(len);
|
||||||
}
|
}
|
||||||
cr = decoder.flush(dst);
|
offset = 0;
|
||||||
if (!cr.isUnderflow()) {
|
src.getBytes(readerIndex, array, 0, len);
|
||||||
cr.throwException();
|
|
||||||
}
|
|
||||||
} catch (CharacterCodingException x) {
|
|
||||||
throw new IllegalStateException(x);
|
|
||||||
}
|
}
|
||||||
|
if (CharsetUtil.US_ASCII.equals(charset)) {
|
||||||
|
// Fast-path for US-ASCII which is used frequently.
|
||||||
|
return new String(array, 0, offset, len);
|
||||||
|
}
|
||||||
|
return new String(array, offset, len, charset);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -0,0 +1,112 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2018 The Netty Project
|
||||||
|
*
|
||||||
|
* The Netty Project licenses this file to you under the Apache License,
|
||||||
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at:
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package io.netty.buffer;
|
||||||
|
|
||||||
|
import io.netty.microbench.util.AbstractMicrobenchmark;
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
|
import org.openjdk.jmh.annotations.TearDown;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
|
||||||
|
@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS)
|
||||||
|
public class ByteBufUtilDecodeStringBenchmark extends AbstractMicrobenchmark {
|
||||||
|
|
||||||
|
public enum ByteBufType {
|
||||||
|
DIRECT {
|
||||||
|
@Override
|
||||||
|
ByteBuf newBuffer(byte[] bytes, int length) {
|
||||||
|
ByteBuf buffer = Unpooled.directBuffer(length);
|
||||||
|
buffer.writeBytes(bytes, 0, length);
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
HEAP_OFFSET {
|
||||||
|
@Override
|
||||||
|
ByteBuf newBuffer(byte[] bytes, int length) {
|
||||||
|
return Unpooled.wrappedBuffer(bytes, 1, length);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
HEAP {
|
||||||
|
@Override
|
||||||
|
ByteBuf newBuffer(byte[] bytes, int length) {
|
||||||
|
return Unpooled.wrappedBuffer(bytes, 0, length);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
COMPOSITE {
|
||||||
|
@Override
|
||||||
|
ByteBuf newBuffer(byte[] bytes, int length) {
|
||||||
|
CompositeByteBuf buffer = Unpooled.compositeBuffer();
|
||||||
|
int offset = 0;
|
||||||
|
// 8 buffers per composite.
|
||||||
|
int capacity = length / 8;
|
||||||
|
|
||||||
|
while (length > 0) {
|
||||||
|
buffer.addComponent(true, Unpooled.wrappedBuffer(bytes, offset, Math.min(length, capacity)));
|
||||||
|
length -= capacity;
|
||||||
|
offset += capacity;
|
||||||
|
}
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
abstract ByteBuf newBuffer(byte[] bytes, int length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Param({ "8", "64", "1024", "10240", "1073741824" })
|
||||||
|
public int size;
|
||||||
|
|
||||||
|
@Param({ "US-ASCII", "UTF-8" })
|
||||||
|
public String charsetName;
|
||||||
|
|
||||||
|
@Param
|
||||||
|
public ByteBufType bufferType;
|
||||||
|
|
||||||
|
private ByteBuf buffer;
|
||||||
|
private Charset charset;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String[] jvmArgs() {
|
||||||
|
// Ensure we minimize the GC overhead by sizing the heap big enough.
|
||||||
|
return new String[] { "-XX:MaxDirectMemorySize=2g", "-Xmx8g", "-Xms8g", "-Xmn6g" };
|
||||||
|
}
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void setup() {
|
||||||
|
byte[] bytes = new byte[size + 2];
|
||||||
|
Arrays.fill(bytes, (byte) 'a');
|
||||||
|
|
||||||
|
// Use an offset to not allow any optimizations because we use the exact passed in byte[] for heap buffers.
|
||||||
|
buffer = bufferType.newBuffer(bytes, size);
|
||||||
|
charset = Charset.forName(charsetName);
|
||||||
|
}
|
||||||
|
|
||||||
|
@TearDown
|
||||||
|
public void teardown() {
|
||||||
|
buffer.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeString() {
|
||||||
|
return ByteBufUtil.decodeString(buffer, buffer.readerIndex(), size, charset);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user