From 583d838f7c02c864c4ab4bc095c01e9b45963fcf Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 26 Oct 2018 15:32:38 -0700 Subject: [PATCH] Optimize AbstractByteBuf.getCharSequence() in US_ASCII case (#8392) * Optimize AbstractByteBuf.getCharSequence() in US_ASCII case Motivation: Inspired by https://github.com/netty/netty/pull/8388, I noticed this simple optimization to avoid char[] allocation (also suggested in a TODO here). Modifications: Return an AsciiString from AbstractByteBuf.getCharSequence() if requested charset is US_ASCII or ISO_8859_1 (latter thanks to @Scottmitch's suggestion). Also tweak unit tests not to require Strings and include a new benchmark to demonstrate the speedup. Result: Speed-up of AbstractByteBuf.getCharSequence() in ascii and iso 8859/1 cases --- .../java/io/netty/buffer/AbstractByteBuf.java | 6 +- .../io/netty/buffer/AbstractByteBufTest.java | 26 +++- .../codec/socks/SocksCmdRequestTest.java | 6 +- .../codec/socks/SocksCmdResponseTest.java | 6 +- .../handler/codec/compression/SnappyTest.java | 5 +- ...stractByteBufGetCharSequenceBenchmark.java | 124 ++++++++++++++++++ 6 files changed, 161 insertions(+), 12 deletions(-) create mode 100644 microbench/src/main/java/io/netty/buffer/AbstractByteBufGetCharSequenceBenchmark.java diff --git a/buffer/src/main/java/io/netty/buffer/AbstractByteBuf.java b/buffer/src/main/java/io/netty/buffer/AbstractByteBuf.java index 1fe1dbe272..16b313a8ed 100644 --- a/buffer/src/main/java/io/netty/buffer/AbstractByteBuf.java +++ b/buffer/src/main/java/io/netty/buffer/AbstractByteBuf.java @@ -15,6 +15,7 @@ */ package io.netty.buffer; +import io.netty.util.AsciiString; import io.netty.util.ByteProcessor; import io.netty.util.CharsetUtil; import io.netty.util.IllegalReferenceCountException; @@ -503,7 +504,10 @@ public abstract class AbstractByteBuf extends ByteBuf { @Override public CharSequence getCharSequence(int index, int length, Charset charset) { - // TODO: We could optimize this for UTF8 and US_ASCII + if (CharsetUtil.US_ASCII.equals(charset) || CharsetUtil.ISO_8859_1.equals(charset)) { + // ByteBufUtil.getBytes(...) will return a new copy which the AsciiString uses directly + return new AsciiString(ByteBufUtil.getBytes(this, index, length, true), false); + } return toString(index, length, charset); } diff --git a/buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java b/buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java index 1af64a7392..59194ab374 100644 --- a/buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java +++ b/buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.CharBuffer; import java.nio.ReadOnlyBufferException; import java.nio.channels.Channels; import java.nio.channels.FileChannel; @@ -3648,11 +3649,23 @@ public abstract class AbstractByteBufTest { testSetGetCharSequence(CharsetUtil.UTF_16); } + private static final CharBuffer EXTENDED_ASCII_CHARS, ASCII_CHARS; + + static { + char[] chars = new char[256]; + for (char c = 0; c < chars.length; c++) { + chars[c] = c; + } + EXTENDED_ASCII_CHARS = CharBuffer.wrap(chars); + ASCII_CHARS = CharBuffer.wrap(chars, 0, 128); + } + private void testSetGetCharSequence(Charset charset) { - ByteBuf buf = newBuffer(16); - String sequence = "AB"; + ByteBuf buf = newBuffer(1024); + CharBuffer sequence = CharsetUtil.US_ASCII.equals(charset) + ? ASCII_CHARS : EXTENDED_ASCII_CHARS; int bytes = buf.setCharSequence(1, sequence, charset); - assertEquals(sequence, buf.getCharSequence(1, bytes, charset)); + assertEquals(sequence, CharBuffer.wrap(buf.getCharSequence(1, bytes, charset))); buf.release(); } @@ -3677,12 +3690,13 @@ public abstract class AbstractByteBufTest { } private void testWriteReadCharSequence(Charset charset) { - ByteBuf buf = newBuffer(16); - String sequence = "AB"; + ByteBuf buf = newBuffer(1024); + CharBuffer sequence = CharsetUtil.US_ASCII.equals(charset) + ? ASCII_CHARS : EXTENDED_ASCII_CHARS; buf.writerIndex(1); int bytes = buf.writeCharSequence(sequence, charset); buf.readerIndex(1); - assertEquals(sequence, buf.readCharSequence(bytes, charset)); + assertEquals(sequence, CharBuffer.wrap(buf.readCharSequence(bytes, charset))); buf.release(); } diff --git a/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdRequestTest.java b/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdRequestTest.java index 37439a7e6a..4013c8b929 100644 --- a/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdRequestTest.java +++ b/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdRequestTest.java @@ -21,6 +21,7 @@ import io.netty.util.CharsetUtil; import org.junit.Test; import java.net.IDN; +import java.nio.CharBuffer; import static org.junit.Assert.*; @@ -101,7 +102,7 @@ public class SocksCmdRequestTest { @Test public void testIDNEncodeToAsciiForDomain() { String host = "тест.рф"; - String asciiHost = IDN.toASCII(host); + CharBuffer asciiHost = CharBuffer.wrap(IDN.toASCII(host)); short port = 10000; SocksCmdRequest rq = new SocksCmdRequest(SocksCmdType.BIND, SocksAddressType.DOMAIN, host, port); @@ -116,7 +117,8 @@ public class SocksCmdRequestTest { assertEquals((byte) 0x00, buffer.readByte()); assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte()); assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte()); - assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)); + assertEquals(asciiHost, + CharBuffer.wrap(buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII))); assertEquals(port, buffer.readUnsignedShort()); buffer.release(); diff --git a/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdResponseTest.java b/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdResponseTest.java index 6861f761de..3ca64252d5 100644 --- a/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdResponseTest.java +++ b/codec-socks/src/test/java/io/netty/handler/codec/socks/SocksCmdResponseTest.java @@ -21,6 +21,7 @@ import io.netty.util.CharsetUtil; import org.junit.Test; import java.net.IDN; +import java.nio.CharBuffer; import static org.junit.Assert.*; @@ -135,7 +136,7 @@ public class SocksCmdResponseTest { @Test public void testIDNEncodeToAsciiForDomain() { String host = "тест.рф"; - String asciiHost = IDN.toASCII(host); + CharBuffer asciiHost = CharBuffer.wrap(IDN.toASCII(host)); short port = 10000; SocksCmdResponse rs = new SocksCmdResponse(SocksCmdStatus.SUCCESS, SocksAddressType.DOMAIN, host, port); @@ -150,7 +151,8 @@ public class SocksCmdResponseTest { assertEquals((byte) 0x00, buffer.readByte()); assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte()); assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte()); - assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)); + assertEquals(asciiHost, + CharBuffer.wrap(buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII))); assertEquals(port, buffer.readUnsignedShort()); buffer.release(); diff --git a/codec/src/test/java/io/netty/handler/codec/compression/SnappyTest.java b/codec/src/test/java/io/netty/handler/codec/compression/SnappyTest.java index 2328047093..115deef158 100644 --- a/codec/src/test/java/io/netty/handler/codec/compression/SnappyTest.java +++ b/codec/src/test/java/io/netty/handler/codec/compression/SnappyTest.java @@ -24,6 +24,8 @@ import org.junit.Test; import static io.netty.handler.codec.compression.Snappy.*; import static org.junit.Assert.*; +import java.nio.CharBuffer; + public class SnappyTest { private final Snappy snappy = new Snappy(); @@ -219,7 +221,8 @@ public class SnappyTest { // Decode ByteBuf outDecoded = Unpooled.buffer(); snappy.decode(out, outDecoded); - assertEquals(srcStr, outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII)); + assertEquals(CharBuffer.wrap(srcStr), + CharBuffer.wrap(outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII))); in.release(); out.release(); diff --git a/microbench/src/main/java/io/netty/buffer/AbstractByteBufGetCharSequenceBenchmark.java b/microbench/src/main/java/io/netty/buffer/AbstractByteBufGetCharSequenceBenchmark.java new file mode 100644 index 0000000000..6fcdb94cc8 --- /dev/null +++ b/microbench/src/main/java/io/netty/buffer/AbstractByteBufGetCharSequenceBenchmark.java @@ -0,0 +1,124 @@ +/* + * Copyright 2018 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.buffer; + +import io.netty.microbench.util.AbstractMicrobenchmark; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; + +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) +public class AbstractByteBufGetCharSequenceBenchmark extends AbstractMicrobenchmark { + + public enum ByteBufType { + DIRECT { + @Override + ByteBuf newBuffer(byte[] bytes, int length) { + ByteBuf buffer = Unpooled.directBuffer(length); + buffer.writeBytes(bytes, 0, length); + return buffer; + } + }, + HEAP_OFFSET { + @Override + ByteBuf newBuffer(byte[] bytes, int length) { + return Unpooled.wrappedBuffer(bytes, 1, length); + } + }, + HEAP { + @Override + ByteBuf newBuffer(byte[] bytes, int length) { + return Unpooled.wrappedBuffer(bytes, 0, length); + } + }, + COMPOSITE { + @Override + ByteBuf newBuffer(byte[] bytes, int length) { + CompositeByteBuf buffer = Unpooled.compositeBuffer(); + int offset = 0; + // 8 buffers per composite. + int capacity = length / 8; + + while (length > 0) { + buffer.addComponent(true, Unpooled.wrappedBuffer(bytes, offset, Math.min(length, capacity))); + length -= capacity; + offset += capacity; + } + return buffer; + } + }; + abstract ByteBuf newBuffer(byte[] bytes, int length); + } + + @Param({ "8", "64", "1024", "10240", "1073741824" }) + public int size; + + @Param({ "US-ASCII", "ISO_8859_1" }) + public String charsetName; + + @Param + public ByteBufType bufferType; + + private ByteBuf buffer; + private Charset charset; + + @Override + protected String[] jvmArgs() { + // Ensure we minimize the GC overhead by sizing the heap big enough. + return new String[] { "-XX:MaxDirectMemorySize=2g", "-Xmx8g", "-Xms8g", "-Xmn6g" }; + } + + @Setup + public void setup() { + byte[] bytes = new byte[size + 2]; + Arrays.fill(bytes, (byte) 'a'); + + // Use an offset to not allow any optimizations because we use the exact passed in byte[] for heap buffers. + buffer = bufferType.newBuffer(bytes, size); + charset = Charset.forName(charsetName); + } + + @TearDown + public void teardown() { + buffer.release(); + } + + @Benchmark + public int getCharSequence() { + return traverse(buffer.getCharSequence(buffer.readerIndex(), size, charset)); + } + + @Benchmark + public int getCharSequenceOld() { + return traverse(buffer.toString(buffer.readerIndex(), size, charset)); + } + + private static int traverse(CharSequence cs) { + int i = 0, len = cs.length(); + while (i < len && cs.charAt(i++) != 0) { + // ensure result is "used" + } + return i; + } +}