Optimize AbstractByteBuf.getCharSequence() in US_ASCII case (#8392)

* Optimize AbstractByteBuf.getCharSequence() in US_ASCII case

Motivation:

Inspired by https://github.com/netty/netty/pull/8388, I noticed this
simple optimization to avoid char[] allocation (also suggested in a TODO
here).

Modifications:

Return an AsciiString from AbstractByteBuf.getCharSequence() if
requested charset is US_ASCII or ISO_8859_1 (latter thanks to
@Scottmitch's suggestion). Also tweak unit tests not to require Strings
and include a new benchmark to demonstrate the speedup.

Result:

Speed-up of AbstractByteBuf.getCharSequence() in ascii and iso 8859/1
cases
This commit is contained in:
Nick Hill 2018-10-26 15:32:38 -07:00 committed by Norman Maurer
parent ce39773e04
commit 583d838f7c
6 changed files with 161 additions and 12 deletions

View File

@ -15,6 +15,7 @@
*/ */
package io.netty.buffer; package io.netty.buffer;
import io.netty.util.AsciiString;
import io.netty.util.ByteProcessor; import io.netty.util.ByteProcessor;
import io.netty.util.CharsetUtil; import io.netty.util.CharsetUtil;
import io.netty.util.IllegalReferenceCountException; import io.netty.util.IllegalReferenceCountException;
@ -503,7 +504,10 @@ public abstract class AbstractByteBuf extends ByteBuf {
@Override @Override
public CharSequence getCharSequence(int index, int length, Charset charset) { public CharSequence getCharSequence(int index, int length, Charset charset) {
// TODO: We could optimize this for UTF8 and US_ASCII if (CharsetUtil.US_ASCII.equals(charset) || CharsetUtil.ISO_8859_1.equals(charset)) {
// ByteBufUtil.getBytes(...) will return a new copy which the AsciiString uses directly
return new AsciiString(ByteBufUtil.getBytes(this, index, length, true), false);
}
return toString(index, length, charset); return toString(index, length, charset);
} }

View File

@ -30,6 +30,7 @@ import java.io.IOException;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.ByteOrder; import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.ReadOnlyBufferException; import java.nio.ReadOnlyBufferException;
import java.nio.channels.Channels; import java.nio.channels.Channels;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
@ -3648,11 +3649,23 @@ public abstract class AbstractByteBufTest {
testSetGetCharSequence(CharsetUtil.UTF_16); testSetGetCharSequence(CharsetUtil.UTF_16);
} }
private static final CharBuffer EXTENDED_ASCII_CHARS, ASCII_CHARS;
static {
char[] chars = new char[256];
for (char c = 0; c < chars.length; c++) {
chars[c] = c;
}
EXTENDED_ASCII_CHARS = CharBuffer.wrap(chars);
ASCII_CHARS = CharBuffer.wrap(chars, 0, 128);
}
private void testSetGetCharSequence(Charset charset) { private void testSetGetCharSequence(Charset charset) {
ByteBuf buf = newBuffer(16); ByteBuf buf = newBuffer(1024);
String sequence = "AB"; CharBuffer sequence = CharsetUtil.US_ASCII.equals(charset)
? ASCII_CHARS : EXTENDED_ASCII_CHARS;
int bytes = buf.setCharSequence(1, sequence, charset); int bytes = buf.setCharSequence(1, sequence, charset);
assertEquals(sequence, buf.getCharSequence(1, bytes, charset)); assertEquals(sequence, CharBuffer.wrap(buf.getCharSequence(1, bytes, charset)));
buf.release(); buf.release();
} }
@ -3677,12 +3690,13 @@ public abstract class AbstractByteBufTest {
} }
private void testWriteReadCharSequence(Charset charset) { private void testWriteReadCharSequence(Charset charset) {
ByteBuf buf = newBuffer(16); ByteBuf buf = newBuffer(1024);
String sequence = "AB"; CharBuffer sequence = CharsetUtil.US_ASCII.equals(charset)
? ASCII_CHARS : EXTENDED_ASCII_CHARS;
buf.writerIndex(1); buf.writerIndex(1);
int bytes = buf.writeCharSequence(sequence, charset); int bytes = buf.writeCharSequence(sequence, charset);
buf.readerIndex(1); buf.readerIndex(1);
assertEquals(sequence, buf.readCharSequence(bytes, charset)); assertEquals(sequence, CharBuffer.wrap(buf.readCharSequence(bytes, charset)));
buf.release(); buf.release();
} }

View File

@ -21,6 +21,7 @@ import io.netty.util.CharsetUtil;
import org.junit.Test; import org.junit.Test;
import java.net.IDN; import java.net.IDN;
import java.nio.CharBuffer;
import static org.junit.Assert.*; import static org.junit.Assert.*;
@ -101,7 +102,7 @@ public class SocksCmdRequestTest {
@Test @Test
public void testIDNEncodeToAsciiForDomain() { public void testIDNEncodeToAsciiForDomain() {
String host = "тест.рф"; String host = "тест.рф";
String asciiHost = IDN.toASCII(host); CharBuffer asciiHost = CharBuffer.wrap(IDN.toASCII(host));
short port = 10000; short port = 10000;
SocksCmdRequest rq = new SocksCmdRequest(SocksCmdType.BIND, SocksAddressType.DOMAIN, host, port); SocksCmdRequest rq = new SocksCmdRequest(SocksCmdType.BIND, SocksAddressType.DOMAIN, host, port);
@ -116,7 +117,8 @@ public class SocksCmdRequestTest {
assertEquals((byte) 0x00, buffer.readByte()); assertEquals((byte) 0x00, buffer.readByte());
assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte()); assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte());
assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte()); assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte());
assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)); assertEquals(asciiHost,
CharBuffer.wrap(buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)));
assertEquals(port, buffer.readUnsignedShort()); assertEquals(port, buffer.readUnsignedShort());
buffer.release(); buffer.release();

View File

@ -21,6 +21,7 @@ import io.netty.util.CharsetUtil;
import org.junit.Test; import org.junit.Test;
import java.net.IDN; import java.net.IDN;
import java.nio.CharBuffer;
import static org.junit.Assert.*; import static org.junit.Assert.*;
@ -135,7 +136,7 @@ public class SocksCmdResponseTest {
@Test @Test
public void testIDNEncodeToAsciiForDomain() { public void testIDNEncodeToAsciiForDomain() {
String host = "тест.рф"; String host = "тест.рф";
String asciiHost = IDN.toASCII(host); CharBuffer asciiHost = CharBuffer.wrap(IDN.toASCII(host));
short port = 10000; short port = 10000;
SocksCmdResponse rs = new SocksCmdResponse(SocksCmdStatus.SUCCESS, SocksAddressType.DOMAIN, host, port); SocksCmdResponse rs = new SocksCmdResponse(SocksCmdStatus.SUCCESS, SocksAddressType.DOMAIN, host, port);
@ -150,7 +151,8 @@ public class SocksCmdResponseTest {
assertEquals((byte) 0x00, buffer.readByte()); assertEquals((byte) 0x00, buffer.readByte());
assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte()); assertEquals(SocksAddressType.DOMAIN.byteValue(), buffer.readByte());
assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte()); assertEquals((byte) asciiHost.length(), buffer.readUnsignedByte());
assertEquals(asciiHost, buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)); assertEquals(asciiHost,
CharBuffer.wrap(buffer.readCharSequence(asciiHost.length(), CharsetUtil.US_ASCII)));
assertEquals(port, buffer.readUnsignedShort()); assertEquals(port, buffer.readUnsignedShort());
buffer.release(); buffer.release();

View File

@ -24,6 +24,8 @@ import org.junit.Test;
import static io.netty.handler.codec.compression.Snappy.*; import static io.netty.handler.codec.compression.Snappy.*;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import java.nio.CharBuffer;
public class SnappyTest { public class SnappyTest {
private final Snappy snappy = new Snappy(); private final Snappy snappy = new Snappy();
@ -219,7 +221,8 @@ public class SnappyTest {
// Decode // Decode
ByteBuf outDecoded = Unpooled.buffer(); ByteBuf outDecoded = Unpooled.buffer();
snappy.decode(out, outDecoded); snappy.decode(out, outDecoded);
assertEquals(srcStr, outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII)); assertEquals(CharBuffer.wrap(srcStr),
CharBuffer.wrap(outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII)));
in.release(); in.release();
out.release(); out.release();

View File

@ -0,0 +1,124 @@
/*
* Copyright 2018 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.buffer;
import io.netty.microbench.util.AbstractMicrobenchmark;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS)
public class AbstractByteBufGetCharSequenceBenchmark extends AbstractMicrobenchmark {
public enum ByteBufType {
DIRECT {
@Override
ByteBuf newBuffer(byte[] bytes, int length) {
ByteBuf buffer = Unpooled.directBuffer(length);
buffer.writeBytes(bytes, 0, length);
return buffer;
}
},
HEAP_OFFSET {
@Override
ByteBuf newBuffer(byte[] bytes, int length) {
return Unpooled.wrappedBuffer(bytes, 1, length);
}
},
HEAP {
@Override
ByteBuf newBuffer(byte[] bytes, int length) {
return Unpooled.wrappedBuffer(bytes, 0, length);
}
},
COMPOSITE {
@Override
ByteBuf newBuffer(byte[] bytes, int length) {
CompositeByteBuf buffer = Unpooled.compositeBuffer();
int offset = 0;
// 8 buffers per composite.
int capacity = length / 8;
while (length > 0) {
buffer.addComponent(true, Unpooled.wrappedBuffer(bytes, offset, Math.min(length, capacity)));
length -= capacity;
offset += capacity;
}
return buffer;
}
};
abstract ByteBuf newBuffer(byte[] bytes, int length);
}
@Param({ "8", "64", "1024", "10240", "1073741824" })
public int size;
@Param({ "US-ASCII", "ISO_8859_1" })
public String charsetName;
@Param
public ByteBufType bufferType;
private ByteBuf buffer;
private Charset charset;
@Override
protected String[] jvmArgs() {
// Ensure we minimize the GC overhead by sizing the heap big enough.
return new String[] { "-XX:MaxDirectMemorySize=2g", "-Xmx8g", "-Xms8g", "-Xmn6g" };
}
@Setup
public void setup() {
byte[] bytes = new byte[size + 2];
Arrays.fill(bytes, (byte) 'a');
// Use an offset to not allow any optimizations because we use the exact passed in byte[] for heap buffers.
buffer = bufferType.newBuffer(bytes, size);
charset = Charset.forName(charsetName);
}
@TearDown
public void teardown() {
buffer.release();
}
@Benchmark
public int getCharSequence() {
return traverse(buffer.getCharSequence(buffer.readerIndex(), size, charset));
}
@Benchmark
public int getCharSequenceOld() {
return traverse(buffer.toString(buffer.readerIndex(), size, charset));
}
private static int traverse(CharSequence cs) {
int i = 0, len = cs.length();
while (i < len && cs.charAt(i++) != 0) {
// ensure result is "used"
}
return i;
}
}