From ee206b6ba8c24413adc55924d796a42b9d4a7c26 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Fri, 20 Dec 2019 16:51:18 +0900 Subject: [PATCH] Separate out query string encoding for non-encoded strings. (#9887) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation: Currently, characters are appended to the encoded string char-by-char even when no encoding is needed. We can instead separate out codepath that appends the entire string in one go for better `StringBuilder` allocation performance. Modification: Only go into char-by-char loop when finding a character that requires encoding. Result: The results aren't so clear with noise on my hot laptop - the biggest impact is on long strings, both to reduce resizes of the buffer and also to reduce complexity of the loop. I don't think there's a significant downside though for the cases that hit the slow path. After ``` Benchmark Mode Cnt Score Error Units QueryStringEncoderBenchmark.longAscii thrpt 6 1.406 ± 0.069 ops/us QueryStringEncoderBenchmark.longAsciiFirst thrpt 6 0.046 ± 0.001 ops/us QueryStringEncoderBenchmark.longUtf8 thrpt 6 0.046 ± 0.001 ops/us QueryStringEncoderBenchmark.shortAscii thrpt 6 15.781 ± 0.949 ops/us QueryStringEncoderBenchmark.shortAsciiFirst thrpt 6 3.171 ± 0.232 ops/us QueryStringEncoderBenchmark.shortUtf8 thrpt 6 3.900 ± 0.667 ops/us ``` Before ``` Benchmark Mode Cnt Score Error Units QueryStringEncoderBenchmark.longAscii thrpt 6 0.444 ± 0.072 ops/us QueryStringEncoderBenchmark.longAsciiFirst thrpt 6 0.043 ± 0.002 ops/us QueryStringEncoderBenchmark.longUtf8 thrpt 6 0.047 ± 0.001 ops/us QueryStringEncoderBenchmark.shortAscii thrpt 6 16.503 ± 1.015 ops/us QueryStringEncoderBenchmark.shortAsciiFirst thrpt 6 3.316 ± 0.154 ops/us QueryStringEncoderBenchmark.shortUtf8 thrpt 6 3.776 ± 0.956 ops/us ``` --- .../codec/http/QueryStringEncoder.java | 19 ++++ .../codec/http/QueryStringEncoderTest.java | 11 ++- .../http/QueryStringEncoderBenchmark.java | 95 +++++++++++++++++++ 3 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 microbench/src/main/java/io/netty/handler/codec/http/QueryStringEncoderBenchmark.java diff --git a/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java b/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java index dd2682ea23..3d0c393e3a 100644 --- a/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java +++ b/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java @@ -156,6 +156,25 @@ public class QueryStringEncoder { */ private void encodeUtf8Component(CharSequence s) { for (int i = 0, len = s.length(); i < len; i++) { + char c = s.charAt(i); + if (!dontNeedEncoding(c)) { + encodeUtf8Component(s, i, len); + return; + } + } + uriBuilder.append(s); + } + + private void encodeUtf8Component(CharSequence s, int encodingStart, int len) { + if (encodingStart > 0) { + // Append non-encoded characters directly first. + uriBuilder.append(s, 0, encodingStart); + } + encodeUtf8ComponentSlow(s, encodingStart, len); + } + + private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) { + for (int i = start; i < len; i++) { char c = s.charAt(i); if (c < 0x80) { if (dontNeedEncoding(c)) { diff --git a/codec-http/src/test/java/io/netty/handler/codec/http/QueryStringEncoderTest.java b/codec-http/src/test/java/io/netty/handler/codec/http/QueryStringEncoderTest.java index a9f6f90155..ba4d2b64e6 100644 --- a/codec-http/src/test/java/io/netty/handler/codec/http/QueryStringEncoderTest.java +++ b/codec-http/src/test/java/io/netty/handler/codec/http/QueryStringEncoderTest.java @@ -15,12 +15,12 @@ */ package io.netty.handler.codec.http; -import java.net.URI; -import java.nio.charset.Charset; - import org.junit.Assert; import org.junit.Test; +import java.net.URI; +import java.nio.charset.Charset; + public class QueryStringEncoderTest { @Test @@ -37,6 +37,11 @@ public class QueryStringEncoderTest { Assert.assertEquals("/foo/\u00A5?a=%C2%A5", e.toString()); Assert.assertEquals(new URI("/foo/\u00A5?a=%C2%A5"), e.toUri()); + e = new QueryStringEncoder("/foo/\u00A5"); + e.addParam("a", "abc\u00A5"); + Assert.assertEquals("/foo/\u00A5?a=abc%C2%A5", e.toString()); + Assert.assertEquals(new URI("/foo/\u00A5?a=abc%C2%A5"), e.toUri()); + e = new QueryStringEncoder("/foo"); e.addParam("a", "1"); e.addParam("b", "2"); diff --git a/microbench/src/main/java/io/netty/handler/codec/http/QueryStringEncoderBenchmark.java b/microbench/src/main/java/io/netty/handler/codec/http/QueryStringEncoderBenchmark.java new file mode 100644 index 0000000000..88013ebd8f --- /dev/null +++ b/microbench/src/main/java/io/netty/handler/codec/http/QueryStringEncoderBenchmark.java @@ -0,0 +1,95 @@ +/* + * Copyright 2019 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.http; + +import io.netty.microbench.util.AbstractMicrobenchmark; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.concurrent.TimeUnit; + +@Threads(1) +@Warmup(iterations = 3) +@Measurement(iterations = 3) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +public class QueryStringEncoderBenchmark extends AbstractMicrobenchmark { + private String shortAscii; + private String shortUtf8; + private String shortAsciiFirst; + + private String longAscii; + private String longUtf8; + private String longAsciiFirst; + + @Setup + public void setUp() { + // Avoid constant pool for strings since it's common for at least values to not be constant. + shortAscii = new String("foo".toCharArray()); + shortUtf8 = new String("ほげほげ".toCharArray()); + shortAsciiFirst = shortAscii + shortUtf8; + longAscii = repeat(shortAscii, 100); + longUtf8 = repeat(shortUtf8, 100); + longAsciiFirst = longAscii + longUtf8; + } + + @Benchmark + public String shortAscii() { + return encode(shortAscii); + } + + @Benchmark + public String shortUtf8() { + return encode(shortUtf8); + } + + @Benchmark + public String shortAsciiFirst() { + return encode(shortAsciiFirst); + } + + @Benchmark + public String longAscii() { + return encode(longAscii); + } + + @Benchmark + public String longUtf8() { + return encode(longUtf8); + } + + @Benchmark + public String longAsciiFirst() { + return encode(longAsciiFirst); + } + + private static String encode(String s) { + QueryStringEncoder encoder = new QueryStringEncoder(""); + encoder.addParam(s, s); + return encoder.toString(); + } + + private static String repeat(String s, int num) { + StringBuilder sb = new StringBuilder(num * s.length()); + for (int i = 0; i < num; i++) { + sb.append(s); + } + return sb.toString(); + } +}