Separate out query string encoding for non-encoded strings. (#9887)

Motivation: Currently, characters are appended to the encoded string char-by-char even when no encoding is needed. We can instead separate out codepath that appends the entire string in one go for better `StringBuilder` allocation performance. Modification: Only go into char-by-char loop when finding a character that requires encoding. Result: The results aren't so clear with noise on my hot laptop - the biggest impact is on long strings, both to reduce resizes of the buffer and also to reduce complexity of the loop. I don't think there's a significant downside though for the cases that hit the slow path. After ``` Benchmark Mode Cnt Score Error Units QueryStringEncoderBenchmark.longAscii thrpt 6 1.406 ± 0.069 ops/us QueryStringEncoderBenchmark.longAsciiFirst thrpt 6 0.046 ± 0.001 ops/us QueryStringEncoderBenchmark.longUtf8 thrpt 6 0.046 ± 0.001 ops/us QueryStringEncoderBenchmark.shortAscii thrpt 6 15.781 ± 0.949 ops/us QueryStringEncoderBenchmark.shortAsciiFirst thrpt 6 3.171 ± 0.232 ops/us QueryStringEncoderBenchmark.shortUtf8 thrpt 6 3.900 ± 0.667 ops/us ``` Before ``` Benchmark Mode Cnt Score Error Units QueryStringEncoderBenchmark.longAscii thrpt 6 0.444 ± 0.072 ops/us QueryStringEncoderBenchmark.longAsciiFirst thrpt 6 0.043 ± 0.002 ops/us QueryStringEncoderBenchmark.longUtf8 thrpt 6 0.047 ± 0.001 ops/us QueryStringEncoderBenchmark.shortAscii thrpt 6 16.503 ± 1.015 ops/us QueryStringEncoderBenchmark.shortAsciiFirst thrpt 6 3.316 ± 0.154 ops/us QueryStringEncoderBenchmark.shortUtf8 thrpt 6 3.776 ± 0.956 ops/us ```
2019-12-20 16:51:18 +09:00 · 2019-12-20 16:51:18 +09:00 · 687308b4de
commit 687308b4de
parent b615495762
3 changed files with 122 additions and 3 deletions
--- a/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java
+++ b/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java
@ -154,6 +154,25 @@ public class QueryStringEncoder {
     */
    private void encodeUtf8Component(CharSequence s) {
        for (int i = 0, len = s.length(); i < len; i++) {
+            char c = s.charAt(i);
+            if (!dontNeedEncoding(c)) {
+                encodeUtf8Component(s, i, len);
+                return;
+            }
+        }
+        uriBuilder.append(s);
+    }
+
+    private void encodeUtf8Component(CharSequence s, int encodingStart, int len) {
+        if (encodingStart > 0) {
+            // Append non-encoded characters directly first.
+            uriBuilder.append(s, 0, encodingStart);
+        }
+        encodeUtf8ComponentSlow(s, encodingStart, len);
+    }
+
+    private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) {
+        for (int i = start; i < len; i++) {
            char c = s.charAt(i);
            if (c < 0x80) {
                if (dontNeedEncoding(c)) {
--- a/codec-http/src/test/java/io/netty/handler/codec/http/QueryStringEncoderTest.java
+++ b/codec-http/src/test/java/io/netty/handler/codec/http/QueryStringEncoderTest.java
@ -15,12 +15,12 @@
 */
 package io.netty.handler.codec.http;

-import java.net.URI;
-import java.nio.charset.Charset;
-
 import org.junit.Assert;
 import org.junit.Test;

+import java.net.URI;
+import java.nio.charset.Charset;
+
 public class QueryStringEncoderTest {

    @Test
@ -37,6 +37,11 @@ public class QueryStringEncoderTest {
        Assert.assertEquals("/foo/\u00A5?a=%C2%A5", e.toString());
        Assert.assertEquals(new URI("/foo/\u00A5?a=%C2%A5"), e.toUri());

+        e = new QueryStringEncoder("/foo/\u00A5");
+        e.addParam("a", "abc\u00A5");
+        Assert.assertEquals("/foo/\u00A5?a=abc%C2%A5", e.toString());
+        Assert.assertEquals(new URI("/foo/\u00A5?a=abc%C2%A5"), e.toUri());
+
        e = new QueryStringEncoder("/foo");
        e.addParam("a", "1");
        e.addParam("b", "2");
--- a/microbench/src/main/java/io/netty/handler/codec/http/QueryStringEncoderBenchmark.java
+++ b/microbench/src/main/java/io/netty/handler/codec/http/QueryStringEncoderBenchmark.java
@ -0,0 +1,95 @@
+/*
+ * Copyright 2019 The Netty Project
+ *
+ * The Netty Project licenses this file to you under the Apache License,
+ * version 2.0 (the "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at:
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package io.netty.handler.codec.http;
+
+import io.netty.microbench.util.AbstractMicrobenchmark;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.concurrent.TimeUnit;
+
+@Threads(1)
+@Warmup(iterations = 3)
+@Measurement(iterations = 3)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+public class QueryStringEncoderBenchmark extends AbstractMicrobenchmark {
+    private String shortAscii;
+    private String shortUtf8;
+    private String shortAsciiFirst;
+
+    private String longAscii;
+    private String longUtf8;
+    private String longAsciiFirst;
+
+    @Setup
+    public void setUp() {
+        // Avoid constant pool for strings since it's common for at least values to not be constant.
+        shortAscii = new String("foo".toCharArray());
+        shortUtf8 = new String("ほげほげ".toCharArray());
+        shortAsciiFirst = shortAscii + shortUtf8;
+        longAscii = repeat(shortAscii, 100);
+        longUtf8 = repeat(shortUtf8, 100);
+        longAsciiFirst = longAscii + longUtf8;
+    }
+
+    @Benchmark
+    public String shortAscii() {
+        return encode(shortAscii);
+    }
+
+    @Benchmark
+    public String shortUtf8() {
+        return encode(shortUtf8);
+    }
+
+    @Benchmark
+    public String shortAsciiFirst() {
+        return encode(shortAsciiFirst);
+    }
+
+    @Benchmark
+    public String longAscii() {
+        return encode(longAscii);
+    }
+
+    @Benchmark
+    public String longUtf8() {
+        return encode(longUtf8);
+    }
+
+    @Benchmark
+    public String longAsciiFirst() {
+        return encode(longAsciiFirst);
+    }
+
+    private static String encode(String s) {
+        QueryStringEncoder encoder = new QueryStringEncoder("");
+        encoder.addParam(s, s);
+        return encoder.toString();
+    }
+
+    private static String repeat(String s, int num) {
+        StringBuilder sb = new StringBuilder(num * s.length());
+        for (int i = 0; i < num; i++) {
+            sb.append(s);
+        }
+        return sb.toString();
+    }
+}