Separate out query string encoding for non-encoded strings. (#9887)

Motivation:

Currently, characters are appended to the encoded string char-by-char even when no encoding is needed. We can instead separate out codepath that appends the entire string in one go for better `StringBuilder` allocation performance.

Modification:

Only go into char-by-char loop when finding a character that requires encoding.

Result:

The results aren't so clear with noise on my hot laptop - the biggest impact is on long strings, both to reduce resizes of the buffer and also to reduce complexity of the loop. I don't think there's a significant downside though for the cases that hit the slow path.

After
```
Benchmark                                     Mode  Cnt   Score   Error   Units
QueryStringEncoderBenchmark.longAscii        thrpt    6   1.406 ± 0.069  ops/us
QueryStringEncoderBenchmark.longAsciiFirst   thrpt    6   0.046 ± 0.001  ops/us
QueryStringEncoderBenchmark.longUtf8         thrpt    6   0.046 ± 0.001  ops/us
QueryStringEncoderBenchmark.shortAscii       thrpt    6  15.781 ± 0.949  ops/us
QueryStringEncoderBenchmark.shortAsciiFirst  thrpt    6   3.171 ± 0.232  ops/us
QueryStringEncoderBenchmark.shortUtf8        thrpt    6   3.900 ± 0.667  ops/us
```

Before
```
Benchmark                                     Mode  Cnt   Score    Error   Units
QueryStringEncoderBenchmark.longAscii        thrpt    6   0.444 ±  0.072  ops/us
QueryStringEncoderBenchmark.longAsciiFirst   thrpt    6   0.043 ±  0.002  ops/us
QueryStringEncoderBenchmark.longUtf8         thrpt    6   0.047 ±  0.001  ops/us
QueryStringEncoderBenchmark.shortAscii       thrpt    6  16.503 ±  1.015  ops/us
QueryStringEncoderBenchmark.shortAsciiFirst  thrpt    6   3.316 ±  0.154  ops/us
QueryStringEncoderBenchmark.shortUtf8        thrpt    6   3.776 ±  0.956  ops/us
```
This commit is contained in:
Anuraag Agrawal 2019-12-20 16:51:18 +09:00 committed by Norman Maurer
parent bc32efe396
commit ee206b6ba8
3 changed files with 122 additions and 3 deletions

View File

@ -156,6 +156,25 @@ public class QueryStringEncoder {
*/
private void encodeUtf8Component(CharSequence s) {
for (int i = 0, len = s.length(); i < len; i++) {
char c = s.charAt(i);
if (!dontNeedEncoding(c)) {
encodeUtf8Component(s, i, len);
return;
}
}
uriBuilder.append(s);
}
private void encodeUtf8Component(CharSequence s, int encodingStart, int len) {
if (encodingStart > 0) {
// Append non-encoded characters directly first.
uriBuilder.append(s, 0, encodingStart);
}
encodeUtf8ComponentSlow(s, encodingStart, len);
}
private void encodeUtf8ComponentSlow(CharSequence s, int start, int len) {
for (int i = start; i < len; i++) {
char c = s.charAt(i);
if (c < 0x80) {
if (dontNeedEncoding(c)) {

View File

@ -15,12 +15,12 @@
*/
package io.netty.handler.codec.http;
import java.net.URI;
import java.nio.charset.Charset;
import org.junit.Assert;
import org.junit.Test;
import java.net.URI;
import java.nio.charset.Charset;
public class QueryStringEncoderTest {
@Test
@ -37,6 +37,11 @@ public class QueryStringEncoderTest {
Assert.assertEquals("/foo/\u00A5?a=%C2%A5", e.toString());
Assert.assertEquals(new URI("/foo/\u00A5?a=%C2%A5"), e.toUri());
e = new QueryStringEncoder("/foo/\u00A5");
e.addParam("a", "abc\u00A5");
Assert.assertEquals("/foo/\u00A5?a=abc%C2%A5", e.toString());
Assert.assertEquals(new URI("/foo/\u00A5?a=abc%C2%A5"), e.toUri());
e = new QueryStringEncoder("/foo");
e.addParam("a", "1");
e.addParam("b", "2");

View File

@ -0,0 +1,95 @@
/*
* Copyright 2019 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.handler.codec.http;
import io.netty.microbench.util.AbstractMicrobenchmark;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import java.util.concurrent.TimeUnit;
@Threads(1)
@Warmup(iterations = 3)
@Measurement(iterations = 3)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public class QueryStringEncoderBenchmark extends AbstractMicrobenchmark {
private String shortAscii;
private String shortUtf8;
private String shortAsciiFirst;
private String longAscii;
private String longUtf8;
private String longAsciiFirst;
@Setup
public void setUp() {
// Avoid constant pool for strings since it's common for at least values to not be constant.
shortAscii = new String("foo".toCharArray());
shortUtf8 = new String("ほげほげ".toCharArray());
shortAsciiFirst = shortAscii + shortUtf8;
longAscii = repeat(shortAscii, 100);
longUtf8 = repeat(shortUtf8, 100);
longAsciiFirst = longAscii + longUtf8;
}
@Benchmark
public String shortAscii() {
return encode(shortAscii);
}
@Benchmark
public String shortUtf8() {
return encode(shortUtf8);
}
@Benchmark
public String shortAsciiFirst() {
return encode(shortAsciiFirst);
}
@Benchmark
public String longAscii() {
return encode(longAscii);
}
@Benchmark
public String longUtf8() {
return encode(longUtf8);
}
@Benchmark
public String longAsciiFirst() {
return encode(longAsciiFirst);
}
private static String encode(String s) {
QueryStringEncoder encoder = new QueryStringEncoder("");
encoder.addParam(s, s);
return encoder.toString();
}
private static String repeat(String s, int num) {
StringBuilder sb = new StringBuilder(num * s.length());
for (int i = 0; i < num; i++) {
sb.append(s);
}
return sb.toString();
}
}