From 5c19c3e6cef8d620b195c00b83ac931c0d1aca0e Mon Sep 17 00:00:00 2001 From: liyixin <601947961@qq.com> Date: Sat, 21 Sep 2019 03:07:13 +0800 Subject: [PATCH] Optimize the QueryStringEncoder performance (#9568) Motivation: Optimize the QueryStringEncoder for lower memory overhead and higher encode speed. Modification: Encode the space to + directly, and reuse the uriStringBuilder rather then create a new one. Result: Improved performance --- .../codec/http/QueryStringEncoder.java | 163 +++++++++++++++--- 1 file changed, 137 insertions(+), 26 deletions(-) diff --git a/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java b/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java index 1d192b91aa..1a2fe3dd97 100644 --- a/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java +++ b/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringEncoder.java @@ -17,12 +17,15 @@ package io.netty.handler.codec.http; import static java.util.Objects.requireNonNull; -import java.io.UnsupportedEncodingException; +import io.netty.buffer.ByteBufUtil; +import io.netty.util.CharsetUtil; +import io.netty.util.internal.StringUtil; + import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; import java.nio.charset.Charset; -import java.nio.charset.UnsupportedCharsetException; +import java.util.Objects; /** * Creates an URL-encoded URI from a path string and key-value parameter pairs. @@ -33,13 +36,16 @@ import java.nio.charset.UnsupportedCharsetException; * encoder.addParam("recipient", "world"); * assert encoder.toString().equals("/hello?recipient=world"); * + * * @see QueryStringDecoder */ public class QueryStringEncoder { - private final String charsetName; + private final Charset charset; private final StringBuilder uriBuilder; private boolean hasParams; + private static final byte WRITE_UTF_UNKNOWN = (byte) '?'; + private static final char[] CHAR_MAP = "0123456789ABCDEF".toCharArray(); /** * Creates a new encoder that encodes a URI that starts with the specified @@ -54,8 +60,9 @@ public class QueryStringEncoder { * path string in the specified charset. */ public QueryStringEncoder(String uri, Charset charset) { + Objects.requireNonNull(charset, "charset"); uriBuilder = new StringBuilder(uri); - charsetName = charset.name(); + this.charset = CharsetUtil.UTF_8.equals(charset) ? null : charset; } /** @@ -69,10 +76,19 @@ public class QueryStringEncoder { uriBuilder.append('?'); hasParams = true; } - appendComponent(name, charsetName, uriBuilder); + + encodeComponent(name); if (value != null) { uriBuilder.append('='); - appendComponent(value, charsetName, uriBuilder); + encodeComponent(value); + } + } + + private void encodeComponent(CharSequence s) { + if (charset == null) { + encodeUtf8Component(s); + } else { + encodeNonUtf8Component(s); } } @@ -95,28 +111,123 @@ public class QueryStringEncoder { return uriBuilder.toString(); } - private static void appendComponent(String s, String charset, StringBuilder sb) { - try { - s = URLEncoder.encode(s, charset); - } catch (UnsupportedEncodingException ignored) { - throw new UnsupportedCharsetException(charset); - } - // replace all '+' with "%20" - int idx = s.indexOf('+'); - if (idx == -1) { - sb.append(s); - return; - } - sb.append(s, 0, idx).append("%20"); - int size = s.length(); - idx++; - for (; idx < size; idx++) { - char c = s.charAt(idx); - if (c != '+') { - sb.append(c); + /** + * Encode the String as per RFC 3986, Section 2. + *

+ * There is a little different between the JDK's encode method : {@link URLEncoder#encode(String, String)}. + * The JDK's encoder encode the space to {@code +} and this method directly encode the blank to {@code %20} + * beyond that , this method reuse the {@link #uriBuilder} in this class rather then create a new one, + * thus generates less garbage for the GC. + * + * @param s The String to encode + */ + private void encodeNonUtf8Component(CharSequence s) { + //Don't allocate memory until needed + char[] buf = null; + + for (int i = 0, len = s.length(); i < len;) { + char c = s.charAt(i); + if (dontNeedEncoding(c)) { + uriBuilder.append(c); + i++; } else { - sb.append("%20"); + int index = 0; + if (buf == null) { + buf = new char[s.length() - i]; + } + + do { + buf[index] = c; + index++; + i++; + } while (i < s.length() && !dontNeedEncoding(c = s.charAt(i))); + + byte[] bytes = new String(buf, 0, index).getBytes(charset); + + for (byte b : bytes) { + appendEncoded(b); + } } } } + + /** + * @see ByteBufUtil#writeUtf8(io.netty.buffer.ByteBuf, CharSequence, int, int) + */ + private void encodeUtf8Component(CharSequence s) { + for (int i = 0, len = s.length(); i < len; i++) { + char c = s.charAt(i); + if (c < 0x80) { + if (dontNeedEncoding(c)) { + uriBuilder.append(c); + } else { + appendEncoded(c); + } + } else if (c < 0x800) { + appendEncoded(0xc0 | (c >> 6)); + appendEncoded(0x80 | (c & 0x3f)); + } else if (StringUtil.isSurrogate(c)) { + if (!Character.isHighSurrogate(c)) { + appendEncoded(WRITE_UTF_UNKNOWN); + continue; + } + // Surrogate Pair consumes 2 characters. + if (++i == s.length()) { + appendEncoded(WRITE_UTF_UNKNOWN); + break; + } + // Extra method to allow inlining the rest of writeUtf8 which is the most likely code path. + writeUtf8Surrogate(c, s.charAt(i)); + } else { + appendEncoded(0xe0 | (c >> 12)); + appendEncoded(0x80 | ((c >> 6) & 0x3f)); + appendEncoded(0x80 | (c & 0x3f)); + } + } + } + + private void writeUtf8Surrogate(char c, char c2) { + if (!Character.isLowSurrogate(c2)) { + appendEncoded(WRITE_UTF_UNKNOWN); + appendEncoded(Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2); + return; + } + int codePoint = Character.toCodePoint(c, c2); + // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630. + appendEncoded(0xf0 | (codePoint >> 18)); + appendEncoded(0x80 | ((codePoint >> 12) & 0x3f)); + appendEncoded(0x80 | ((codePoint >> 6) & 0x3f)); + appendEncoded(0x80 | (codePoint & 0x3f)); + } + + private void appendEncoded(int b) { + uriBuilder.append('%').append(forDigit(b >> 4)).append(forDigit(b)); + } + + /** + * Convert the given digit to a upper hexadecimal char. + * + * @param digit the number to convert to a character. + * @return the {@code char} representation of the specified digit + * in hexadecimal. + */ + private static char forDigit(int digit) { + return CHAR_MAP[digit & 0xF]; + } + + /** + * Determines whether the given character is a unreserved character. + *

+ * unreserved characters do not need to be encoded, and include uppercase and lowercase + * letters, decimal digits, hyphen, period, underscore, and tilde. + *

+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * + * @param ch the char to be judged whether it need to be encode + * @return true or false + */ + private static boolean dontNeedEncoding(char ch) { + return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9' + || ch == '-' || ch == '_' || ch == '.' || ch == '*'; + } }