Optimize the QueryStringEncoder performance (#9568)
Motivation: Optimize the QueryStringEncoder for lower memory overhead and higher encode speed. Modification: Encode the space to + directly, and reuse the uriStringBuilder rather then create a new one. Result: Improved performance
This commit is contained in:
parent
39cafcb05c
commit
07fe1a299a
@ -15,14 +15,15 @@
|
|||||||
*/
|
*/
|
||||||
package io.netty.handler.codec.http;
|
package io.netty.handler.codec.http;
|
||||||
|
|
||||||
|
import io.netty.buffer.ByteBufUtil;
|
||||||
|
import io.netty.util.CharsetUtil;
|
||||||
import io.netty.util.internal.ObjectUtil;
|
import io.netty.util.internal.ObjectUtil;
|
||||||
|
import io.netty.util.internal.StringUtil;
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.nio.charset.UnsupportedCharsetException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an URL-encoded URI from a path string and key-value parameter pairs.
|
* Creates an URL-encoded URI from a path string and key-value parameter pairs.
|
||||||
@ -33,13 +34,16 @@ import java.nio.charset.UnsupportedCharsetException;
|
|||||||
* encoder.addParam("recipient", "world");
|
* encoder.addParam("recipient", "world");
|
||||||
* assert encoder.toString().equals("/hello?recipient=world");
|
* assert encoder.toString().equals("/hello?recipient=world");
|
||||||
* </pre>
|
* </pre>
|
||||||
|
*
|
||||||
* @see QueryStringDecoder
|
* @see QueryStringDecoder
|
||||||
*/
|
*/
|
||||||
public class QueryStringEncoder {
|
public class QueryStringEncoder {
|
||||||
|
|
||||||
private final String charsetName;
|
private final Charset charset;
|
||||||
private final StringBuilder uriBuilder;
|
private final StringBuilder uriBuilder;
|
||||||
private boolean hasParams;
|
private boolean hasParams;
|
||||||
|
private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
|
||||||
|
private static final char[] CHAR_MAP = "0123456789ABCDEF".toCharArray();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new encoder that encodes a URI that starts with the specified
|
* Creates a new encoder that encodes a URI that starts with the specified
|
||||||
@ -54,8 +58,9 @@ public class QueryStringEncoder {
|
|||||||
* path string in the specified charset.
|
* path string in the specified charset.
|
||||||
*/
|
*/
|
||||||
public QueryStringEncoder(String uri, Charset charset) {
|
public QueryStringEncoder(String uri, Charset charset) {
|
||||||
|
ObjectUtil.checkNotNull(charset, "charset");
|
||||||
uriBuilder = new StringBuilder(uri);
|
uriBuilder = new StringBuilder(uri);
|
||||||
charsetName = charset.name();
|
this.charset = CharsetUtil.UTF_8.equals(charset) ? null : charset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -69,10 +74,19 @@ public class QueryStringEncoder {
|
|||||||
uriBuilder.append('?');
|
uriBuilder.append('?');
|
||||||
hasParams = true;
|
hasParams = true;
|
||||||
}
|
}
|
||||||
appendComponent(name, charsetName, uriBuilder);
|
|
||||||
|
encodeComponent(name);
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
uriBuilder.append('=');
|
uriBuilder.append('=');
|
||||||
appendComponent(value, charsetName, uriBuilder);
|
encodeComponent(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void encodeComponent(CharSequence s) {
|
||||||
|
if (charset == null) {
|
||||||
|
encodeUtf8Component(s);
|
||||||
|
} else {
|
||||||
|
encodeNonUtf8Component(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,28 +109,123 @@ public class QueryStringEncoder {
|
|||||||
return uriBuilder.toString();
|
return uriBuilder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void appendComponent(String s, String charset, StringBuilder sb) {
|
/**
|
||||||
try {
|
* Encode the String as per RFC 3986, Section 2.
|
||||||
s = URLEncoder.encode(s, charset);
|
* <p>
|
||||||
} catch (UnsupportedEncodingException ignored) {
|
* There is a little different between the JDK's encode method : {@link URLEncoder#encode(String, String)}.
|
||||||
throw new UnsupportedCharsetException(charset);
|
* The JDK's encoder encode the space to {@code +} and this method directly encode the blank to {@code %20}
|
||||||
}
|
* beyond that , this method reuse the {@link #uriBuilder} in this class rather then create a new one,
|
||||||
// replace all '+' with "%20"
|
* thus generates less garbage for the GC.
|
||||||
int idx = s.indexOf('+');
|
*
|
||||||
if (idx == -1) {
|
* @param s The String to encode
|
||||||
sb.append(s);
|
*/
|
||||||
return;
|
private void encodeNonUtf8Component(CharSequence s) {
|
||||||
}
|
//Don't allocate memory until needed
|
||||||
sb.append(s, 0, idx).append("%20");
|
char[] buf = null;
|
||||||
int size = s.length();
|
|
||||||
idx++;
|
for (int i = 0, len = s.length(); i < len;) {
|
||||||
for (; idx < size; idx++) {
|
char c = s.charAt(i);
|
||||||
char c = s.charAt(idx);
|
if (dontNeedEncoding(c)) {
|
||||||
if (c != '+') {
|
uriBuilder.append(c);
|
||||||
sb.append(c);
|
i++;
|
||||||
} else {
|
} else {
|
||||||
sb.append("%20");
|
int index = 0;
|
||||||
|
if (buf == null) {
|
||||||
|
buf = new char[s.length() - i];
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
buf[index] = c;
|
||||||
|
index++;
|
||||||
|
i++;
|
||||||
|
} while (i < s.length() && !dontNeedEncoding(c = s.charAt(i)));
|
||||||
|
|
||||||
|
byte[] bytes = new String(buf, 0, index).getBytes(charset);
|
||||||
|
|
||||||
|
for (byte b : bytes) {
|
||||||
|
appendEncoded(b);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see ByteBufUtil#writeUtf8(io.netty.buffer.ByteBuf, CharSequence, int, int)
|
||||||
|
*/
|
||||||
|
private void encodeUtf8Component(CharSequence s) {
|
||||||
|
for (int i = 0, len = s.length(); i < len; i++) {
|
||||||
|
char c = s.charAt(i);
|
||||||
|
if (c < 0x80) {
|
||||||
|
if (dontNeedEncoding(c)) {
|
||||||
|
uriBuilder.append(c);
|
||||||
|
} else {
|
||||||
|
appendEncoded(c);
|
||||||
|
}
|
||||||
|
} else if (c < 0x800) {
|
||||||
|
appendEncoded(0xc0 | (c >> 6));
|
||||||
|
appendEncoded(0x80 | (c & 0x3f));
|
||||||
|
} else if (StringUtil.isSurrogate(c)) {
|
||||||
|
if (!Character.isHighSurrogate(c)) {
|
||||||
|
appendEncoded(WRITE_UTF_UNKNOWN);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Surrogate Pair consumes 2 characters.
|
||||||
|
if (++i == s.length()) {
|
||||||
|
appendEncoded(WRITE_UTF_UNKNOWN);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Extra method to allow inlining the rest of writeUtf8 which is the most likely code path.
|
||||||
|
writeUtf8Surrogate(c, s.charAt(i));
|
||||||
|
} else {
|
||||||
|
appendEncoded(0xe0 | (c >> 12));
|
||||||
|
appendEncoded(0x80 | ((c >> 6) & 0x3f));
|
||||||
|
appendEncoded(0x80 | (c & 0x3f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeUtf8Surrogate(char c, char c2) {
|
||||||
|
if (!Character.isLowSurrogate(c2)) {
|
||||||
|
appendEncoded(WRITE_UTF_UNKNOWN);
|
||||||
|
appendEncoded(Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int codePoint = Character.toCodePoint(c, c2);
|
||||||
|
// See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
|
||||||
|
appendEncoded(0xf0 | (codePoint >> 18));
|
||||||
|
appendEncoded(0x80 | ((codePoint >> 12) & 0x3f));
|
||||||
|
appendEncoded(0x80 | ((codePoint >> 6) & 0x3f));
|
||||||
|
appendEncoded(0x80 | (codePoint & 0x3f));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void appendEncoded(int b) {
|
||||||
|
uriBuilder.append('%').append(forDigit(b >> 4)).append(forDigit(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the given digit to a upper hexadecimal char.
|
||||||
|
*
|
||||||
|
* @param digit the number to convert to a character.
|
||||||
|
* @return the {@code char} representation of the specified digit
|
||||||
|
* in hexadecimal.
|
||||||
|
*/
|
||||||
|
private static char forDigit(int digit) {
|
||||||
|
return CHAR_MAP[digit & 0xF];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines whether the given character is a unreserved character.
|
||||||
|
* <p>
|
||||||
|
* unreserved characters do not need to be encoded, and include uppercase and lowercase
|
||||||
|
* letters, decimal digits, hyphen, period, underscore, and tilde.
|
||||||
|
* <p>
|
||||||
|
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||||
|
*
|
||||||
|
* @param ch the char to be judged whether it need to be encode
|
||||||
|
* @return true or false
|
||||||
|
*/
|
||||||
|
private static boolean dontNeedEncoding(char ch) {
|
||||||
|
return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9'
|
||||||
|
|| ch == '-' || ch == '_' || ch == '.' || ch == '*';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user