Fixes in QueryStringDecoder
Motivation: QueryStringDecoder has several problems: - doesn't decode correctly path part with `+` (plus) sign in it, - doesn't cut a `fragment` (after `#`) from query string (see RFC 3986), - doesn't work correctly with encoding, - treat `%%` as a percent character escaping (it's don't described in RFC). Modifications: - leave `+` chars in a `path` part of uri string, - ignore `fragment` part (after `#`), - correctly work with encoding. - don't treat `%%` as escaping for the `%`. Result: Fixed issues from #6745.
This commit is contained in:
parent
4aa8002596
commit
270e9d66c5
|
@ -19,16 +19,20 @@ import io.netty.util.CharsetUtil;
|
|||
|
||||
import java.net.URI;
|
||||
import java.net.URLDecoder;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static io.netty.util.internal.ObjectUtil.checkNotNull;
|
||||
import static io.netty.util.internal.ObjectUtil.checkPositive;
|
||||
import static io.netty.util.internal.StringUtil.EMPTY_STRING;
|
||||
import static io.netty.util.internal.ObjectUtil.*;
|
||||
import static io.netty.util.internal.StringUtil.*;
|
||||
|
||||
/**
|
||||
* Splits an HTTP query string into a path string and key-value parameter pairs.
|
||||
|
@ -63,11 +67,10 @@ public class QueryStringDecoder {
|
|||
|
||||
private final Charset charset;
|
||||
private final String uri;
|
||||
private final boolean hasPath;
|
||||
private final int maxParams;
|
||||
private int pathEndIdx;
|
||||
private String path;
|
||||
private Map<String, List<String>> params;
|
||||
private int nParams;
|
||||
|
||||
/**
|
||||
* Creates a new decoder that decodes the specified URI. The decoder will
|
||||
|
@ -106,14 +109,12 @@ public class QueryStringDecoder {
|
|||
* specified charset.
|
||||
*/
|
||||
public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
|
||||
checkNotNull(uri, "uri");
|
||||
checkNotNull(charset, "charset");
|
||||
checkPositive(maxParams, "maxParams");
|
||||
this.uri = checkNotNull(uri, "uri");
|
||||
this.charset = checkNotNull(charset, "charset");
|
||||
this.maxParams = checkPositive(maxParams, "maxParams");
|
||||
|
||||
this.uri = uri;
|
||||
this.charset = charset;
|
||||
this.maxParams = maxParams;
|
||||
this.hasPath = hasPath;
|
||||
// `-1` means that path end index will be initialized lazily
|
||||
pathEndIdx = hasPath ? -1 : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -137,22 +138,21 @@ public class QueryStringDecoder {
|
|||
* specified charset.
|
||||
*/
|
||||
public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
|
||||
checkNotNull(uri, "uri");
|
||||
checkNotNull(charset, "charset");
|
||||
checkPositive(maxParams, "maxParams");
|
||||
|
||||
String rawPath = uri.getRawPath();
|
||||
if (rawPath != null) {
|
||||
hasPath = true;
|
||||
} else {
|
||||
if (rawPath == null) {
|
||||
rawPath = EMPTY_STRING;
|
||||
hasPath = false;
|
||||
}
|
||||
String rawQuery = uri.getRawQuery();
|
||||
// Also take care of cut of things like "http://localhost"
|
||||
this.uri = uri.getRawQuery() == null? rawPath : rawPath + '?' + uri.getRawQuery();
|
||||
this.uri = rawQuery == null? rawPath : rawPath + '?' + rawQuery;
|
||||
this.charset = checkNotNull(charset, "charset");
|
||||
this.maxParams = checkPositive(maxParams, "maxParams");
|
||||
pathEndIdx = rawPath.length();
|
||||
}
|
||||
|
||||
this.charset = charset;
|
||||
this.maxParams = maxParams;
|
||||
@Override
|
||||
public String toString() {
|
||||
return uri();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -167,12 +167,7 @@ public class QueryStringDecoder {
|
|||
*/
|
||||
public String path() {
|
||||
if (path == null) {
|
||||
if (!hasPath) {
|
||||
path = EMPTY_STRING;
|
||||
} else {
|
||||
int pathEndPos = uri.indexOf('?');
|
||||
path = decodeComponent(pathEndPos < 0 ? uri : uri.substring(0, pathEndPos), charset);
|
||||
}
|
||||
path = decodeComponent(uri, 0, pathEndIdx(), charset, true);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
@ -182,80 +177,76 @@ public class QueryStringDecoder {
|
|||
*/
|
||||
public Map<String, List<String>> parameters() {
|
||||
if (params == null) {
|
||||
if (hasPath) {
|
||||
int pathEndPos = uri.indexOf('?');
|
||||
if (pathEndPos >= 0 && pathEndPos < uri.length() - 1) {
|
||||
decodeParams(uri.substring(pathEndPos + 1));
|
||||
} else {
|
||||
params = Collections.emptyMap();
|
||||
}
|
||||
} else {
|
||||
if (uri.isEmpty()) {
|
||||
params = Collections.emptyMap();
|
||||
} else {
|
||||
decodeParams(uri);
|
||||
}
|
||||
}
|
||||
params = decodeParams(uri, pathEndIdx(), charset, maxParams);
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
private void decodeParams(String s) {
|
||||
Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
|
||||
nParams = 0;
|
||||
String name = null;
|
||||
int pos = 0; // Beginning of the unprocessed region
|
||||
int i; // End of the unprocessed region
|
||||
char c; // Current character
|
||||
for (i = 0; i < s.length(); i++) {
|
||||
c = s.charAt(i);
|
||||
if (c == '=' && name == null) {
|
||||
if (pos != i) {
|
||||
name = decodeComponent(s.substring(pos, i), charset);
|
||||
}
|
||||
pos = i + 1;
|
||||
// http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2
|
||||
} else if (c == '&' || c == ';') {
|
||||
if (name == null && pos != i) {
|
||||
// We haven't seen an `=' so far but moved forward.
|
||||
// Must be a param of the form '&a&' so add it with
|
||||
// an empty value.
|
||||
if (!addParam(params, decodeComponent(s.substring(pos, i), charset), EMPTY_STRING)) {
|
||||
return;
|
||||
}
|
||||
} else if (name != null) {
|
||||
if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
|
||||
return;
|
||||
}
|
||||
name = null;
|
||||
}
|
||||
pos = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos != i) { // Are there characters we haven't dealt with?
|
||||
if (name == null) { // Yes and we haven't seen any `='.
|
||||
addParam(params, decodeComponent(s.substring(pos, i), charset), EMPTY_STRING);
|
||||
} else { // Yes and this must be the last value.
|
||||
addParam(params, name, decodeComponent(s.substring(pos, i), charset));
|
||||
}
|
||||
} else if (name != null) { // Have we seen a name without value?
|
||||
addParam(params, name, "");
|
||||
private int pathEndIdx() {
|
||||
if (pathEndIdx == -1) {
|
||||
pathEndIdx = findPathEndIndex(uri);
|
||||
}
|
||||
return pathEndIdx;
|
||||
}
|
||||
|
||||
private boolean addParam(Map<String, List<String>> params, String name, String value) {
|
||||
if (nParams >= maxParams) {
|
||||
private static Map<String, List<String>> decodeParams(String s, int from, Charset charset, int paramsLimit) {
|
||||
int len = s.length();
|
||||
if (from >= len) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
if (s.charAt(from) == '?') {
|
||||
from++;
|
||||
}
|
||||
Map<String, List<String>> params = new LinkedHashMap<String, List<String>>();
|
||||
int nameStart = from;
|
||||
int valueStart = -1;
|
||||
int i;
|
||||
loop:
|
||||
for (i = from; i < len; i++) {
|
||||
switch (s.charAt(i)) {
|
||||
case '=':
|
||||
if (nameStart == i) {
|
||||
nameStart = i + 1;
|
||||
} else if (valueStart < nameStart) {
|
||||
valueStart = i + 1;
|
||||
}
|
||||
break;
|
||||
case '&':
|
||||
case ';':
|
||||
if (addParam(s, nameStart, valueStart, i, params, charset)) {
|
||||
paramsLimit--;
|
||||
if (paramsLimit == 0) {
|
||||
return params;
|
||||
}
|
||||
}
|
||||
nameStart = i + 1;
|
||||
break;
|
||||
case '#':
|
||||
break loop;
|
||||
default:
|
||||
// continue
|
||||
}
|
||||
}
|
||||
addParam(s, nameStart, valueStart, i, params, charset);
|
||||
return params;
|
||||
}
|
||||
|
||||
private static boolean addParam(String s, int nameStart, int valueStart, int valueEnd,
|
||||
Map<String, List<String>> params, Charset charset) {
|
||||
if (nameStart >= valueEnd) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (valueStart <= nameStart) {
|
||||
valueStart = valueEnd + 1;
|
||||
}
|
||||
String name = decodeComponent(s, nameStart, valueStart - 1, charset, false);
|
||||
String value = decodeComponent(s, valueStart, valueEnd, charset, false);
|
||||
List<String> values = params.get(name);
|
||||
if (values == null) {
|
||||
values = new ArrayList<String>(1); // Often there's only 1 value.
|
||||
params.put(name, values);
|
||||
}
|
||||
values.add(value);
|
||||
nParams ++;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -284,7 +275,7 @@ public class QueryStringDecoder {
|
|||
* {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
|
||||
* <p>
|
||||
* This is essentially equivalent to calling
|
||||
* {@link URLDecoder#decode(String, String) URLDecoder.decode(s, charset.name())}
|
||||
* {@link URLDecoder#decode(String, String)}
|
||||
* except that it's over 2x faster and generates less garbage for the GC.
|
||||
* Actually this function doesn't allocate any memory if there's nothing
|
||||
* to decode, the argument itself is returned.
|
||||
|
@ -300,56 +291,80 @@ public class QueryStringDecoder {
|
|||
if (s == null) {
|
||||
return EMPTY_STRING;
|
||||
}
|
||||
final int size = s.length();
|
||||
boolean modified = false;
|
||||
for (int i = 0; i < size; i++) {
|
||||
final char c = s.charAt(i);
|
||||
if (c == '%' || c == '+') {
|
||||
modified = true;
|
||||
return decodeComponent(s, 0, s.length(), charset, false);
|
||||
}
|
||||
|
||||
private static String decodeComponent(String s, int from, int toExcluded, Charset charset, boolean isPath) {
|
||||
int len = toExcluded - from;
|
||||
if (len <= 0) {
|
||||
return EMPTY_STRING;
|
||||
}
|
||||
int firstEscaped = -1;
|
||||
for (int i = from; i < toExcluded; i++) {
|
||||
char c = s.charAt(i);
|
||||
if (c == '%' || c == '+' && !isPath) {
|
||||
firstEscaped = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!modified) {
|
||||
return s;
|
||||
if (firstEscaped == -1) {
|
||||
return s.substring(from, toExcluded);
|
||||
}
|
||||
final byte[] buf = new byte[size];
|
||||
int pos = 0; // position in `buf'.
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
||||
CharsetDecoder decoder = CharsetUtil.decoder(charset);
|
||||
|
||||
// Each encoded byte takes 3 characters (e.g. "%20")
|
||||
int decodedCapacity = (toExcluded - firstEscaped) / 3;
|
||||
ByteBuffer byteBuf = ByteBuffer.allocate(decodedCapacity);
|
||||
CharBuffer charBuf = CharBuffer.allocate(decodedCapacity);
|
||||
|
||||
StringBuilder strBuf = new StringBuilder(len);
|
||||
strBuf.append(s, from, firstEscaped);
|
||||
|
||||
for (int i = firstEscaped; i < toExcluded; i++) {
|
||||
char c = s.charAt(i);
|
||||
switch (c) {
|
||||
case '+':
|
||||
buf[pos++] = ' '; // "+" -> " "
|
||||
break;
|
||||
case '%':
|
||||
if (i == size - 1) {
|
||||
throw new IllegalArgumentException("unterminated escape"
|
||||
+ " sequence at end of string: " + s);
|
||||
}
|
||||
c = s.charAt(++i);
|
||||
if (c == '%') {
|
||||
buf[pos++] = '%'; // "%%" -> "%"
|
||||
break;
|
||||
}
|
||||
if (i == size - 1) {
|
||||
throw new IllegalArgumentException("partial escape"
|
||||
+ " sequence at end of string: " + s);
|
||||
}
|
||||
c = decodeHexNibble(c);
|
||||
final char c2 = decodeHexNibble(s.charAt(++i));
|
||||
if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
|
||||
throw new IllegalArgumentException(
|
||||
"invalid escape sequence `%" + s.charAt(i - 1)
|
||||
+ s.charAt(i) + "' at index " + (i - 2)
|
||||
+ " of: " + s);
|
||||
}
|
||||
c = (char) (c * 16 + c2);
|
||||
// Fall through.
|
||||
default:
|
||||
buf[pos++] = (byte) c;
|
||||
break;
|
||||
if (c != '%') {
|
||||
strBuf.append(c != '+' || isPath? c : SPACE);
|
||||
continue;
|
||||
}
|
||||
|
||||
byteBuf.clear();
|
||||
do {
|
||||
byteBuf.put(decodeHexByte(s, i, toExcluded));
|
||||
i += 3;
|
||||
} while (i < toExcluded && s.charAt(i) == '%');
|
||||
i--;
|
||||
|
||||
byteBuf.flip();
|
||||
charBuf.clear();
|
||||
CoderResult result = decoder.reset().decode(byteBuf, charBuf, true);
|
||||
try {
|
||||
if (!result.isUnderflow()) {
|
||||
result.throwException();
|
||||
}
|
||||
result = decoder.flush(charBuf);
|
||||
if (!result.isUnderflow()) {
|
||||
result.throwException();
|
||||
}
|
||||
} catch (CharacterCodingException ex) {
|
||||
throw new IllegalStateException(ex);
|
||||
}
|
||||
strBuf.append(charBuf.flip());
|
||||
}
|
||||
return new String(buf, 0, pos, charset);
|
||||
return strBuf.toString();
|
||||
}
|
||||
|
||||
private static byte decodeHexByte(String s, int pos, int len) {
|
||||
if (pos + 2 >= len) {
|
||||
throw new IllegalArgumentException("unterminated escape sequence at index " + pos + " of: " + s);
|
||||
}
|
||||
int hi = decodeHexNibble(s.charAt(pos + 1));
|
||||
int lo = decodeHexNibble(s.charAt(pos + 2));
|
||||
if (hi == -1 || lo == -1) {
|
||||
throw new IllegalArgumentException(
|
||||
"invalid escape sequence '" + s.substring(pos, pos + 3) + "' at index " + pos + " of: " + s);
|
||||
}
|
||||
return (byte) ((hi << 4) + lo);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -357,17 +372,29 @@ public class QueryStringDecoder {
|
|||
* @param c The ASCII character of the hexadecimal number to decode.
|
||||
* Must be in the range {@code [0-9a-fA-F]}.
|
||||
* @return The hexadecimal value represented in the ASCII character
|
||||
* given, or {@link Character#MAX_VALUE} if the character is invalid.
|
||||
* given, or {@code -1} if the character is invalid.
|
||||
*/
|
||||
private static char decodeHexNibble(final char c) {
|
||||
private static int decodeHexNibble(final char c) {
|
||||
if ('0' <= c && c <= '9') {
|
||||
return (char) (c - '0');
|
||||
} else if ('a' <= c && c <= 'f') {
|
||||
return (char) (c - 'a' + 10);
|
||||
} else if ('A' <= c && c <= 'F') {
|
||||
return (char) (c - 'A' + 10);
|
||||
} else {
|
||||
return Character.MAX_VALUE;
|
||||
return c - '0';
|
||||
}
|
||||
if ('a' <= c && c <= 'f') {
|
||||
return c - 'a' + 0xA;
|
||||
}
|
||||
if ('A' <= c && c <= 'F') {
|
||||
return c - 'A' + 0xA;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private static int findPathEndIndex(String uri) {
|
||||
int len = uri.length();
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = uri.charAt(i);
|
||||
if (c == '?' || c == '#') {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ public class QueryStringDecoderTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testBasic() throws Exception {
|
||||
public void testBasic() {
|
||||
QueryStringDecoder d;
|
||||
|
||||
d = new QueryStringDecoder("/foo");
|
||||
|
@ -94,10 +94,17 @@ public class QueryStringDecoderTest {
|
|||
Assert.assertEquals(2, d.parameters().get("a").size());
|
||||
Assert.assertEquals("1=", d.parameters().get("a").get(0));
|
||||
Assert.assertEquals("=2", d.parameters().get("a").get(1));
|
||||
|
||||
d = new QueryStringDecoder("/foo?abc=1%2023&abc=124%20");
|
||||
Assert.assertEquals("/foo", d.path());
|
||||
Assert.assertEquals(1, d.parameters().size());
|
||||
Assert.assertEquals(2, d.parameters().get("abc").size());
|
||||
Assert.assertEquals("1 23", d.parameters().get("abc").get(0));
|
||||
Assert.assertEquals("124 ", d.parameters().get("abc").get(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExotic() throws Exception {
|
||||
public void testExotic() {
|
||||
assertQueryString("", "");
|
||||
assertQueryString("foo", "foo");
|
||||
assertQueryString("foo", "foo?");
|
||||
|
@ -123,7 +130,38 @@ public class QueryStringDecoderTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testHashDos() throws Exception {
|
||||
public void testPathSpecific() {
|
||||
// decode escaped characters
|
||||
Assert.assertEquals("/foo bar/", new QueryStringDecoder("/foo%20bar/?").path());
|
||||
Assert.assertEquals("/foo\r\n\\bar/", new QueryStringDecoder("/foo%0D%0A\\bar/?").path());
|
||||
|
||||
// a 'fragment' after '#' should be cuted (see RFC 3986)
|
||||
Assert.assertEquals("", new QueryStringDecoder("#123").path());
|
||||
Assert.assertEquals("foo", new QueryStringDecoder("foo?bar#anchor").path());
|
||||
Assert.assertEquals("/foo-bar", new QueryStringDecoder("/foo-bar#anchor").path());
|
||||
Assert.assertEquals("/foo-bar", new QueryStringDecoder("/foo-bar#a#b?c=d").path());
|
||||
|
||||
// '+' is not escape ' ' for the path
|
||||
Assert.assertEquals("+", new QueryStringDecoder("+").path());
|
||||
Assert.assertEquals("/foo+bar/", new QueryStringDecoder("/foo+bar/?").path());
|
||||
Assert.assertEquals("/foo++", new QueryStringDecoder("/foo++?index.php").path());
|
||||
Assert.assertEquals("/foo +", new QueryStringDecoder("/foo%20+?index.php").path());
|
||||
Assert.assertEquals("/foo+ ", new QueryStringDecoder("/foo+%20").path());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExcludeFragment() {
|
||||
// a 'fragment' after '#' should be cuted (see RFC 3986)
|
||||
Assert.assertEquals("a", new QueryStringDecoder("?a#anchor").parameters().keySet().iterator().next());
|
||||
Assert.assertEquals("b", new QueryStringDecoder("?a=b#anchor").parameters().get("a").get(0));
|
||||
Assert.assertTrue(new QueryStringDecoder("?#").parameters().isEmpty());
|
||||
Assert.assertTrue(new QueryStringDecoder("?#anchor").parameters().isEmpty());
|
||||
Assert.assertTrue(new QueryStringDecoder("#?a=b#anchor").parameters().isEmpty());
|
||||
Assert.assertTrue(new QueryStringDecoder("?#a=b#anchor").parameters().isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHashDos() {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append('?');
|
||||
for (int i = 0; i < 65536; i ++) {
|
||||
|
@ -137,7 +175,7 @@ public class QueryStringDecoderTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testHasPath() throws Exception {
|
||||
public void testHasPath() {
|
||||
QueryStringDecoder decoder = new QueryStringDecoder("1=2", false);
|
||||
Assert.assertEquals("", decoder.path());
|
||||
Map<String, List<String>> params = decoder.parameters();
|
||||
|
@ -161,16 +199,18 @@ public class QueryStringDecoderTest {
|
|||
// Encoded -> Decoded or error message substring
|
||||
"", "",
|
||||
"foo", "foo",
|
||||
"f%%b", "f%b",
|
||||
"f+o", "f o",
|
||||
"f++", "f ",
|
||||
"fo%", "unterminated escape sequence",
|
||||
"fo%", "unterminated escape sequence at index 2 of: fo%",
|
||||
"%42", "B",
|
||||
"%5f", "_",
|
||||
"f%4", "partial escape sequence",
|
||||
"%x2", "invalid escape sequence `%x2' at index 0 of: %x2",
|
||||
"%4x", "invalid escape sequence `%4x' at index 0 of: %4x",
|
||||
"f%4", "unterminated escape sequence at index 1 of: f%4",
|
||||
"%x2", "invalid escape sequence '%x2' at index 0 of: %x2",
|
||||
"%4x", "invalid escape sequence '%4x' at index 0 of: %4x",
|
||||
"Caff%C3%A9", caffe,
|
||||
"случайный праздник", "случайный праздник",
|
||||
"случайный%20праздник", "случайный праздник",
|
||||
"случайный%20праздник%20%E2%98%BA", "случайный праздник ☺",
|
||||
};
|
||||
for (int i = 0; i < tests.length; i += 2) {
|
||||
final String encoded = tests[i];
|
||||
|
@ -179,9 +219,7 @@ public class QueryStringDecoderTest {
|
|||
final String decoded = QueryStringDecoder.decodeComponent(encoded);
|
||||
Assert.assertEquals(expected, decoded);
|
||||
} catch (IllegalArgumentException e) {
|
||||
Assert.assertTrue("String \"" + e.getMessage() + "\" does"
|
||||
+ " not contain \"" + expected + '"',
|
||||
e.getMessage().contains(expected));
|
||||
Assert.assertEquals(expected, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user