Fixes in QueryStringDecoder

Motivation: QueryStringDecoder has several problems: - doesn't decode correctly path part with `+` (plus) sign in it, - doesn't cut a `fragment` (after `#`) from query string (see RFC 3986), - doesn't work correctly with encoding, - treat `%%` as a percent character escaping (it's don't described in RFC). Modifications: - leave `+` chars in a `path` part of uri string, - ignore `fragment` part (after `#`), - correctly work with encoding. - don't treat `%%` as escaping for the `%`. Result: Fixed issues from #6745.
2017-05-31 10:04:55 +05:00 · 2017-05-31 10:04:55 +05:00 · 270e9d66c5
commit 270e9d66c5
parent 4aa8002596
2 changed files with 218 additions and 153 deletions
--- a/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringDecoder.java
+++ b/codec-http/src/main/java/io/netty/handler/codec/http/QueryStringDecoder.java
@ -19,16 +19,20 @@ import io.netty.util.CharsetUtil;

 import java.net.URI;
 import java.net.URLDecoder;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;

-import static io.netty.util.internal.ObjectUtil.checkNotNull;
-import static io.netty.util.internal.ObjectUtil.checkPositive;
-import static io.netty.util.internal.StringUtil.EMPTY_STRING;
+import static io.netty.util.internal.ObjectUtil.*;
+import static io.netty.util.internal.StringUtil.*;

 /**
 * Splits an HTTP query string into a path string and key-value parameter pairs.
@ -63,11 +67,10 @@ public class QueryStringDecoder {

    private final Charset charset;
    private final String uri;
-    private final boolean hasPath;
    private final int maxParams;
+    private int pathEndIdx;
    private String path;
    private Map<String, List<String>> params;
-    private int nParams;

    /**
     * Creates a new decoder that decodes the specified URI. The decoder will
@ -106,14 +109,12 @@ public class QueryStringDecoder {
     * specified charset.
     */
    public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
-        checkNotNull(uri, "uri");
-        checkNotNull(charset, "charset");
-        checkPositive(maxParams, "maxParams");
+        this.uri = checkNotNull(uri, "uri");
+        this.charset = checkNotNull(charset, "charset");
+        this.maxParams = checkPositive(maxParams, "maxParams");

-        this.uri = uri;
-        this.charset = charset;
-        this.maxParams = maxParams;
-        this.hasPath = hasPath;
+        // `-1` means that path end index will be initialized lazily
+        pathEndIdx = hasPath ? -1 : 0;
    }

    /**
@ -137,22 +138,21 @@ public class QueryStringDecoder {
     * specified charset.
     */
    public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
-        checkNotNull(uri, "uri");
-        checkNotNull(charset, "charset");
-        checkPositive(maxParams, "maxParams");
-
        String rawPath = uri.getRawPath();
-        if (rawPath != null) {
-            hasPath = true;
-        } else {
+        if (rawPath == null) {
            rawPath = EMPTY_STRING;
-            hasPath = false;
        }
+        String rawQuery = uri.getRawQuery();
        // Also take care of cut of things like "http://localhost"
-        this.uri = uri.getRawQuery() == null? rawPath : rawPath + '?' + uri.getRawQuery();
+        this.uri = rawQuery == null? rawPath : rawPath + '?' + rawQuery;
+        this.charset = checkNotNull(charset, "charset");
+        this.maxParams = checkPositive(maxParams, "maxParams");
+        pathEndIdx = rawPath.length();
+    }

-        this.charset = charset;
-        this.maxParams = maxParams;
+    @Override
+    public String toString() {
+        return uri();
    }

    /**
@ -167,12 +167,7 @@ public class QueryStringDecoder {
     */
    public String path() {
        if (path == null) {
-            if (!hasPath) {
-                path = EMPTY_STRING;
-            } else {
-                int pathEndPos = uri.indexOf('?');
-                path = decodeComponent(pathEndPos < 0 ? uri : uri.substring(0, pathEndPos), charset);
-            }
+            path = decodeComponent(uri, 0, pathEndIdx(), charset, true);
        }
        return path;
    }
@ -182,80 +177,76 @@ public class QueryStringDecoder {
     */
    public Map<String, List<String>> parameters() {
        if (params == null) {
-            if (hasPath) {
-                int pathEndPos = uri.indexOf('?');
-                if (pathEndPos >= 0 && pathEndPos < uri.length() - 1) {
-                    decodeParams(uri.substring(pathEndPos + 1));
-                } else {
-                    params = Collections.emptyMap();
-                }
-            } else {
-                if (uri.isEmpty()) {
-                    params = Collections.emptyMap();
-                } else {
-                    decodeParams(uri);
-                }
-            }
+            params = decodeParams(uri, pathEndIdx(), charset, maxParams);
        }
        return params;
    }

-    private void decodeParams(String s) {
-        Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
-        nParams = 0;
-        String name = null;
-        int pos = 0; // Beginning of the unprocessed region
-        int i;       // End of the unprocessed region
-        char c;  // Current character
-        for (i = 0; i < s.length(); i++) {
-            c = s.charAt(i);
-            if (c == '=' && name == null) {
-                if (pos != i) {
-                    name = decodeComponent(s.substring(pos, i), charset);
-                }
-                pos = i + 1;
-            // http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2
-            } else if (c == '&' || c == ';') {
-                if (name == null && pos != i) {
-                    // We haven't seen an `=' so far but moved forward.
-                    // Must be a param of the form '&a&' so add it with
-                    // an empty value.
-                    if (!addParam(params, decodeComponent(s.substring(pos, i), charset), EMPTY_STRING)) {
-                        return;
-                    }
-                } else if (name != null) {
-                    if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
-                        return;
-                    }
-                    name = null;
-                }
-                pos = i + 1;
-            }
-        }
-
-        if (pos != i) {  // Are there characters we haven't dealt with?
-            if (name == null) {     // Yes and we haven't seen any `='.
-                addParam(params, decodeComponent(s.substring(pos, i), charset), EMPTY_STRING);
-            } else {                // Yes and this must be the last value.
-                addParam(params, name, decodeComponent(s.substring(pos, i), charset));
-            }
-        } else if (name != null) {  // Have we seen a name without value?
-            addParam(params, name, "");
+    private int pathEndIdx() {
+        if (pathEndIdx == -1) {
+            pathEndIdx = findPathEndIndex(uri);
        }
+        return pathEndIdx;
    }

-    private boolean addParam(Map<String, List<String>> params, String name, String value) {
-        if (nParams >= maxParams) {
+    private static Map<String, List<String>> decodeParams(String s, int from, Charset charset, int paramsLimit) {
+        int len = s.length();
+        if (from >= len) {
+            return Collections.emptyMap();
+        }
+        if (s.charAt(from) == '?') {
+            from++;
+        }
+        Map<String, List<String>> params = new LinkedHashMap<String, List<String>>();
+        int nameStart = from;
+        int valueStart = -1;
+        int i;
+        loop:
+        for (i = from; i < len; i++) {
+            switch (s.charAt(i)) {
+            case '=':
+                if (nameStart == i) {
+                    nameStart = i + 1;
+                } else if (valueStart < nameStart) {
+                    valueStart = i + 1;
+                }
+                break;
+            case '&':
+            case ';':
+                if (addParam(s, nameStart, valueStart, i, params, charset)) {
+                    paramsLimit--;
+                    if (paramsLimit == 0) {
+                        return params;
+                    }
+                }
+                nameStart = i + 1;
+                break;
+            case '#':
+                break loop;
+            default:
+                // continue
+            }
+        }
+        addParam(s, nameStart, valueStart, i, params, charset);
+        return params;
+    }
+
+    private static boolean addParam(String s, int nameStart, int valueStart, int valueEnd,
+                                    Map<String, List<String>> params, Charset charset) {
+        if (nameStart >= valueEnd) {
            return false;
        }
-
+        if (valueStart <= nameStart) {
+            valueStart = valueEnd + 1;
+        }
+        String name = decodeComponent(s, nameStart, valueStart - 1, charset, false);
+        String value = decodeComponent(s, valueStart, valueEnd, charset, false);
        List<String> values = params.get(name);
        if (values == null) {
            values = new ArrayList<String>(1);  // Often there's only 1 value.
            params.put(name, values);
        }
        values.add(value);
-        nParams ++;
        return true;
    }

@ -284,7 +275,7 @@ public class QueryStringDecoder {
     * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
     * <p>
     * This is essentially equivalent to calling
-     *   {@link URLDecoder#decode(String, String) URLDecoder.decode(s, charset.name())}
+     *   {@link URLDecoder#decode(String, String)}
     * except that it's over 2x faster and generates less garbage for the GC.
     * Actually this function doesn't allocate any memory if there's nothing
     * to decode, the argument itself is returned.
@ -300,56 +291,80 @@ public class QueryStringDecoder {
        if (s == null) {
            return EMPTY_STRING;
        }
-        final int size = s.length();
-        boolean modified = false;
-        for (int i = 0; i < size; i++) {
-            final char c = s.charAt(i);
-            if (c == '%' || c == '+') {
-                modified = true;
+        return decodeComponent(s, 0, s.length(), charset, false);
+    }
+
+    private static String decodeComponent(String s, int from, int toExcluded, Charset charset, boolean isPath) {
+        int len = toExcluded - from;
+        if (len <= 0) {
+            return EMPTY_STRING;
+        }
+        int firstEscaped = -1;
+        for (int i = from; i < toExcluded; i++) {
+            char c = s.charAt(i);
+            if (c == '%' || c == '+' && !isPath) {
+                firstEscaped = i;
                break;
            }
        }
-        if (!modified) {
-            return s;
+        if (firstEscaped == -1) {
+            return s.substring(from, toExcluded);
        }
-        final byte[] buf = new byte[size];
-        int pos = 0;  // position in `buf'.
-        for (int i = 0; i < size; i++) {
+
+        CharsetDecoder decoder = CharsetUtil.decoder(charset);
+
+        // Each encoded byte takes 3 characters (e.g. "%20")
+        int decodedCapacity = (toExcluded - firstEscaped) / 3;
+        ByteBuffer byteBuf = ByteBuffer.allocate(decodedCapacity);
+        CharBuffer charBuf = CharBuffer.allocate(decodedCapacity);
+
+        StringBuilder strBuf = new StringBuilder(len);
+        strBuf.append(s, from, firstEscaped);
+
+        for (int i = firstEscaped; i < toExcluded; i++) {
            char c = s.charAt(i);
-            switch (c) {
-                case '+':
-                    buf[pos++] = ' ';  // "+" -> " "
-                    break;
-                case '%':
-                    if (i == size - 1) {
-                        throw new IllegalArgumentException("unterminated escape"
-                                + " sequence at end of string: " + s);
-                    }
-                    c = s.charAt(++i);
-                    if (c == '%') {
-                        buf[pos++] = '%';  // "%%" -> "%"
-                        break;
-                    }
-                    if (i == size - 1) {
-                        throw new IllegalArgumentException("partial escape"
-                                + " sequence at end of string: " + s);
-                    }
-                    c = decodeHexNibble(c);
-                    final char c2 = decodeHexNibble(s.charAt(++i));
-                    if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
-                        throw new IllegalArgumentException(
-                                "invalid escape sequence `%" + s.charAt(i - 1)
-                                + s.charAt(i) + "' at index " + (i - 2)
-                                + " of: " + s);
-                    }
-                    c = (char) (c * 16 + c2);
-                    // Fall through.
-                default:
-                    buf[pos++] = (byte) c;
-                    break;
+            if (c != '%') {
+                strBuf.append(c != '+' || isPath? c : SPACE);
+                continue;
            }
+
+            byteBuf.clear();
+            do {
+                byteBuf.put(decodeHexByte(s, i, toExcluded));
+                i += 3;
+            } while (i < toExcluded && s.charAt(i) == '%');
+            i--;
+
+            byteBuf.flip();
+            charBuf.clear();
+            CoderResult result = decoder.reset().decode(byteBuf, charBuf, true);
+            try {
+                if (!result.isUnderflow()) {
+                    result.throwException();
+                }
+                result = decoder.flush(charBuf);
+                if (!result.isUnderflow()) {
+                    result.throwException();
+                }
+            } catch (CharacterCodingException ex) {
+                throw new IllegalStateException(ex);
+            }
+            strBuf.append(charBuf.flip());
        }
-        return new String(buf, 0, pos, charset);
+        return strBuf.toString();
+    }
+
+    private static byte decodeHexByte(String s, int pos, int len) {
+        if (pos + 2 >= len) {
+            throw new IllegalArgumentException("unterminated escape sequence at index " + pos + " of: " + s);
+        }
+        int hi = decodeHexNibble(s.charAt(pos + 1));
+        int lo = decodeHexNibble(s.charAt(pos + 2));
+        if (hi == -1 || lo == -1) {
+            throw new IllegalArgumentException(
+                    "invalid escape sequence '" + s.substring(pos, pos + 3) + "' at index " + pos + " of: " + s);
+        }
+        return (byte) ((hi << 4) + lo);
    }

    /**
@ -357,17 +372,29 @@ public class QueryStringDecoder {
     * @param c The ASCII character of the hexadecimal number to decode.
     * Must be in the range {@code [0-9a-fA-F]}.
     * @return The hexadecimal value represented in the ASCII character
-     * given, or {@link Character#MAX_VALUE} if the character is invalid.
+     * given, or {@code -1} if the character is invalid.
     */
-    private static char decodeHexNibble(final char c) {
+    private static int decodeHexNibble(final char c) {
        if ('0' <= c && c <= '9') {
-            return (char) (c - '0');
-        } else if ('a' <= c && c <= 'f') {
-            return (char) (c - 'a' + 10);
-        } else if ('A' <= c && c <= 'F') {
-            return (char) (c - 'A' + 10);
-        } else {
-            return Character.MAX_VALUE;
+            return c - '0';
        }
+        if ('a' <= c && c <= 'f') {
+            return c - 'a' + 0xA;
+        }
+        if ('A' <= c && c <= 'F') {
+            return c - 'A' + 0xA;
+        }
+        return -1;
+    }
+
+    private static int findPathEndIndex(String uri) {
+        int len = uri.length();
+        for (int i = 0; i < len; i++) {
+            char c = uri.charAt(i);
+            if (c == '?' || c == '#') {
+                return i;
+            }
+        }
+        return len;
    }
 }
--- a/codec-http/src/test/java/io/netty/handler/codec/http/QueryStringDecoderTest.java
+++ b/codec-http/src/test/java/io/netty/handler/codec/http/QueryStringDecoderTest.java
@ -35,7 +35,7 @@ public class QueryStringDecoderTest {
    }

    @Test
-    public void testBasic() throws Exception {
+    public void testBasic() {
        QueryStringDecoder d;

        d = new QueryStringDecoder("/foo");
@ -94,10 +94,17 @@ public class QueryStringDecoderTest {
        Assert.assertEquals(2, d.parameters().get("a").size());
        Assert.assertEquals("1=", d.parameters().get("a").get(0));
        Assert.assertEquals("=2", d.parameters().get("a").get(1));
+
+        d = new QueryStringDecoder("/foo?abc=1%2023&abc=124%20");
+        Assert.assertEquals("/foo", d.path());
+        Assert.assertEquals(1, d.parameters().size());
+        Assert.assertEquals(2, d.parameters().get("abc").size());
+        Assert.assertEquals("1 23", d.parameters().get("abc").get(0));
+        Assert.assertEquals("124 ", d.parameters().get("abc").get(1));
    }

    @Test
-    public void testExotic() throws Exception {
+    public void testExotic() {
        assertQueryString("", "");
        assertQueryString("foo", "foo");
        assertQueryString("foo", "foo?");
@ -123,7 +130,38 @@ public class QueryStringDecoderTest {
    }

    @Test
-    public void testHashDos() throws Exception {
+    public void testPathSpecific() {
+        // decode escaped characters
+        Assert.assertEquals("/foo bar/", new QueryStringDecoder("/foo%20bar/?").path());
+        Assert.assertEquals("/foo\r\n\\bar/", new QueryStringDecoder("/foo%0D%0A\\bar/?").path());
+
+        // a 'fragment' after '#' should be cuted (see RFC 3986)
+        Assert.assertEquals("", new QueryStringDecoder("#123").path());
+        Assert.assertEquals("foo", new QueryStringDecoder("foo?bar#anchor").path());
+        Assert.assertEquals("/foo-bar", new QueryStringDecoder("/foo-bar#anchor").path());
+        Assert.assertEquals("/foo-bar", new QueryStringDecoder("/foo-bar#a#b?c=d").path());
+
+        // '+' is not escape ' ' for the path
+        Assert.assertEquals("+", new QueryStringDecoder("+").path());
+        Assert.assertEquals("/foo+bar/", new QueryStringDecoder("/foo+bar/?").path());
+        Assert.assertEquals("/foo++", new QueryStringDecoder("/foo++?index.php").path());
+        Assert.assertEquals("/foo +", new QueryStringDecoder("/foo%20+?index.php").path());
+        Assert.assertEquals("/foo+ ", new QueryStringDecoder("/foo+%20").path());
+    }
+
+    @Test
+    public void testExcludeFragment() {
+        // a 'fragment' after '#' should be cuted (see RFC 3986)
+        Assert.assertEquals("a", new QueryStringDecoder("?a#anchor").parameters().keySet().iterator().next());
+        Assert.assertEquals("b", new QueryStringDecoder("?a=b#anchor").parameters().get("a").get(0));
+        Assert.assertTrue(new QueryStringDecoder("?#").parameters().isEmpty());
+        Assert.assertTrue(new QueryStringDecoder("?#anchor").parameters().isEmpty());
+        Assert.assertTrue(new QueryStringDecoder("#?a=b#anchor").parameters().isEmpty());
+        Assert.assertTrue(new QueryStringDecoder("?#a=b#anchor").parameters().isEmpty());
+    }
+
+    @Test
+    public void testHashDos() {
        StringBuilder buf = new StringBuilder();
        buf.append('?');
        for (int i = 0; i < 65536; i ++) {
@ -137,7 +175,7 @@ public class QueryStringDecoderTest {
    }

    @Test
-    public void testHasPath() throws Exception {
+    public void testHasPath() {
        QueryStringDecoder decoder = new QueryStringDecoder("1=2", false);
        Assert.assertEquals("", decoder.path());
        Map<String, List<String>> params = decoder.parameters();
@ -161,16 +199,18 @@ public class QueryStringDecoderTest {
            // Encoded   ->   Decoded or error message substring
            "",               "",
            "foo",            "foo",
-            "f%%b",           "f%b",
            "f+o",            "f o",
            "f++",            "f  ",
-            "fo%",            "unterminated escape sequence",
+            "fo%",            "unterminated escape sequence at index 2 of: fo%",
            "%42",            "B",
            "%5f",            "_",
-            "f%4",            "partial escape sequence",
-            "%x2",            "invalid escape sequence `%x2' at index 0 of: %x2",
-            "%4x",            "invalid escape sequence `%4x' at index 0 of: %4x",
+            "f%4",            "unterminated escape sequence at index 1 of: f%4",
+            "%x2",            "invalid escape sequence '%x2' at index 0 of: %x2",
+            "%4x",            "invalid escape sequence '%4x' at index 0 of: %4x",
            "Caff%C3%A9",     caffe,
+            "случайный праздник",               "случайный праздник",
+            "случайный%20праздник",             "случайный праздник",
+            "случайный%20праздник%20%E2%98%BA", "случайный праздник ☺",
        };
        for (int i = 0; i < tests.length; i += 2) {
            final String encoded = tests[i];
@ -179,9 +219,7 @@ public class QueryStringDecoderTest {
                final String decoded = QueryStringDecoder.decodeComponent(encoded);
                Assert.assertEquals(expected, decoded);
            } catch (IllegalArgumentException e) {
-                Assert.assertTrue("String \"" + e.getMessage() + "\" does"
-                                  + " not contain \"" + expected + '"',
-                                  e.getMessage().contains(expected));
+                Assert.assertEquals(expected, e.getMessage());
            }
        }
    }