2008-11-19 08:22:15 +01:00
|
|
|
/*
|
2012-06-04 22:31:44 +02:00
|
|
|
* Copyright 2012 The Netty Project
|
2009-06-19 19:48:17 +02:00
|
|
|
*
|
2011-12-09 06:18:34 +01:00
|
|
|
* The Netty Project licenses this file to you under the Apache License,
|
|
|
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
* with the License. You may obtain a copy of the License at:
|
2008-11-19 08:22:15 +01:00
|
|
|
*
|
2012-06-04 22:31:44 +02:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2008-11-19 08:22:15 +01:00
|
|
|
*
|
2009-08-28 09:15:49 +02:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
2011-12-09 06:18:34 +01:00
|
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
2009-08-28 09:15:49 +02:00
|
|
|
* License for the specific language governing permissions and limitations
|
|
|
|
* under the License.
|
2008-11-19 08:22:15 +01:00
|
|
|
*/
|
2011-12-09 04:38:59 +01:00
|
|
|
package io.netty.handler.codec.http;
|
2008-11-19 08:22:15 +01:00
|
|
|
|
2012-05-31 20:32:42 +02:00
|
|
|
import io.netty.util.CharsetUtil;
|
|
|
|
|
2008-11-19 08:22:15 +01:00
|
|
|
import java.net.URI;
|
2011-01-12 11:11:32 +01:00
|
|
|
import java.net.URLDecoder;
|
2009-12-29 06:52:00 +01:00
|
|
|
import java.nio.charset.Charset;
|
2008-11-19 08:22:15 +01:00
|
|
|
import java.util.ArrayList;
|
2010-06-14 12:57:48 +02:00
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.LinkedHashMap;
|
2008-11-19 08:22:15 +01:00
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
|
|
|
|
/**
|
2009-06-19 16:49:26 +02:00
|
|
|
* Splits an HTTP query string into a path string and key-value parameter pairs.
|
2009-06-19 17:05:47 +02:00
|
|
|
* This decoder is for one time use only. Create a new instance for each URI:
|
|
|
|
* <pre>
|
2011-11-22 20:14:10 +01:00
|
|
|
* {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
|
2009-06-19 17:05:47 +02:00
|
|
|
* assert decoder.getPath().equals("/hello");
|
2011-12-30 09:58:51 +01:00
|
|
|
* assert decoder.getParameters().get("recipient").get(0).equals("world");
|
|
|
|
* assert decoder.getParameters().get("x").get(0).equals("1");
|
|
|
|
* assert decoder.getParameters().get("y").get(0).equals("2");
|
2009-06-19 17:05:47 +02:00
|
|
|
* </pre>
|
2011-12-30 09:58:51 +01:00
|
|
|
*
|
|
|
|
* This decoder can also decode the content of an HTTP POST request whose
|
|
|
|
* content type is <tt>application/x-www-form-urlencoded</tt>:
|
|
|
|
* <pre>
|
|
|
|
* {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
|
|
|
|
* ...
|
|
|
|
* </pre>
|
|
|
|
*
|
|
|
|
* <h3>HashDOS vulnerability fix</h3>
|
|
|
|
*
|
2012-06-08 12:28:12 +02:00
|
|
|
* As a workaround to the <a href="http://goo.gl/I4Nky">HashDOS</a> vulnerability, the decoder
|
|
|
|
* limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
|
|
|
|
* default, and you can configure it when you construct the decoder by passing an additional
|
|
|
|
* integer parameter.
|
2011-12-30 09:58:51 +01:00
|
|
|
*
|
2009-06-19 17:05:47 +02:00
|
|
|
* @see QueryStringEncoder
|
2008-11-19 08:22:15 +01:00
|
|
|
*/
|
|
|
|
public class QueryStringDecoder {
|
|
|
|
|
2011-12-30 09:58:51 +01:00
|
|
|
private static final int DEFAULT_MAX_PARAMS = 1024;
|
|
|
|
|
2009-12-29 06:52:00 +01:00
|
|
|
private final Charset charset;
|
2008-11-19 08:22:15 +01:00
|
|
|
private final String uri;
|
2011-12-30 09:58:51 +01:00
|
|
|
private final boolean hasPath;
|
|
|
|
private final int maxParams;
|
2008-11-19 08:22:15 +01:00
|
|
|
private String path;
|
2010-06-14 12:57:48 +02:00
|
|
|
private Map<String, List<String>> params;
|
2011-12-30 09:58:51 +01:00
|
|
|
private int nParams;
|
2008-11-19 08:22:15 +01:00
|
|
|
|
2009-06-19 16:49:26 +02:00
|
|
|
/**
|
|
|
|
* Creates a new decoder that decodes the specified URI. The decoder will
|
|
|
|
* assume that the query string is encoded in UTF-8.
|
|
|
|
*/
|
2008-11-19 08:22:15 +01:00
|
|
|
public QueryStringDecoder(String uri) {
|
2012-05-31 20:32:42 +02:00
|
|
|
this(uri, HttpConstants.DEFAULT_CHARSET);
|
2009-02-26 07:34:07 +01:00
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:51 +01:00
|
|
|
/**
|
|
|
|
* Creates a new decoder that decodes the specified URI encoded in the
|
|
|
|
* specified charset.
|
|
|
|
*/
|
|
|
|
public QueryStringDecoder(String uri, boolean hasPath) {
|
2012-05-31 20:32:42 +02:00
|
|
|
this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
|
2011-12-30 09:58:51 +01:00
|
|
|
}
|
|
|
|
|
2009-06-19 16:49:26 +02:00
|
|
|
/**
|
|
|
|
* Creates a new decoder that decodes the specified URI encoded in the
|
|
|
|
* specified charset.
|
|
|
|
*/
|
2009-12-29 06:52:00 +01:00
|
|
|
public QueryStringDecoder(String uri, Charset charset) {
|
2011-12-30 09:58:51 +01:00
|
|
|
this(uri, charset, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a new decoder that decodes the specified URI encoded in the
|
|
|
|
* specified charset.
|
|
|
|
*/
|
|
|
|
public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
|
|
|
|
this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a new decoder that decodes the specified URI encoded in the
|
|
|
|
* specified charset.
|
|
|
|
*/
|
|
|
|
public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
|
2009-02-26 07:34:07 +01:00
|
|
|
if (uri == null) {
|
2013-01-30 07:42:18 +01:00
|
|
|
throw new NullPointerException("getUri");
|
2009-02-26 07:34:07 +01:00
|
|
|
}
|
|
|
|
if (charset == null) {
|
|
|
|
throw new NullPointerException("charset");
|
|
|
|
}
|
2011-12-30 09:58:51 +01:00
|
|
|
if (maxParams <= 0) {
|
|
|
|
throw new IllegalArgumentException(
|
|
|
|
"maxParams: " + maxParams + " (expected: a positive integer)");
|
|
|
|
}
|
2009-02-26 07:34:07 +01:00
|
|
|
|
2013-09-12 14:09:41 +02:00
|
|
|
this.uri = uri;
|
2009-02-26 07:34:07 +01:00
|
|
|
this.charset = charset;
|
2011-12-30 09:58:51 +01:00
|
|
|
this.maxParams = maxParams;
|
|
|
|
this.hasPath = hasPath;
|
2008-11-19 08:22:15 +01:00
|
|
|
}
|
|
|
|
|
2009-06-19 16:49:26 +02:00
|
|
|
/**
|
|
|
|
* Creates a new decoder that decodes the specified URI. The decoder will
|
|
|
|
* assume that the query string is encoded in UTF-8.
|
|
|
|
*/
|
2009-02-26 07:34:07 +01:00
|
|
|
public QueryStringDecoder(URI uri) {
|
2012-05-31 20:32:42 +02:00
|
|
|
this(uri, HttpConstants.DEFAULT_CHARSET);
|
2009-02-26 07:34:07 +01:00
|
|
|
}
|
|
|
|
|
2009-06-19 16:49:26 +02:00
|
|
|
/**
|
|
|
|
* Creates a new decoder that decodes the specified URI encoded in the
|
|
|
|
* specified charset.
|
|
|
|
*/
|
2012-01-11 12:16:14 +01:00
|
|
|
public QueryStringDecoder(URI uri, Charset charset) {
|
2011-12-30 09:58:51 +01:00
|
|
|
this(uri, charset, DEFAULT_MAX_PARAMS);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a new decoder that decodes the specified URI encoded in the
|
|
|
|
* specified charset.
|
|
|
|
*/
|
|
|
|
public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
|
2009-02-26 07:34:07 +01:00
|
|
|
if (uri == null) {
|
2013-01-30 07:42:18 +01:00
|
|
|
throw new NullPointerException("getUri");
|
2009-02-26 07:34:07 +01:00
|
|
|
}
|
|
|
|
if (charset == null) {
|
|
|
|
throw new NullPointerException("charset");
|
|
|
|
}
|
2011-12-30 09:58:51 +01:00
|
|
|
if (maxParams <= 0) {
|
|
|
|
throw new IllegalArgumentException(
|
|
|
|
"maxParams: " + maxParams + " (expected: a positive integer)");
|
|
|
|
}
|
2012-05-31 20:32:42 +02:00
|
|
|
|
2012-02-15 08:30:22 +01:00
|
|
|
String rawPath = uri.getRawPath();
|
|
|
|
if (rawPath != null) {
|
|
|
|
hasPath = true;
|
|
|
|
} else {
|
2012-02-18 23:03:13 +01:00
|
|
|
rawPath = "";
|
2012-02-15 08:30:22 +01:00
|
|
|
hasPath = false;
|
|
|
|
}
|
2012-05-31 20:32:42 +02:00
|
|
|
// Also take care of cut of things like "http://localhost"
|
2013-09-12 14:09:41 +02:00
|
|
|
this.uri = rawPath + '?' + uri.getRawQuery();
|
2009-02-26 07:34:07 +01:00
|
|
|
|
|
|
|
this.charset = charset;
|
2011-12-30 09:58:51 +01:00
|
|
|
this.maxParams = maxParams;
|
2008-11-19 08:22:15 +01:00
|
|
|
}
|
|
|
|
|
2009-06-19 16:49:26 +02:00
|
|
|
/**
|
|
|
|
* Returns the decoded path string of the URI.
|
|
|
|
*/
|
2013-01-17 06:48:03 +01:00
|
|
|
public String path() {
|
2010-06-14 12:57:48 +02:00
|
|
|
if (path == null) {
|
2011-12-30 09:58:51 +01:00
|
|
|
if (!hasPath) {
|
|
|
|
return path = "";
|
|
|
|
}
|
|
|
|
|
2010-06-14 12:57:48 +02:00
|
|
|
int pathEndPos = uri.indexOf('?');
|
|
|
|
if (pathEndPos < 0) {
|
|
|
|
path = uri;
|
2012-01-11 12:16:14 +01:00
|
|
|
} else {
|
2010-06-14 12:57:48 +02:00
|
|
|
return path = uri.substring(0, pathEndPos);
|
2008-11-19 08:22:15 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return path;
|
|
|
|
}
|
|
|
|
|
2009-06-19 16:49:26 +02:00
|
|
|
/**
|
|
|
|
* Returns the decoded key-value parameter pairs of the URI.
|
|
|
|
*/
|
2013-01-17 06:48:03 +01:00
|
|
|
public Map<String, List<String>> parameters() {
|
2010-06-14 12:57:48 +02:00
|
|
|
if (params == null) {
|
2011-12-30 09:58:51 +01:00
|
|
|
if (hasPath) {
|
2013-01-17 06:48:03 +01:00
|
|
|
int pathLength = path().length();
|
2011-12-30 09:58:51 +01:00
|
|
|
if (uri.length() == pathLength) {
|
|
|
|
return Collections.emptyMap();
|
|
|
|
}
|
|
|
|
decodeParams(uri.substring(pathLength + 1));
|
|
|
|
} else {
|
|
|
|
if (uri.isEmpty()) {
|
|
|
|
return Collections.emptyMap();
|
|
|
|
}
|
|
|
|
decodeParams(uri);
|
2008-11-19 08:22:15 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return params;
|
|
|
|
}
|
|
|
|
|
2011-12-30 09:58:51 +01:00
|
|
|
private void decodeParams(String s) {
|
|
|
|
Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
|
|
|
|
nParams = 0;
|
2010-06-14 12:57:48 +02:00
|
|
|
String name = null;
|
|
|
|
int pos = 0; // Beginning of the unprocessed region
|
|
|
|
int i; // End of the unprocessed region
|
2012-06-11 15:54:28 +02:00
|
|
|
char c; // Current character
|
2010-06-14 12:57:48 +02:00
|
|
|
for (i = 0; i < s.length(); i++) {
|
|
|
|
c = s.charAt(i);
|
|
|
|
if (c == '=' && name == null) {
|
|
|
|
if (pos != i) {
|
|
|
|
name = decodeComponent(s.substring(pos, i), charset);
|
|
|
|
}
|
|
|
|
pos = i + 1;
|
2013-09-12 14:09:41 +02:00
|
|
|
// http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2
|
|
|
|
} else if (c == '&' || c == ';') {
|
2010-06-14 12:57:48 +02:00
|
|
|
if (name == null && pos != i) {
|
|
|
|
// We haven't seen an `=' so far but moved forward.
|
|
|
|
// Must be a param of the form '&a&' so add it with
|
|
|
|
// an empty value.
|
2011-12-30 09:58:51 +01:00
|
|
|
if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
|
|
|
|
return;
|
|
|
|
}
|
2010-06-14 12:57:48 +02:00
|
|
|
} else if (name != null) {
|
2011-12-30 09:58:51 +01:00
|
|
|
if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
|
|
|
|
return;
|
|
|
|
}
|
2010-06-14 12:57:48 +02:00
|
|
|
name = null;
|
|
|
|
}
|
|
|
|
pos = i + 1;
|
|
|
|
}
|
2009-03-14 14:48:01 +01:00
|
|
|
}
|
2008-11-26 09:44:39 +01:00
|
|
|
|
2010-06-14 12:57:48 +02:00
|
|
|
if (pos != i) { // Are there characters we haven't dealt with?
|
|
|
|
if (name == null) { // Yes and we haven't seen any `='.
|
2012-06-11 15:54:28 +02:00
|
|
|
addParam(params, decodeComponent(s.substring(pos, i), charset), "");
|
2010-06-14 12:57:48 +02:00
|
|
|
} else { // Yes and this must be the last value.
|
2012-06-11 15:54:28 +02:00
|
|
|
addParam(params, name, decodeComponent(s.substring(pos, i), charset));
|
2009-02-12 05:39:17 +01:00
|
|
|
}
|
2010-06-14 12:57:48 +02:00
|
|
|
} else if (name != null) { // Have we seen a name without value?
|
2012-06-11 15:54:28 +02:00
|
|
|
addParam(params, name, "");
|
2008-11-19 08:22:15 +01:00
|
|
|
}
|
2011-12-30 09:58:51 +01:00
|
|
|
}
|
2010-06-14 12:57:48 +02:00
|
|
|
|
2011-12-30 09:58:51 +01:00
|
|
|
private boolean addParam(Map<String, List<String>> params, String name, String value) {
|
|
|
|
if (nParams >= maxParams) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
List<String> values = params.get(name);
|
|
|
|
if (values == null) {
|
|
|
|
values = new ArrayList<String>(1); // Often there's only 1 value.
|
|
|
|
params.put(name, values);
|
|
|
|
}
|
|
|
|
values.add(value);
|
|
|
|
nParams ++;
|
|
|
|
return true;
|
2008-11-19 08:22:15 +01:00
|
|
|
}
|
|
|
|
|
2010-12-04 08:12:03 +01:00
|
|
|
/**
|
|
|
|
* Decodes a bit of an URL encoded by a browser.
|
|
|
|
* <p>
|
2011-01-12 11:11:32 +01:00
|
|
|
* This is equivalent to calling {@link #decodeComponent(String, Charset)}
|
2010-12-04 08:12:03 +01:00
|
|
|
* with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
|
|
|
|
* @param s The string to decode (can be empty).
|
|
|
|
* @return The decoded string, or {@code s} if there's nothing to decode.
|
|
|
|
* If the string to decode is {@code null}, returns an empty string.
|
|
|
|
* @throws IllegalArgumentException if the string contains a malformed
|
|
|
|
* escape sequence.
|
|
|
|
*/
|
|
|
|
public static String decodeComponent(final String s) {
|
2012-05-31 20:32:42 +02:00
|
|
|
return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
|
2010-12-04 08:12:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Decodes a bit of an URL encoded by a browser.
|
|
|
|
* <p>
|
|
|
|
* The string is expected to be encoded as per RFC 3986, Section 2.
|
|
|
|
* This is the encoding used by JavaScript functions {@code encodeURI}
|
|
|
|
* and {@code encodeURIComponent}, but not {@code escape}. For example
|
|
|
|
* in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
|
|
|
|
* {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
|
|
|
|
* <p>
|
|
|
|
* This is essentially equivalent to calling
|
2012-11-12 03:51:23 +01:00
|
|
|
* {@link URLDecoder#decode(String, String) URLDecoder.decode(s, charset.name())}
|
2010-12-04 08:12:03 +01:00
|
|
|
* except that it's over 2x faster and generates less garbage for the GC.
|
|
|
|
* Actually this function doesn't allocate any memory if there's nothing
|
|
|
|
* to decode, the argument itself is returned.
|
|
|
|
* @param s The string to decode (can be empty).
|
|
|
|
* @param charset The charset to use to decode the string (should really
|
2011-01-12 11:11:32 +01:00
|
|
|
* be {@link CharsetUtil#UTF_8}.
|
2010-12-04 08:12:03 +01:00
|
|
|
* @return The decoded string, or {@code s} if there's nothing to decode.
|
|
|
|
* If the string to decode is {@code null}, returns an empty string.
|
|
|
|
* @throws IllegalArgumentException if the string contains a malformed
|
|
|
|
* escape sequence.
|
|
|
|
*/
|
2014-06-24 10:39:46 +02:00
|
|
|
public static String decodeComponent(final String s, final Charset charset) {
|
2009-03-14 14:48:01 +01:00
|
|
|
if (s == null) {
|
|
|
|
return "";
|
|
|
|
}
|
2010-12-04 08:12:03 +01:00
|
|
|
final int size = s.length();
|
|
|
|
boolean modified = false;
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
final char c = s.charAt(i);
|
2014-02-14 22:25:57 +01:00
|
|
|
if (c == '%' || c == '+') {
|
|
|
|
modified = true;
|
|
|
|
break;
|
2010-12-04 08:12:03 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!modified) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
final byte[] buf = new byte[size];
|
|
|
|
int pos = 0; // position in `buf'.
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
char c = s.charAt(i);
|
|
|
|
switch (c) {
|
|
|
|
case '+':
|
|
|
|
buf[pos++] = ' '; // "+" -> " "
|
|
|
|
break;
|
|
|
|
case '%':
|
|
|
|
if (i == size - 1) {
|
|
|
|
throw new IllegalArgumentException("unterminated escape"
|
|
|
|
+ " sequence at end of string: " + s);
|
|
|
|
}
|
|
|
|
c = s.charAt(++i);
|
|
|
|
if (c == '%') {
|
|
|
|
buf[pos++] = '%'; // "%%" -> "%"
|
|
|
|
break;
|
2012-11-12 01:31:40 +01:00
|
|
|
}
|
|
|
|
if (i == size - 1) {
|
2010-12-04 08:12:03 +01:00
|
|
|
throw new IllegalArgumentException("partial escape"
|
|
|
|
+ " sequence at end of string: " + s);
|
|
|
|
}
|
|
|
|
c = decodeHexNibble(c);
|
|
|
|
final char c2 = decodeHexNibble(s.charAt(++i));
|
|
|
|
if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
|
|
|
|
throw new IllegalArgumentException(
|
|
|
|
"invalid escape sequence `%" + s.charAt(i - 1)
|
|
|
|
+ s.charAt(i) + "' at index " + (i - 2)
|
|
|
|
+ " of: " + s);
|
|
|
|
}
|
|
|
|
c = (char) (c * 16 + c2);
|
|
|
|
// Fall through.
|
|
|
|
default:
|
|
|
|
buf[pos++] = (byte) c;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return new String(buf, 0, pos, charset);
|
|
|
|
}
|
2009-03-14 14:48:01 +01:00
|
|
|
|
2010-12-04 08:12:03 +01:00
|
|
|
/**
|
|
|
|
* Helper to decode half of a hexadecimal number from a string.
|
|
|
|
* @param c The ASCII character of the hexadecimal number to decode.
|
|
|
|
* Must be in the range {@code [0-9a-fA-F]}.
|
|
|
|
* @return The hexadecimal value represented in the ASCII character
|
2011-01-12 11:11:32 +01:00
|
|
|
* given, or {@link Character#MAX_VALUE} if the character is invalid.
|
2010-12-04 08:12:03 +01:00
|
|
|
*/
|
|
|
|
private static char decodeHexNibble(final char c) {
|
|
|
|
if ('0' <= c && c <= '9') {
|
|
|
|
return (char) (c - '0');
|
|
|
|
} else if ('a' <= c && c <= 'f') {
|
|
|
|
return (char) (c - 'a' + 10);
|
|
|
|
} else if ('A' <= c && c <= 'F') {
|
|
|
|
return (char) (c - 'A' + 10);
|
|
|
|
} else {
|
|
|
|
return Character.MAX_VALUE;
|
2009-02-26 07:34:07 +01:00
|
|
|
}
|
2008-11-19 08:22:15 +01:00
|
|
|
}
|
|
|
|
}
|