2009-02-08 17:37:49 +01:00
|
|
|
/*
|
2012-06-04 22:31:44 +02:00
|
|
|
* Copyright 2012 The Netty Project
|
2009-02-08 17:37:49 +01:00
|
|
|
*
|
2011-12-09 06:18:34 +01:00
|
|
|
* The Netty Project licenses this file to you under the Apache License,
|
|
|
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
* with the License. You may obtain a copy of the License at:
|
2009-02-08 17:37:49 +01:00
|
|
|
*
|
2012-06-04 22:31:44 +02:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2009-02-08 17:37:49 +01:00
|
|
|
*
|
2009-08-28 09:15:49 +02:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
2011-12-09 06:18:34 +01:00
|
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
2009-08-28 09:15:49 +02:00
|
|
|
* License for the specific language governing permissions and limitations
|
|
|
|
* under the License.
|
2009-02-08 17:37:49 +01:00
|
|
|
*/
|
2011-12-09 04:38:59 +01:00
|
|
|
package io.netty.util.internal;
|
2009-02-08 17:37:49 +01:00
|
|
|
|
2014-02-06 23:59:31 +01:00
|
|
|
import java.io.IOException;
|
2016-02-11 05:32:27 +01:00
|
|
|
import java.util.ArrayList;
|
2019-12-23 21:15:56 +01:00
|
|
|
import java.util.Arrays;
|
2019-09-11 08:46:06 +02:00
|
|
|
import java.util.Iterator;
|
2012-11-09 16:41:22 +01:00
|
|
|
import java.util.List;
|
2019-12-10 11:27:32 +01:00
|
|
|
import java.util.Objects;
|
2010-01-26 06:17:52 +01:00
|
|
|
|
2019-02-04 10:32:25 +01:00
|
|
|
import static java.util.Objects.requireNonNull;
|
2014-12-12 13:46:54 +01:00
|
|
|
|
2009-02-08 17:37:49 +01:00
|
|
|
/**
|
2009-06-13 13:30:02 +02:00
|
|
|
* String utility class.
|
2009-02-08 17:37:49 +01:00
|
|
|
*/
|
2012-01-11 12:16:14 +01:00
|
|
|
public final class StringUtil {
|
2009-06-18 09:30:26 +02:00
|
|
|
|
2015-03-16 07:46:14 +01:00
|
|
|
public static final String EMPTY_STRING = "";
|
2017-03-09 12:15:34 +01:00
|
|
|
public static final String NEWLINE = SystemPropertyUtil.get("line.separator", "\n");
|
2015-03-16 07:46:14 +01:00
|
|
|
|
2014-12-12 13:46:54 +01:00
|
|
|
public static final char DOUBLE_QUOTE = '\"';
|
|
|
|
public static final char COMMA = ',';
|
|
|
|
public static final char LINE_FEED = '\n';
|
|
|
|
public static final char CARRIAGE_RETURN = '\r';
|
2015-03-16 07:46:14 +01:00
|
|
|
public static final char TAB = '\t';
|
2017-02-24 21:45:23 +01:00
|
|
|
public static final char SPACE = 0x20;
|
2015-03-16 07:46:14 +01:00
|
|
|
|
2014-02-06 23:59:31 +01:00
|
|
|
private static final String[] BYTE2HEX_PAD = new String[256];
|
|
|
|
private static final String[] BYTE2HEX_NOPAD = new String[256];
|
2019-12-23 21:15:56 +01:00
|
|
|
private static final byte[] HEX2B;
|
2015-03-16 07:46:14 +01:00
|
|
|
|
2014-12-12 13:46:54 +01:00
|
|
|
/**
|
|
|
|
* 2 - Quote character at beginning and end.
|
|
|
|
* 5 - Extra allowance for anticipated escape characters that may be added.
|
|
|
|
*/
|
|
|
|
private static final int CSV_NUMBER_ESCAPE_CHARACTERS = 2 + 5;
|
2015-04-18 21:34:01 +02:00
|
|
|
private static final char PACKAGE_SEPARATOR_CHAR = '.';
|
2014-02-06 23:59:31 +01:00
|
|
|
|
2010-01-26 06:17:52 +01:00
|
|
|
static {
|
2014-02-06 23:59:31 +01:00
|
|
|
// Generate the lookup table that converts a byte into a 2-digit hexadecimal integer.
|
2017-11-27 21:39:13 +01:00
|
|
|
for (int i = 0; i < BYTE2HEX_PAD.length; i++) {
|
2016-08-01 00:36:46 +02:00
|
|
|
String str = Integer.toHexString(i);
|
2017-11-27 21:39:13 +01:00
|
|
|
BYTE2HEX_PAD[i] = i > 0xf ? str : ('0' + str);
|
2014-02-06 23:59:31 +01:00
|
|
|
BYTE2HEX_NOPAD[i] = str;
|
|
|
|
}
|
2019-12-23 21:15:56 +01:00
|
|
|
// Generate the lookup table that converts an hex char into its decimal value:
|
|
|
|
// the size of the table is such that the JVM is capable of save any bounds-check
|
|
|
|
// if a char type is used as an index.
|
|
|
|
HEX2B = new byte[Character.MAX_VALUE + 1];
|
|
|
|
Arrays.fill(HEX2B, (byte) -1);
|
|
|
|
HEX2B['0'] = (byte) 0;
|
|
|
|
HEX2B['1'] = (byte) 1;
|
|
|
|
HEX2B['2'] = (byte) 2;
|
|
|
|
HEX2B['3'] = (byte) 3;
|
|
|
|
HEX2B['4'] = (byte) 4;
|
|
|
|
HEX2B['5'] = (byte) 5;
|
|
|
|
HEX2B['6'] = (byte) 6;
|
|
|
|
HEX2B['7'] = (byte) 7;
|
|
|
|
HEX2B['8'] = (byte) 8;
|
|
|
|
HEX2B['9'] = (byte) 9;
|
|
|
|
HEX2B['A'] = (byte) 10;
|
|
|
|
HEX2B['B'] = (byte) 11;
|
|
|
|
HEX2B['C'] = (byte) 12;
|
|
|
|
HEX2B['D'] = (byte) 13;
|
|
|
|
HEX2B['E'] = (byte) 14;
|
|
|
|
HEX2B['F'] = (byte) 15;
|
|
|
|
HEX2B['a'] = (byte) 10;
|
|
|
|
HEX2B['b'] = (byte) 11;
|
|
|
|
HEX2B['c'] = (byte) 12;
|
|
|
|
HEX2B['d'] = (byte) 13;
|
|
|
|
HEX2B['e'] = (byte) 14;
|
|
|
|
HEX2B['f'] = (byte) 15;
|
2014-02-06 23:59:31 +01:00
|
|
|
}
|
2012-11-09 16:41:22 +01:00
|
|
|
|
2016-08-01 00:36:46 +02:00
|
|
|
private StringUtil() {
|
|
|
|
// Unused.
|
|
|
|
}
|
|
|
|
|
2014-10-15 23:47:27 +02:00
|
|
|
/**
|
|
|
|
* Get the item after one char delim if the delim is found (else null).
|
|
|
|
* This operation is a simplified and optimized
|
|
|
|
* version of {@link String#split(String, int)}.
|
|
|
|
*/
|
|
|
|
public static String substringAfter(String value, char delim) {
|
|
|
|
int pos = value.indexOf(delim);
|
|
|
|
if (pos >= 0) {
|
|
|
|
return value.substring(pos + 1);
|
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2015-12-16 22:46:25 +01:00
|
|
|
/**
|
|
|
|
* Checks if two strings have the same suffix of specified length
|
|
|
|
*
|
2017-02-24 21:45:23 +01:00
|
|
|
* @param s string
|
|
|
|
* @param p string
|
2015-12-16 22:46:25 +01:00
|
|
|
* @param len length of the common suffix
|
|
|
|
* @return true if both s and p are not null and both have the same suffix. Otherwise - false
|
|
|
|
*/
|
|
|
|
public static boolean commonSuffixOfLength(String s, String p, int len) {
|
|
|
|
return s != null && p != null && len >= 0 && s.regionMatches(s.length() - len, p, p.length() - len, len);
|
|
|
|
}
|
|
|
|
|
2014-02-06 23:59:31 +01:00
|
|
|
/**
|
|
|
|
* Converts the specified byte value into a 2-digit hexadecimal integer.
|
|
|
|
*/
|
|
|
|
public static String byteToHexStringPadded(int value) {
|
|
|
|
return BYTE2HEX_PAD[value & 0xff];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte value into a 2-digit hexadecimal integer and appends it to the specified buffer.
|
|
|
|
*/
|
|
|
|
public static <T extends Appendable> T byteToHexStringPadded(T buf, int value) {
|
|
|
|
try {
|
|
|
|
buf.append(byteToHexStringPadded(value));
|
|
|
|
} catch (IOException e) {
|
|
|
|
PlatformDependent.throwException(e);
|
|
|
|
}
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte array into a hexadecimal value.
|
|
|
|
*/
|
|
|
|
public static String toHexStringPadded(byte[] src) {
|
|
|
|
return toHexStringPadded(src, 0, src.length);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte array into a hexadecimal value.
|
|
|
|
*/
|
|
|
|
public static String toHexStringPadded(byte[] src, int offset, int length) {
|
|
|
|
return toHexStringPadded(new StringBuilder(length << 1), src, offset, length).toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
|
|
|
|
*/
|
|
|
|
public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src) {
|
|
|
|
return toHexStringPadded(dst, src, 0, src.length);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
|
|
|
|
*/
|
|
|
|
public static <T extends Appendable> T toHexStringPadded(T dst, byte[] src, int offset, int length) {
|
|
|
|
final int end = offset + length;
|
2017-02-24 21:45:23 +01:00
|
|
|
for (int i = offset; i < end; i++) {
|
2014-02-06 23:59:31 +01:00
|
|
|
byteToHexStringPadded(dst, src[i]);
|
|
|
|
}
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte value into a hexadecimal integer.
|
|
|
|
*/
|
|
|
|
public static String byteToHexString(int value) {
|
|
|
|
return BYTE2HEX_NOPAD[value & 0xff];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte value into a hexadecimal integer and appends it to the specified buffer.
|
|
|
|
*/
|
|
|
|
public static <T extends Appendable> T byteToHexString(T buf, int value) {
|
|
|
|
try {
|
|
|
|
buf.append(byteToHexString(value));
|
|
|
|
} catch (IOException e) {
|
|
|
|
PlatformDependent.throwException(e);
|
|
|
|
}
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte array into a hexadecimal value.
|
|
|
|
*/
|
|
|
|
public static String toHexString(byte[] src) {
|
|
|
|
return toHexString(src, 0, src.length);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte array into a hexadecimal value.
|
|
|
|
*/
|
|
|
|
public static String toHexString(byte[] src, int offset, int length) {
|
|
|
|
return toHexString(new StringBuilder(length << 1), src, offset, length).toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
|
|
|
|
*/
|
|
|
|
public static <T extends Appendable> T toHexString(T dst, byte[] src) {
|
|
|
|
return toHexString(dst, src, 0, src.length);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts the specified byte array into a hexadecimal value and appends it to the specified buffer.
|
|
|
|
*/
|
|
|
|
public static <T extends Appendable> T toHexString(T dst, byte[] src, int offset, int length) {
|
|
|
|
assert length >= 0;
|
|
|
|
if (length == 0) {
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
final int end = offset + length;
|
|
|
|
final int endMinusOne = end - 1;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
// Skip preceding zeroes.
|
2017-02-24 21:45:23 +01:00
|
|
|
for (i = offset; i < endMinusOne; i++) {
|
2014-02-06 23:59:31 +01:00
|
|
|
if (src[i] != 0) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-24 21:45:23 +01:00
|
|
|
byteToHexString(dst, src[i++]);
|
2014-02-06 23:59:31 +01:00
|
|
|
int remaining = end - i;
|
|
|
|
toHexStringPadded(dst, src, i, remaining);
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2017-06-22 23:44:09 +02:00
|
|
|
/**
|
|
|
|
* Helper to decode half of a hexadecimal number from a string.
|
|
|
|
* @param c The ASCII character of the hexadecimal number to decode.
|
|
|
|
* Must be in the range {@code [0-9a-fA-F]}.
|
|
|
|
* @return The hexadecimal value represented in the ASCII character
|
|
|
|
* given, or {@code -1} if the character is invalid.
|
|
|
|
*/
|
|
|
|
public static int decodeHexNibble(final char c) {
|
2019-12-23 21:15:56 +01:00
|
|
|
assert HEX2B.length == (Character.MAX_VALUE + 1);
|
2017-06-22 23:44:09 +02:00
|
|
|
// Character.digit() is not used here, as it addresses a larger
|
|
|
|
// set of characters (both ASCII and full-width latin letters).
|
2019-12-23 21:15:56 +01:00
|
|
|
final int index = c;
|
|
|
|
return HEX2B[index];
|
2017-06-22 23:44:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Decode a 2-digit hex byte from within a string.
|
|
|
|
*/
|
|
|
|
public static byte decodeHexByte(CharSequence s, int pos) {
|
|
|
|
int hi = decodeHexNibble(s.charAt(pos));
|
|
|
|
int lo = decodeHexNibble(s.charAt(pos + 1));
|
|
|
|
if (hi == -1 || lo == -1) {
|
|
|
|
throw new IllegalArgumentException(String.format(
|
|
|
|
"invalid hex byte '%s' at index %d of '%s'", s.subSequence(pos, pos + 2), pos, s));
|
|
|
|
}
|
|
|
|
return (byte) ((hi << 4) + lo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Decodes part of a string with <a href="http://en.wikipedia.org/wiki/Hex_dump">hex dump</a>
|
|
|
|
*
|
|
|
|
* @param hexDump a {@link CharSequence} which contains the hex dump
|
|
|
|
* @param fromIndex start of hex dump in {@code hexDump}
|
|
|
|
* @param length hex string length
|
|
|
|
*/
|
|
|
|
public static byte[] decodeHexDump(CharSequence hexDump, int fromIndex, int length) {
|
|
|
|
if (length < 0 || (length & 1) != 0) {
|
|
|
|
throw new IllegalArgumentException("length: " + length);
|
|
|
|
}
|
|
|
|
if (length == 0) {
|
|
|
|
return EmptyArrays.EMPTY_BYTES;
|
|
|
|
}
|
|
|
|
byte[] bytes = new byte[length >>> 1];
|
|
|
|
for (int i = 0; i < length; i += 2) {
|
|
|
|
bytes[i >>> 1] = decodeHexByte(hexDump, fromIndex + i);
|
|
|
|
}
|
|
|
|
return bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Decodes a <a href="http://en.wikipedia.org/wiki/Hex_dump">hex dump</a>
|
|
|
|
*/
|
|
|
|
public static byte[] decodeHexDump(CharSequence hexDump) {
|
|
|
|
return decodeHexDump(hexDump, 0, hexDump.length());
|
|
|
|
}
|
|
|
|
|
2013-05-01 10:04:43 +02:00
|
|
|
/**
|
|
|
|
* The shortcut to {@link #simpleClassName(Class) simpleClassName(o.getClass())}.
|
|
|
|
*/
|
|
|
|
public static String simpleClassName(Object o) {
|
2013-12-16 05:54:23 +01:00
|
|
|
if (o == null) {
|
|
|
|
return "null_object";
|
|
|
|
} else {
|
|
|
|
return simpleClassName(o.getClass());
|
|
|
|
}
|
2013-05-01 10:04:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generates a simplified name from a {@link Class}. Similar to {@link Class#getSimpleName()}, but it works fine
|
|
|
|
* with anonymous classes.
|
|
|
|
*/
|
|
|
|
public static String simpleClassName(Class<?> clazz) {
|
2019-02-04 10:32:25 +01:00
|
|
|
String className = requireNonNull(clazz, "clazz").getName();
|
2015-04-18 21:34:01 +02:00
|
|
|
final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR);
|
|
|
|
if (lastDotIdx > -1) {
|
|
|
|
return className.substring(lastDotIdx + 1);
|
2013-05-01 10:04:43 +02:00
|
|
|
}
|
2015-04-18 21:34:01 +02:00
|
|
|
return className;
|
2013-05-01 10:04:43 +02:00
|
|
|
}
|
2014-02-06 23:59:31 +01:00
|
|
|
|
2014-12-12 13:46:54 +01:00
|
|
|
/**
|
|
|
|
* Escapes the specified value, if necessary according to
|
|
|
|
* <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
|
|
|
|
*
|
|
|
|
* @param value The value which will be escaped according to
|
|
|
|
* <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
|
2015-04-30 14:34:24 +02:00
|
|
|
* @return {@link CharSequence} the escaped value if necessary, or the value unchanged
|
2014-12-12 13:46:54 +01:00
|
|
|
*/
|
|
|
|
public static CharSequence escapeCsv(CharSequence value) {
|
2017-02-24 21:45:23 +01:00
|
|
|
return escapeCsv(value, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Escapes the specified value, if necessary according to
|
|
|
|
* <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
|
|
|
|
*
|
|
|
|
* @param value The value which will be escaped according to
|
|
|
|
* <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
|
|
|
|
* @param trimWhiteSpace The value will first be trimmed of its optional white-space characters,
|
|
|
|
* according to <a href="https://tools.ietf.org/html/rfc7230#section-7">RFC-7230</a>
|
|
|
|
* @return {@link CharSequence} the escaped value if necessary, or the value unchanged
|
|
|
|
*/
|
|
|
|
public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) {
|
2019-02-04 10:32:25 +01:00
|
|
|
int length = requireNonNull(value, "value").length();
|
2017-06-13 19:58:54 +02:00
|
|
|
int start;
|
|
|
|
int last;
|
2017-02-24 21:45:23 +01:00
|
|
|
if (trimWhiteSpace) {
|
|
|
|
start = indexOfFirstNonOwsChar(value, length);
|
|
|
|
last = indexOfLastNonOwsChar(value, start, length);
|
2017-06-13 19:58:54 +02:00
|
|
|
} else {
|
|
|
|
start = 0;
|
|
|
|
last = length - 1;
|
|
|
|
}
|
|
|
|
if (start > last) {
|
|
|
|
return EMPTY_STRING;
|
|
|
|
}
|
|
|
|
|
|
|
|
int firstUnescapedSpecial = -1;
|
|
|
|
boolean quoted = false;
|
|
|
|
if (isDoubleQuote(value.charAt(start))) {
|
|
|
|
quoted = isDoubleQuote(value.charAt(last)) && last > start;
|
|
|
|
if (quoted) {
|
|
|
|
start++;
|
|
|
|
last--;
|
|
|
|
} else {
|
|
|
|
firstUnescapedSpecial = start;
|
2017-02-24 21:45:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-13 19:58:54 +02:00
|
|
|
if (firstUnescapedSpecial < 0) {
|
|
|
|
if (quoted) {
|
|
|
|
for (int i = start; i <= last; i++) {
|
|
|
|
if (isDoubleQuote(value.charAt(i))) {
|
|
|
|
if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
|
|
|
|
firstUnescapedSpecial = i;
|
|
|
|
break;
|
2014-12-12 13:46:54 +01:00
|
|
|
}
|
2017-06-13 19:58:54 +02:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (int i = start; i <= last; i++) {
|
|
|
|
char c = value.charAt(i);
|
|
|
|
if (c == LINE_FEED || c == CARRIAGE_RETURN || c == COMMA) {
|
|
|
|
firstUnescapedSpecial = i;
|
2014-12-12 13:46:54 +01:00
|
|
|
break;
|
|
|
|
}
|
2017-06-13 19:58:54 +02:00
|
|
|
if (isDoubleQuote(c)) {
|
|
|
|
if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
|
|
|
|
firstUnescapedSpecial = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
2014-12-12 13:46:54 +01:00
|
|
|
}
|
2017-02-24 21:45:23 +01:00
|
|
|
|
2017-06-13 19:58:54 +02:00
|
|
|
if (firstUnescapedSpecial < 0) {
|
|
|
|
// Special characters is not found or all of them already escaped.
|
|
|
|
// In the most cases returns a same string. New string will be instantiated (via StringBuilder)
|
|
|
|
// only if it really needed. It's important to prevent GC extra load.
|
|
|
|
return quoted? value.subSequence(start - 1, last + 2) : value.subSequence(start, last + 1);
|
|
|
|
}
|
2017-02-24 21:45:23 +01:00
|
|
|
}
|
|
|
|
|
2017-06-13 19:58:54 +02:00
|
|
|
StringBuilder result = new StringBuilder(last - start + 1 + CSV_NUMBER_ESCAPE_CHARACTERS);
|
|
|
|
result.append(DOUBLE_QUOTE).append(value, start, firstUnescapedSpecial);
|
|
|
|
for (int i = firstUnescapedSpecial; i <= last; i++) {
|
|
|
|
char c = value.charAt(i);
|
|
|
|
if (isDoubleQuote(c)) {
|
|
|
|
result.append(DOUBLE_QUOTE);
|
|
|
|
if (i < last && isDoubleQuote(value.charAt(i + 1))) {
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result.append(c);
|
|
|
|
}
|
|
|
|
return result.append(DOUBLE_QUOTE);
|
2014-12-12 13:46:54 +01:00
|
|
|
}
|
|
|
|
|
2016-02-06 23:21:07 +01:00
|
|
|
/**
|
|
|
|
* Unescapes the specified escaped CSV field, if necessary according to
|
|
|
|
* <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
|
|
|
|
*
|
|
|
|
* @param value The escaped CSV field which will be unescaped according to
|
|
|
|
* <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
|
|
|
|
* @return {@link CharSequence} the unescaped value if necessary, or the value unchanged
|
|
|
|
*/
|
|
|
|
public static CharSequence unescapeCsv(CharSequence value) {
|
2019-02-04 10:32:25 +01:00
|
|
|
int length = requireNonNull(value, "value").length();
|
2016-02-06 23:21:07 +01:00
|
|
|
if (length == 0) {
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
int last = length - 1;
|
|
|
|
boolean quoted = isDoubleQuote(value.charAt(0)) && isDoubleQuote(value.charAt(last)) && length != 1;
|
|
|
|
if (!quoted) {
|
|
|
|
validateCsvFormat(value);
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
StringBuilder unescaped = InternalThreadLocalMap.get().stringBuilder();
|
|
|
|
for (int i = 1; i < last; i++) {
|
|
|
|
char current = value.charAt(i);
|
|
|
|
if (current == DOUBLE_QUOTE) {
|
|
|
|
if (isDoubleQuote(value.charAt(i + 1)) && (i + 1) != last) {
|
|
|
|
// Followed by a double-quote but not the last character
|
|
|
|
// Just skip the next double-quote
|
|
|
|
i++;
|
|
|
|
} else {
|
|
|
|
// Not followed by a double-quote or the following double-quote is the last character
|
|
|
|
throw newInvalidEscapedCsvFieldException(value, i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
unescaped.append(current);
|
|
|
|
}
|
|
|
|
return unescaped.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-02-11 05:32:27 +01:00
|
|
|
* Unescapes the specified escaped CSV fields according to
|
|
|
|
* <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>.
|
|
|
|
*
|
|
|
|
* @param value A string with multiple CSV escaped fields which will be unescaped according to
|
|
|
|
* <a href="https://tools.ietf.org/html/rfc4180#section-2">RFC-4180</a>
|
|
|
|
* @return {@link List} the list of unescaped fields
|
|
|
|
*/
|
|
|
|
public static List<CharSequence> unescapeCsvFields(CharSequence value) {
|
2019-01-22 16:07:26 +01:00
|
|
|
List<CharSequence> unescaped = new ArrayList<>(2);
|
2016-02-11 05:32:27 +01:00
|
|
|
StringBuilder current = InternalThreadLocalMap.get().stringBuilder();
|
|
|
|
boolean quoted = false;
|
|
|
|
int last = value.length() - 1;
|
|
|
|
for (int i = 0; i <= last; i++) {
|
|
|
|
char c = value.charAt(i);
|
|
|
|
if (quoted) {
|
|
|
|
switch (c) {
|
|
|
|
case DOUBLE_QUOTE:
|
|
|
|
if (i == last) {
|
|
|
|
// Add the last field and return
|
|
|
|
unescaped.add(current.toString());
|
|
|
|
return unescaped;
|
|
|
|
}
|
|
|
|
char next = value.charAt(++i);
|
|
|
|
if (next == DOUBLE_QUOTE) {
|
|
|
|
// 2 double-quotes should be unescaped to one
|
|
|
|
current.append(DOUBLE_QUOTE);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (next == COMMA) {
|
|
|
|
// This is the end of a field. Let's start to parse the next field.
|
|
|
|
quoted = false;
|
|
|
|
unescaped.add(current.toString());
|
|
|
|
current.setLength(0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// double-quote followed by other character is invalid
|
|
|
|
throw newInvalidEscapedCsvFieldException(value, i - 1);
|
|
|
|
default:
|
|
|
|
current.append(c);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch (c) {
|
|
|
|
case COMMA:
|
|
|
|
// Start to parse the next field
|
|
|
|
unescaped.add(current.toString());
|
|
|
|
current.setLength(0);
|
|
|
|
break;
|
|
|
|
case DOUBLE_QUOTE:
|
|
|
|
if (current.length() == 0) {
|
|
|
|
quoted = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// double-quote appears without being enclosed with double-quotes
|
2017-08-16 08:13:29 +02:00
|
|
|
// fall through
|
2016-02-11 05:32:27 +01:00
|
|
|
case LINE_FEED:
|
2017-08-15 21:41:22 +02:00
|
|
|
// fall through
|
2016-02-11 05:32:27 +01:00
|
|
|
case CARRIAGE_RETURN:
|
|
|
|
// special characters appears without being enclosed with double-quotes
|
|
|
|
throw newInvalidEscapedCsvFieldException(value, i);
|
|
|
|
default:
|
|
|
|
current.append(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (quoted) {
|
|
|
|
throw newInvalidEscapedCsvFieldException(value, last);
|
|
|
|
}
|
|
|
|
unescaped.add(current.toString());
|
|
|
|
return unescaped;
|
|
|
|
}
|
|
|
|
|
2017-02-24 21:45:23 +01:00
|
|
|
/**
|
2016-02-06 23:21:07 +01:00
|
|
|
* Validate if {@code value} is a valid csv field without double-quotes.
|
|
|
|
*
|
|
|
|
* @throws IllegalArgumentException if {@code value} needs to be encoded with double-quotes.
|
|
|
|
*/
|
|
|
|
private static void validateCsvFormat(CharSequence value) {
|
|
|
|
int length = value.length();
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
switch (value.charAt(i)) {
|
|
|
|
case DOUBLE_QUOTE:
|
|
|
|
case LINE_FEED:
|
|
|
|
case CARRIAGE_RETURN:
|
|
|
|
case COMMA:
|
|
|
|
// If value contains any special character, it should be enclosed with double-quotes
|
|
|
|
throw newInvalidEscapedCsvFieldException(value, i);
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static IllegalArgumentException newInvalidEscapedCsvFieldException(CharSequence value, int index) {
|
|
|
|
return new IllegalArgumentException("invalid escaped CSV field: " + value + " index: " + index);
|
|
|
|
}
|
|
|
|
|
2015-08-19 22:52:08 +02:00
|
|
|
/**
|
|
|
|
* Get the length of a string, {@code null} input is considered {@code 0} length.
|
|
|
|
*/
|
|
|
|
public static int length(String s) {
|
|
|
|
return s == null ? 0 : s.length();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Determine if a string is {@code null} or {@link String#isEmpty()} returns {@code true}.
|
|
|
|
*/
|
|
|
|
public static boolean isNullOrEmpty(String s) {
|
|
|
|
return s == null || s.isEmpty();
|
|
|
|
}
|
|
|
|
|
2017-02-02 10:32:33 +01:00
|
|
|
/**
|
|
|
|
* Find the index of the first non-white space character in {@code s} starting at {@code offset}.
|
2017-02-24 21:45:23 +01:00
|
|
|
*
|
|
|
|
* @param seq The string to search.
|
2017-02-02 10:32:33 +01:00
|
|
|
* @param offset The offset to start searching at.
|
2019-12-18 21:07:23 +01:00
|
|
|
* @return the index of the first non-white space character or <{@code -1} if none was found.
|
2017-02-02 10:32:33 +01:00
|
|
|
*/
|
|
|
|
public static int indexOfNonWhiteSpace(CharSequence seq, int offset) {
|
|
|
|
for (; offset < seq.length(); ++offset) {
|
|
|
|
if (!Character.isWhitespace(seq.charAt(offset))) {
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-12-18 21:07:23 +01:00
|
|
|
/**
|
|
|
|
* Find the index of the first white space character in {@code s} starting at {@code offset}.
|
|
|
|
*
|
|
|
|
* @param seq The string to search.
|
|
|
|
* @param offset The offset to start searching at.
|
|
|
|
* @return the index of the first white space character or <{@code -1} if none was found.
|
|
|
|
*/
|
|
|
|
public static int indexOfWhiteSpace(CharSequence seq, int offset) {
|
|
|
|
for (; offset < seq.length(); ++offset) {
|
|
|
|
if (Character.isWhitespace(seq.charAt(offset))) {
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-12-18 19:53:54 +01:00
|
|
|
/**
|
|
|
|
* Determine if {@code c} lies within the range of values defined for
|
|
|
|
* <a href="http://unicode.org/glossary/#surrogate_code_point">Surrogate Code Point</a>.
|
2017-02-24 21:45:23 +01:00
|
|
|
*
|
2015-12-18 19:53:54 +01:00
|
|
|
* @param c the character to check.
|
|
|
|
* @return {@code true} if {@code c} lies within the range of values defined for
|
|
|
|
* <a href="http://unicode.org/glossary/#surrogate_code_point">Surrogate Code Point</a>. {@code false} otherwise.
|
|
|
|
*/
|
|
|
|
public static boolean isSurrogate(char c) {
|
|
|
|
return c >= '\uD800' && c <= '\uDFFF';
|
|
|
|
}
|
|
|
|
|
2014-12-12 13:46:54 +01:00
|
|
|
private static boolean isDoubleQuote(char c) {
|
|
|
|
return c == DOUBLE_QUOTE;
|
|
|
|
}
|
|
|
|
|
2016-06-30 23:12:11 +02:00
|
|
|
/**
|
|
|
|
* Determine if the string {@code s} ends with the char {@code c}.
|
|
|
|
*
|
|
|
|
* @param s the string to test
|
|
|
|
* @param c the tested char
|
|
|
|
* @return true if {@code s} ends with the char {@code c}
|
|
|
|
*/
|
|
|
|
public static boolean endsWith(CharSequence s, char c) {
|
|
|
|
int len = s.length();
|
|
|
|
return len > 0 && s.charAt(len - 1) == c;
|
|
|
|
}
|
2017-02-24 21:45:23 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Trim optional white-space characters from the specified value,
|
|
|
|
* according to <a href="https://tools.ietf.org/html/rfc7230#section-7">RFC-7230</a>.
|
|
|
|
*
|
|
|
|
* @param value the value to trim
|
|
|
|
* @return {@link CharSequence} the trimmed value if necessary, or the value unchanged
|
|
|
|
*/
|
|
|
|
public static CharSequence trimOws(CharSequence value) {
|
|
|
|
final int length = value.length();
|
|
|
|
if (length == 0) {
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
int start = indexOfFirstNonOwsChar(value, length);
|
|
|
|
int end = indexOfLastNonOwsChar(value, start, length);
|
|
|
|
return start == 0 && end == length - 1 ? value : value.subSequence(start, end + 1);
|
|
|
|
}
|
|
|
|
|
2019-09-11 08:46:06 +02:00
|
|
|
/**
|
|
|
|
* Returns a char sequence that contains all {@code elements} joined by a given separator.
|
|
|
|
*
|
|
|
|
* @param separator for each element
|
|
|
|
* @param elements to join together
|
|
|
|
*
|
|
|
|
* @return a char sequence joined by a given separator.
|
|
|
|
*/
|
|
|
|
public static CharSequence join(CharSequence separator, Iterable<? extends CharSequence> elements) {
|
2019-12-10 11:27:32 +01:00
|
|
|
Objects.requireNonNull(separator, "separator");
|
|
|
|
Objects.requireNonNull(elements, "elements");
|
2019-09-11 08:46:06 +02:00
|
|
|
|
|
|
|
Iterator<? extends CharSequence> iterator = elements.iterator();
|
|
|
|
if (!iterator.hasNext()) {
|
|
|
|
return EMPTY_STRING;
|
|
|
|
}
|
|
|
|
|
|
|
|
CharSequence firstElement = iterator.next();
|
|
|
|
if (!iterator.hasNext()) {
|
|
|
|
return firstElement;
|
|
|
|
}
|
|
|
|
|
|
|
|
StringBuilder builder = new StringBuilder(firstElement);
|
|
|
|
do {
|
|
|
|
builder.append(separator).append(iterator.next());
|
|
|
|
} while (iterator.hasNext());
|
|
|
|
|
|
|
|
return builder;
|
|
|
|
}
|
|
|
|
|
2017-02-24 21:45:23 +01:00
|
|
|
/**
|
|
|
|
* @return {@code length} if no OWS is found.
|
|
|
|
*/
|
|
|
|
private static int indexOfFirstNonOwsChar(CharSequence value, int length) {
|
|
|
|
int i = 0;
|
|
|
|
while (i < length && isOws(value.charAt(i))) {
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return {@code start} if no OWS is found.
|
|
|
|
*/
|
|
|
|
private static int indexOfLastNonOwsChar(CharSequence value, int start, int length) {
|
|
|
|
int i = length - 1;
|
|
|
|
while (i > start && isOws(value.charAt(i))) {
|
|
|
|
i--;
|
|
|
|
}
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static boolean isOws(char c) {
|
|
|
|
return c == SPACE || c == TAB;
|
|
|
|
}
|
2019-09-11 08:46:06 +02:00
|
|
|
|
2009-02-08 17:37:49 +01:00
|
|
|
}
|