/* * Copyright 2012 The Netty Project * * The Netty Project licenses this file to you under the Apache License, * version 2.0 (the "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package io.netty.util.internal; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Objects; import static java.util.Objects.requireNonNull; /** * String utility class. */ public final class StringUtil { public static final String EMPTY_STRING = ""; public static final String NEWLINE = SystemPropertyUtil.get("line.separator", "\n"); public static final char DOUBLE_QUOTE = '\"'; public static final char COMMA = ','; public static final char LINE_FEED = '\n'; public static final char CARRIAGE_RETURN = '\r'; public static final char TAB = '\t'; public static final char SPACE = 0x20; private static final String[] BYTE2HEX_PAD = new String[256]; private static final String[] BYTE2HEX_NOPAD = new String[256]; private static final byte[] HEX2B; /** * 2 - Quote character at beginning and end. * 5 - Extra allowance for anticipated escape characters that may be added. */ private static final int CSV_NUMBER_ESCAPE_CHARACTERS = 2 + 5; private static final char PACKAGE_SEPARATOR_CHAR = '.'; static { // Generate the lookup table that converts a byte into a 2-digit hexadecimal integer. for (int i = 0; i < BYTE2HEX_PAD.length; i++) { String str = Integer.toHexString(i); BYTE2HEX_PAD[i] = i > 0xf ? str : ('0' + str); BYTE2HEX_NOPAD[i] = str; } // Generate the lookup table that converts an hex char into its decimal value: // the size of the table is such that the JVM is capable of save any bounds-check // if a char type is used as an index. HEX2B = new byte[Character.MAX_VALUE + 1]; Arrays.fill(HEX2B, (byte) -1); HEX2B['0'] = (byte) 0; HEX2B['1'] = (byte) 1; HEX2B['2'] = (byte) 2; HEX2B['3'] = (byte) 3; HEX2B['4'] = (byte) 4; HEX2B['5'] = (byte) 5; HEX2B['6'] = (byte) 6; HEX2B['7'] = (byte) 7; HEX2B['8'] = (byte) 8; HEX2B['9'] = (byte) 9; HEX2B['A'] = (byte) 10; HEX2B['B'] = (byte) 11; HEX2B['C'] = (byte) 12; HEX2B['D'] = (byte) 13; HEX2B['E'] = (byte) 14; HEX2B['F'] = (byte) 15; HEX2B['a'] = (byte) 10; HEX2B['b'] = (byte) 11; HEX2B['c'] = (byte) 12; HEX2B['d'] = (byte) 13; HEX2B['e'] = (byte) 14; HEX2B['f'] = (byte) 15; } private StringUtil() { // Unused. } /** * Get the item after one char delim if the delim is found (else null). * This operation is a simplified and optimized * version of {@link String#split(String, int)}. */ public static String substringAfter(String value, char delim) { int pos = value.indexOf(delim); if (pos >= 0) { return value.substring(pos + 1); } return null; } /** * Checks if two strings have the same suffix of specified length * * @param s string * @param p string * @param len length of the common suffix * @return true if both s and p are not null and both have the same suffix. Otherwise - false */ public static boolean commonSuffixOfLength(String s, String p, int len) { return s != null && p != null && len >= 0 && s.regionMatches(s.length() - len, p, p.length() - len, len); } /** * Converts the specified byte value into a 2-digit hexadecimal integer. */ public static String byteToHexStringPadded(int value) { return BYTE2HEX_PAD[value & 0xff]; } /** * Converts the specified byte value into a 2-digit hexadecimal integer and appends it to the specified buffer. */ public static T byteToHexStringPadded(T buf, int value) { try { buf.append(byteToHexStringPadded(value)); } catch (IOException e) { PlatformDependent.throwException(e); } return buf; } /** * Converts the specified byte array into a hexadecimal value. */ public static String toHexStringPadded(byte[] src) { return toHexStringPadded(src, 0, src.length); } /** * Converts the specified byte array into a hexadecimal value. */ public static String toHexStringPadded(byte[] src, int offset, int length) { return toHexStringPadded(new StringBuilder(length << 1), src, offset, length).toString(); } /** * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer. */ public static T toHexStringPadded(T dst, byte[] src) { return toHexStringPadded(dst, src, 0, src.length); } /** * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer. */ public static T toHexStringPadded(T dst, byte[] src, int offset, int length) { final int end = offset + length; for (int i = offset; i < end; i++) { byteToHexStringPadded(dst, src[i]); } return dst; } /** * Converts the specified byte value into a hexadecimal integer. */ public static String byteToHexString(int value) { return BYTE2HEX_NOPAD[value & 0xff]; } /** * Converts the specified byte value into a hexadecimal integer and appends it to the specified buffer. */ public static T byteToHexString(T buf, int value) { try { buf.append(byteToHexString(value)); } catch (IOException e) { PlatformDependent.throwException(e); } return buf; } /** * Converts the specified byte array into a hexadecimal value. */ public static String toHexString(byte[] src) { return toHexString(src, 0, src.length); } /** * Converts the specified byte array into a hexadecimal value. */ public static String toHexString(byte[] src, int offset, int length) { return toHexString(new StringBuilder(length << 1), src, offset, length).toString(); } /** * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer. */ public static T toHexString(T dst, byte[] src) { return toHexString(dst, src, 0, src.length); } /** * Converts the specified byte array into a hexadecimal value and appends it to the specified buffer. */ public static T toHexString(T dst, byte[] src, int offset, int length) { assert length >= 0; if (length == 0) { return dst; } final int end = offset + length; final int endMinusOne = end - 1; int i; // Skip preceding zeroes. for (i = offset; i < endMinusOne; i++) { if (src[i] != 0) { break; } } byteToHexString(dst, src[i++]); int remaining = end - i; toHexStringPadded(dst, src, i, remaining); return dst; } /** * Helper to decode half of a hexadecimal number from a string. * @param c The ASCII character of the hexadecimal number to decode. * Must be in the range {@code [0-9a-fA-F]}. * @return The hexadecimal value represented in the ASCII character * given, or {@code -1} if the character is invalid. */ public static int decodeHexNibble(final char c) { assert HEX2B.length == (Character.MAX_VALUE + 1); // Character.digit() is not used here, as it addresses a larger // set of characters (both ASCII and full-width latin letters). final int index = c; return HEX2B[index]; } /** * Decode a 2-digit hex byte from within a string. */ public static byte decodeHexByte(CharSequence s, int pos) { int hi = decodeHexNibble(s.charAt(pos)); int lo = decodeHexNibble(s.charAt(pos + 1)); if (hi == -1 || lo == -1) { throw new IllegalArgumentException(String.format( "invalid hex byte '%s' at index %d of '%s'", s.subSequence(pos, pos + 2), pos, s)); } return (byte) ((hi << 4) + lo); } /** * Decodes part of a string with hex dump * * @param hexDump a {@link CharSequence} which contains the hex dump * @param fromIndex start of hex dump in {@code hexDump} * @param length hex string length */ public static byte[] decodeHexDump(CharSequence hexDump, int fromIndex, int length) { if (length < 0 || (length & 1) != 0) { throw new IllegalArgumentException("length: " + length); } if (length == 0) { return EmptyArrays.EMPTY_BYTES; } byte[] bytes = new byte[length >>> 1]; for (int i = 0; i < length; i += 2) { bytes[i >>> 1] = decodeHexByte(hexDump, fromIndex + i); } return bytes; } /** * Decodes a hex dump */ public static byte[] decodeHexDump(CharSequence hexDump) { return decodeHexDump(hexDump, 0, hexDump.length()); } /** * The shortcut to {@link #simpleClassName(Class) simpleClassName(o.getClass())}. */ public static String simpleClassName(Object o) { if (o == null) { return "null_object"; } else { return simpleClassName(o.getClass()); } } /** * Generates a simplified name from a {@link Class}. Similar to {@link Class#getSimpleName()}, but it works fine * with anonymous classes. */ public static String simpleClassName(Class clazz) { String className = requireNonNull(clazz, "clazz").getName(); final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR); if (lastDotIdx > -1) { return className.substring(lastDotIdx + 1); } return className; } /** * Escapes the specified value, if necessary according to * RFC-4180. * * @param value The value which will be escaped according to * RFC-4180 * @return {@link CharSequence} the escaped value if necessary, or the value unchanged */ public static CharSequence escapeCsv(CharSequence value) { return escapeCsv(value, false); } /** * Escapes the specified value, if necessary according to * RFC-4180. * * @param value The value which will be escaped according to * RFC-4180 * @param trimWhiteSpace The value will first be trimmed of its optional white-space characters, * according to RFC-7230 * @return {@link CharSequence} the escaped value if necessary, or the value unchanged */ public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) { int length = requireNonNull(value, "value").length(); int start; int last; if (trimWhiteSpace) { start = indexOfFirstNonOwsChar(value, length); last = indexOfLastNonOwsChar(value, start, length); } else { start = 0; last = length - 1; } if (start > last) { return EMPTY_STRING; } int firstUnescapedSpecial = -1; boolean quoted = false; if (isDoubleQuote(value.charAt(start))) { quoted = isDoubleQuote(value.charAt(last)) && last > start; if (quoted) { start++; last--; } else { firstUnescapedSpecial = start; } } if (firstUnescapedSpecial < 0) { if (quoted) { for (int i = start; i <= last; i++) { if (isDoubleQuote(value.charAt(i))) { if (i == last || !isDoubleQuote(value.charAt(i + 1))) { firstUnescapedSpecial = i; break; } i++; } } } else { for (int i = start; i <= last; i++) { char c = value.charAt(i); if (c == LINE_FEED || c == CARRIAGE_RETURN || c == COMMA) { firstUnescapedSpecial = i; break; } if (isDoubleQuote(c)) { if (i == last || !isDoubleQuote(value.charAt(i + 1))) { firstUnescapedSpecial = i; break; } i++; } } } if (firstUnescapedSpecial < 0) { // Special characters is not found or all of them already escaped. // In the most cases returns a same string. New string will be instantiated (via StringBuilder) // only if it really needed. It's important to prevent GC extra load. return quoted? value.subSequence(start - 1, last + 2) : value.subSequence(start, last + 1); } } StringBuilder result = new StringBuilder(last - start + 1 + CSV_NUMBER_ESCAPE_CHARACTERS); result.append(DOUBLE_QUOTE).append(value, start, firstUnescapedSpecial); for (int i = firstUnescapedSpecial; i <= last; i++) { char c = value.charAt(i); if (isDoubleQuote(c)) { result.append(DOUBLE_QUOTE); if (i < last && isDoubleQuote(value.charAt(i + 1))) { i++; } } result.append(c); } return result.append(DOUBLE_QUOTE); } /** * Unescapes the specified escaped CSV field, if necessary according to * RFC-4180. * * @param value The escaped CSV field which will be unescaped according to * RFC-4180 * @return {@link CharSequence} the unescaped value if necessary, or the value unchanged */ public static CharSequence unescapeCsv(CharSequence value) { int length = requireNonNull(value, "value").length(); if (length == 0) { return value; } int last = length - 1; boolean quoted = isDoubleQuote(value.charAt(0)) && isDoubleQuote(value.charAt(last)) && length != 1; if (!quoted) { validateCsvFormat(value); return value; } StringBuilder unescaped = InternalThreadLocalMap.get().stringBuilder(); for (int i = 1; i < last; i++) { char current = value.charAt(i); if (current == DOUBLE_QUOTE) { if (isDoubleQuote(value.charAt(i + 1)) && (i + 1) != last) { // Followed by a double-quote but not the last character // Just skip the next double-quote i++; } else { // Not followed by a double-quote or the following double-quote is the last character throw newInvalidEscapedCsvFieldException(value, i); } } unescaped.append(current); } return unescaped.toString(); } /** * Unescapes the specified escaped CSV fields according to * RFC-4180. * * @param value A string with multiple CSV escaped fields which will be unescaped according to * RFC-4180 * @return {@link List} the list of unescaped fields */ public static List unescapeCsvFields(CharSequence value) { List unescaped = new ArrayList<>(2); StringBuilder current = InternalThreadLocalMap.get().stringBuilder(); boolean quoted = false; int last = value.length() - 1; for (int i = 0; i <= last; i++) { char c = value.charAt(i); if (quoted) { switch (c) { case DOUBLE_QUOTE: if (i == last) { // Add the last field and return unescaped.add(current.toString()); return unescaped; } char next = value.charAt(++i); if (next == DOUBLE_QUOTE) { // 2 double-quotes should be unescaped to one current.append(DOUBLE_QUOTE); break; } if (next == COMMA) { // This is the end of a field. Let's start to parse the next field. quoted = false; unescaped.add(current.toString()); current.setLength(0); break; } // double-quote followed by other character is invalid throw newInvalidEscapedCsvFieldException(value, i - 1); default: current.append(c); } } else { switch (c) { case COMMA: // Start to parse the next field unescaped.add(current.toString()); current.setLength(0); break; case DOUBLE_QUOTE: if (current.length() == 0) { quoted = true; break; } // double-quote appears without being enclosed with double-quotes // fall through case LINE_FEED: // fall through case CARRIAGE_RETURN: // special characters appears without being enclosed with double-quotes throw newInvalidEscapedCsvFieldException(value, i); default: current.append(c); } } } if (quoted) { throw newInvalidEscapedCsvFieldException(value, last); } unescaped.add(current.toString()); return unescaped; } /** * Validate if {@code value} is a valid csv field without double-quotes. * * @throws IllegalArgumentException if {@code value} needs to be encoded with double-quotes. */ private static void validateCsvFormat(CharSequence value) { int length = value.length(); for (int i = 0; i < length; i++) { switch (value.charAt(i)) { case DOUBLE_QUOTE: case LINE_FEED: case CARRIAGE_RETURN: case COMMA: // If value contains any special character, it should be enclosed with double-quotes throw newInvalidEscapedCsvFieldException(value, i); default: } } } private static IllegalArgumentException newInvalidEscapedCsvFieldException(CharSequence value, int index) { return new IllegalArgumentException("invalid escaped CSV field: " + value + " index: " + index); } /** * Get the length of a string, {@code null} input is considered {@code 0} length. */ public static int length(String s) { return s == null ? 0 : s.length(); } /** * Determine if a string is {@code null} or {@link String#isEmpty()} returns {@code true}. */ public static boolean isNullOrEmpty(String s) { return s == null || s.isEmpty(); } /** * Find the index of the first non-white space character in {@code s} starting at {@code offset}. * * @param seq The string to search. * @param offset The offset to start searching at. * @return the index of the first non-white space character or <{@code -1} if none was found. */ public static int indexOfNonWhiteSpace(CharSequence seq, int offset) { for (; offset < seq.length(); ++offset) { if (!Character.isWhitespace(seq.charAt(offset))) { return offset; } } return -1; } /** * Find the index of the first white space character in {@code s} starting at {@code offset}. * * @param seq The string to search. * @param offset The offset to start searching at. * @return the index of the first white space character or <{@code -1} if none was found. */ public static int indexOfWhiteSpace(CharSequence seq, int offset) { for (; offset < seq.length(); ++offset) { if (Character.isWhitespace(seq.charAt(offset))) { return offset; } } return -1; } /** * Determine if {@code c} lies within the range of values defined for * Surrogate Code Point. * * @param c the character to check. * @return {@code true} if {@code c} lies within the range of values defined for * Surrogate Code Point. {@code false} otherwise. */ public static boolean isSurrogate(char c) { return c >= '\uD800' && c <= '\uDFFF'; } private static boolean isDoubleQuote(char c) { return c == DOUBLE_QUOTE; } /** * Determine if the string {@code s} ends with the char {@code c}. * * @param s the string to test * @param c the tested char * @return true if {@code s} ends with the char {@code c} */ public static boolean endsWith(CharSequence s, char c) { int len = s.length(); return len > 0 && s.charAt(len - 1) == c; } /** * Trim optional white-space characters from the specified value, * according to RFC-7230. * * @param value the value to trim * @return {@link CharSequence} the trimmed value if necessary, or the value unchanged */ public static CharSequence trimOws(CharSequence value) { final int length = value.length(); if (length == 0) { return value; } int start = indexOfFirstNonOwsChar(value, length); int end = indexOfLastNonOwsChar(value, start, length); return start == 0 && end == length - 1 ? value : value.subSequence(start, end + 1); } /** * Returns a char sequence that contains all {@code elements} joined by a given separator. * * @param separator for each element * @param elements to join together * * @return a char sequence joined by a given separator. */ public static CharSequence join(CharSequence separator, Iterable elements) { Objects.requireNonNull(separator, "separator"); Objects.requireNonNull(elements, "elements"); Iterator iterator = elements.iterator(); if (!iterator.hasNext()) { return EMPTY_STRING; } CharSequence firstElement = iterator.next(); if (!iterator.hasNext()) { return firstElement; } StringBuilder builder = new StringBuilder(firstElement); do { builder.append(separator).append(iterator.next()); } while (iterator.hasNext()); return builder; } /** * @return {@code length} if no OWS is found. */ private static int indexOfFirstNonOwsChar(CharSequence value, int length) { int i = 0; while (i < length && isOws(value.charAt(i))) { i++; } return i; } /** * @return {@code start} if no OWS is found. */ private static int indexOfLastNonOwsChar(CharSequence value, int start, int length) { int i = length - 1; while (i > start && isOws(value.charAt(i))) { i--; } return i; } private static boolean isOws(char c) { return c == SPACE || c == TAB; } }