diff --git a/common/src/main/java/io/netty/util/internal/StringUtil.java b/common/src/main/java/io/netty/util/internal/StringUtil.java index 5332464f21..f9798605e0 100644 --- a/common/src/main/java/io/netty/util/internal/StringUtil.java +++ b/common/src/main/java/io/netty/util/internal/StringUtil.java @@ -228,7 +228,7 @@ public final class StringUtil { * with anonymous classes. */ public static String simpleClassName(Class clazz) { - String className = ObjectUtil.checkNotNull(clazz, "clazz").getName(); + String className = checkNotNull(clazz, "clazz").getName(); final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR); if (lastDotIdx > -1) { return className.substring(lastDotIdx + 1); @@ -260,67 +260,80 @@ public final class StringUtil { */ public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) { int length = checkNotNull(value, "value").length(); - if (length == 0) { - return value; - } - - int start = 0; - int last = length - 1; - boolean trimmed = false; + int start; + int last; if (trimWhiteSpace) { start = indexOfFirstNonOwsChar(value, length); - if (start == length) { - return EMPTY_STRING; - } last = indexOfLastNonOwsChar(value, start, length); - trimmed = start > 0 || last < length - 1; - if (trimmed) { - length = last - start + 1; + } else { + start = 0; + last = length - 1; + } + if (start > last) { + return EMPTY_STRING; + } + + int firstUnescapedSpecial = -1; + boolean quoted = false; + if (isDoubleQuote(value.charAt(start))) { + quoted = isDoubleQuote(value.charAt(last)) && last > start; + if (quoted) { + start++; + last--; + } else { + firstUnescapedSpecial = start; } } - StringBuilder result = new StringBuilder(length + CSV_NUMBER_ESCAPE_CHARACTERS); - boolean quoted = isDoubleQuote(value.charAt(start)) && isDoubleQuote(value.charAt(last)) && length != 1; - boolean foundSpecialCharacter = false; - boolean escapedDoubleQuote = false; - for (int i = start; i <= last; i++) { - char current = value.charAt(i); - switch (current) { - case DOUBLE_QUOTE: - if (i == start || i == last) { - if (!quoted) { - result.append(DOUBLE_QUOTE); - } else { - continue; - } - } else { - boolean isNextCharDoubleQuote = isDoubleQuote(value.charAt(i + 1)); - if (!isDoubleQuote(value.charAt(i - 1)) && - (!isNextCharDoubleQuote || i + 1 == last)) { - result.append(DOUBLE_QUOTE); - escapedDoubleQuote = true; + if (firstUnescapedSpecial < 0) { + if (quoted) { + for (int i = start; i <= last; i++) { + if (isDoubleQuote(value.charAt(i))) { + if (i == last || !isDoubleQuote(value.charAt(i + 1))) { + firstUnescapedSpecial = i; + break; } + i++; + } + } + } else { + for (int i = start; i <= last; i++) { + char c = value.charAt(i); + if (c == LINE_FEED || c == CARRIAGE_RETURN || c == COMMA) { + firstUnescapedSpecial = i; break; } - case LINE_FEED: - case CARRIAGE_RETURN: - case COMMA: - foundSpecialCharacter = true; + if (isDoubleQuote(c)) { + if (i == last || !isDoubleQuote(value.charAt(i + 1))) { + firstUnescapedSpecial = i; + break; + } + i++; + } + } + } + + if (firstUnescapedSpecial < 0) { + // Special characters is not found or all of them already escaped. + // In the most cases returns a same string. New string will be instantiated (via StringBuilder) + // only if it really needed. It's important to prevent GC extra load. + return quoted? value.subSequence(start - 1, last + 2) : value.subSequence(start, last + 1); } - result.append(current); } - if (escapedDoubleQuote || foundSpecialCharacter && !quoted) { - return quote(result); + StringBuilder result = new StringBuilder(last - start + 1 + CSV_NUMBER_ESCAPE_CHARACTERS); + result.append(DOUBLE_QUOTE).append(value, start, firstUnescapedSpecial); + for (int i = firstUnescapedSpecial; i <= last; i++) { + char c = value.charAt(i); + if (isDoubleQuote(c)) { + result.append(DOUBLE_QUOTE); + if (i < last && isDoubleQuote(value.charAt(i + 1))) { + i++; + } + } + result.append(c); } - if (trimmed) { - return quoted ? quote(result) : result; - } - return value; - } - - private static StringBuilder quote(StringBuilder builder) { - return builder.insert(0, DOUBLE_QUOTE).append(DOUBLE_QUOTE); + return result.append(DOUBLE_QUOTE); } /** diff --git a/common/src/test/java/io/netty/util/internal/StringUtilTest.java b/common/src/test/java/io/netty/util/internal/StringUtilTest.java index c8a13f58c0..574e6c09db 100644 --- a/common/src/test/java/io/netty/util/internal/StringUtilTest.java +++ b/common/src/test/java/io/netty/util/internal/StringUtilTest.java @@ -377,6 +377,18 @@ public class StringUtilTest { escapeCsvWithTrimming("\ttest,ing ", "\"test,ing\""); } + @Test + public void escapeCsvGarbageFree() { + // 'StringUtil#escapeCsv()' should return same string object if string didn't changing. + assertSame("1", StringUtil.escapeCsv("1", true)); + assertSame(" 123 ", StringUtil.escapeCsv(" 123 ", false)); + assertSame("\" 123 \"", StringUtil.escapeCsv("\" 123 \"", true)); + assertSame("\"\"", StringUtil.escapeCsv("\"\"", true)); + assertSame("123 \"\"", StringUtil.escapeCsv("123 \"\"", true)); + assertSame("123\"\"321", StringUtil.escapeCsv("123\"\"321", true)); + assertSame("\"123\"\"321\"", StringUtil.escapeCsv("\"123\"\"321\"", true)); + } + @Test public void testUnescapeCsv() { assertEquals("", unescapeCsv("")); diff --git a/microbench/src/main/java/io/netty/microbench/internal/EscapeCsvBenchmark.java b/microbench/src/main/java/io/netty/microbench/internal/EscapeCsvBenchmark.java new file mode 100644 index 0000000000..e6750e8d18 --- /dev/null +++ b/microbench/src/main/java/io/netty/microbench/internal/EscapeCsvBenchmark.java @@ -0,0 +1,160 @@ +/* + * Copyright 2017 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.microbench.internal; + +import io.netty.microbench.util.AbstractMicrobenchmark; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.options.ChainedOptionsBuilder; + +import java.util.concurrent.TimeUnit; + +import static io.netty.util.internal.ObjectUtil.*; +import static io.netty.util.internal.StringUtil.*; + +@Threads(1) +@Warmup(iterations = 3) +@Measurement(iterations = 3) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +public class EscapeCsvBenchmark extends AbstractMicrobenchmark { + + private static final String value1024; + private static final String value1024commaAtEnd; + static { + StringBuilder s1024 = new StringBuilder(1024); + while (s1024.length() < 1024) { + s1024.append('A' + s1024.length() % 10); + } + value1024 = s1024.toString(); + value1024commaAtEnd = value1024 + ','; + } + + @Param("netty") + private String value; + + @Override + protected ChainedOptionsBuilder newOptionsBuilder() throws Exception { + return super.newOptionsBuilder() + .param("value", "netty") + .param("value", "\"123\"", "need\"escape", "need,quotes", " trim-me ", "short-comma-ended,") + .param("value", value1024) + .param("value", value1024commaAtEnd); + } + + private static CharSequence escapeCsvOld(CharSequence value, boolean trimWhiteSpace) { + int length = checkNotNull(value, "value").length(); + if (length == 0) { + return value; + } + + int start = 0; + int last = length - 1; + boolean trimmed = false; + if (trimWhiteSpace) { + start = indexOfFirstNonOwsChar(value, length); + if (start == length) { + return EMPTY_STRING; + } + last = indexOfLastNonOwsChar(value, start, length); + trimmed = start > 0 || last < length - 1; + if (trimmed) { + length = last - start + 1; + } + } + + StringBuilder result = new StringBuilder(length + 7); + boolean quoted = isDoubleQuote(value.charAt(start)) && isDoubleQuote(value.charAt(last)) && length != 1; + boolean foundSpecialCharacter = false; + boolean escapedDoubleQuote = false; + for (int i = start; i <= last; i++) { + char current = value.charAt(i); + switch (current) { + case DOUBLE_QUOTE: + if (i == start || i == last) { + if (!quoted) { + result.append(DOUBLE_QUOTE); + } else { + continue; + } + } else { + boolean isNextCharDoubleQuote = isDoubleQuote(value.charAt(i + 1)); + if (!isDoubleQuote(value.charAt(i - 1)) && + (!isNextCharDoubleQuote || i + 1 == last)) { + result.append(DOUBLE_QUOTE); + escapedDoubleQuote = true; + } + break; + } + case LINE_FEED: + case CARRIAGE_RETURN: + case COMMA: + foundSpecialCharacter = true; + } + result.append(current); + } + + if (escapedDoubleQuote || foundSpecialCharacter && !quoted) { + return quote(result); + } + if (trimmed) { + return quoted? quote(result) : result; + } + return value; + } + + private static StringBuilder quote(StringBuilder builder) { + return builder.insert(0, DOUBLE_QUOTE).append(DOUBLE_QUOTE); + } + + private static boolean isDoubleQuote(char c) { + return c == DOUBLE_QUOTE; + } + + private static int indexOfFirstNonOwsChar(CharSequence value, int length) { + int i = 0; + while (i < length && isOws(value.charAt(i))) { + i++; + } + return i; + } + + private static int indexOfLastNonOwsChar(CharSequence value, int start, int length) { + int i = length - 1; + while (i > start && isOws(value.charAt(i))) { + i--; + } + return i; + } + + private static boolean isOws(char c) { + return c == SPACE || c == TAB; + } + + @Benchmark + public CharSequence escapeCsvOld() { + return escapeCsvOld(value, true); + } + + @Benchmark + public CharSequence escapeCsvNew() { + return escapeCsv(value, true); + } + +}