Prevent unnecessary allocations in the StringUtil#escapeCsv

Motivation:

A `StringUtil#escapeCsv` creates new `StringBuilder` on each value even if the same string is returned in the end.

Modifications:

Create new `StringBuilder` only if it really needed. Otherwise, return the original string (or just trimmed substring).

Result:

Less GC load. Up to 4x faster work for not changed strings.
This commit is contained in:
Nikolay Fedorovskikh 2017-06-13 22:58:54 +05:00 committed by Scott Mitchell
parent 94c0ef3c96
commit aa38b6a769
3 changed files with 234 additions and 49 deletions

View File

@ -228,7 +228,7 @@ public final class StringUtil {
* with anonymous classes. * with anonymous classes.
*/ */
public static String simpleClassName(Class<?> clazz) { public static String simpleClassName(Class<?> clazz) {
String className = ObjectUtil.checkNotNull(clazz, "clazz").getName(); String className = checkNotNull(clazz, "clazz").getName();
final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR); final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR);
if (lastDotIdx > -1) { if (lastDotIdx > -1) {
return className.substring(lastDotIdx + 1); return className.substring(lastDotIdx + 1);
@ -260,67 +260,80 @@ public final class StringUtil {
*/ */
public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) { public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) {
int length = checkNotNull(value, "value").length(); int length = checkNotNull(value, "value").length();
if (length == 0) { int start;
return value; int last;
}
int start = 0;
int last = length - 1;
boolean trimmed = false;
if (trimWhiteSpace) { if (trimWhiteSpace) {
start = indexOfFirstNonOwsChar(value, length); start = indexOfFirstNonOwsChar(value, length);
if (start == length) { last = indexOfLastNonOwsChar(value, start, length);
} else {
start = 0;
last = length - 1;
}
if (start > last) {
return EMPTY_STRING; return EMPTY_STRING;
} }
last = indexOfLastNonOwsChar(value, start, length);
trimmed = start > 0 || last < length - 1; int firstUnescapedSpecial = -1;
if (trimmed) { boolean quoted = false;
length = last - start + 1; if (isDoubleQuote(value.charAt(start))) {
quoted = isDoubleQuote(value.charAt(last)) && last > start;
if (quoted) {
start++;
last--;
} else {
firstUnescapedSpecial = start;
} }
} }
StringBuilder result = new StringBuilder(length + CSV_NUMBER_ESCAPE_CHARACTERS); if (firstUnescapedSpecial < 0) {
boolean quoted = isDoubleQuote(value.charAt(start)) && isDoubleQuote(value.charAt(last)) && length != 1; if (quoted) {
boolean foundSpecialCharacter = false;
boolean escapedDoubleQuote = false;
for (int i = start; i <= last; i++) { for (int i = start; i <= last; i++) {
char current = value.charAt(i); if (isDoubleQuote(value.charAt(i))) {
switch (current) { if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
case DOUBLE_QUOTE: firstUnescapedSpecial = i;
if (i == start || i == last) {
if (!quoted) {
result.append(DOUBLE_QUOTE);
} else {
continue;
}
} else {
boolean isNextCharDoubleQuote = isDoubleQuote(value.charAt(i + 1));
if (!isDoubleQuote(value.charAt(i - 1)) &&
(!isNextCharDoubleQuote || i + 1 == last)) {
result.append(DOUBLE_QUOTE);
escapedDoubleQuote = true;
}
break; break;
} }
case LINE_FEED: i++;
case CARRIAGE_RETURN: }
case COMMA: }
foundSpecialCharacter = true; } else {
for (int i = start; i <= last; i++) {
char c = value.charAt(i);
if (c == LINE_FEED || c == CARRIAGE_RETURN || c == COMMA) {
firstUnescapedSpecial = i;
break;
}
if (isDoubleQuote(c)) {
if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
firstUnescapedSpecial = i;
break;
}
i++;
}
} }
result.append(current);
} }
if (escapedDoubleQuote || foundSpecialCharacter && !quoted) { if (firstUnescapedSpecial < 0) {
return quote(result); // Special characters is not found or all of them already escaped.
// In the most cases returns a same string. New string will be instantiated (via StringBuilder)
// only if it really needed. It's important to prevent GC extra load.
return quoted? value.subSequence(start - 1, last + 2) : value.subSequence(start, last + 1);
} }
if (trimmed) {
return quoted ? quote(result) : result;
}
return value;
} }
private static StringBuilder quote(StringBuilder builder) { StringBuilder result = new StringBuilder(last - start + 1 + CSV_NUMBER_ESCAPE_CHARACTERS);
return builder.insert(0, DOUBLE_QUOTE).append(DOUBLE_QUOTE); result.append(DOUBLE_QUOTE).append(value, start, firstUnescapedSpecial);
for (int i = firstUnescapedSpecial; i <= last; i++) {
char c = value.charAt(i);
if (isDoubleQuote(c)) {
result.append(DOUBLE_QUOTE);
if (i < last && isDoubleQuote(value.charAt(i + 1))) {
i++;
}
}
result.append(c);
}
return result.append(DOUBLE_QUOTE);
} }
/** /**

View File

@ -377,6 +377,18 @@ public class StringUtilTest {
escapeCsvWithTrimming("\ttest,ing ", "\"test,ing\""); escapeCsvWithTrimming("\ttest,ing ", "\"test,ing\"");
} }
@Test
public void escapeCsvGarbageFree() {
// 'StringUtil#escapeCsv()' should return same string object if string didn't changing.
assertSame("1", StringUtil.escapeCsv("1", true));
assertSame(" 123 ", StringUtil.escapeCsv(" 123 ", false));
assertSame("\" 123 \"", StringUtil.escapeCsv("\" 123 \"", true));
assertSame("\"\"", StringUtil.escapeCsv("\"\"", true));
assertSame("123 \"\"", StringUtil.escapeCsv("123 \"\"", true));
assertSame("123\"\"321", StringUtil.escapeCsv("123\"\"321", true));
assertSame("\"123\"\"321\"", StringUtil.escapeCsv("\"123\"\"321\"", true));
}
@Test @Test
public void testUnescapeCsv() { public void testUnescapeCsv() {
assertEquals("", unescapeCsv("")); assertEquals("", unescapeCsv(""));

View File

@ -0,0 +1,160 @@
/*
* Copyright 2017 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.microbench.internal;
import io.netty.microbench.util.AbstractMicrobenchmark;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
import java.util.concurrent.TimeUnit;
import static io.netty.util.internal.ObjectUtil.*;
import static io.netty.util.internal.StringUtil.*;
@Threads(1)
@Warmup(iterations = 3)
@Measurement(iterations = 3)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public class EscapeCsvBenchmark extends AbstractMicrobenchmark {
private static final String value1024;
private static final String value1024commaAtEnd;
static {
StringBuilder s1024 = new StringBuilder(1024);
while (s1024.length() < 1024) {
s1024.append('A' + s1024.length() % 10);
}
value1024 = s1024.toString();
value1024commaAtEnd = value1024 + ',';
}
@Param("netty")
private String value;
@Override
protected ChainedOptionsBuilder newOptionsBuilder() throws Exception {
return super.newOptionsBuilder()
.param("value", "netty")
.param("value", "\"123\"", "need\"escape", "need,quotes", " trim-me ", "short-comma-ended,")
.param("value", value1024)
.param("value", value1024commaAtEnd);
}
private static CharSequence escapeCsvOld(CharSequence value, boolean trimWhiteSpace) {
int length = checkNotNull(value, "value").length();
if (length == 0) {
return value;
}
int start = 0;
int last = length - 1;
boolean trimmed = false;
if (trimWhiteSpace) {
start = indexOfFirstNonOwsChar(value, length);
if (start == length) {
return EMPTY_STRING;
}
last = indexOfLastNonOwsChar(value, start, length);
trimmed = start > 0 || last < length - 1;
if (trimmed) {
length = last - start + 1;
}
}
StringBuilder result = new StringBuilder(length + 7);
boolean quoted = isDoubleQuote(value.charAt(start)) && isDoubleQuote(value.charAt(last)) && length != 1;
boolean foundSpecialCharacter = false;
boolean escapedDoubleQuote = false;
for (int i = start; i <= last; i++) {
char current = value.charAt(i);
switch (current) {
case DOUBLE_QUOTE:
if (i == start || i == last) {
if (!quoted) {
result.append(DOUBLE_QUOTE);
} else {
continue;
}
} else {
boolean isNextCharDoubleQuote = isDoubleQuote(value.charAt(i + 1));
if (!isDoubleQuote(value.charAt(i - 1)) &&
(!isNextCharDoubleQuote || i + 1 == last)) {
result.append(DOUBLE_QUOTE);
escapedDoubleQuote = true;
}
break;
}
case LINE_FEED:
case CARRIAGE_RETURN:
case COMMA:
foundSpecialCharacter = true;
}
result.append(current);
}
if (escapedDoubleQuote || foundSpecialCharacter && !quoted) {
return quote(result);
}
if (trimmed) {
return quoted? quote(result) : result;
}
return value;
}
private static StringBuilder quote(StringBuilder builder) {
return builder.insert(0, DOUBLE_QUOTE).append(DOUBLE_QUOTE);
}
private static boolean isDoubleQuote(char c) {
return c == DOUBLE_QUOTE;
}
private static int indexOfFirstNonOwsChar(CharSequence value, int length) {
int i = 0;
while (i < length && isOws(value.charAt(i))) {
i++;
}
return i;
}
private static int indexOfLastNonOwsChar(CharSequence value, int start, int length) {
int i = length - 1;
while (i > start && isOws(value.charAt(i))) {
i--;
}
return i;
}
private static boolean isOws(char c) {
return c == SPACE || c == TAB;
}
@Benchmark
public CharSequence escapeCsvOld() {
return escapeCsvOld(value, true);
}
@Benchmark
public CharSequence escapeCsvNew() {
return escapeCsv(value, true);
}
}