Prevent unnecessary allocations in the StringUtil#escapeCsv
Motivation: A `StringUtil#escapeCsv` creates new `StringBuilder` on each value even if the same string is returned in the end. Modifications: Create new `StringBuilder` only if it really needed. Otherwise, return the original string (or just trimmed substring). Result: Less GC load. Up to 4x faster work for not changed strings.
This commit is contained in:
parent
94c0ef3c96
commit
aa38b6a769
@ -228,7 +228,7 @@ public final class StringUtil {
|
|||||||
* with anonymous classes.
|
* with anonymous classes.
|
||||||
*/
|
*/
|
||||||
public static String simpleClassName(Class<?> clazz) {
|
public static String simpleClassName(Class<?> clazz) {
|
||||||
String className = ObjectUtil.checkNotNull(clazz, "clazz").getName();
|
String className = checkNotNull(clazz, "clazz").getName();
|
||||||
final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR);
|
final int lastDotIdx = className.lastIndexOf(PACKAGE_SEPARATOR_CHAR);
|
||||||
if (lastDotIdx > -1) {
|
if (lastDotIdx > -1) {
|
||||||
return className.substring(lastDotIdx + 1);
|
return className.substring(lastDotIdx + 1);
|
||||||
@ -260,67 +260,80 @@ public final class StringUtil {
|
|||||||
*/
|
*/
|
||||||
public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) {
|
public static CharSequence escapeCsv(CharSequence value, boolean trimWhiteSpace) {
|
||||||
int length = checkNotNull(value, "value").length();
|
int length = checkNotNull(value, "value").length();
|
||||||
if (length == 0) {
|
int start;
|
||||||
return value;
|
int last;
|
||||||
}
|
|
||||||
|
|
||||||
int start = 0;
|
|
||||||
int last = length - 1;
|
|
||||||
boolean trimmed = false;
|
|
||||||
if (trimWhiteSpace) {
|
if (trimWhiteSpace) {
|
||||||
start = indexOfFirstNonOwsChar(value, length);
|
start = indexOfFirstNonOwsChar(value, length);
|
||||||
if (start == length) {
|
|
||||||
return EMPTY_STRING;
|
|
||||||
}
|
|
||||||
last = indexOfLastNonOwsChar(value, start, length);
|
last = indexOfLastNonOwsChar(value, start, length);
|
||||||
trimmed = start > 0 || last < length - 1;
|
} else {
|
||||||
if (trimmed) {
|
start = 0;
|
||||||
length = last - start + 1;
|
last = length - 1;
|
||||||
|
}
|
||||||
|
if (start > last) {
|
||||||
|
return EMPTY_STRING;
|
||||||
|
}
|
||||||
|
|
||||||
|
int firstUnescapedSpecial = -1;
|
||||||
|
boolean quoted = false;
|
||||||
|
if (isDoubleQuote(value.charAt(start))) {
|
||||||
|
quoted = isDoubleQuote(value.charAt(last)) && last > start;
|
||||||
|
if (quoted) {
|
||||||
|
start++;
|
||||||
|
last--;
|
||||||
|
} else {
|
||||||
|
firstUnescapedSpecial = start;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuilder result = new StringBuilder(length + CSV_NUMBER_ESCAPE_CHARACTERS);
|
if (firstUnescapedSpecial < 0) {
|
||||||
boolean quoted = isDoubleQuote(value.charAt(start)) && isDoubleQuote(value.charAt(last)) && length != 1;
|
if (quoted) {
|
||||||
boolean foundSpecialCharacter = false;
|
for (int i = start; i <= last; i++) {
|
||||||
boolean escapedDoubleQuote = false;
|
if (isDoubleQuote(value.charAt(i))) {
|
||||||
for (int i = start; i <= last; i++) {
|
if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
|
||||||
char current = value.charAt(i);
|
firstUnescapedSpecial = i;
|
||||||
switch (current) {
|
break;
|
||||||
case DOUBLE_QUOTE:
|
|
||||||
if (i == start || i == last) {
|
|
||||||
if (!quoted) {
|
|
||||||
result.append(DOUBLE_QUOTE);
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
boolean isNextCharDoubleQuote = isDoubleQuote(value.charAt(i + 1));
|
|
||||||
if (!isDoubleQuote(value.charAt(i - 1)) &&
|
|
||||||
(!isNextCharDoubleQuote || i + 1 == last)) {
|
|
||||||
result.append(DOUBLE_QUOTE);
|
|
||||||
escapedDoubleQuote = true;
|
|
||||||
}
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = start; i <= last; i++) {
|
||||||
|
char c = value.charAt(i);
|
||||||
|
if (c == LINE_FEED || c == CARRIAGE_RETURN || c == COMMA) {
|
||||||
|
firstUnescapedSpecial = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LINE_FEED:
|
if (isDoubleQuote(c)) {
|
||||||
case CARRIAGE_RETURN:
|
if (i == last || !isDoubleQuote(value.charAt(i + 1))) {
|
||||||
case COMMA:
|
firstUnescapedSpecial = i;
|
||||||
foundSpecialCharacter = true;
|
break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstUnescapedSpecial < 0) {
|
||||||
|
// Special characters is not found or all of them already escaped.
|
||||||
|
// In the most cases returns a same string. New string will be instantiated (via StringBuilder)
|
||||||
|
// only if it really needed. It's important to prevent GC extra load.
|
||||||
|
return quoted? value.subSequence(start - 1, last + 2) : value.subSequence(start, last + 1);
|
||||||
}
|
}
|
||||||
result.append(current);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (escapedDoubleQuote || foundSpecialCharacter && !quoted) {
|
StringBuilder result = new StringBuilder(last - start + 1 + CSV_NUMBER_ESCAPE_CHARACTERS);
|
||||||
return quote(result);
|
result.append(DOUBLE_QUOTE).append(value, start, firstUnescapedSpecial);
|
||||||
|
for (int i = firstUnescapedSpecial; i <= last; i++) {
|
||||||
|
char c = value.charAt(i);
|
||||||
|
if (isDoubleQuote(c)) {
|
||||||
|
result.append(DOUBLE_QUOTE);
|
||||||
|
if (i < last && isDoubleQuote(value.charAt(i + 1))) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.append(c);
|
||||||
}
|
}
|
||||||
if (trimmed) {
|
return result.append(DOUBLE_QUOTE);
|
||||||
return quoted ? quote(result) : result;
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static StringBuilder quote(StringBuilder builder) {
|
|
||||||
return builder.insert(0, DOUBLE_QUOTE).append(DOUBLE_QUOTE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -377,6 +377,18 @@ public class StringUtilTest {
|
|||||||
escapeCsvWithTrimming("\ttest,ing ", "\"test,ing\"");
|
escapeCsvWithTrimming("\ttest,ing ", "\"test,ing\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void escapeCsvGarbageFree() {
|
||||||
|
// 'StringUtil#escapeCsv()' should return same string object if string didn't changing.
|
||||||
|
assertSame("1", StringUtil.escapeCsv("1", true));
|
||||||
|
assertSame(" 123 ", StringUtil.escapeCsv(" 123 ", false));
|
||||||
|
assertSame("\" 123 \"", StringUtil.escapeCsv("\" 123 \"", true));
|
||||||
|
assertSame("\"\"", StringUtil.escapeCsv("\"\"", true));
|
||||||
|
assertSame("123 \"\"", StringUtil.escapeCsv("123 \"\"", true));
|
||||||
|
assertSame("123\"\"321", StringUtil.escapeCsv("123\"\"321", true));
|
||||||
|
assertSame("\"123\"\"321\"", StringUtil.escapeCsv("\"123\"\"321\"", true));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUnescapeCsv() {
|
public void testUnescapeCsv() {
|
||||||
assertEquals("", unescapeCsv(""));
|
assertEquals("", unescapeCsv(""));
|
||||||
|
@ -0,0 +1,160 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2017 The Netty Project
|
||||||
|
*
|
||||||
|
* The Netty Project licenses this file to you under the Apache License,
|
||||||
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at:
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package io.netty.microbench.internal;
|
||||||
|
|
||||||
|
import io.netty.microbench.util.AbstractMicrobenchmark;
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Threads;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
|
||||||
|
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import static io.netty.util.internal.ObjectUtil.*;
|
||||||
|
import static io.netty.util.internal.StringUtil.*;
|
||||||
|
|
||||||
|
@Threads(1)
|
||||||
|
@Warmup(iterations = 3)
|
||||||
|
@Measurement(iterations = 3)
|
||||||
|
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||||
|
public class EscapeCsvBenchmark extends AbstractMicrobenchmark {
|
||||||
|
|
||||||
|
private static final String value1024;
|
||||||
|
private static final String value1024commaAtEnd;
|
||||||
|
static {
|
||||||
|
StringBuilder s1024 = new StringBuilder(1024);
|
||||||
|
while (s1024.length() < 1024) {
|
||||||
|
s1024.append('A' + s1024.length() % 10);
|
||||||
|
}
|
||||||
|
value1024 = s1024.toString();
|
||||||
|
value1024commaAtEnd = value1024 + ',';
|
||||||
|
}
|
||||||
|
|
||||||
|
@Param("netty")
|
||||||
|
private String value;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ChainedOptionsBuilder newOptionsBuilder() throws Exception {
|
||||||
|
return super.newOptionsBuilder()
|
||||||
|
.param("value", "netty")
|
||||||
|
.param("value", "\"123\"", "need\"escape", "need,quotes", " trim-me ", "short-comma-ended,")
|
||||||
|
.param("value", value1024)
|
||||||
|
.param("value", value1024commaAtEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CharSequence escapeCsvOld(CharSequence value, boolean trimWhiteSpace) {
|
||||||
|
int length = checkNotNull(value, "value").length();
|
||||||
|
if (length == 0) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
int start = 0;
|
||||||
|
int last = length - 1;
|
||||||
|
boolean trimmed = false;
|
||||||
|
if (trimWhiteSpace) {
|
||||||
|
start = indexOfFirstNonOwsChar(value, length);
|
||||||
|
if (start == length) {
|
||||||
|
return EMPTY_STRING;
|
||||||
|
}
|
||||||
|
last = indexOfLastNonOwsChar(value, start, length);
|
||||||
|
trimmed = start > 0 || last < length - 1;
|
||||||
|
if (trimmed) {
|
||||||
|
length = last - start + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder result = new StringBuilder(length + 7);
|
||||||
|
boolean quoted = isDoubleQuote(value.charAt(start)) && isDoubleQuote(value.charAt(last)) && length != 1;
|
||||||
|
boolean foundSpecialCharacter = false;
|
||||||
|
boolean escapedDoubleQuote = false;
|
||||||
|
for (int i = start; i <= last; i++) {
|
||||||
|
char current = value.charAt(i);
|
||||||
|
switch (current) {
|
||||||
|
case DOUBLE_QUOTE:
|
||||||
|
if (i == start || i == last) {
|
||||||
|
if (!quoted) {
|
||||||
|
result.append(DOUBLE_QUOTE);
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
boolean isNextCharDoubleQuote = isDoubleQuote(value.charAt(i + 1));
|
||||||
|
if (!isDoubleQuote(value.charAt(i - 1)) &&
|
||||||
|
(!isNextCharDoubleQuote || i + 1 == last)) {
|
||||||
|
result.append(DOUBLE_QUOTE);
|
||||||
|
escapedDoubleQuote = true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LINE_FEED:
|
||||||
|
case CARRIAGE_RETURN:
|
||||||
|
case COMMA:
|
||||||
|
foundSpecialCharacter = true;
|
||||||
|
}
|
||||||
|
result.append(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (escapedDoubleQuote || foundSpecialCharacter && !quoted) {
|
||||||
|
return quote(result);
|
||||||
|
}
|
||||||
|
if (trimmed) {
|
||||||
|
return quoted? quote(result) : result;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static StringBuilder quote(StringBuilder builder) {
|
||||||
|
return builder.insert(0, DOUBLE_QUOTE).append(DOUBLE_QUOTE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isDoubleQuote(char c) {
|
||||||
|
return c == DOUBLE_QUOTE;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int indexOfFirstNonOwsChar(CharSequence value, int length) {
|
||||||
|
int i = 0;
|
||||||
|
while (i < length && isOws(value.charAt(i))) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int indexOfLastNonOwsChar(CharSequence value, int start, int length) {
|
||||||
|
int i = length - 1;
|
||||||
|
while (i > start && isOws(value.charAt(i))) {
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isOws(char c) {
|
||||||
|
return c == SPACE || c == TAB;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public CharSequence escapeCsvOld() {
|
||||||
|
return escapeCsvOld(value, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public CharSequence escapeCsvNew() {
|
||||||
|
return escapeCsv(value, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user