Add zero-copy text encoder
This commit is contained in:
parent
70adaaf6cf
commit
915afcbf4d
@ -198,9 +198,19 @@
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>31.1-jre</version>
|
||||
<version>33.0.0-jre</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openjdk.jmh</groupId>
|
||||
<artifactId>jmh-core</artifactId>
|
||||
<version>1.36</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openjdk.jmh</groupId>
|
||||
<artifactId>jmh-generator-annprocess</artifactId>
|
||||
<version>1.36</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
@ -232,4 +242,88 @@
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>java9</id>
|
||||
<activation>
|
||||
<activeByDefault>true</activeByDefault>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>build-helper-maven-plugin</artifactId>
|
||||
<version>3.6.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>add-source</id>
|
||||
<phase>generate-sources</phase>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<sources>
|
||||
<source>src/java9/java</source>
|
||||
</sources>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>benchmark</id>
|
||||
<activation>
|
||||
<activeByDefault>false</activeByDefault>
|
||||
</activation>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.openjdk.jmh</groupId>
|
||||
<artifactId>jmh-core</artifactId>
|
||||
<version>1.36</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.openjdk.jmh</groupId>
|
||||
<artifactId>jmh-generator-annprocess</artifactId>
|
||||
<version>1.36</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>build-helper-maven-plugin</artifactId>
|
||||
<version>3.6.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>add-source</id>
|
||||
<phase>generate-sources</phase>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<sources>
|
||||
<source>src/benchmark/java</source>
|
||||
</sources>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<configuration>
|
||||
<release>21</release>
|
||||
<encoding>UTF-8</encoding>
|
||||
<excludes>
|
||||
<exclude>src/main/java/module-info.java</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
@ -0,0 +1,14 @@
|
||||
package it.cavallium.buffer;
|
||||
|
||||
import org.openjdk.jmh.runner.Runner;
|
||||
import org.openjdk.jmh.runner.RunnerException;
|
||||
import org.openjdk.jmh.runner.options.Options;
|
||||
import org.openjdk.jmh.runner.options.OptionsBuilder;
|
||||
|
||||
public class BenchmarkMain {
|
||||
|
||||
public static void main(String[] args) throws RunnerException {
|
||||
Options opt = new OptionsBuilder().include(BufEncoderBench.class.getSimpleName()).build();
|
||||
new Runner(opt).run();
|
||||
}
|
||||
}
|
@ -0,0 +1,124 @@
|
||||
package it.cavallium.buffer;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
public class BenchmarkMainManual {
|
||||
|
||||
public static class BenchmarkZeroCopy {
|
||||
public static void main(String[] args) {
|
||||
var s2 = new ZeroAllocationEncoderBenchState();
|
||||
s2.prepare();
|
||||
while (true) {
|
||||
s2.reset();
|
||||
encodeMediumTextZeroCopy(s2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class BenchmarkJava {
|
||||
public static void main(String[] args) {
|
||||
var s2 = new ZeroAllocationEncoderBenchState();
|
||||
s2.prepare();
|
||||
while (true) {
|
||||
s2.reset();
|
||||
encodeMediumTextJava(s2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class ZeroAllocationEncoderBenchState {
|
||||
private static final List<String> WORDS = List.of(
|
||||
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
|
||||
"hello",
|
||||
"test",
|
||||
"òàòà§òè+=))=732e0",
|
||||
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
|
||||
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
|
||||
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
|
||||
"من left اليمين to الى right اليسار",
|
||||
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
|
||||
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
|
||||
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
|
||||
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
|
||||
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
|
||||
"田中さんにあげて下さい",
|
||||
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
|
||||
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
|
||||
"表ポあA鷗ŒéB逍Üߪąñ丂㐀\uD840\uDC00",
|
||||
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
|
||||
"᚛ ᚜\n",
|
||||
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
|
||||
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
|
||||
"జ్ఞ\u200Cా"
|
||||
);
|
||||
String shortText;
|
||||
byte[] shortTextBytes;
|
||||
BufDataInput shortTextInput;
|
||||
BufDataOutput shortTextOutput;
|
||||
String mediumText;
|
||||
byte[] mediumTextBytes;
|
||||
BufDataInput mediumTextInput;
|
||||
BufDataOutput mediumTextOutput;
|
||||
String longText;
|
||||
byte[] longTextBytes;
|
||||
BufDataInput longTextInput;
|
||||
BufDataOutput longTextOutput;
|
||||
|
||||
public void reset() {
|
||||
longTextInput.reset();
|
||||
longTextOutput.resetUnderlyingBuffer();
|
||||
mediumTextInput.reset();
|
||||
mediumTextOutput.resetUnderlyingBuffer();
|
||||
shortTextInput.reset();
|
||||
shortTextOutput.resetUnderlyingBuffer();
|
||||
}
|
||||
|
||||
public void prepare() {
|
||||
var l = new ArrayList<String>();
|
||||
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
|
||||
for (int i = 0; i < maxI; i++) {
|
||||
l.addAll(WORDS);
|
||||
}
|
||||
Collections.shuffle(l);
|
||||
var fullText = String.join(" ", l);
|
||||
var out = BufDataOutput.create(Integer.BYTES + fullText.getBytes(StandardCharsets.UTF_8).length);
|
||||
|
||||
out.resetUnderlyingBuffer();
|
||||
longText = fullText;
|
||||
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
|
||||
out.writeMediumText(longText, StandardCharsets.UTF_8);
|
||||
longTextInput = BufDataInput.create(out.toList());
|
||||
longTextOutput = BufDataOutput.create(Integer.BYTES + longTextBytes.length);
|
||||
|
||||
out.resetUnderlyingBuffer();
|
||||
mediumText = fullText.substring(0, 128);
|
||||
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
|
||||
out.writeMediumText(mediumText, StandardCharsets.UTF_8);
|
||||
mediumTextInput = BufDataInput.create(out.toList());
|
||||
mediumTextOutput = BufDataOutput.create(Integer.BYTES + mediumTextBytes.length);
|
||||
|
||||
out.resetUnderlyingBuffer();
|
||||
shortText = fullText.substring(0, 15);
|
||||
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
|
||||
out.writeMediumText(shortText, StandardCharsets.UTF_8);
|
||||
shortTextInput = BufDataInput.create(out.toList());
|
||||
shortTextOutput = BufDataOutput.create(Integer.BYTES + shortTextBytes.length);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static Buf encodeMediumTextZeroCopy(ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.mediumTextOutput;
|
||||
out.writeMediumTextZeroCopy(benchState.mediumText);
|
||||
return out.toList();
|
||||
}
|
||||
public static Buf encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.mediumTextOutput;
|
||||
out.writeMediumTextLegacy(benchState.mediumText, StandardCharsets.UTF_8);
|
||||
return out.toList();
|
||||
}
|
||||
}
|
@ -0,0 +1,174 @@
|
||||
package it.cavallium.buffer;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
@OutputTimeUnit(TimeUnit.SECONDS)
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@Fork(value = 1, warmups = 1)
|
||||
@Warmup(time = 2, iterations = 6)
|
||||
@Measurement(time = 2, iterations = 6)
|
||||
public class BufEncoderBench {
|
||||
|
||||
@State(Scope.Thread)
|
||||
public static class ZeroAllocationEncoderState {
|
||||
ZeroAllocationEncoder encoder;
|
||||
BufDataOutput bufOutput;
|
||||
|
||||
@Setup
|
||||
public void prepare() {
|
||||
encoder = ZeroAllocationEncoder.INSTANCE;
|
||||
bufOutput = BufDataOutput.create(1024);
|
||||
}
|
||||
}
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
public static class ZeroAllocationEncoderBenchState {
|
||||
private static final List<String> WORDS = List.of(
|
||||
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
|
||||
"hello",
|
||||
"test",
|
||||
"òàòà§òè+=))=732e0",
|
||||
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
|
||||
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
|
||||
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
|
||||
"من left اليمين to الى right اليسار",
|
||||
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
|
||||
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
|
||||
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
|
||||
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
|
||||
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
|
||||
"田中さんにあげて下さい",
|
||||
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
|
||||
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
|
||||
"表ポあA鷗ŒéB逍Üߪąñ丂㐀\uD840\uDC00",
|
||||
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
|
||||
"᚛ ᚜\n",
|
||||
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
|
||||
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
|
||||
"జ్ఞ\u200Cా"
|
||||
);
|
||||
String shortText;
|
||||
byte[] shortTextBytes;
|
||||
BufDataInput shortTextInput;
|
||||
BufDataOutput shortTextOutput;
|
||||
String mediumText;
|
||||
byte[] mediumTextBytes;
|
||||
BufDataInput mediumTextInput;
|
||||
BufDataOutput mediumTextOutput;
|
||||
String longText;
|
||||
byte[] longTextBytes;
|
||||
BufDataInput longTextInput;
|
||||
BufDataOutput longTextOutput;
|
||||
|
||||
@Setup(Level.Invocation)
|
||||
public void reset() {
|
||||
longTextInput.reset();
|
||||
longTextOutput.resetUnderlyingBuffer();
|
||||
mediumTextInput.reset();
|
||||
mediumTextOutput.resetUnderlyingBuffer();
|
||||
shortTextInput.reset();
|
||||
shortTextOutput.resetUnderlyingBuffer();
|
||||
}
|
||||
|
||||
@Setup(Level.Invocation)
|
||||
public void prepare() {
|
||||
var l = new ArrayList<String>();
|
||||
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
|
||||
for (int i = 0; i < maxI; i++) {
|
||||
l.addAll(WORDS);
|
||||
}
|
||||
Collections.shuffle(l);
|
||||
var fullText = String.join(" ", l);
|
||||
var out = BufDataOutput.create(Integer.BYTES + fullText.getBytes(StandardCharsets.UTF_8).length);
|
||||
|
||||
out.resetUnderlyingBuffer();
|
||||
longText = fullText;
|
||||
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
|
||||
out.writeMediumText(longText, StandardCharsets.UTF_8);
|
||||
longTextInput = BufDataInput.create(out.toList());
|
||||
longTextOutput = BufDataOutput.create(Integer.BYTES + longTextBytes.length);
|
||||
|
||||
out.resetUnderlyingBuffer();
|
||||
mediumText = fullText.substring(0, 128);
|
||||
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
|
||||
out.writeMediumText(mediumText, StandardCharsets.UTF_8);
|
||||
mediumTextInput = BufDataInput.create(out.toList());
|
||||
mediumTextOutput = BufDataOutput.create(Integer.BYTES + mediumTextBytes.length);
|
||||
|
||||
out.resetUnderlyingBuffer();
|
||||
shortText = fullText.substring(0, 15);
|
||||
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
|
||||
out.writeMediumText(shortText, StandardCharsets.UTF_8);
|
||||
shortTextInput = BufDataInput.create(out.toList());
|
||||
shortTextOutput = BufDataOutput.create(Integer.BYTES + shortTextBytes.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public Buf encodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.shortTextOutput;
|
||||
out.writeMediumTextZeroCopy(benchState.shortText);
|
||||
return out.toList();
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public Buf encodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.mediumTextOutput;
|
||||
out.writeMediumTextZeroCopy(benchState.mediumText);
|
||||
return out.toList();
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public Buf encodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.longTextOutput;
|
||||
out.writeMediumTextZeroCopy(benchState.longText);
|
||||
return out.toList();
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public Buf encodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.shortTextOutput;
|
||||
out.writeMediumTextLegacy(benchState.shortText, StandardCharsets.UTF_8);
|
||||
return out.toList();
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public Buf encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.mediumTextOutput;
|
||||
out.writeMediumTextLegacy(benchState.mediumText, StandardCharsets.UTF_8);
|
||||
return out.toList();
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public Buf encodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.longTextOutput;
|
||||
out.writeMediumTextLegacy(benchState.longText, StandardCharsets.UTF_8);
|
||||
return out.toList();
|
||||
}
|
||||
|
||||
|
||||
@Benchmark
|
||||
public String decodeShortText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var in = benchState.shortTextInput;
|
||||
return in.readMediumText(StandardCharsets.UTF_8);
|
||||
}
|
||||
@Benchmark
|
||||
public String decodeMediumText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var in = benchState.mediumTextInput;
|
||||
return in.readMediumText(StandardCharsets.UTF_8);
|
||||
}
|
||||
@Benchmark
|
||||
public String decodeLongText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var in = benchState.longTextInput;
|
||||
return in.readMediumText(StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,178 @@
|
||||
package it.cavallium.buffer;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
@OutputTimeUnit(TimeUnit.SECONDS)
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@Fork(value = 1, warmups = 1)
|
||||
@Warmup(time = 1, iterations = 3)
|
||||
@Measurement(time = 1, iterations = 6)
|
||||
public class ZeroAllocationEncoderBench {
|
||||
|
||||
@State(Scope.Thread)
|
||||
public static class ZeroAllocationEncoderState {
|
||||
ZeroAllocationEncoder encoder;
|
||||
BufDataOutput bufOutput;
|
||||
|
||||
@Setup
|
||||
public void prepare() {
|
||||
encoder = ZeroAllocationEncoder.INSTANCE;
|
||||
bufOutput = BufDataOutput.create(1024);
|
||||
}
|
||||
}
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
public static class ZeroAllocationEncoderBenchState {
|
||||
private static final List<String> WORDS = List.of(
|
||||
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
|
||||
"hello",
|
||||
"test",
|
||||
"òàòà§òè+=))=732e0",
|
||||
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
|
||||
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
|
||||
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
|
||||
"من left اليمين to الى right اليسار",
|
||||
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
|
||||
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
|
||||
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
|
||||
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
|
||||
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
|
||||
"田中さんにあげて下さい",
|
||||
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
|
||||
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
|
||||
"表ポあA鷗ŒéB逍Üߪąñ丂㐀\uD840\uDC00",
|
||||
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
|
||||
"᚛ ᚜\n",
|
||||
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
|
||||
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
|
||||
"జ్ఞ\u200Cా"
|
||||
);
|
||||
String shortText;
|
||||
byte[] shortTextBytes;
|
||||
BufDataInput shortTextInput;
|
||||
BufDataOutput shortTextOutput;
|
||||
String mediumText;
|
||||
byte[] mediumTextBytes;
|
||||
BufDataInput mediumTextInput;
|
||||
BufDataOutput mediumTextOutput;
|
||||
String longText;
|
||||
byte[] longTextBytes;
|
||||
BufDataInput longTextInput;
|
||||
BufDataOutput longTextOutput;
|
||||
|
||||
@Setup(Level.Invocation)
|
||||
public void reset() {
|
||||
longTextInput.reset();
|
||||
longTextOutput.resetUnderlyingBuffer();
|
||||
mediumTextInput.reset();
|
||||
mediumTextOutput.resetUnderlyingBuffer();
|
||||
shortTextInput.reset();
|
||||
shortTextOutput.resetUnderlyingBuffer();
|
||||
}
|
||||
|
||||
@Setup
|
||||
public void prepare() {
|
||||
var l = new ArrayList<String>();
|
||||
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
|
||||
for (int i = 0; i < maxI; i++) {
|
||||
l.addAll(WORDS);
|
||||
}
|
||||
Collections.shuffle(l);
|
||||
var fullText = String.join(" ", l);
|
||||
longText = fullText;
|
||||
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
|
||||
longTextInput = BufDataInput.create(Buf.wrap(longTextBytes));
|
||||
longTextOutput = BufDataOutput.create(longTextBytes.length);
|
||||
mediumText = fullText.substring(0, 128);
|
||||
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
|
||||
mediumTextInput = BufDataInput.create(Buf.wrap(mediumTextBytes));
|
||||
mediumTextOutput = BufDataOutput.create(mediumTextBytes.length);
|
||||
shortText = fullText.substring(0, 15);
|
||||
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
|
||||
shortTextInput = BufDataInput.create(Buf.wrap(shortTextBytes));
|
||||
shortTextOutput = BufDataOutput.create(shortTextBytes.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public BufDataOutput encodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.shortTextOutput;
|
||||
encoderState.encoder.encodeTo(benchState.shortText, out);
|
||||
return out;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public BufDataOutput encodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.mediumTextOutput;
|
||||
encoderState.encoder.encodeTo(benchState.mediumText, out);
|
||||
return out;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public BufDataOutput encodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.longTextOutput;
|
||||
encoderState.encoder.encodeTo(benchState.longText, out);
|
||||
return out;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public BufDataOutput encodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.shortTextOutput;
|
||||
out.write(benchState.shortText.getBytes(StandardCharsets.UTF_8));
|
||||
return out;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public BufDataOutput encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.mediumTextOutput;
|
||||
out.write(benchState.mediumText.getBytes(StandardCharsets.UTF_8));
|
||||
return out;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public BufDataOutput encodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
var out = benchState.longTextOutput;
|
||||
out.write(benchState.longText.getBytes(StandardCharsets.UTF_8));
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@Benchmark
|
||||
public String decodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
return encoderState.encoder.decodeFrom(benchState.shortTextInput, benchState.shortTextBytes.length);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
return encoderState.encoder.decodeFrom(benchState.mediumTextInput, benchState.mediumTextBytes.length);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||
return encoderState.encoder.decodeFrom(benchState.longTextInput, benchState.longTextBytes.length);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
return new String(benchState.shortTextInput.readAllBytes(), StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
return new String(benchState.mediumTextInput.readAllBytes(), StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public String decodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||
return new String(benchState.longTextInput.readAllBytes(), StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
}
|
@ -12,22 +12,23 @@ import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.VisibleForTesting;
|
||||
|
||||
public class BufDataOutput implements SafeDataOutput {
|
||||
|
||||
private final SafeByteArrayOutputStream buf;
|
||||
private final SafeDataOutputStream dOut;
|
||||
private final SafeByteArrayDataOutputStream dOut;
|
||||
private final int limit;
|
||||
|
||||
public BufDataOutput(SafeByteArrayOutputStream buf) {
|
||||
this.buf = buf;
|
||||
this.dOut = new SafeDataOutputStream(buf);
|
||||
this.dOut = new SafeByteArrayDataOutputStream(buf);
|
||||
limit = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
public BufDataOutput(SafeByteArrayOutputStream buf, int maxSize) {
|
||||
this.buf = buf;
|
||||
this.dOut = new SafeDataOutputStream(buf);
|
||||
this.dOut = new SafeByteArrayDataOutputStream(buf);
|
||||
this.limit = maxSize;
|
||||
}
|
||||
|
||||
@ -218,17 +219,17 @@ public class BufDataOutput implements SafeDataOutput {
|
||||
@Override
|
||||
public void writeShortText(String s, Charset charset) {
|
||||
if (charset == StandardCharsets.UTF_8) {
|
||||
var beforeWrite = this.buf.position();
|
||||
writeShort(0);
|
||||
var beforeWrite = (int) this.position();
|
||||
this.advancePosition(Short.BYTES);
|
||||
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
|
||||
var afterWrite = this.buf.position();
|
||||
this.buf.position(beforeWrite);
|
||||
var afterWrite = (int) this.position();
|
||||
this.rewindPosition(afterWrite - beforeWrite);
|
||||
var len = Math.toIntExact(afterWrite - beforeWrite - Short.BYTES);
|
||||
if (len > Short.MAX_VALUE) {
|
||||
throw new IndexOutOfBoundsException("String too long: " + len + " bytes");
|
||||
}
|
||||
this.writeShort(len);
|
||||
this.buf.position(afterWrite);
|
||||
dOut.writeShort(len);
|
||||
dOut.advancePosition(len);
|
||||
} else {
|
||||
var out = s.getBytes(charset);
|
||||
if (out.length > Short.MAX_VALUE) {
|
||||
@ -242,20 +243,49 @@ public class BufDataOutput implements SafeDataOutput {
|
||||
|
||||
@Override
|
||||
public void writeMediumText(String s, Charset charset) {
|
||||
if (charset == StandardCharsets.UTF_8) {
|
||||
var beforeWrite = this.buf.position();
|
||||
writeInt(0);
|
||||
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
|
||||
var afterWrite = this.buf.position();
|
||||
this.buf.position(beforeWrite);
|
||||
this.writeInt(Math.toIntExact(afterWrite - beforeWrite - Integer.BYTES));
|
||||
this.buf.position(afterWrite);
|
||||
} else {
|
||||
var out = s.getBytes(charset);
|
||||
checkOutOfBounds(Integer.BYTES + out.length);
|
||||
dOut.writeInt(out.length);
|
||||
dOut.write(out);
|
||||
}
|
||||
// todo: charbuffer is still slow, check in future java versions
|
||||
// if (charset == StandardCharsets.UTF_8) {
|
||||
// writeMediumTextZeroCopy(s);
|
||||
// } else {
|
||||
writeMediumTextLegacy(s, charset);
|
||||
// }
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void writeMediumTextZeroCopy(String s) {
|
||||
var beforeWrite = (int) this.position();
|
||||
this.advancePosition(Integer.BYTES);
|
||||
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
|
||||
var afterWrite = (int) this.position();
|
||||
this.rewindPosition(afterWrite - beforeWrite);
|
||||
var len = Math.toIntExact(afterWrite - beforeWrite - Integer.BYTES);
|
||||
dOut.writeInt(len);
|
||||
dOut.advancePosition(len);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void writeMediumTextLegacy(String s, Charset charset) {
|
||||
var out = s.getBytes(charset);
|
||||
checkOutOfBounds(Integer.BYTES + out.length);
|
||||
dOut.writeInt(out.length);
|
||||
dOut.write(out);
|
||||
}
|
||||
|
||||
public void resetUnderlyingBuffer() {
|
||||
dOut.resetUnderlyingBuffer();
|
||||
}
|
||||
|
||||
public void rewindPosition(int count) {
|
||||
dOut.rewindPosition(count);
|
||||
}
|
||||
|
||||
public void advancePosition(int count) {
|
||||
checkOutOfBounds(count);
|
||||
dOut.advancePosition(count);
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return dOut.position();
|
||||
}
|
||||
|
||||
public Buf asList() {
|
||||
|
@ -0,0 +1,37 @@
|
||||
package it.cavallium.buffer;
|
||||
|
||||
import it.cavallium.stream.SafeByteArrayOutputStream;
|
||||
import it.cavallium.stream.SafeDataOutputStream;
|
||||
|
||||
public class SafeByteArrayDataOutputStream extends SafeDataOutputStream {
|
||||
private final SafeByteArrayOutputStream bOut;
|
||||
|
||||
public SafeByteArrayDataOutputStream(SafeByteArrayOutputStream out) {
|
||||
super(out);
|
||||
this.bOut = out;
|
||||
}
|
||||
|
||||
public void resetUnderlyingBuffer() {
|
||||
bOut.reset();
|
||||
this.written = 0;
|
||||
}
|
||||
|
||||
public void rewindPosition(int count) {
|
||||
var currentPosition = bOut.position();
|
||||
if (count > written) {
|
||||
throw new IndexOutOfBoundsException(count + " > " + written);
|
||||
}
|
||||
bOut.position(currentPosition - count);
|
||||
decCount(count);
|
||||
}
|
||||
|
||||
public void advancePosition(int count) {
|
||||
bOut.ensureWritable(count);
|
||||
bOut.position(bOut.position() + count);
|
||||
incCount(count);
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return bOut.position();
|
||||
}
|
||||
}
|
@ -27,25 +27,38 @@ public class ZeroAllocationEncoder {
|
||||
|
||||
private final ThreadLocal<AtomicReference<CharBuffer>> charBufferRefThreadLocal;
|
||||
|
||||
private final ThreadLocal<AtomicReference<ByteBuffer>> byteBufferRefThreadLocal;
|
||||
|
||||
public ZeroAllocationEncoder(int outBufferSize) {
|
||||
bufferThreadLocal = ThreadLocal.withInitial(() -> ByteBuffer.allocate(outBufferSize));
|
||||
var maxBytesPerChar = (int) Math.ceil(StandardCharsets.UTF_8.newEncoder().maxBytesPerChar());
|
||||
bufferThreadLocal = ThreadLocal.withInitial(() -> ByteBuffer.allocate(outBufferSize * maxBytesPerChar));
|
||||
charBufferRefThreadLocal = ThreadLocal.withInitial(() -> new AtomicReference<>(CharBuffer.allocate(outBufferSize)));
|
||||
byteBufferRefThreadLocal = ThreadLocal.withInitial(() -> new AtomicReference<>(ByteBuffer.allocate(outBufferSize * maxBytesPerChar)));
|
||||
}
|
||||
|
||||
public void encodeTo(String s, SafeDataOutput bufDataOutput) {
|
||||
var encoder = CHARSET_ENCODER_UTF8.get();
|
||||
encoder.reset();
|
||||
var buf = bufferThreadLocal.get();
|
||||
var charBuffer = CharBuffer.wrap(s);
|
||||
boolean endOfInput = false;
|
||||
CoderResult result;
|
||||
do {
|
||||
buf.clear();
|
||||
result = encoder.encode(charBuffer, buf, true);
|
||||
result = encoder.encode(charBuffer, buf, endOfInput);
|
||||
buf.flip();
|
||||
var bufArray = buf.array();
|
||||
var bufArrayOffset = buf.arrayOffset();
|
||||
bufDataOutput.write(bufArray, bufArrayOffset + buf.position(), buf.remaining());
|
||||
bufDataOutput.write(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
|
||||
if (result.isUnderflow()) {
|
||||
break;
|
||||
if (endOfInput) {
|
||||
buf.clear();
|
||||
encoder.flush(buf);
|
||||
buf.flip();
|
||||
bufDataOutput.write(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
|
||||
break;
|
||||
} else {
|
||||
endOfInput = true;
|
||||
continue;
|
||||
}
|
||||
} else if (result.isOverflow()) {
|
||||
continue;
|
||||
} else if (result.isError()) {
|
||||
@ -59,32 +72,41 @@ public class ZeroAllocationEncoder {
|
||||
} while (true);
|
||||
}
|
||||
|
||||
public String decodeFrom(SafeDataInput bufDataInput, int length) {
|
||||
public String decodeFrom(SafeDataInput bufDataInput, int bytesLength) {
|
||||
var decoder = CHARSET_DECODER_UTF8.get();
|
||||
var byteBuf = bufferThreadLocal.get();
|
||||
decoder.reset();
|
||||
var bufRef = byteBufferRefThreadLocal.get();
|
||||
var charBufRef = charBufferRefThreadLocal.get();
|
||||
var buf = bufRef.get();
|
||||
var charBuf = charBufRef.get();
|
||||
if (charBuf.capacity() < length) {
|
||||
charBuf = CharBuffer.allocate(length);
|
||||
assert decoder.maxCharsPerByte() == 1.0f
|
||||
: "UTF8 max chars per byte is 1.0f, but the decoder got a value of " + decoder.maxCharsPerByte();
|
||||
if (charBuf.capacity() < bytesLength) {
|
||||
charBuf = CharBuffer.allocate(bytesLength);
|
||||
charBufRef.set(charBuf);
|
||||
} else {
|
||||
charBuf.clear();
|
||||
}
|
||||
var remainingLengthToRead = length;
|
||||
if (buf.capacity() < bytesLength) {
|
||||
buf = ByteBuffer.allocate(bytesLength);
|
||||
bufRef.set(buf);
|
||||
} else {
|
||||
buf.clear();
|
||||
}
|
||||
CoderResult result;
|
||||
do {
|
||||
byteBuf.clear();
|
||||
bufDataInput.readFully(byteBuf, Math.min(remainingLengthToRead, byteBuf.limit()));
|
||||
byteBuf.flip();
|
||||
remainingLengthToRead -= byteBuf.remaining();
|
||||
result = decoder.decode(byteBuf, charBuf, true);
|
||||
buf.clear();
|
||||
assert buf.capacity() >= bytesLength;
|
||||
bufDataInput.readFully(buf, bytesLength);
|
||||
buf.flip();
|
||||
result = decoder.decode(buf, charBuf, true);
|
||||
if (result.isUnderflow()) {
|
||||
if (remainingLengthToRead > 0) {
|
||||
continue;
|
||||
} else {
|
||||
charBuf.flip();
|
||||
return charBuf.toString();
|
||||
result = decoder.flush(charBuf);
|
||||
if (result.isOverflow()) {
|
||||
throw new IllegalStateException("Unexpected overflow");
|
||||
}
|
||||
charBuf.flip();
|
||||
return charBuf.toString();
|
||||
} else if (result.isOverflow()) {
|
||||
throw new UnsupportedOperationException();
|
||||
} else if (result.isError()) {
|
||||
|
@ -131,7 +131,7 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
|
||||
public void readNBytes(int length, ByteBuffer buffer) {
|
||||
Objects.checkFromIndexSize(0, length, buffer.remaining());
|
||||
if (this.available() < length) {
|
||||
throw new IndexOutOfBoundsException(this.length);
|
||||
throw new IndexOutOfBoundsException(length);
|
||||
}
|
||||
buffer.put(array, offset + this.position, length);
|
||||
position += length;
|
||||
@ -149,12 +149,17 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
|
||||
return cappedLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] readAllBytes() {
|
||||
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + length);
|
||||
position = length;
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] readNBytes(int length) {
|
||||
if (this.available() < length) {
|
||||
throw new IndexOutOfBoundsException(this.length);
|
||||
}
|
||||
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + position + length);
|
||||
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + position + Math.min(length, this.available()));
|
||||
position += length;
|
||||
return result;
|
||||
}
|
||||
@ -162,7 +167,7 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
|
||||
@Override
|
||||
public String readString(int length, Charset charset) {
|
||||
if (this.available() < length) {
|
||||
throw new IndexOutOfBoundsException(this.length);
|
||||
throw new IndexOutOfBoundsException(length + " > " + this.available());
|
||||
}
|
||||
var result = new String(this.array, offset + position, length, charset);
|
||||
position += length;
|
||||
|
@ -63,9 +63,16 @@ public class SafeDataOutputStream extends SafeFilterOutputStream implements Safe
|
||||
* Increases the written counter by the specified value
|
||||
* until it reaches Integer.MAX_VALUE.
|
||||
*/
|
||||
private void incCount(int value) {
|
||||
protected void incCount(int value) {
|
||||
written = Math.addExact(written, value);
|
||||
}
|
||||
/**
|
||||
* Decreases the written counter by the specified value
|
||||
* until it reaches 0.
|
||||
*/
|
||||
protected void decCount(int value) {
|
||||
written = Math.subtractExact(written, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the specified byte (the low eight bits of the argument
|
||||
|
@ -4,6 +4,10 @@ import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
@ -11,6 +15,48 @@ class ZeroAllocationEncoderTest {
|
||||
|
||||
private static final ZeroAllocationEncoder INSTANCE = new ZeroAllocationEncoder(16);
|
||||
|
||||
|
||||
private static final List<String> WORDS = List.of(
|
||||
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
|
||||
"hello",
|
||||
"test",
|
||||
"òàòà§òè+=))=732e0",
|
||||
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
|
||||
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
|
||||
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
|
||||
"من left اليمين to الى right اليسار",
|
||||
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
|
||||
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
|
||||
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
|
||||
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
|
||||
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
|
||||
"田中さんにあげて下さい",
|
||||
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
|
||||
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
|
||||
"表ポあA鷗ŒéB逍Üߪąñ丂㐀\uD840\uDC00",
|
||||
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
|
||||
"᚛ ᚜\n",
|
||||
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
|
||||
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
|
||||
"జ్ఞ\u200Cా"
|
||||
);
|
||||
|
||||
@Test
|
||||
void encodeFuzzer() {
|
||||
var l = new ArrayList<>(WORDS);
|
||||
Collections.shuffle(l);
|
||||
var collected = l.stream().collect(Collectors.joining(" "));
|
||||
testEncodeString(collected);
|
||||
}
|
||||
|
||||
@Test
|
||||
void decodeFuzzer() {
|
||||
var l = new ArrayList<>(WORDS);
|
||||
Collections.shuffle(l);
|
||||
var collected = l.stream().collect(Collectors.joining(" "));
|
||||
testDecodeString(collected);
|
||||
}
|
||||
|
||||
@Test
|
||||
void encodeToEmpty() {
|
||||
testEncodeString("");
|
||||
@ -21,6 +67,26 @@ class ZeroAllocationEncoderTest {
|
||||
testDecodeString("");
|
||||
}
|
||||
|
||||
@Test
|
||||
void encodeComplex() {
|
||||
testEncodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF");
|
||||
}
|
||||
|
||||
@Test
|
||||
void decodeComplex() {
|
||||
testDecodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF");
|
||||
}
|
||||
|
||||
@Test
|
||||
void encodeComplexLong() {
|
||||
testEncodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF".repeat(10));
|
||||
}
|
||||
|
||||
@Test
|
||||
void decodeComplexLong() {
|
||||
testDecodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF".repeat(10));
|
||||
}
|
||||
|
||||
@Test
|
||||
void encodeTo1Underflow() {
|
||||
testEncodeString("ciao");
|
||||
@ -87,8 +153,15 @@ class ZeroAllocationEncoderTest {
|
||||
var out = bdo.toList();
|
||||
out.toString(StandardCharsets.UTF_8);
|
||||
Assertions.assertEquals(s, out.toString(StandardCharsets.UTF_8));
|
||||
Assertions.assertEquals(s.length(), bdo.size());
|
||||
Assertions.assertEquals(s.length(), out.size());
|
||||
Assertions.assertEquals(s.getBytes(StandardCharsets.UTF_8).length, bdo.size());
|
||||
Assertions.assertEquals(s.getBytes(StandardCharsets.UTF_8).length, out.size());
|
||||
|
||||
var bdo2 = BufDataOutput.create();
|
||||
bdo2.writeMediumText("ciao", StandardCharsets.UTF_8);
|
||||
bdo2.writeShortText("ciao2", StandardCharsets.UTF_8);
|
||||
var in = BufDataInput.create(bdo2.asList());
|
||||
Assertions.assertEquals("ciao", in.readMediumText(StandardCharsets.UTF_8));
|
||||
Assertions.assertEquals("ciao2", in.readShortText(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
private void testDecodeString(String s) {
|
||||
|
Loading…
Reference in New Issue
Block a user