Add zero-copy text encoder
This commit is contained in:
parent
70adaaf6cf
commit
915afcbf4d
@ -198,9 +198,19 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
<artifactId>guava</artifactId>
|
<artifactId>guava</artifactId>
|
||||||
<version>31.1-jre</version>
|
<version>33.0.0-jre</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.openjdk.jmh</groupId>
|
||||||
|
<artifactId>jmh-core</artifactId>
|
||||||
|
<version>1.36</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.openjdk.jmh</groupId>
|
||||||
|
<artifactId>jmh-generator-annprocess</artifactId>
|
||||||
|
<version>1.36</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<dependencyManagement>
|
<dependencyManagement>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
@ -232,4 +242,88 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>java9</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>true</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>3.6.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>add-source</id>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/java9/java</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
<profile>
|
||||||
|
<id>benchmark</id>
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.openjdk.jmh</groupId>
|
||||||
|
<artifactId>jmh-core</artifactId>
|
||||||
|
<version>1.36</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.openjdk.jmh</groupId>
|
||||||
|
<artifactId>jmh-generator-annprocess</artifactId>
|
||||||
|
<version>1.36</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>3.6.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>add-source</id>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/benchmark/java</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<version>3.8.1</version>
|
||||||
|
<configuration>
|
||||||
|
<release>21</release>
|
||||||
|
<encoding>UTF-8</encoding>
|
||||||
|
<excludes>
|
||||||
|
<exclude>src/main/java/module-info.java</exclude>
|
||||||
|
</excludes>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
</project>
|
</project>
|
@ -0,0 +1,14 @@
|
|||||||
|
package it.cavallium.buffer;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.runner.Runner;
|
||||||
|
import org.openjdk.jmh.runner.RunnerException;
|
||||||
|
import org.openjdk.jmh.runner.options.Options;
|
||||||
|
import org.openjdk.jmh.runner.options.OptionsBuilder;
|
||||||
|
|
||||||
|
public class BenchmarkMain {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws RunnerException {
|
||||||
|
Options opt = new OptionsBuilder().include(BufEncoderBench.class.getSimpleName()).build();
|
||||||
|
new Runner(opt).run();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,124 @@
|
|||||||
|
package it.cavallium.buffer;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
|
|
||||||
|
public class BenchmarkMainManual {
|
||||||
|
|
||||||
|
public static class BenchmarkZeroCopy {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
var s2 = new ZeroAllocationEncoderBenchState();
|
||||||
|
s2.prepare();
|
||||||
|
while (true) {
|
||||||
|
s2.reset();
|
||||||
|
encodeMediumTextZeroCopy(s2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class BenchmarkJava {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
var s2 = new ZeroAllocationEncoderBenchState();
|
||||||
|
s2.prepare();
|
||||||
|
while (true) {
|
||||||
|
s2.reset();
|
||||||
|
encodeMediumTextJava(s2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ZeroAllocationEncoderBenchState {
|
||||||
|
private static final List<String> WORDS = List.of(
|
||||||
|
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
|
||||||
|
"hello",
|
||||||
|
"test",
|
||||||
|
"òàòà§òè+=))=732e0",
|
||||||
|
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
|
||||||
|
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
|
||||||
|
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
|
||||||
|
"من left اليمين to الى right اليسار",
|
||||||
|
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
|
||||||
|
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
|
||||||
|
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
|
||||||
|
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
|
||||||
|
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
|
||||||
|
"田中さんにあげて下さい",
|
||||||
|
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
|
||||||
|
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
|
||||||
|
"表ポあA鷗ŒéB逍Üߪąñ丂㐀\uD840\uDC00",
|
||||||
|
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
|
||||||
|
"᚛ ᚜\n",
|
||||||
|
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
|
||||||
|
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
|
||||||
|
"జ్ఞ\u200Cా"
|
||||||
|
);
|
||||||
|
String shortText;
|
||||||
|
byte[] shortTextBytes;
|
||||||
|
BufDataInput shortTextInput;
|
||||||
|
BufDataOutput shortTextOutput;
|
||||||
|
String mediumText;
|
||||||
|
byte[] mediumTextBytes;
|
||||||
|
BufDataInput mediumTextInput;
|
||||||
|
BufDataOutput mediumTextOutput;
|
||||||
|
String longText;
|
||||||
|
byte[] longTextBytes;
|
||||||
|
BufDataInput longTextInput;
|
||||||
|
BufDataOutput longTextOutput;
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
longTextInput.reset();
|
||||||
|
longTextOutput.resetUnderlyingBuffer();
|
||||||
|
mediumTextInput.reset();
|
||||||
|
mediumTextOutput.resetUnderlyingBuffer();
|
||||||
|
shortTextInput.reset();
|
||||||
|
shortTextOutput.resetUnderlyingBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void prepare() {
|
||||||
|
var l = new ArrayList<String>();
|
||||||
|
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
|
||||||
|
for (int i = 0; i < maxI; i++) {
|
||||||
|
l.addAll(WORDS);
|
||||||
|
}
|
||||||
|
Collections.shuffle(l);
|
||||||
|
var fullText = String.join(" ", l);
|
||||||
|
var out = BufDataOutput.create(Integer.BYTES + fullText.getBytes(StandardCharsets.UTF_8).length);
|
||||||
|
|
||||||
|
out.resetUnderlyingBuffer();
|
||||||
|
longText = fullText;
|
||||||
|
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
out.writeMediumText(longText, StandardCharsets.UTF_8);
|
||||||
|
longTextInput = BufDataInput.create(out.toList());
|
||||||
|
longTextOutput = BufDataOutput.create(Integer.BYTES + longTextBytes.length);
|
||||||
|
|
||||||
|
out.resetUnderlyingBuffer();
|
||||||
|
mediumText = fullText.substring(0, 128);
|
||||||
|
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
out.writeMediumText(mediumText, StandardCharsets.UTF_8);
|
||||||
|
mediumTextInput = BufDataInput.create(out.toList());
|
||||||
|
mediumTextOutput = BufDataOutput.create(Integer.BYTES + mediumTextBytes.length);
|
||||||
|
|
||||||
|
out.resetUnderlyingBuffer();
|
||||||
|
shortText = fullText.substring(0, 15);
|
||||||
|
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
out.writeMediumText(shortText, StandardCharsets.UTF_8);
|
||||||
|
shortTextInput = BufDataInput.create(out.toList());
|
||||||
|
shortTextOutput = BufDataOutput.create(Integer.BYTES + shortTextBytes.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static Buf encodeMediumTextZeroCopy(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.mediumTextOutput;
|
||||||
|
out.writeMediumTextZeroCopy(benchState.mediumText);
|
||||||
|
return out.toList();
|
||||||
|
}
|
||||||
|
public static Buf encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.mediumTextOutput;
|
||||||
|
out.writeMediumTextLegacy(benchState.mediumText, StandardCharsets.UTF_8);
|
||||||
|
return out.toList();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,174 @@
|
|||||||
|
package it.cavallium.buffer;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
@OutputTimeUnit(TimeUnit.SECONDS)
|
||||||
|
@BenchmarkMode(Mode.Throughput)
|
||||||
|
@Fork(value = 1, warmups = 1)
|
||||||
|
@Warmup(time = 2, iterations = 6)
|
||||||
|
@Measurement(time = 2, iterations = 6)
|
||||||
|
public class BufEncoderBench {
|
||||||
|
|
||||||
|
@State(Scope.Thread)
|
||||||
|
public static class ZeroAllocationEncoderState {
|
||||||
|
ZeroAllocationEncoder encoder;
|
||||||
|
BufDataOutput bufOutput;
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void prepare() {
|
||||||
|
encoder = ZeroAllocationEncoder.INSTANCE;
|
||||||
|
bufOutput = BufDataOutput.create(1024);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
public static class ZeroAllocationEncoderBenchState {
|
||||||
|
private static final List<String> WORDS = List.of(
|
||||||
|
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
|
||||||
|
"hello",
|
||||||
|
"test",
|
||||||
|
"òàòà§òè+=))=732e0",
|
||||||
|
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
|
||||||
|
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
|
||||||
|
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
|
||||||
|
"من left اليمين to الى right اليسار",
|
||||||
|
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
|
||||||
|
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
|
||||||
|
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
|
||||||
|
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
|
||||||
|
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
|
||||||
|
"田中さんにあげて下さい",
|
||||||
|
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
|
||||||
|
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
|
||||||
|
"表ポあA鷗ŒéB逍Üߪąñ丂㐀\uD840\uDC00",
|
||||||
|
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
|
||||||
|
"᚛ ᚜\n",
|
||||||
|
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
|
||||||
|
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
|
||||||
|
"జ్ఞ\u200Cా"
|
||||||
|
);
|
||||||
|
String shortText;
|
||||||
|
byte[] shortTextBytes;
|
||||||
|
BufDataInput shortTextInput;
|
||||||
|
BufDataOutput shortTextOutput;
|
||||||
|
String mediumText;
|
||||||
|
byte[] mediumTextBytes;
|
||||||
|
BufDataInput mediumTextInput;
|
||||||
|
BufDataOutput mediumTextOutput;
|
||||||
|
String longText;
|
||||||
|
byte[] longTextBytes;
|
||||||
|
BufDataInput longTextInput;
|
||||||
|
BufDataOutput longTextOutput;
|
||||||
|
|
||||||
|
@Setup(Level.Invocation)
|
||||||
|
public void reset() {
|
||||||
|
longTextInput.reset();
|
||||||
|
longTextOutput.resetUnderlyingBuffer();
|
||||||
|
mediumTextInput.reset();
|
||||||
|
mediumTextOutput.resetUnderlyingBuffer();
|
||||||
|
shortTextInput.reset();
|
||||||
|
shortTextOutput.resetUnderlyingBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Setup(Level.Invocation)
|
||||||
|
public void prepare() {
|
||||||
|
var l = new ArrayList<String>();
|
||||||
|
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
|
||||||
|
for (int i = 0; i < maxI; i++) {
|
||||||
|
l.addAll(WORDS);
|
||||||
|
}
|
||||||
|
Collections.shuffle(l);
|
||||||
|
var fullText = String.join(" ", l);
|
||||||
|
var out = BufDataOutput.create(Integer.BYTES + fullText.getBytes(StandardCharsets.UTF_8).length);
|
||||||
|
|
||||||
|
out.resetUnderlyingBuffer();
|
||||||
|
longText = fullText;
|
||||||
|
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
out.writeMediumText(longText, StandardCharsets.UTF_8);
|
||||||
|
longTextInput = BufDataInput.create(out.toList());
|
||||||
|
longTextOutput = BufDataOutput.create(Integer.BYTES + longTextBytes.length);
|
||||||
|
|
||||||
|
out.resetUnderlyingBuffer();
|
||||||
|
mediumText = fullText.substring(0, 128);
|
||||||
|
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
out.writeMediumText(mediumText, StandardCharsets.UTF_8);
|
||||||
|
mediumTextInput = BufDataInput.create(out.toList());
|
||||||
|
mediumTextOutput = BufDataOutput.create(Integer.BYTES + mediumTextBytes.length);
|
||||||
|
|
||||||
|
out.resetUnderlyingBuffer();
|
||||||
|
shortText = fullText.substring(0, 15);
|
||||||
|
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
out.writeMediumText(shortText, StandardCharsets.UTF_8);
|
||||||
|
shortTextInput = BufDataInput.create(out.toList());
|
||||||
|
shortTextOutput = BufDataOutput.create(Integer.BYTES + shortTextBytes.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Buf encodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.shortTextOutput;
|
||||||
|
out.writeMediumTextZeroCopy(benchState.shortText);
|
||||||
|
return out.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Buf encodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.mediumTextOutput;
|
||||||
|
out.writeMediumTextZeroCopy(benchState.mediumText);
|
||||||
|
return out.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Buf encodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.longTextOutput;
|
||||||
|
out.writeMediumTextZeroCopy(benchState.longText);
|
||||||
|
return out.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Buf encodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.shortTextOutput;
|
||||||
|
out.writeMediumTextLegacy(benchState.shortText, StandardCharsets.UTF_8);
|
||||||
|
return out.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Buf encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.mediumTextOutput;
|
||||||
|
out.writeMediumTextLegacy(benchState.mediumText, StandardCharsets.UTF_8);
|
||||||
|
return out.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public Buf encodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.longTextOutput;
|
||||||
|
out.writeMediumTextLegacy(benchState.longText, StandardCharsets.UTF_8);
|
||||||
|
return out.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeShortText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var in = benchState.shortTextInput;
|
||||||
|
return in.readMediumText(StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
@Benchmark
|
||||||
|
public String decodeMediumText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var in = benchState.mediumTextInput;
|
||||||
|
return in.readMediumText(StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
@Benchmark
|
||||||
|
public String decodeLongText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var in = benchState.longTextInput;
|
||||||
|
return in.readMediumText(StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,178 @@
|
|||||||
|
package it.cavallium.buffer;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
@OutputTimeUnit(TimeUnit.SECONDS)
|
||||||
|
@BenchmarkMode(Mode.Throughput)
|
||||||
|
@Fork(value = 1, warmups = 1)
|
||||||
|
@Warmup(time = 1, iterations = 3)
|
||||||
|
@Measurement(time = 1, iterations = 6)
|
||||||
|
public class ZeroAllocationEncoderBench {
|
||||||
|
|
||||||
|
@State(Scope.Thread)
|
||||||
|
public static class ZeroAllocationEncoderState {
|
||||||
|
ZeroAllocationEncoder encoder;
|
||||||
|
BufDataOutput bufOutput;
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void prepare() {
|
||||||
|
encoder = ZeroAllocationEncoder.INSTANCE;
|
||||||
|
bufOutput = BufDataOutput.create(1024);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@State(Scope.Benchmark)
|
||||||
|
public static class ZeroAllocationEncoderBenchState {
|
||||||
|
private static final List<String> WORDS = List.of(
|
||||||
|
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
|
||||||
|
"hello",
|
||||||
|
"test",
|
||||||
|
"òàòà§òè+=))=732e0",
|
||||||
|
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
|
||||||
|
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
|
||||||
|
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
|
||||||
|
"من left اليمين to الى right اليسار",
|
||||||
|
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
|
||||||
|
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
|
||||||
|
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
|
||||||
|
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
|
||||||
|
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
|
||||||
|
"田中さんにあげて下さい",
|
||||||
|
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
|
||||||
|
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
|
||||||
|
"表ポあA鷗ŒéB逍Üߪąñ丂㐀\uD840\uDC00",
|
||||||
|
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
|
||||||
|
"᚛ ᚜\n",
|
||||||
|
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
|
||||||
|
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
|
||||||
|
"జ్ఞ\u200Cా"
|
||||||
|
);
|
||||||
|
String shortText;
|
||||||
|
byte[] shortTextBytes;
|
||||||
|
BufDataInput shortTextInput;
|
||||||
|
BufDataOutput shortTextOutput;
|
||||||
|
String mediumText;
|
||||||
|
byte[] mediumTextBytes;
|
||||||
|
BufDataInput mediumTextInput;
|
||||||
|
BufDataOutput mediumTextOutput;
|
||||||
|
String longText;
|
||||||
|
byte[] longTextBytes;
|
||||||
|
BufDataInput longTextInput;
|
||||||
|
BufDataOutput longTextOutput;
|
||||||
|
|
||||||
|
@Setup(Level.Invocation)
|
||||||
|
public void reset() {
|
||||||
|
longTextInput.reset();
|
||||||
|
longTextOutput.resetUnderlyingBuffer();
|
||||||
|
mediumTextInput.reset();
|
||||||
|
mediumTextOutput.resetUnderlyingBuffer();
|
||||||
|
shortTextInput.reset();
|
||||||
|
shortTextOutput.resetUnderlyingBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void prepare() {
|
||||||
|
var l = new ArrayList<String>();
|
||||||
|
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
|
||||||
|
for (int i = 0; i < maxI; i++) {
|
||||||
|
l.addAll(WORDS);
|
||||||
|
}
|
||||||
|
Collections.shuffle(l);
|
||||||
|
var fullText = String.join(" ", l);
|
||||||
|
longText = fullText;
|
||||||
|
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
longTextInput = BufDataInput.create(Buf.wrap(longTextBytes));
|
||||||
|
longTextOutput = BufDataOutput.create(longTextBytes.length);
|
||||||
|
mediumText = fullText.substring(0, 128);
|
||||||
|
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
mediumTextInput = BufDataInput.create(Buf.wrap(mediumTextBytes));
|
||||||
|
mediumTextOutput = BufDataOutput.create(mediumTextBytes.length);
|
||||||
|
shortText = fullText.substring(0, 15);
|
||||||
|
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
|
||||||
|
shortTextInput = BufDataInput.create(Buf.wrap(shortTextBytes));
|
||||||
|
shortTextOutput = BufDataOutput.create(shortTextBytes.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public BufDataOutput encodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.shortTextOutput;
|
||||||
|
encoderState.encoder.encodeTo(benchState.shortText, out);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public BufDataOutput encodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.mediumTextOutput;
|
||||||
|
encoderState.encoder.encodeTo(benchState.mediumText, out);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public BufDataOutput encodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.longTextOutput;
|
||||||
|
encoderState.encoder.encodeTo(benchState.longText, out);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public BufDataOutput encodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.shortTextOutput;
|
||||||
|
out.write(benchState.shortText.getBytes(StandardCharsets.UTF_8));
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public BufDataOutput encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.mediumTextOutput;
|
||||||
|
out.write(benchState.mediumText.getBytes(StandardCharsets.UTF_8));
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public BufDataOutput encodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
var out = benchState.longTextOutput;
|
||||||
|
out.write(benchState.longText.getBytes(StandardCharsets.UTF_8));
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
return encoderState.encoder.decodeFrom(benchState.shortTextInput, benchState.shortTextBytes.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
return encoderState.encoder.decodeFrom(benchState.mediumTextInput, benchState.mediumTextBytes.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
return encoderState.encoder.decodeFrom(benchState.longTextInput, benchState.longTextBytes.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
return new String(benchState.shortTextInput.readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
return new String(benchState.mediumTextInput.readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public String decodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
|
||||||
|
return new String(benchState.longTextInput.readAllBytes(), StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -12,22 +12,23 @@ import java.util.Arrays;
|
|||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.jetbrains.annotations.VisibleForTesting;
|
||||||
|
|
||||||
public class BufDataOutput implements SafeDataOutput {
|
public class BufDataOutput implements SafeDataOutput {
|
||||||
|
|
||||||
private final SafeByteArrayOutputStream buf;
|
private final SafeByteArrayOutputStream buf;
|
||||||
private final SafeDataOutputStream dOut;
|
private final SafeByteArrayDataOutputStream dOut;
|
||||||
private final int limit;
|
private final int limit;
|
||||||
|
|
||||||
public BufDataOutput(SafeByteArrayOutputStream buf) {
|
public BufDataOutput(SafeByteArrayOutputStream buf) {
|
||||||
this.buf = buf;
|
this.buf = buf;
|
||||||
this.dOut = new SafeDataOutputStream(buf);
|
this.dOut = new SafeByteArrayDataOutputStream(buf);
|
||||||
limit = Integer.MAX_VALUE;
|
limit = Integer.MAX_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BufDataOutput(SafeByteArrayOutputStream buf, int maxSize) {
|
public BufDataOutput(SafeByteArrayOutputStream buf, int maxSize) {
|
||||||
this.buf = buf;
|
this.buf = buf;
|
||||||
this.dOut = new SafeDataOutputStream(buf);
|
this.dOut = new SafeByteArrayDataOutputStream(buf);
|
||||||
this.limit = maxSize;
|
this.limit = maxSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -218,17 +219,17 @@ public class BufDataOutput implements SafeDataOutput {
|
|||||||
@Override
|
@Override
|
||||||
public void writeShortText(String s, Charset charset) {
|
public void writeShortText(String s, Charset charset) {
|
||||||
if (charset == StandardCharsets.UTF_8) {
|
if (charset == StandardCharsets.UTF_8) {
|
||||||
var beforeWrite = this.buf.position();
|
var beforeWrite = (int) this.position();
|
||||||
writeShort(0);
|
this.advancePosition(Short.BYTES);
|
||||||
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
|
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
|
||||||
var afterWrite = this.buf.position();
|
var afterWrite = (int) this.position();
|
||||||
this.buf.position(beforeWrite);
|
this.rewindPosition(afterWrite - beforeWrite);
|
||||||
var len = Math.toIntExact(afterWrite - beforeWrite - Short.BYTES);
|
var len = Math.toIntExact(afterWrite - beforeWrite - Short.BYTES);
|
||||||
if (len > Short.MAX_VALUE) {
|
if (len > Short.MAX_VALUE) {
|
||||||
throw new IndexOutOfBoundsException("String too long: " + len + " bytes");
|
throw new IndexOutOfBoundsException("String too long: " + len + " bytes");
|
||||||
}
|
}
|
||||||
this.writeShort(len);
|
dOut.writeShort(len);
|
||||||
this.buf.position(afterWrite);
|
dOut.advancePosition(len);
|
||||||
} else {
|
} else {
|
||||||
var out = s.getBytes(charset);
|
var out = s.getBytes(charset);
|
||||||
if (out.length > Short.MAX_VALUE) {
|
if (out.length > Short.MAX_VALUE) {
|
||||||
@ -242,20 +243,49 @@ public class BufDataOutput implements SafeDataOutput {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeMediumText(String s, Charset charset) {
|
public void writeMediumText(String s, Charset charset) {
|
||||||
if (charset == StandardCharsets.UTF_8) {
|
// todo: charbuffer is still slow, check in future java versions
|
||||||
var beforeWrite = this.buf.position();
|
// if (charset == StandardCharsets.UTF_8) {
|
||||||
writeInt(0);
|
// writeMediumTextZeroCopy(s);
|
||||||
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
|
// } else {
|
||||||
var afterWrite = this.buf.position();
|
writeMediumTextLegacy(s, charset);
|
||||||
this.buf.position(beforeWrite);
|
// }
|
||||||
this.writeInt(Math.toIntExact(afterWrite - beforeWrite - Integer.BYTES));
|
}
|
||||||
this.buf.position(afterWrite);
|
|
||||||
} else {
|
@VisibleForTesting
|
||||||
var out = s.getBytes(charset);
|
public void writeMediumTextZeroCopy(String s) {
|
||||||
checkOutOfBounds(Integer.BYTES + out.length);
|
var beforeWrite = (int) this.position();
|
||||||
dOut.writeInt(out.length);
|
this.advancePosition(Integer.BYTES);
|
||||||
dOut.write(out);
|
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
|
||||||
}
|
var afterWrite = (int) this.position();
|
||||||
|
this.rewindPosition(afterWrite - beforeWrite);
|
||||||
|
var len = Math.toIntExact(afterWrite - beforeWrite - Integer.BYTES);
|
||||||
|
dOut.writeInt(len);
|
||||||
|
dOut.advancePosition(len);
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void writeMediumTextLegacy(String s, Charset charset) {
|
||||||
|
var out = s.getBytes(charset);
|
||||||
|
checkOutOfBounds(Integer.BYTES + out.length);
|
||||||
|
dOut.writeInt(out.length);
|
||||||
|
dOut.write(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void resetUnderlyingBuffer() {
|
||||||
|
dOut.resetUnderlyingBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void rewindPosition(int count) {
|
||||||
|
dOut.rewindPosition(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void advancePosition(int count) {
|
||||||
|
checkOutOfBounds(count);
|
||||||
|
dOut.advancePosition(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return dOut.position();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Buf asList() {
|
public Buf asList() {
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
package it.cavallium.buffer;
|
||||||
|
|
||||||
|
import it.cavallium.stream.SafeByteArrayOutputStream;
|
||||||
|
import it.cavallium.stream.SafeDataOutputStream;
|
||||||
|
|
||||||
|
public class SafeByteArrayDataOutputStream extends SafeDataOutputStream {
|
||||||
|
private final SafeByteArrayOutputStream bOut;
|
||||||
|
|
||||||
|
public SafeByteArrayDataOutputStream(SafeByteArrayOutputStream out) {
|
||||||
|
super(out);
|
||||||
|
this.bOut = out;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void resetUnderlyingBuffer() {
|
||||||
|
bOut.reset();
|
||||||
|
this.written = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void rewindPosition(int count) {
|
||||||
|
var currentPosition = bOut.position();
|
||||||
|
if (count > written) {
|
||||||
|
throw new IndexOutOfBoundsException(count + " > " + written);
|
||||||
|
}
|
||||||
|
bOut.position(currentPosition - count);
|
||||||
|
decCount(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void advancePosition(int count) {
|
||||||
|
bOut.ensureWritable(count);
|
||||||
|
bOut.position(bOut.position() + count);
|
||||||
|
incCount(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return bOut.position();
|
||||||
|
}
|
||||||
|
}
|
@ -27,25 +27,38 @@ public class ZeroAllocationEncoder {
|
|||||||
|
|
||||||
private final ThreadLocal<AtomicReference<CharBuffer>> charBufferRefThreadLocal;
|
private final ThreadLocal<AtomicReference<CharBuffer>> charBufferRefThreadLocal;
|
||||||
|
|
||||||
|
private final ThreadLocal<AtomicReference<ByteBuffer>> byteBufferRefThreadLocal;
|
||||||
|
|
||||||
public ZeroAllocationEncoder(int outBufferSize) {
|
public ZeroAllocationEncoder(int outBufferSize) {
|
||||||
bufferThreadLocal = ThreadLocal.withInitial(() -> ByteBuffer.allocate(outBufferSize));
|
var maxBytesPerChar = (int) Math.ceil(StandardCharsets.UTF_8.newEncoder().maxBytesPerChar());
|
||||||
|
bufferThreadLocal = ThreadLocal.withInitial(() -> ByteBuffer.allocate(outBufferSize * maxBytesPerChar));
|
||||||
charBufferRefThreadLocal = ThreadLocal.withInitial(() -> new AtomicReference<>(CharBuffer.allocate(outBufferSize)));
|
charBufferRefThreadLocal = ThreadLocal.withInitial(() -> new AtomicReference<>(CharBuffer.allocate(outBufferSize)));
|
||||||
|
byteBufferRefThreadLocal = ThreadLocal.withInitial(() -> new AtomicReference<>(ByteBuffer.allocate(outBufferSize * maxBytesPerChar)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void encodeTo(String s, SafeDataOutput bufDataOutput) {
|
public void encodeTo(String s, SafeDataOutput bufDataOutput) {
|
||||||
var encoder = CHARSET_ENCODER_UTF8.get();
|
var encoder = CHARSET_ENCODER_UTF8.get();
|
||||||
|
encoder.reset();
|
||||||
var buf = bufferThreadLocal.get();
|
var buf = bufferThreadLocal.get();
|
||||||
var charBuffer = CharBuffer.wrap(s);
|
var charBuffer = CharBuffer.wrap(s);
|
||||||
|
boolean endOfInput = false;
|
||||||
CoderResult result;
|
CoderResult result;
|
||||||
do {
|
do {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
result = encoder.encode(charBuffer, buf, true);
|
result = encoder.encode(charBuffer, buf, endOfInput);
|
||||||
buf.flip();
|
buf.flip();
|
||||||
var bufArray = buf.array();
|
bufDataOutput.write(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
|
||||||
var bufArrayOffset = buf.arrayOffset();
|
|
||||||
bufDataOutput.write(bufArray, bufArrayOffset + buf.position(), buf.remaining());
|
|
||||||
if (result.isUnderflow()) {
|
if (result.isUnderflow()) {
|
||||||
break;
|
if (endOfInput) {
|
||||||
|
buf.clear();
|
||||||
|
encoder.flush(buf);
|
||||||
|
buf.flip();
|
||||||
|
bufDataOutput.write(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
endOfInput = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
} else if (result.isOverflow()) {
|
} else if (result.isOverflow()) {
|
||||||
continue;
|
continue;
|
||||||
} else if (result.isError()) {
|
} else if (result.isError()) {
|
||||||
@ -59,32 +72,41 @@ public class ZeroAllocationEncoder {
|
|||||||
} while (true);
|
} while (true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String decodeFrom(SafeDataInput bufDataInput, int length) {
|
public String decodeFrom(SafeDataInput bufDataInput, int bytesLength) {
|
||||||
var decoder = CHARSET_DECODER_UTF8.get();
|
var decoder = CHARSET_DECODER_UTF8.get();
|
||||||
var byteBuf = bufferThreadLocal.get();
|
decoder.reset();
|
||||||
|
var bufRef = byteBufferRefThreadLocal.get();
|
||||||
var charBufRef = charBufferRefThreadLocal.get();
|
var charBufRef = charBufferRefThreadLocal.get();
|
||||||
|
var buf = bufRef.get();
|
||||||
var charBuf = charBufRef.get();
|
var charBuf = charBufRef.get();
|
||||||
if (charBuf.capacity() < length) {
|
assert decoder.maxCharsPerByte() == 1.0f
|
||||||
charBuf = CharBuffer.allocate(length);
|
: "UTF8 max chars per byte is 1.0f, but the decoder got a value of " + decoder.maxCharsPerByte();
|
||||||
|
if (charBuf.capacity() < bytesLength) {
|
||||||
|
charBuf = CharBuffer.allocate(bytesLength);
|
||||||
charBufRef.set(charBuf);
|
charBufRef.set(charBuf);
|
||||||
} else {
|
} else {
|
||||||
charBuf.clear();
|
charBuf.clear();
|
||||||
}
|
}
|
||||||
var remainingLengthToRead = length;
|
if (buf.capacity() < bytesLength) {
|
||||||
|
buf = ByteBuffer.allocate(bytesLength);
|
||||||
|
bufRef.set(buf);
|
||||||
|
} else {
|
||||||
|
buf.clear();
|
||||||
|
}
|
||||||
CoderResult result;
|
CoderResult result;
|
||||||
do {
|
do {
|
||||||
byteBuf.clear();
|
buf.clear();
|
||||||
bufDataInput.readFully(byteBuf, Math.min(remainingLengthToRead, byteBuf.limit()));
|
assert buf.capacity() >= bytesLength;
|
||||||
byteBuf.flip();
|
bufDataInput.readFully(buf, bytesLength);
|
||||||
remainingLengthToRead -= byteBuf.remaining();
|
buf.flip();
|
||||||
result = decoder.decode(byteBuf, charBuf, true);
|
result = decoder.decode(buf, charBuf, true);
|
||||||
if (result.isUnderflow()) {
|
if (result.isUnderflow()) {
|
||||||
if (remainingLengthToRead > 0) {
|
result = decoder.flush(charBuf);
|
||||||
continue;
|
if (result.isOverflow()) {
|
||||||
} else {
|
throw new IllegalStateException("Unexpected overflow");
|
||||||
charBuf.flip();
|
|
||||||
return charBuf.toString();
|
|
||||||
}
|
}
|
||||||
|
charBuf.flip();
|
||||||
|
return charBuf.toString();
|
||||||
} else if (result.isOverflow()) {
|
} else if (result.isOverflow()) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
} else if (result.isError()) {
|
} else if (result.isError()) {
|
||||||
|
@ -131,7 +131,7 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
|
|||||||
public void readNBytes(int length, ByteBuffer buffer) {
|
public void readNBytes(int length, ByteBuffer buffer) {
|
||||||
Objects.checkFromIndexSize(0, length, buffer.remaining());
|
Objects.checkFromIndexSize(0, length, buffer.remaining());
|
||||||
if (this.available() < length) {
|
if (this.available() < length) {
|
||||||
throw new IndexOutOfBoundsException(this.length);
|
throw new IndexOutOfBoundsException(length);
|
||||||
}
|
}
|
||||||
buffer.put(array, offset + this.position, length);
|
buffer.put(array, offset + this.position, length);
|
||||||
position += length;
|
position += length;
|
||||||
@ -149,12 +149,17 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
|
|||||||
return cappedLength;
|
return cappedLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] readAllBytes() {
|
||||||
|
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + length);
|
||||||
|
position = length;
|
||||||
|
return result;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public byte[] readNBytes(int length) {
|
public byte[] readNBytes(int length) {
|
||||||
if (this.available() < length) {
|
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + position + Math.min(length, this.available()));
|
||||||
throw new IndexOutOfBoundsException(this.length);
|
|
||||||
}
|
|
||||||
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + position + length);
|
|
||||||
position += length;
|
position += length;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -162,7 +167,7 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
|
|||||||
@Override
|
@Override
|
||||||
public String readString(int length, Charset charset) {
|
public String readString(int length, Charset charset) {
|
||||||
if (this.available() < length) {
|
if (this.available() < length) {
|
||||||
throw new IndexOutOfBoundsException(this.length);
|
throw new IndexOutOfBoundsException(length + " > " + this.available());
|
||||||
}
|
}
|
||||||
var result = new String(this.array, offset + position, length, charset);
|
var result = new String(this.array, offset + position, length, charset);
|
||||||
position += length;
|
position += length;
|
||||||
|
@ -63,9 +63,16 @@ public class SafeDataOutputStream extends SafeFilterOutputStream implements Safe
|
|||||||
* Increases the written counter by the specified value
|
* Increases the written counter by the specified value
|
||||||
* until it reaches Integer.MAX_VALUE.
|
* until it reaches Integer.MAX_VALUE.
|
||||||
*/
|
*/
|
||||||
private void incCount(int value) {
|
protected void incCount(int value) {
|
||||||
written = Math.addExact(written, value);
|
written = Math.addExact(written, value);
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Decreases the written counter by the specified value
|
||||||
|
* until it reaches 0.
|
||||||
|
*/
|
||||||
|
protected void decCount(int value) {
|
||||||
|
written = Math.subtractExact(written, value);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes the specified byte (the low eight bits of the argument
|
* Writes the specified byte (the low eight bits of the argument
|
||||||
|
@ -4,6 +4,10 @@ import org.junit.jupiter.api.Assertions;
|
|||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
@ -11,6 +15,48 @@ class ZeroAllocationEncoderTest {
|
|||||||
|
|
||||||
private static final ZeroAllocationEncoder INSTANCE = new ZeroAllocationEncoder(16);
|
private static final ZeroAllocationEncoder INSTANCE = new ZeroAllocationEncoder(16);
|
||||||
|
|
||||||
|
|
||||||
|
private static final List<String> WORDS = List.of(
|
||||||
|
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
|
||||||
|
"hello",
|
||||||
|
"test",
|
||||||
|
"òàòà§òè+=))=732e0",
|
||||||
|
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
|
||||||
|
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
|
||||||
|
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
|
||||||
|
"من left اليمين to الى right اليسار",
|
||||||
|
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
|
||||||
|
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
|
||||||
|
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
|
||||||
|
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
|
||||||
|
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
|
||||||
|
"田中さんにあげて下さい",
|
||||||
|
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
|
||||||
|
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
|
||||||
|
"表ポあA鷗ŒéB逍Üߪąñ丂㐀\uD840\uDC00",
|
||||||
|
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
|
||||||
|
"᚛ ᚜\n",
|
||||||
|
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
|
||||||
|
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
|
||||||
|
"జ్ఞ\u200Cా"
|
||||||
|
);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void encodeFuzzer() {
|
||||||
|
var l = new ArrayList<>(WORDS);
|
||||||
|
Collections.shuffle(l);
|
||||||
|
var collected = l.stream().collect(Collectors.joining(" "));
|
||||||
|
testEncodeString(collected);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void decodeFuzzer() {
|
||||||
|
var l = new ArrayList<>(WORDS);
|
||||||
|
Collections.shuffle(l);
|
||||||
|
var collected = l.stream().collect(Collectors.joining(" "));
|
||||||
|
testDecodeString(collected);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void encodeToEmpty() {
|
void encodeToEmpty() {
|
||||||
testEncodeString("");
|
testEncodeString("");
|
||||||
@ -21,6 +67,26 @@ class ZeroAllocationEncoderTest {
|
|||||||
testDecodeString("");
|
testDecodeString("");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void encodeComplex() {
|
||||||
|
testEncodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void decodeComplex() {
|
||||||
|
testDecodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void encodeComplexLong() {
|
||||||
|
testEncodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF".repeat(10));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void decodeComplexLong() {
|
||||||
|
testDecodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF".repeat(10));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void encodeTo1Underflow() {
|
void encodeTo1Underflow() {
|
||||||
testEncodeString("ciao");
|
testEncodeString("ciao");
|
||||||
@ -87,8 +153,15 @@ class ZeroAllocationEncoderTest {
|
|||||||
var out = bdo.toList();
|
var out = bdo.toList();
|
||||||
out.toString(StandardCharsets.UTF_8);
|
out.toString(StandardCharsets.UTF_8);
|
||||||
Assertions.assertEquals(s, out.toString(StandardCharsets.UTF_8));
|
Assertions.assertEquals(s, out.toString(StandardCharsets.UTF_8));
|
||||||
Assertions.assertEquals(s.length(), bdo.size());
|
Assertions.assertEquals(s.getBytes(StandardCharsets.UTF_8).length, bdo.size());
|
||||||
Assertions.assertEquals(s.length(), out.size());
|
Assertions.assertEquals(s.getBytes(StandardCharsets.UTF_8).length, out.size());
|
||||||
|
|
||||||
|
var bdo2 = BufDataOutput.create();
|
||||||
|
bdo2.writeMediumText("ciao", StandardCharsets.UTF_8);
|
||||||
|
bdo2.writeShortText("ciao2", StandardCharsets.UTF_8);
|
||||||
|
var in = BufDataInput.create(bdo2.asList());
|
||||||
|
Assertions.assertEquals("ciao", in.readMediumText(StandardCharsets.UTF_8));
|
||||||
|
Assertions.assertEquals("ciao2", in.readShortText(StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testDecodeString(String s) {
|
private void testDecodeString(String s) {
|
||||||
|
Loading…
Reference in New Issue
Block a user