Add zero-copy text encoder

This commit is contained in:
Andrea Cavalli 2024-09-26 16:36:39 +02:00
parent 70adaaf6cf
commit 915afcbf4d
12 changed files with 812 additions and 54 deletions

View File

@ -198,9 +198,19 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>31.1-jre</version>
<version>33.0.0-jre</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>1.36</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>1.36</version>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
@ -232,4 +242,88 @@
</dependency>
</dependencies>
</dependencyManagement>
<profiles>
<profile>
<id>java9</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>3.6.0</version>
<executions>
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>src/java9/java</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>benchmark</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<dependencies>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>1.36</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>1.36</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>3.6.0</version>
<executions>
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>src/benchmark/java</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<release>21</release>
<encoding>UTF-8</encoding>
<excludes>
<exclude>src/main/java/module-info.java</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,14 @@
package it.cavallium.buffer;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
public class BenchmarkMain {
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder().include(BufEncoderBench.class.getSimpleName()).build();
new Runner(opt).run();
}
}

View File

@ -0,0 +1,124 @@
package it.cavallium.buffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
public class BenchmarkMainManual {
public static class BenchmarkZeroCopy {
public static void main(String[] args) {
var s2 = new ZeroAllocationEncoderBenchState();
s2.prepare();
while (true) {
s2.reset();
encodeMediumTextZeroCopy(s2);
}
}
}
public static class BenchmarkJava {
public static void main(String[] args) {
var s2 = new ZeroAllocationEncoderBenchState();
s2.prepare();
while (true) {
s2.reset();
encodeMediumTextJava(s2);
}
}
}
public static class ZeroAllocationEncoderBenchState {
private static final List<String> WORDS = List.of(
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
"hello",
"test",
"òàòà§òè+=))=732e0",
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
"من left اليمين to الى right اليسار",
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
"田中さんにあげて下さい",
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
"表ポあA鷗Œé逍Üߪąñ丂㐀\uD840\uDC00",
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
"᚛                 ᚜\n",
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
"జ్ఞ\u200Cా"
);
String shortText;
byte[] shortTextBytes;
BufDataInput shortTextInput;
BufDataOutput shortTextOutput;
String mediumText;
byte[] mediumTextBytes;
BufDataInput mediumTextInput;
BufDataOutput mediumTextOutput;
String longText;
byte[] longTextBytes;
BufDataInput longTextInput;
BufDataOutput longTextOutput;
public void reset() {
longTextInput.reset();
longTextOutput.resetUnderlyingBuffer();
mediumTextInput.reset();
mediumTextOutput.resetUnderlyingBuffer();
shortTextInput.reset();
shortTextOutput.resetUnderlyingBuffer();
}
public void prepare() {
var l = new ArrayList<String>();
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
for (int i = 0; i < maxI; i++) {
l.addAll(WORDS);
}
Collections.shuffle(l);
var fullText = String.join(" ", l);
var out = BufDataOutput.create(Integer.BYTES + fullText.getBytes(StandardCharsets.UTF_8).length);
out.resetUnderlyingBuffer();
longText = fullText;
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
out.writeMediumText(longText, StandardCharsets.UTF_8);
longTextInput = BufDataInput.create(out.toList());
longTextOutput = BufDataOutput.create(Integer.BYTES + longTextBytes.length);
out.resetUnderlyingBuffer();
mediumText = fullText.substring(0, 128);
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
out.writeMediumText(mediumText, StandardCharsets.UTF_8);
mediumTextInput = BufDataInput.create(out.toList());
mediumTextOutput = BufDataOutput.create(Integer.BYTES + mediumTextBytes.length);
out.resetUnderlyingBuffer();
shortText = fullText.substring(0, 15);
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
out.writeMediumText(shortText, StandardCharsets.UTF_8);
shortTextInput = BufDataInput.create(out.toList());
shortTextOutput = BufDataOutput.create(Integer.BYTES + shortTextBytes.length);
}
}
public static Buf encodeMediumTextZeroCopy(ZeroAllocationEncoderBenchState benchState) {
var out = benchState.mediumTextOutput;
out.writeMediumTextZeroCopy(benchState.mediumText);
return out.toList();
}
public static Buf encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
var out = benchState.mediumTextOutput;
out.writeMediumTextLegacy(benchState.mediumText, StandardCharsets.UTF_8);
return out.toList();
}
}

View File

@ -0,0 +1,174 @@
package it.cavallium.buffer;
import org.openjdk.jmh.annotations.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
@OutputTimeUnit(TimeUnit.SECONDS)
@BenchmarkMode(Mode.Throughput)
@Fork(value = 1, warmups = 1)
@Warmup(time = 2, iterations = 6)
@Measurement(time = 2, iterations = 6)
public class BufEncoderBench {
@State(Scope.Thread)
public static class ZeroAllocationEncoderState {
ZeroAllocationEncoder encoder;
BufDataOutput bufOutput;
@Setup
public void prepare() {
encoder = ZeroAllocationEncoder.INSTANCE;
bufOutput = BufDataOutput.create(1024);
}
}
@State(Scope.Benchmark)
public static class ZeroAllocationEncoderBenchState {
private static final List<String> WORDS = List.of(
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
"hello",
"test",
"òàòà§òè+=))=732e0",
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
"من left اليمين to الى right اليسار",
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
"田中さんにあげて下さい",
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
"表ポあA鷗Œé逍Üߪąñ丂㐀\uD840\uDC00",
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
"᚛                 ᚜\n",
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
"జ్ఞ\u200Cా"
);
String shortText;
byte[] shortTextBytes;
BufDataInput shortTextInput;
BufDataOutput shortTextOutput;
String mediumText;
byte[] mediumTextBytes;
BufDataInput mediumTextInput;
BufDataOutput mediumTextOutput;
String longText;
byte[] longTextBytes;
BufDataInput longTextInput;
BufDataOutput longTextOutput;
@Setup(Level.Invocation)
public void reset() {
longTextInput.reset();
longTextOutput.resetUnderlyingBuffer();
mediumTextInput.reset();
mediumTextOutput.resetUnderlyingBuffer();
shortTextInput.reset();
shortTextOutput.resetUnderlyingBuffer();
}
@Setup(Level.Invocation)
public void prepare() {
var l = new ArrayList<String>();
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
for (int i = 0; i < maxI; i++) {
l.addAll(WORDS);
}
Collections.shuffle(l);
var fullText = String.join(" ", l);
var out = BufDataOutput.create(Integer.BYTES + fullText.getBytes(StandardCharsets.UTF_8).length);
out.resetUnderlyingBuffer();
longText = fullText;
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
out.writeMediumText(longText, StandardCharsets.UTF_8);
longTextInput = BufDataInput.create(out.toList());
longTextOutput = BufDataOutput.create(Integer.BYTES + longTextBytes.length);
out.resetUnderlyingBuffer();
mediumText = fullText.substring(0, 128);
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
out.writeMediumText(mediumText, StandardCharsets.UTF_8);
mediumTextInput = BufDataInput.create(out.toList());
mediumTextOutput = BufDataOutput.create(Integer.BYTES + mediumTextBytes.length);
out.resetUnderlyingBuffer();
shortText = fullText.substring(0, 15);
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
out.writeMediumText(shortText, StandardCharsets.UTF_8);
shortTextInput = BufDataInput.create(out.toList());
shortTextOutput = BufDataOutput.create(Integer.BYTES + shortTextBytes.length);
}
}
@Benchmark
public Buf encodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var out = benchState.shortTextOutput;
out.writeMediumTextZeroCopy(benchState.shortText);
return out.toList();
}
@Benchmark
public Buf encodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var out = benchState.mediumTextOutput;
out.writeMediumTextZeroCopy(benchState.mediumText);
return out.toList();
}
@Benchmark
public Buf encodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var out = benchState.longTextOutput;
out.writeMediumTextZeroCopy(benchState.longText);
return out.toList();
}
@Benchmark
public Buf encodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
var out = benchState.shortTextOutput;
out.writeMediumTextLegacy(benchState.shortText, StandardCharsets.UTF_8);
return out.toList();
}
@Benchmark
public Buf encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
var out = benchState.mediumTextOutput;
out.writeMediumTextLegacy(benchState.mediumText, StandardCharsets.UTF_8);
return out.toList();
}
@Benchmark
public Buf encodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
var out = benchState.longTextOutput;
out.writeMediumTextLegacy(benchState.longText, StandardCharsets.UTF_8);
return out.toList();
}
@Benchmark
public String decodeShortText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var in = benchState.shortTextInput;
return in.readMediumText(StandardCharsets.UTF_8);
}
@Benchmark
public String decodeMediumText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var in = benchState.mediumTextInput;
return in.readMediumText(StandardCharsets.UTF_8);
}
@Benchmark
public String decodeLongText(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var in = benchState.longTextInput;
return in.readMediumText(StandardCharsets.UTF_8);
}
}

View File

@ -0,0 +1,178 @@
package it.cavallium.buffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.*;
@State(Scope.Benchmark)
@OutputTimeUnit(TimeUnit.SECONDS)
@BenchmarkMode(Mode.Throughput)
@Fork(value = 1, warmups = 1)
@Warmup(time = 1, iterations = 3)
@Measurement(time = 1, iterations = 6)
public class ZeroAllocationEncoderBench {
@State(Scope.Thread)
public static class ZeroAllocationEncoderState {
ZeroAllocationEncoder encoder;
BufDataOutput bufOutput;
@Setup
public void prepare() {
encoder = ZeroAllocationEncoder.INSTANCE;
bufOutput = BufDataOutput.create(1024);
}
}
@State(Scope.Benchmark)
public static class ZeroAllocationEncoderBenchState {
private static final List<String> WORDS = List.of(
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
"hello",
"test",
"òàòà§òè+=))=732e0",
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
"من left اليمين to الى right اليسار",
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
"田中さんにあげて下さい",
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
"表ポあA鷗Œé逍Üߪąñ丂㐀\uD840\uDC00",
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
"᚛                 ᚜\n",
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
"జ్ఞ\u200Cా"
);
String shortText;
byte[] shortTextBytes;
BufDataInput shortTextInput;
BufDataOutput shortTextOutput;
String mediumText;
byte[] mediumTextBytes;
BufDataInput mediumTextInput;
BufDataOutput mediumTextOutput;
String longText;
byte[] longTextBytes;
BufDataInput longTextInput;
BufDataOutput longTextOutput;
@Setup(Level.Invocation)
public void reset() {
longTextInput.reset();
longTextOutput.resetUnderlyingBuffer();
mediumTextInput.reset();
mediumTextOutput.resetUnderlyingBuffer();
shortTextInput.reset();
shortTextOutput.resetUnderlyingBuffer();
}
@Setup
public void prepare() {
var l = new ArrayList<String>();
var maxI = ThreadLocalRandom.current().nextInt(1, 20);
for (int i = 0; i < maxI; i++) {
l.addAll(WORDS);
}
Collections.shuffle(l);
var fullText = String.join(" ", l);
longText = fullText;
longTextBytes = longText.getBytes(StandardCharsets.UTF_8);
longTextInput = BufDataInput.create(Buf.wrap(longTextBytes));
longTextOutput = BufDataOutput.create(longTextBytes.length);
mediumText = fullText.substring(0, 128);
mediumTextBytes = mediumText.getBytes(StandardCharsets.UTF_8);
mediumTextInput = BufDataInput.create(Buf.wrap(mediumTextBytes));
mediumTextOutput = BufDataOutput.create(mediumTextBytes.length);
shortText = fullText.substring(0, 15);
shortTextBytes = shortText.getBytes(StandardCharsets.UTF_8);
shortTextInput = BufDataInput.create(Buf.wrap(shortTextBytes));
shortTextOutput = BufDataOutput.create(shortTextBytes.length);
}
}
@Benchmark
public BufDataOutput encodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var out = benchState.shortTextOutput;
encoderState.encoder.encodeTo(benchState.shortText, out);
return out;
}
@Benchmark
public BufDataOutput encodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var out = benchState.mediumTextOutput;
encoderState.encoder.encodeTo(benchState.mediumText, out);
return out;
}
@Benchmark
public BufDataOutput encodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
var out = benchState.longTextOutput;
encoderState.encoder.encodeTo(benchState.longText, out);
return out;
}
@Benchmark
public BufDataOutput encodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
var out = benchState.shortTextOutput;
out.write(benchState.shortText.getBytes(StandardCharsets.UTF_8));
return out;
}
@Benchmark
public BufDataOutput encodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
var out = benchState.mediumTextOutput;
out.write(benchState.mediumText.getBytes(StandardCharsets.UTF_8));
return out;
}
@Benchmark
public BufDataOutput encodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
var out = benchState.longTextOutput;
out.write(benchState.longText.getBytes(StandardCharsets.UTF_8));
return out;
}
@Benchmark
public String decodeShortTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
return encoderState.encoder.decodeFrom(benchState.shortTextInput, benchState.shortTextBytes.length);
}
@Benchmark
public String decodeMediumTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
return encoderState.encoder.decodeFrom(benchState.mediumTextInput, benchState.mediumTextBytes.length);
}
@Benchmark
public String decodeLongTextZeroCopy(ZeroAllocationEncoderState encoderState, ZeroAllocationEncoderBenchState benchState) {
return encoderState.encoder.decodeFrom(benchState.longTextInput, benchState.longTextBytes.length);
}
@Benchmark
public String decodeShortTextJava(ZeroAllocationEncoderBenchState benchState) {
return new String(benchState.shortTextInput.readAllBytes(), StandardCharsets.UTF_8);
}
@Benchmark
public String decodeMediumTextJava(ZeroAllocationEncoderBenchState benchState) {
return new String(benchState.mediumTextInput.readAllBytes(), StandardCharsets.UTF_8);
}
@Benchmark
public String decodeLongTextJava(ZeroAllocationEncoderBenchState benchState) {
return new String(benchState.longTextInput.readAllBytes(), StandardCharsets.UTF_8);
}
}

View File

@ -12,22 +12,23 @@ import java.util.Arrays;
import java.util.Objects;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.VisibleForTesting;
public class BufDataOutput implements SafeDataOutput {
private final SafeByteArrayOutputStream buf;
private final SafeDataOutputStream dOut;
private final SafeByteArrayDataOutputStream dOut;
private final int limit;
public BufDataOutput(SafeByteArrayOutputStream buf) {
this.buf = buf;
this.dOut = new SafeDataOutputStream(buf);
this.dOut = new SafeByteArrayDataOutputStream(buf);
limit = Integer.MAX_VALUE;
}
public BufDataOutput(SafeByteArrayOutputStream buf, int maxSize) {
this.buf = buf;
this.dOut = new SafeDataOutputStream(buf);
this.dOut = new SafeByteArrayDataOutputStream(buf);
this.limit = maxSize;
}
@ -218,17 +219,17 @@ public class BufDataOutput implements SafeDataOutput {
@Override
public void writeShortText(String s, Charset charset) {
if (charset == StandardCharsets.UTF_8) {
var beforeWrite = this.buf.position();
writeShort(0);
var beforeWrite = (int) this.position();
this.advancePosition(Short.BYTES);
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
var afterWrite = this.buf.position();
this.buf.position(beforeWrite);
var afterWrite = (int) this.position();
this.rewindPosition(afterWrite - beforeWrite);
var len = Math.toIntExact(afterWrite - beforeWrite - Short.BYTES);
if (len > Short.MAX_VALUE) {
throw new IndexOutOfBoundsException("String too long: " + len + " bytes");
}
this.writeShort(len);
this.buf.position(afterWrite);
dOut.writeShort(len);
dOut.advancePosition(len);
} else {
var out = s.getBytes(charset);
if (out.length > Short.MAX_VALUE) {
@ -242,20 +243,49 @@ public class BufDataOutput implements SafeDataOutput {
@Override
public void writeMediumText(String s, Charset charset) {
if (charset == StandardCharsets.UTF_8) {
var beforeWrite = this.buf.position();
writeInt(0);
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
var afterWrite = this.buf.position();
this.buf.position(beforeWrite);
this.writeInt(Math.toIntExact(afterWrite - beforeWrite - Integer.BYTES));
this.buf.position(afterWrite);
} else {
var out = s.getBytes(charset);
checkOutOfBounds(Integer.BYTES + out.length);
dOut.writeInt(out.length);
dOut.write(out);
}
// todo: charbuffer is still slow, check in future java versions
// if (charset == StandardCharsets.UTF_8) {
// writeMediumTextZeroCopy(s);
// } else {
writeMediumTextLegacy(s, charset);
// }
}
@VisibleForTesting
public void writeMediumTextZeroCopy(String s) {
var beforeWrite = (int) this.position();
this.advancePosition(Integer.BYTES);
ZeroAllocationEncoder.INSTANCE.encodeTo(s, this);
var afterWrite = (int) this.position();
this.rewindPosition(afterWrite - beforeWrite);
var len = Math.toIntExact(afterWrite - beforeWrite - Integer.BYTES);
dOut.writeInt(len);
dOut.advancePosition(len);
}
@VisibleForTesting
public void writeMediumTextLegacy(String s, Charset charset) {
var out = s.getBytes(charset);
checkOutOfBounds(Integer.BYTES + out.length);
dOut.writeInt(out.length);
dOut.write(out);
}
public void resetUnderlyingBuffer() {
dOut.resetUnderlyingBuffer();
}
public void rewindPosition(int count) {
dOut.rewindPosition(count);
}
public void advancePosition(int count) {
checkOutOfBounds(count);
dOut.advancePosition(count);
}
public long position() {
return dOut.position();
}
public Buf asList() {

View File

@ -0,0 +1,37 @@
package it.cavallium.buffer;
import it.cavallium.stream.SafeByteArrayOutputStream;
import it.cavallium.stream.SafeDataOutputStream;
public class SafeByteArrayDataOutputStream extends SafeDataOutputStream {
private final SafeByteArrayOutputStream bOut;
public SafeByteArrayDataOutputStream(SafeByteArrayOutputStream out) {
super(out);
this.bOut = out;
}
public void resetUnderlyingBuffer() {
bOut.reset();
this.written = 0;
}
public void rewindPosition(int count) {
var currentPosition = bOut.position();
if (count > written) {
throw new IndexOutOfBoundsException(count + " > " + written);
}
bOut.position(currentPosition - count);
decCount(count);
}
public void advancePosition(int count) {
bOut.ensureWritable(count);
bOut.position(bOut.position() + count);
incCount(count);
}
public long position() {
return bOut.position();
}
}

View File

@ -27,25 +27,38 @@ public class ZeroAllocationEncoder {
private final ThreadLocal<AtomicReference<CharBuffer>> charBufferRefThreadLocal;
private final ThreadLocal<AtomicReference<ByteBuffer>> byteBufferRefThreadLocal;
public ZeroAllocationEncoder(int outBufferSize) {
bufferThreadLocal = ThreadLocal.withInitial(() -> ByteBuffer.allocate(outBufferSize));
var maxBytesPerChar = (int) Math.ceil(StandardCharsets.UTF_8.newEncoder().maxBytesPerChar());
bufferThreadLocal = ThreadLocal.withInitial(() -> ByteBuffer.allocate(outBufferSize * maxBytesPerChar));
charBufferRefThreadLocal = ThreadLocal.withInitial(() -> new AtomicReference<>(CharBuffer.allocate(outBufferSize)));
byteBufferRefThreadLocal = ThreadLocal.withInitial(() -> new AtomicReference<>(ByteBuffer.allocate(outBufferSize * maxBytesPerChar)));
}
public void encodeTo(String s, SafeDataOutput bufDataOutput) {
var encoder = CHARSET_ENCODER_UTF8.get();
encoder.reset();
var buf = bufferThreadLocal.get();
var charBuffer = CharBuffer.wrap(s);
boolean endOfInput = false;
CoderResult result;
do {
buf.clear();
result = encoder.encode(charBuffer, buf, true);
result = encoder.encode(charBuffer, buf, endOfInput);
buf.flip();
var bufArray = buf.array();
var bufArrayOffset = buf.arrayOffset();
bufDataOutput.write(bufArray, bufArrayOffset + buf.position(), buf.remaining());
bufDataOutput.write(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
if (result.isUnderflow()) {
break;
if (endOfInput) {
buf.clear();
encoder.flush(buf);
buf.flip();
bufDataOutput.write(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
break;
} else {
endOfInput = true;
continue;
}
} else if (result.isOverflow()) {
continue;
} else if (result.isError()) {
@ -59,32 +72,41 @@ public class ZeroAllocationEncoder {
} while (true);
}
public String decodeFrom(SafeDataInput bufDataInput, int length) {
public String decodeFrom(SafeDataInput bufDataInput, int bytesLength) {
var decoder = CHARSET_DECODER_UTF8.get();
var byteBuf = bufferThreadLocal.get();
decoder.reset();
var bufRef = byteBufferRefThreadLocal.get();
var charBufRef = charBufferRefThreadLocal.get();
var buf = bufRef.get();
var charBuf = charBufRef.get();
if (charBuf.capacity() < length) {
charBuf = CharBuffer.allocate(length);
assert decoder.maxCharsPerByte() == 1.0f
: "UTF8 max chars per byte is 1.0f, but the decoder got a value of " + decoder.maxCharsPerByte();
if (charBuf.capacity() < bytesLength) {
charBuf = CharBuffer.allocate(bytesLength);
charBufRef.set(charBuf);
} else {
charBuf.clear();
}
var remainingLengthToRead = length;
if (buf.capacity() < bytesLength) {
buf = ByteBuffer.allocate(bytesLength);
bufRef.set(buf);
} else {
buf.clear();
}
CoderResult result;
do {
byteBuf.clear();
bufDataInput.readFully(byteBuf, Math.min(remainingLengthToRead, byteBuf.limit()));
byteBuf.flip();
remainingLengthToRead -= byteBuf.remaining();
result = decoder.decode(byteBuf, charBuf, true);
buf.clear();
assert buf.capacity() >= bytesLength;
bufDataInput.readFully(buf, bytesLength);
buf.flip();
result = decoder.decode(buf, charBuf, true);
if (result.isUnderflow()) {
if (remainingLengthToRead > 0) {
continue;
} else {
charBuf.flip();
return charBuf.toString();
result = decoder.flush(charBuf);
if (result.isOverflow()) {
throw new IllegalStateException("Unexpected overflow");
}
charBuf.flip();
return charBuf.toString();
} else if (result.isOverflow()) {
throw new UnsupportedOperationException();
} else if (result.isError()) {

View File

@ -131,7 +131,7 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
public void readNBytes(int length, ByteBuffer buffer) {
Objects.checkFromIndexSize(0, length, buffer.remaining());
if (this.available() < length) {
throw new IndexOutOfBoundsException(this.length);
throw new IndexOutOfBoundsException(length);
}
buffer.put(array, offset + this.position, length);
position += length;
@ -149,12 +149,17 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
return cappedLength;
}
@Override
public byte[] readAllBytes() {
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + length);
position = length;
return result;
}
@Override
public byte[] readNBytes(int length) {
if (this.available() < length) {
throw new IndexOutOfBoundsException(this.length);
}
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + position + length);
var result = Arrays.copyOfRange(this.array, this.offset + position, this.offset + position + Math.min(length, this.available()));
position += length;
return result;
}
@ -162,7 +167,7 @@ public class SafeByteArrayInputStream extends SafeMeasurableInputStream implemen
@Override
public String readString(int length, Charset charset) {
if (this.available() < length) {
throw new IndexOutOfBoundsException(this.length);
throw new IndexOutOfBoundsException(length + " > " + this.available());
}
var result = new String(this.array, offset + position, length, charset);
position += length;

View File

@ -63,9 +63,16 @@ public class SafeDataOutputStream extends SafeFilterOutputStream implements Safe
* Increases the written counter by the specified value
* until it reaches Integer.MAX_VALUE.
*/
private void incCount(int value) {
protected void incCount(int value) {
written = Math.addExact(written, value);
}
/**
* Decreases the written counter by the specified value
* until it reaches 0.
*/
protected void decCount(int value) {
written = Math.subtractExact(written, value);
}
/**
* Writes the specified byte (the low eight bits of the argument

View File

@ -4,6 +4,10 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.*;
@ -11,6 +15,48 @@ class ZeroAllocationEncoderTest {
private static final ZeroAllocationEncoder INSTANCE = new ZeroAllocationEncoder(16);
private static final List<String> WORDS = List.of(
"\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF",
"hello",
"test",
"òàòà§òè+=))=732e0",
"ل موقع يسمح لزواره الكرام بتحويل الكتابة العربي الى كتابة مفه",
"\uD800\uDF3C\uD800\uDF30\uD800\uDF32 \uD800\uDF32\uD800\uDF3B\uD800\uDF34\uD800\uDF43 \uD800\uDF39̈\uD800\uDF44\uD800\uDF30\uD800\uDF3D, \uD800\uDF3D\uD800\uDF39 \uD800\uDF3C\uD800\uDF39\uD800\uDF43 \uD800\uDF45\uD800\uDF3F \uD800\uDF3D\uD800\uDF33\uD800\uDF30\uD800\uDF3D \uD800\uDF31\uD800\uDF42\uD800\uDF39\uD800\uDF32\uD800\uDF32\uD800\uDF39\uD800\uDF38.",
"Z̤͔ͧ̑̓ä͖̭̈̇lͮ̒ͫǧ̗͚̚o̙̔ͮ̇͐̇",
"من left اليمين to الى right اليسار",
"a\u202Db\u202Ec\u202Dd\u202Ee\u202Df\u202Eg",
"﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽﷽",
"\uD83D\uDC71\uD83D\uDC71\uD83C\uDFFB\uD83D\uDC71\uD83C\uDFFC\uD83D\uDC71\uD83C\uDFFD\uD83D\uDC71\uD83C\uDFFE\uD83D\uDC71\uD83C\uDFFF",
"\uD83E\uDDDF\u200D♀\uFE0F\uD83E\uDDDF\u200D♂\uFE0F",
"\uD83D\uDC68\u200D❤\uFE0F\u200D\uD83D\uDC8B\u200D\uD83D\uDC68\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC66\uD83C\uDFF3\uFE0F\u200D⚧\uFE0F\uD83C\uDDF5\uD83C\uDDF7",
"田中さんにあげて下さい",
"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็",
"\uD801\uDC1C \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC19\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC13/\uD801\uDC1D\uD801\uDC07\uD801\uDC17\uD801\uDC0A\uD801\uDC24\uD801\uDC14 \uD801\uDC12\uD801\uDC0B\uD801\uDC17 \uD801\uDC12\uD801\uDC0C \uD801\uDC1C \uD801\uDC21\uD801\uDC00\uD801\uDC16\uD801\uDC07\uD801\uDC24\uD801\uDC13\uD801\uDC1D \uD801\uDC31\uD801\uDC42 \uD801\uDC44 \uD801\uDC14\uD801\uDC07\uD801\uDC1D\uD801\uDC00\uD801\uDC21\uD801\uDC07\uD801\uDC13 \uD801\uDC0F\uD801\uDC06\uD801\uDC05\uD801\uDC24\uD801\uDC06\uD801\uDC1A\uD801\uDC0A\uD801\uDC21\uD801\uDC1D\uD801\uDC06\uD801\uDC13\uD801\uDC06",
"表ポあA鷗Œé逍Üߪąñ丂㐀\uD840\uDC00",
"᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜\n" +
"᚛                 ᚜\n",
"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗\n" +
"\uD83C\uDFF30\uD83C\uDF08\uFE0F\n" +
"జ్ఞ\u200Cా"
);
@Test
void encodeFuzzer() {
var l = new ArrayList<>(WORDS);
Collections.shuffle(l);
var collected = l.stream().collect(Collectors.joining(" "));
testEncodeString(collected);
}
@Test
void decodeFuzzer() {
var l = new ArrayList<>(WORDS);
Collections.shuffle(l);
var collected = l.stream().collect(Collectors.joining(" "));
testDecodeString(collected);
}
@Test
void encodeToEmpty() {
testEncodeString("");
@ -21,6 +67,26 @@ class ZeroAllocationEncoderTest {
testDecodeString("");
}
@Test
void encodeComplex() {
testEncodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF");
}
@Test
void decodeComplex() {
testDecodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF");
}
@Test
void encodeComplexLong() {
testEncodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF".repeat(10));
}
@Test
void decodeComplexLong() {
testDecodeString("\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC69\uD83C\uDFFF\u200D\uD83D\uDC67\uD83C\uDFFF\u200D\uD83D\uDC66\uD83C\uDFFF".repeat(10));
}
@Test
void encodeTo1Underflow() {
testEncodeString("ciao");
@ -87,8 +153,15 @@ class ZeroAllocationEncoderTest {
var out = bdo.toList();
out.toString(StandardCharsets.UTF_8);
Assertions.assertEquals(s, out.toString(StandardCharsets.UTF_8));
Assertions.assertEquals(s.length(), bdo.size());
Assertions.assertEquals(s.length(), out.size());
Assertions.assertEquals(s.getBytes(StandardCharsets.UTF_8).length, bdo.size());
Assertions.assertEquals(s.getBytes(StandardCharsets.UTF_8).length, out.size());
var bdo2 = BufDataOutput.create();
bdo2.writeMediumText("ciao", StandardCharsets.UTF_8);
bdo2.writeShortText("ciao2", StandardCharsets.UTF_8);
var in = BufDataInput.create(bdo2.asList());
Assertions.assertEquals("ciao", in.readMediumText(StandardCharsets.UTF_8));
Assertions.assertEquals("ciao2", in.readShortText(StandardCharsets.UTF_8));
}
private void testDecodeString(String s) {