Optimization

This commit is contained in:
Andrea Cavalli 2023-03-28 00:39:14 +02:00
parent 40d7f35a8e
commit 4ff1eb0179
2 changed files with 17 additions and 103 deletions

View File

@ -1,13 +1,11 @@
package it.cavallium.buffer;
import static it.cavallium.stream.SafeDataOutputStream.strLen;
import static it.cavallium.stream.SafeDataOutputStream.utfLen;
import it.cavallium.stream.SafeByteArrayOutputStream;
import it.cavallium.stream.SafeDataOutput;
import it.cavallium.stream.SafeDataOutputStream;
import it.unimi.dsi.fastutil.Arrays;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import org.jetbrains.annotations.NotNull;
@ -205,11 +203,13 @@ public class BufDataOutput implements SafeDataOutput {
@Override
public void writeUTF(@NotNull String str) {
var strlen = strLen(str);
var utflen = utfLen(str, strlen);
var bytes = Short.BYTES + utflen;
checkOutOfBounds(bytes);
dOut.writeUTF(strlen, utflen, str);
var out = str.getBytes(StandardCharsets.UTF_8);
if (out.length > Short.MAX_VALUE) {
throw new IndexOutOfBoundsException("String too long: " + out.length + " bytes");
}
checkOutOfBounds(Short.BYTES + out.length);
dOut.writeShort(out.length);
dOut.write(out);
}
public Buf asList() {

View File

@ -26,6 +26,7 @@
package it.cavallium.stream;
import java.io.DataOutputStream;
import java.nio.charset.StandardCharsets;
/**
* A data output stream lets an application write primitive Java data
@ -326,102 +327,15 @@ public class SafeDataOutputStream extends SafeFilterOutputStream implements Safe
* @see #writeChars(String)
*/
public final void writeUTF(String str) {
writeUTF(str, this);
}
public final void writeUTF(int strlen, int utflen, String str) {
writeUTF(strlen, utflen, str, this);
}
/**
* Writes a string to the specified DataOutput using
* <a href="DataInput.html#modified-utf-8">modified UTF-8</a>
* encoding in a machine-independent manner.
* <p>
* First, two bytes are written to out as if by the {@code writeShort}
* method giving the number of bytes to follow. This value is the number of
* bytes actually written out, not the length of the string. Following the
* length, each character of the string is output, in sequence, using the
* modified UTF-8 encoding for the character. If no exception is thrown, the
* counter {@code written} is incremented by the total number of
* bytes written to the output stream. This will be at least two
* plus the length of {@code str}, and at most two plus
* thrice the length of {@code str}.
*
* @param str a string to be written.
* @param out destination to write to
* @return The number of bytes written out.
*/
static int writeUTF(String str, SafeDataOutput out) {
int strlen = strLen(str);
int utflen = utfLen(str, strlen);
return writeUTF(strlen, utflen, str, out);
}
public static int strLen(String str) {
return str.length();
}
public static int utfLen(String str, int strLen) {
int utflen = strLen; // optimized for ASCII
for (int i = 0; i < strLen; i++) {
int c = str.charAt(i);
if (c >= 0x80 || c == 0)
utflen += (c >= 0x800) ? 2 : 1;
var outString = str.getBytes(StandardCharsets.UTF_8);
if (outString.length > Short.MAX_VALUE) {
throw new IndexOutOfBoundsException("String too long: " + outString.length + " bytes");
}
if (utflen > 65535 || /* overflow */ utflen < strLen)
throw new IllegalArgumentException(tooLongMsg(str, utflen));
return utflen;
}
static int writeUTF(int strlen, int utflen, String str, SafeDataOutput out) {
final byte[] bytearr;
if (out instanceof SafeDataOutputStream dos) {
if (dos.bytearr == null || (dos.bytearr.length < (utflen + 2)))
dos.bytearr = new byte[(utflen*2) + 2];
bytearr = dos.bytearr;
} else {
bytearr = new byte[utflen + 2];
}
int count = 0;
bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
bytearr[count++] = (byte) ((utflen) & 0xFF);
int i;
for (i = 0; i < strlen; i++) { // optimized for initial run of ASCII
int c = str.charAt(i);
if (c >= 0x80 || c == 0) break;
bytearr[count++] = (byte) c;
}
for (; i < strlen; i++) {
int c = str.charAt(i);
if (c < 0x80 && c != 0) {
bytearr[count++] = (byte) c;
} else if (c >= 0x800) {
bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
} else {
bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
}
}
out.write(bytearr, 0, utflen + 2);
return utflen + 2;
}
private static String tooLongMsg(String s, int bits32) {
int slen = s.length();
String head = s.substring(0, 8);
String tail = s.substring(slen - 8, slen);
// handle int overflow with max 3x expansion
long actualLength = (long)slen + Integer.toUnsignedLong(bits32 - slen);
return "encoded string (" + head + "..." + tail + ") too long: "
+ actualLength + " bytes";
var v = outString.length;
out.write((v >>> 8) & 0xFF);
out.write((v) & 0xFF);
out.write(outString);
incCount(2 + outString.length);
}
/**