Fix readutf implementation
This commit is contained in:
parent
58eb0d0b5c
commit
bc34559de0
@ -25,6 +25,11 @@
|
|||||||
|
|
||||||
package it.cavallium.stream;
|
package it.cavallium.stream;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.charset.CharacterCodingException;
|
||||||
|
import java.nio.charset.CharsetDecoder;
|
||||||
|
import java.nio.charset.CodingErrorAction;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
public class SafeDataInputStream extends SafeFilterInputStream implements SafeDataInput {
|
public class SafeDataInputStream extends SafeFilterInputStream implements SafeDataInput {
|
||||||
@ -42,8 +47,9 @@ public class SafeDataInputStream extends SafeFilterInputStream implements SafeDa
|
|||||||
/**
|
/**
|
||||||
* working arrays initialized on demand by readUTF
|
* working arrays initialized on demand by readUTF
|
||||||
*/
|
*/
|
||||||
private byte[] bytearr = new byte[80];
|
private byte[] bytearr;
|
||||||
private char[] chararr = new char[80];
|
private char[] chararr;
|
||||||
|
private CharsetDecoder utfdec;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final int read(byte[] b) {
|
public final int read(byte[] b) {
|
||||||
@ -448,64 +454,18 @@ public class SafeDataInputStream extends SafeFilterInputStream implements SafeDa
|
|||||||
* @see SafeDataInputStream#readUnsignedShort()
|
* @see SafeDataInputStream#readUnsignedShort()
|
||||||
*/
|
*/
|
||||||
public static String readUTF(SafeDataInputStream in) {
|
public static String readUTF(SafeDataInputStream in) {
|
||||||
|
if (in.bytearr == null) in.bytearr = new byte[80];
|
||||||
|
if (in.chararr == null) in.chararr = new char[80];
|
||||||
|
if (in.utfdec == null) in.utfdec = StandardCharsets.UTF_8.newDecoder()
|
||||||
|
.onUnmappableCharacter(CodingErrorAction.REPORT)
|
||||||
|
.onMalformedInput(CodingErrorAction.REPORT);
|
||||||
int utflen = in.readUnsignedShort();
|
int utflen = in.readUnsignedShort();
|
||||||
byte[] bytearr;
|
var data = new byte[utflen];
|
||||||
char[] chararr;
|
in.readFully(data);
|
||||||
if (in.bytearr.length < utflen){
|
try {
|
||||||
in.bytearr = new byte[utflen*2];
|
return in.utfdec.reset().decode(ByteBuffer.wrap(data)).toString();
|
||||||
in.chararr = new char[utflen*2];
|
} catch (CharacterCodingException e) {
|
||||||
}
|
throw new IllegalArgumentException("malformed input string", e);
|
||||||
chararr = in.chararr;
|
|
||||||
bytearr = in.bytearr;
|
|
||||||
|
|
||||||
int c, char2, char3;
|
|
||||||
int count = 0;
|
|
||||||
int chararr_count=0;
|
|
||||||
|
|
||||||
in.readFully(bytearr, 0, utflen);
|
|
||||||
|
|
||||||
while (count < utflen) {
|
|
||||||
c = (int) bytearr[count] & 0xff;
|
|
||||||
if (c > 127) break;
|
|
||||||
count++;
|
|
||||||
chararr[chararr_count++]=(char)c;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (count < utflen) {
|
|
||||||
c = (int) bytearr[count] & 0xff;
|
|
||||||
switch (c >> 4) {
|
|
||||||
case 0, 1, 2, 3, 4, 5, 6, 7 -> {
|
|
||||||
/* 0xxxxxxx*/
|
|
||||||
count++;
|
|
||||||
chararr[chararr_count++] = (char) c;
|
|
||||||
}
|
|
||||||
case 12, 13 -> {
|
|
||||||
/* 110x xxxx 10xx xxxx*/
|
|
||||||
count += 2;
|
|
||||||
if (count > utflen)
|
|
||||||
throw new IllegalArgumentException("malformed input: partial character at end");
|
|
||||||
char2 = bytearr[count - 1];
|
|
||||||
if ((char2 & 0xC0) != 0x80)
|
|
||||||
throw new IllegalArgumentException("malformed input around byte " + count);
|
|
||||||
chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
|
|
||||||
}
|
|
||||||
case 14 -> {
|
|
||||||
/* 1110 xxxx 10xx xxxx 10xx xxxx */
|
|
||||||
count += 3;
|
|
||||||
if (count > utflen)
|
|
||||||
throw new IllegalArgumentException("malformed input: partial character at end");
|
|
||||||
char2 = bytearr[count - 2];
|
|
||||||
char3 = bytearr[count - 1];
|
|
||||||
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
|
|
||||||
throw new IllegalArgumentException("malformed input around byte " + (count - 1));
|
|
||||||
chararr[chararr_count++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F)));
|
|
||||||
}
|
|
||||||
default ->
|
|
||||||
/* 10xx xxxx, 1111 xxxx */
|
|
||||||
throw new IllegalArgumentException("malformed input around byte " + count);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// The number of chars produced may be less than utflen
|
|
||||||
return new String(chararr, 0, chararr_count);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user