From d9de99a63466092a7bfc2ba4973c854922bbf670 Mon Sep 17 00:00:00 2001 From: Andrea Cavalli Date: Sat, 9 Oct 2021 00:38:42 +0200 Subject: [PATCH] Only accept mappable and valid UTF-8 during serialization --- .../nativedata/StringSerializer.java | 43 +++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/src/main/java/it/cavallium/data/generator/nativedata/StringSerializer.java b/src/main/java/it/cavallium/data/generator/nativedata/StringSerializer.java index 8ef7894..0628a6d 100644 --- a/src/main/java/it/cavallium/data/generator/nativedata/StringSerializer.java +++ b/src/main/java/it/cavallium/data/generator/nativedata/StringSerializer.java @@ -4,18 +4,48 @@ import it.cavallium.data.generator.DataSerializer; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.channels.Channels; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.MalformedInputException; import java.nio.charset.StandardCharsets; +import java.nio.charset.UnmappableCharacterException; import org.jetbrains.annotations.NotNull; public class StringSerializer implements DataSerializer { public static final StringSerializer INSTANCE = new StringSerializer(); + private static final ThreadLocal UTF8_ENCODER = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8 + .newEncoder() + .onUnmappableCharacter(CodingErrorAction.REPORT) + .onMalformedInput(CodingErrorAction.REPORT) + ); + private static final ThreadLocal UTF8_DECODER = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8 + .newDecoder() + .onUnmappableCharacter(CodingErrorAction.REPORT) + .onMalformedInput(CodingErrorAction.REPORT) + ); @Override public void serialize(DataOutput dataOutput, @NotNull String data) throws IOException { - byte[] bytes = data.getBytes(StandardCharsets.UTF_8); - dataOutput.writeInt(bytes.length); - dataOutput.write(bytes); + try { + var bytes = UTF8_ENCODER.get().reset().encode(CharBuffer.wrap(data)); + + dataOutput.writeInt(bytes.limit()); + if (bytes.hasArray()) { + dataOutput.write(bytes.array(), bytes.arrayOffset(), bytes.limit()); + } else { + while (bytes.hasRemaining()) { + dataOutput.writeByte(bytes.get()); + } + } + } catch (IllegalStateException | CharacterCodingException ex) { + throw new IOException("Can't encode this UTF-8 string", ex); + } } @NotNull @@ -23,6 +53,11 @@ public class StringSerializer implements DataSerializer { public String deserialize(DataInput dataInput) throws IOException { byte[] bytes = new byte[dataInput.readInt()]; dataInput.readFully(bytes); - return new String(bytes, StandardCharsets.UTF_8); + try { + CharBuffer decoded = UTF8_DECODER.get().reset().decode(ByteBuffer.wrap(bytes)); + return decoded.toString(); + } catch (IllegalStateException | CharacterCodingException ex) { + throw new IOException("Can't decode this UTF-8 string", ex); + } } }