ByteBufUtil.writeUtf8 not consistent with String.getBytes(Charset)

Motivation:
f750d6e36c80e88fb302c99b5b7413e5649e6738 added support for surrogates in the writeUtf8 conversion. However exceptions are thrown if invalid input is detected, but the JDK (and slow path of writeUtf8) uses a replacement character and does not throw. We should behave the same way.

Modificiations:
- Don't throw in ByteBufUtil.writeUtf8, and instead use a replacement character consistent with the JDK

Result:
ByteBufUtil.writeUtf8 behavior is consistent with the JDK UTF_8 conversion.
This commit is contained in:
Scott Mitchell 2016-02-10 19:07:20 -08:00
parent f4798edc07
commit 8ddba82fce
2 changed files with 137 additions and 26 deletions

View File

@ -54,6 +54,7 @@ public final class ByteBufUtil {
} }
}; };
private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
private static final int MAX_CHAR_BUFFER_SIZE; private static final int MAX_CHAR_BUFFER_SIZE;
private static final int THREAD_LOCAL_BUFFER_SIZE; private static final int THREAD_LOCAL_BUFFER_SIZE;
@ -382,8 +383,8 @@ public final class ByteBufUtil {
buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f))); buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f)));
} else if (isSurrogate(c)) { } else if (isSurrogate(c)) {
if (!Character.isHighSurrogate(c)) { if (!Character.isHighSurrogate(c)) {
throw new IllegalArgumentException("Invalid encoding. " + buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
"Expected high (leading) surrogate at index " + i + " but got " + c); continue;
} }
final char c2; final char c2;
try { try {
@ -392,12 +393,13 @@ public final class ByteBufUtil {
// re-throw a more informative exception describing the problem. // re-throw a more informative exception describing the problem.
c2 = seq.charAt(++i); c2 = seq.charAt(++i);
} catch (IndexOutOfBoundsException e) { } catch (IndexOutOfBoundsException e) {
throw new IllegalArgumentException("Underflow. " + buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
"Expected low (trailing) surrogate at index " + i + " but no more characters found.", e); break;
} }
if (!Character.isLowSurrogate(c2)) { if (!Character.isLowSurrogate(c2)) {
throw new IllegalArgumentException("Invalid encoding. " + buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
"Expected low (trailing) surrogate at index " + i + " but got " + c2); buffer._setByte(writerIndex++, Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
continue;
} }
int codePoint = Character.toCodePoint(c, c2); int codePoint = Character.toCodePoint(c, c2);
// See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630. // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.

View File

@ -16,79 +16,188 @@
package io.netty.buffer; package io.netty.buffer;
import io.netty.util.CharsetUtil; import io.netty.util.CharsetUtil;
import io.netty.util.ReferenceCountUtil;
import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import static io.netty.buffer.Unpooled.unreleasableBuffer;
import static io.netty.util.ReferenceCountUtil.releaseLater;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class ByteBufUtilTest { public class ByteBufUtilTest {
@Test @Test
public void testWriteUsAscii() { public void testWriteUsAscii() {
String usAscii = "NettyRocks"; String usAscii = "NettyRocks";
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII)); buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeAscii(buf2, usAscii); ByteBufUtil.writeAscii(buf2, usAscii);
Assert.assertEquals(buf, buf2); assertEquals(buf, buf2);
} }
@Test @Test
public void testWriteUsAsciiWrapped() { public void testWriteUsAsciiWrapped() {
String usAscii = "NettyRocks"; String usAscii = "NettyRocks";
ByteBuf buf = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16))); ByteBuf buf = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
assertWrapped(buf); assertWrapped(buf);
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII)); buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
ByteBuf buf2 = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16))); ByteBuf buf2 = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
assertWrapped(buf2); assertWrapped(buf2);
ByteBufUtil.writeAscii(buf2, usAscii); ByteBufUtil.writeAscii(buf2, usAscii);
Assert.assertEquals(buf, buf2); assertEquals(buf, buf2);
} }
@Test @Test
public void testWriteUtf8() { public void testWriteUtf8() {
String usAscii = "Some UTF-8 like äÄ∏ŒŒ"; String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8)); buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, usAscii); ByteBufUtil.writeUtf8(buf2, usAscii);
Assert.assertEquals(buf, buf2); assertEquals(buf, buf2);
} }
@Test @Test
public void testWriteUtf8Surrogates() { public void testWriteUtf8Surrogates() {
// leading surrogate + trailing surrogate // leading surrogate + trailing surrogate
String surrogateString = new StringBuilder(2) String surrogateString = new StringBuilder(2)
.append('a')
.append('\uD800') .append('\uD800')
.append('\uDC00') .append('\uDC00')
.append('b')
.toString(); .toString();
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8)); buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16)); ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, surrogateString); ByteBufUtil.writeUtf8(buf2, surrogateString);
Assert.assertEquals(buf, buf2); assertEquals(buf, buf2);
}
@Test
public void testWriteUtf8InvalidOnlyTrailingSurrogate() {
String surrogateString = new StringBuilder(2)
.append('a')
.append('\uDC00')
.append('b')
.toString();
ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, surrogateString);
assertEquals(buf, buf2);
}
@Test
public void testWriteUtf8InvalidOnlyLeadingSurrogate() {
String surrogateString = new StringBuilder(2)
.append('a')
.append('\uD800')
.append('b')
.toString();
ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, surrogateString);
assertEquals(buf, buf2);
}
@Test
public void testWriteUtf8InvalidSurrogatesSwitched() {
String surrogateString = new StringBuilder(2)
.append('a')
.append('\uDC00')
.append('\uD800')
.append('b')
.toString();
ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, surrogateString);
assertEquals(buf, buf2);
}
@Test
public void testWriteUtf8InvalidTwoLeadingSurrogates() {
String surrogateString = new StringBuilder(2)
.append('a')
.append('\uD800')
.append('\uD800')
.append('b')
.toString();
ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, surrogateString);
assertEquals(buf, buf2);
}
@Test
public void testWriteUtf8InvalidTwoTrailingSurrogates() {
String surrogateString = new StringBuilder(2)
.append('a')
.append('\uDC00')
.append('\uDC00')
.append('b')
.toString();
ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, surrogateString);
assertEquals(buf, buf2);
}
@Test
public void testWriteUtf8InvalidEndOnLeadingSurrogate() {
String surrogateString = new StringBuilder(2)
.append('\uD800')
.toString();
ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, surrogateString);
assertEquals(buf, buf2);
}
@Test
public void testWriteUtf8InvalidEndOnTrailingSurrogate() {
String surrogateString = new StringBuilder(2)
.append('\uDC00')
.toString();
ByteBuf buf = releaseLater(Unpooled.buffer(16));
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
ByteBufUtil.writeUtf8(buf2, surrogateString);
assertEquals(buf, buf2);
} }
@Test @Test
public void testWriteUtf8Wrapped() { public void testWriteUtf8Wrapped() {
String usAscii = "Some UTF-8 like äÄ∏ŒŒ"; String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
ByteBuf buf = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16))); ByteBuf buf = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
assertWrapped(buf); assertWrapped(buf);
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8)); buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
ByteBuf buf2 = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16))); ByteBuf buf2 = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
assertWrapped(buf2); assertWrapped(buf2);
ByteBufUtil.writeUtf8(buf2, usAscii); ByteBufUtil.writeUtf8(buf2, usAscii);
Assert.assertEquals(buf, buf2); assertEquals(buf, buf2);
} }
private static void assertWrapped(ByteBuf buf) { private static void assertWrapped(ByteBuf buf) {
Assert.assertTrue(buf instanceof WrappedByteBuf); assertTrue(buf instanceof WrappedByteBuf);
} }
@Test @Test
@ -103,7 +212,7 @@ public class ByteBufUtilTest {
private static void testDecodeString(String text, Charset charset) { private static void testDecodeString(String text, Charset charset) {
ByteBuf buffer = Unpooled.copiedBuffer(text, charset); ByteBuf buffer = Unpooled.copiedBuffer(text, charset);
Assert.assertEquals(text, ByteBufUtil.decodeString(buffer, 0, buffer.readableBytes(), charset)); assertEquals(text, ByteBufUtil.decodeString(buffer, 0, buffer.readableBytes(), charset));
buffer.release(); buffer.release();
} }
@ -114,7 +223,7 @@ public class ByteBufUtilTest {
byte[] bytes = "1234".getBytes(CharsetUtil.UTF_8); byte[] bytes = "1234".getBytes(CharsetUtil.UTF_8);
buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes)); buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes));
buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes)); buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes));
Assert.assertEquals("1234", buffer.toString(bytes.length, bytes.length, CharsetUtil.UTF_8)); assertEquals("1234", buffer.toString(bytes.length, bytes.length, CharsetUtil.UTF_8));
} finally { } finally {
buffer.release(); buffer.release();
} }