ByteBufUtil.writeUtf8 not consistent with String.getBytes(Charset)
Motivation:
f750d6e36c
added support for surrogates in the writeUtf8 conversion. However exceptions are thrown if invalid input is detected, but the JDK (and slow path of writeUtf8) uses a replacement character and does not throw. We should behave the same way.
Modificiations:
- Don't throw in ByteBufUtil.writeUtf8, and instead use a replacement character consistent with the JDK
Result:
ByteBufUtil.writeUtf8 behavior is consistent with the JDK UTF_8 conversion.
This commit is contained in:
parent
f4798edc07
commit
8ddba82fce
@ -54,6 +54,7 @@ public final class ByteBufUtil {
|
||||
}
|
||||
};
|
||||
|
||||
private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
|
||||
private static final int MAX_CHAR_BUFFER_SIZE;
|
||||
private static final int THREAD_LOCAL_BUFFER_SIZE;
|
||||
|
||||
@ -382,8 +383,8 @@ public final class ByteBufUtil {
|
||||
buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f)));
|
||||
} else if (isSurrogate(c)) {
|
||||
if (!Character.isHighSurrogate(c)) {
|
||||
throw new IllegalArgumentException("Invalid encoding. " +
|
||||
"Expected high (leading) surrogate at index " + i + " but got " + c);
|
||||
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
|
||||
continue;
|
||||
}
|
||||
final char c2;
|
||||
try {
|
||||
@ -392,12 +393,13 @@ public final class ByteBufUtil {
|
||||
// re-throw a more informative exception describing the problem.
|
||||
c2 = seq.charAt(++i);
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
throw new IllegalArgumentException("Underflow. " +
|
||||
"Expected low (trailing) surrogate at index " + i + " but no more characters found.", e);
|
||||
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
|
||||
break;
|
||||
}
|
||||
if (!Character.isLowSurrogate(c2)) {
|
||||
throw new IllegalArgumentException("Invalid encoding. " +
|
||||
"Expected low (trailing) surrogate at index " + i + " but got " + c2);
|
||||
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
|
||||
buffer._setByte(writerIndex++, Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
|
||||
continue;
|
||||
}
|
||||
int codePoint = Character.toCodePoint(c, c2);
|
||||
// See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
|
||||
|
@ -16,79 +16,188 @@
|
||||
package io.netty.buffer;
|
||||
|
||||
import io.netty.util.CharsetUtil;
|
||||
import io.netty.util.ReferenceCountUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import static io.netty.buffer.Unpooled.unreleasableBuffer;
|
||||
import static io.netty.util.ReferenceCountUtil.releaseLater;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class ByteBufUtilTest {
|
||||
|
||||
@Test
|
||||
public void testWriteUsAscii() {
|
||||
String usAscii = "NettyRocks";
|
||||
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
|
||||
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeAscii(buf2, usAscii);
|
||||
|
||||
Assert.assertEquals(buf, buf2);
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUsAsciiWrapped() {
|
||||
String usAscii = "NettyRocks";
|
||||
ByteBuf buf = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16)));
|
||||
ByteBuf buf = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
|
||||
assertWrapped(buf);
|
||||
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
|
||||
ByteBuf buf2 = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16)));
|
||||
ByteBuf buf2 = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
|
||||
assertWrapped(buf2);
|
||||
ByteBufUtil.writeAscii(buf2, usAscii);
|
||||
|
||||
Assert.assertEquals(buf, buf2);
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8() {
|
||||
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
||||
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, usAscii);
|
||||
|
||||
Assert.assertEquals(buf, buf2);
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8Surrogates() {
|
||||
// leading surrogate + trailing surrogate
|
||||
String surrogateString = new StringBuilder(2)
|
||||
.append('a')
|
||||
.append('\uD800')
|
||||
.append('\uDC00')
|
||||
.append('b')
|
||||
.toString();
|
||||
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||
|
||||
Assert.assertEquals(buf, buf2);
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8InvalidOnlyTrailingSurrogate() {
|
||||
String surrogateString = new StringBuilder(2)
|
||||
.append('a')
|
||||
.append('\uDC00')
|
||||
.append('b')
|
||||
.toString();
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8InvalidOnlyLeadingSurrogate() {
|
||||
String surrogateString = new StringBuilder(2)
|
||||
.append('a')
|
||||
.append('\uD800')
|
||||
.append('b')
|
||||
.toString();
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8InvalidSurrogatesSwitched() {
|
||||
String surrogateString = new StringBuilder(2)
|
||||
.append('a')
|
||||
.append('\uDC00')
|
||||
.append('\uD800')
|
||||
.append('b')
|
||||
.toString();
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8InvalidTwoLeadingSurrogates() {
|
||||
String surrogateString = new StringBuilder(2)
|
||||
.append('a')
|
||||
.append('\uD800')
|
||||
.append('\uD800')
|
||||
.append('b')
|
||||
.toString();
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8InvalidTwoTrailingSurrogates() {
|
||||
String surrogateString = new StringBuilder(2)
|
||||
.append('a')
|
||||
.append('\uDC00')
|
||||
.append('\uDC00')
|
||||
.append('b')
|
||||
.toString();
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8InvalidEndOnLeadingSurrogate() {
|
||||
String surrogateString = new StringBuilder(2)
|
||||
.append('\uD800')
|
||||
.toString();
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8InvalidEndOnTrailingSurrogate() {
|
||||
String surrogateString = new StringBuilder(2)
|
||||
.append('\uDC00')
|
||||
.toString();
|
||||
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUtf8Wrapped() {
|
||||
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
||||
ByteBuf buf = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16)));
|
||||
ByteBuf buf = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
|
||||
assertWrapped(buf);
|
||||
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
|
||||
ByteBuf buf2 = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16)));
|
||||
ByteBuf buf2 = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
|
||||
assertWrapped(buf2);
|
||||
ByteBufUtil.writeUtf8(buf2, usAscii);
|
||||
|
||||
Assert.assertEquals(buf, buf2);
|
||||
assertEquals(buf, buf2);
|
||||
}
|
||||
|
||||
private static void assertWrapped(ByteBuf buf) {
|
||||
Assert.assertTrue(buf instanceof WrappedByteBuf);
|
||||
assertTrue(buf instanceof WrappedByteBuf);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -103,7 +212,7 @@ public class ByteBufUtilTest {
|
||||
|
||||
private static void testDecodeString(String text, Charset charset) {
|
||||
ByteBuf buffer = Unpooled.copiedBuffer(text, charset);
|
||||
Assert.assertEquals(text, ByteBufUtil.decodeString(buffer, 0, buffer.readableBytes(), charset));
|
||||
assertEquals(text, ByteBufUtil.decodeString(buffer, 0, buffer.readableBytes(), charset));
|
||||
buffer.release();
|
||||
}
|
||||
|
||||
@ -114,7 +223,7 @@ public class ByteBufUtilTest {
|
||||
byte[] bytes = "1234".getBytes(CharsetUtil.UTF_8);
|
||||
buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes));
|
||||
buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes));
|
||||
Assert.assertEquals("1234", buffer.toString(bytes.length, bytes.length, CharsetUtil.UTF_8));
|
||||
assertEquals("1234", buffer.toString(bytes.length, bytes.length, CharsetUtil.UTF_8));
|
||||
} finally {
|
||||
buffer.release();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user