ByteBufUtil.writeUtf8 not consistent with String.getBytes(Charset)
Motivation: f750d6e36c80e88fb302c99b5b7413e5649e6738 added support for surrogates in the writeUtf8 conversion. However exceptions are thrown if invalid input is detected, but the JDK (and slow path of writeUtf8) uses a replacement character and does not throw. We should behave the same way. Modificiations: - Don't throw in ByteBufUtil.writeUtf8, and instead use a replacement character consistent with the JDK Result: ByteBufUtil.writeUtf8 behavior is consistent with the JDK UTF_8 conversion.
This commit is contained in:
parent
f4798edc07
commit
8ddba82fce
@ -54,6 +54,7 @@ public final class ByteBufUtil {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
|
||||||
private static final int MAX_CHAR_BUFFER_SIZE;
|
private static final int MAX_CHAR_BUFFER_SIZE;
|
||||||
private static final int THREAD_LOCAL_BUFFER_SIZE;
|
private static final int THREAD_LOCAL_BUFFER_SIZE;
|
||||||
|
|
||||||
@ -382,8 +383,8 @@ public final class ByteBufUtil {
|
|||||||
buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f)));
|
buffer._setByte(writerIndex++, (byte) (0x80 | (c & 0x3f)));
|
||||||
} else if (isSurrogate(c)) {
|
} else if (isSurrogate(c)) {
|
||||||
if (!Character.isHighSurrogate(c)) {
|
if (!Character.isHighSurrogate(c)) {
|
||||||
throw new IllegalArgumentException("Invalid encoding. " +
|
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
|
||||||
"Expected high (leading) surrogate at index " + i + " but got " + c);
|
continue;
|
||||||
}
|
}
|
||||||
final char c2;
|
final char c2;
|
||||||
try {
|
try {
|
||||||
@ -392,12 +393,13 @@ public final class ByteBufUtil {
|
|||||||
// re-throw a more informative exception describing the problem.
|
// re-throw a more informative exception describing the problem.
|
||||||
c2 = seq.charAt(++i);
|
c2 = seq.charAt(++i);
|
||||||
} catch (IndexOutOfBoundsException e) {
|
} catch (IndexOutOfBoundsException e) {
|
||||||
throw new IllegalArgumentException("Underflow. " +
|
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
|
||||||
"Expected low (trailing) surrogate at index " + i + " but no more characters found.", e);
|
break;
|
||||||
}
|
}
|
||||||
if (!Character.isLowSurrogate(c2)) {
|
if (!Character.isLowSurrogate(c2)) {
|
||||||
throw new IllegalArgumentException("Invalid encoding. " +
|
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
|
||||||
"Expected low (trailing) surrogate at index " + i + " but got " + c2);
|
buffer._setByte(writerIndex++, Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
int codePoint = Character.toCodePoint(c, c2);
|
int codePoint = Character.toCodePoint(c, c2);
|
||||||
// See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
|
// See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
|
||||||
|
@ -16,79 +16,188 @@
|
|||||||
package io.netty.buffer;
|
package io.netty.buffer;
|
||||||
|
|
||||||
import io.netty.util.CharsetUtil;
|
import io.netty.util.CharsetUtil;
|
||||||
import io.netty.util.ReferenceCountUtil;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
|
||||||
|
import static io.netty.buffer.Unpooled.unreleasableBuffer;
|
||||||
|
import static io.netty.util.ReferenceCountUtil.releaseLater;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
public class ByteBufUtilTest {
|
public class ByteBufUtilTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWriteUsAscii() {
|
public void testWriteUsAscii() {
|
||||||
String usAscii = "NettyRocks";
|
String usAscii = "NettyRocks";
|
||||||
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
|
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
|
||||||
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
ByteBufUtil.writeAscii(buf2, usAscii);
|
ByteBufUtil.writeAscii(buf2, usAscii);
|
||||||
|
|
||||||
Assert.assertEquals(buf, buf2);
|
assertEquals(buf, buf2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWriteUsAsciiWrapped() {
|
public void testWriteUsAsciiWrapped() {
|
||||||
String usAscii = "NettyRocks";
|
String usAscii = "NettyRocks";
|
||||||
ByteBuf buf = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16)));
|
ByteBuf buf = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
|
||||||
assertWrapped(buf);
|
assertWrapped(buf);
|
||||||
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
|
buf.writeBytes(usAscii.getBytes(CharsetUtil.US_ASCII));
|
||||||
ByteBuf buf2 = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16)));
|
ByteBuf buf2 = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
|
||||||
assertWrapped(buf2);
|
assertWrapped(buf2);
|
||||||
ByteBufUtil.writeAscii(buf2, usAscii);
|
ByteBufUtil.writeAscii(buf2, usAscii);
|
||||||
|
|
||||||
Assert.assertEquals(buf, buf2);
|
assertEquals(buf, buf2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWriteUtf8() {
|
public void testWriteUtf8() {
|
||||||
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
||||||
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
|
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
|
||||||
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
ByteBufUtil.writeUtf8(buf2, usAscii);
|
ByteBufUtil.writeUtf8(buf2, usAscii);
|
||||||
|
|
||||||
Assert.assertEquals(buf, buf2);
|
assertEquals(buf, buf2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWriteUtf8Surrogates() {
|
public void testWriteUtf8Surrogates() {
|
||||||
// leading surrogate + trailing surrogate
|
// leading surrogate + trailing surrogate
|
||||||
String surrogateString = new StringBuilder(2)
|
String surrogateString = new StringBuilder(2)
|
||||||
|
.append('a')
|
||||||
.append('\uD800')
|
.append('\uD800')
|
||||||
.append('\uDC00')
|
.append('\uDC00')
|
||||||
|
.append('b')
|
||||||
.toString();
|
.toString();
|
||||||
ByteBuf buf = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||||
ByteBuf buf2 = ReferenceCountUtil.releaseLater(Unpooled.buffer(16));
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||||
|
|
||||||
Assert.assertEquals(buf, buf2);
|
assertEquals(buf, buf2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8InvalidOnlyTrailingSurrogate() {
|
||||||
|
String surrogateString = new StringBuilder(2)
|
||||||
|
.append('a')
|
||||||
|
.append('\uDC00')
|
||||||
|
.append('b')
|
||||||
|
.toString();
|
||||||
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
|
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
|
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8InvalidOnlyLeadingSurrogate() {
|
||||||
|
String surrogateString = new StringBuilder(2)
|
||||||
|
.append('a')
|
||||||
|
.append('\uD800')
|
||||||
|
.append('b')
|
||||||
|
.toString();
|
||||||
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
|
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
|
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8InvalidSurrogatesSwitched() {
|
||||||
|
String surrogateString = new StringBuilder(2)
|
||||||
|
.append('a')
|
||||||
|
.append('\uDC00')
|
||||||
|
.append('\uD800')
|
||||||
|
.append('b')
|
||||||
|
.toString();
|
||||||
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
|
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
|
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8InvalidTwoLeadingSurrogates() {
|
||||||
|
String surrogateString = new StringBuilder(2)
|
||||||
|
.append('a')
|
||||||
|
.append('\uD800')
|
||||||
|
.append('\uD800')
|
||||||
|
.append('b')
|
||||||
|
.toString();
|
||||||
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
|
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
|
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8InvalidTwoTrailingSurrogates() {
|
||||||
|
String surrogateString = new StringBuilder(2)
|
||||||
|
.append('a')
|
||||||
|
.append('\uDC00')
|
||||||
|
.append('\uDC00')
|
||||||
|
.append('b')
|
||||||
|
.toString();
|
||||||
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
|
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
|
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8InvalidEndOnLeadingSurrogate() {
|
||||||
|
String surrogateString = new StringBuilder(2)
|
||||||
|
.append('\uD800')
|
||||||
|
.toString();
|
||||||
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
|
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
|
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUtf8InvalidEndOnTrailingSurrogate() {
|
||||||
|
String surrogateString = new StringBuilder(2)
|
||||||
|
.append('\uDC00')
|
||||||
|
.toString();
|
||||||
|
ByteBuf buf = releaseLater(Unpooled.buffer(16));
|
||||||
|
buf.writeBytes(surrogateString.getBytes(CharsetUtil.UTF_8));
|
||||||
|
ByteBuf buf2 = releaseLater(Unpooled.buffer(16));
|
||||||
|
ByteBufUtil.writeUtf8(buf2, surrogateString);
|
||||||
|
|
||||||
|
assertEquals(buf, buf2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWriteUtf8Wrapped() {
|
public void testWriteUtf8Wrapped() {
|
||||||
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
String usAscii = "Some UTF-8 like äÄ∏ŒŒ";
|
||||||
ByteBuf buf = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16)));
|
ByteBuf buf = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
|
||||||
assertWrapped(buf);
|
assertWrapped(buf);
|
||||||
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
|
buf.writeBytes(usAscii.getBytes(CharsetUtil.UTF_8));
|
||||||
ByteBuf buf2 = Unpooled.unreleasableBuffer(ReferenceCountUtil.releaseLater(Unpooled.buffer(16)));
|
ByteBuf buf2 = unreleasableBuffer(releaseLater(Unpooled.buffer(16)));
|
||||||
assertWrapped(buf2);
|
assertWrapped(buf2);
|
||||||
ByteBufUtil.writeUtf8(buf2, usAscii);
|
ByteBufUtil.writeUtf8(buf2, usAscii);
|
||||||
|
|
||||||
Assert.assertEquals(buf, buf2);
|
assertEquals(buf, buf2);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void assertWrapped(ByteBuf buf) {
|
private static void assertWrapped(ByteBuf buf) {
|
||||||
Assert.assertTrue(buf instanceof WrappedByteBuf);
|
assertTrue(buf instanceof WrappedByteBuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -103,7 +212,7 @@ public class ByteBufUtilTest {
|
|||||||
|
|
||||||
private static void testDecodeString(String text, Charset charset) {
|
private static void testDecodeString(String text, Charset charset) {
|
||||||
ByteBuf buffer = Unpooled.copiedBuffer(text, charset);
|
ByteBuf buffer = Unpooled.copiedBuffer(text, charset);
|
||||||
Assert.assertEquals(text, ByteBufUtil.decodeString(buffer, 0, buffer.readableBytes(), charset));
|
assertEquals(text, ByteBufUtil.decodeString(buffer, 0, buffer.readableBytes(), charset));
|
||||||
buffer.release();
|
buffer.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +223,7 @@ public class ByteBufUtilTest {
|
|||||||
byte[] bytes = "1234".getBytes(CharsetUtil.UTF_8);
|
byte[] bytes = "1234".getBytes(CharsetUtil.UTF_8);
|
||||||
buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes));
|
buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes));
|
||||||
buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes));
|
buffer.addComponent(Unpooled.buffer(bytes.length).writeBytes(bytes));
|
||||||
Assert.assertEquals("1234", buffer.toString(bytes.length, bytes.length, CharsetUtil.UTF_8));
|
assertEquals("1234", buffer.toString(bytes.length, bytes.length, CharsetUtil.UTF_8));
|
||||||
} finally {
|
} finally {
|
||||||
buffer.release();
|
buffer.release();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user