Fix Snappy decoding of large 2-byte literal lengths and copy offsets
Motivation: The Snappy decoder was failing on valid inputs containing literals with 2-byte lengths > 0x8000 or copies with 2-byte offsets >= 0x8000. The decoder was also enforcing an artificially low offset limit of 0x7FFF, something the Snappy format description advises against, and which prevents decoding valid inputs generated by other encoders. Modifications: Interpret 2-byte literal lengths and 2-byte copy offsets as unsigned shorts, in accordance with the format description and reference implementation. Allow any positive offset value. Throw an appropriate exception for negative values (which can theoretically occur due to arithmetic overflow on 4-byte offsets, but are unlikely to occur in the wild). Result: The Snappy decoder can handle valid inputs that previously caused it to throw exceptions.
This commit is contained in:
parent
ce241bd11e
commit
f40ecc3f10
@ -403,7 +403,7 @@ public final class Snappy {
|
||||
if (in.readableBytes() < 2) {
|
||||
return NOT_ENOUGH_INPUT;
|
||||
}
|
||||
length = in.readShortLE();
|
||||
length = in.readUnsignedShortLE();
|
||||
break;
|
||||
case 62:
|
||||
if (in.readableBytes() < 3) {
|
||||
@ -495,7 +495,7 @@ public final class Snappy {
|
||||
|
||||
int initialIndex = out.writerIndex();
|
||||
int length = 1 + (tag >> 2 & 0x03f);
|
||||
int offset = in.readShortLE();
|
||||
int offset = in.readUnsignedShortLE();
|
||||
|
||||
validateOffset(offset, writtenSoFar);
|
||||
|
||||
@ -565,7 +565,7 @@ public final class Snappy {
|
||||
|
||||
/**
|
||||
* Validates that the offset extracted from a compressed reference is within
|
||||
* the permissible bounds of an offset (4 <= offset <= 32768), and does not
|
||||
* the permissible bounds of an offset (0 < offset < Integer.MAX_VALUE), and does not
|
||||
* exceed the length of the chunk currently read so far.
|
||||
*
|
||||
* @param offset The offset extracted from the compressed reference
|
||||
@ -573,12 +573,13 @@ public final class Snappy {
|
||||
* @throws DecompressionException if the offset is invalid
|
||||
*/
|
||||
private static void validateOffset(int offset, int chunkSizeSoFar) {
|
||||
if (offset > Short.MAX_VALUE) {
|
||||
throw new DecompressionException("Offset exceeds maximum permissible value");
|
||||
if (offset == 0) {
|
||||
throw new DecompressionException("Offset is less than minimum permissible value");
|
||||
}
|
||||
|
||||
if (offset <= 0) {
|
||||
throw new DecompressionException("Offset is less than minimum permissible value");
|
||||
if (offset < 0) {
|
||||
// Due to arithmetic overflow
|
||||
throw new DecompressionException("Offset is greater than maximum value supported by this implementation");
|
||||
}
|
||||
|
||||
if (offset > chunkSizeSoFar) {
|
||||
|
@ -284,4 +284,39 @@ public class SnappyTest {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLarge2ByteLiteralLengthAndCopyOffset() {
|
||||
ByteBuf compressed = Unpooled.buffer();
|
||||
ByteBuf actualDecompressed = Unpooled.buffer();
|
||||
ByteBuf expectedDecompressed = Unpooled.buffer().writeByte(0x01).writeZero(0x8000).writeByte(0x01);
|
||||
try {
|
||||
// Generate a Snappy-encoded buffer that can only be decompressed correctly if
|
||||
// the decoder treats 2-byte literal lengths and 2-byte copy offsets as unsigned values.
|
||||
|
||||
// Write preamble, uncompressed content length (0x8002) encoded as varint.
|
||||
compressed.writeByte(0x82).writeByte(0x80).writeByte(0x02);
|
||||
|
||||
// Write a literal consisting of 0x01 followed by 0x8000 zeroes.
|
||||
// The total length of this literal is 0x8001, which gets encoded as 0x8000 (length - 1).
|
||||
// This length was selected because the encoded form is one larger than the maximum value
|
||||
// representable using a signed 16-bit integer, and we want to assert the decoder is reading
|
||||
// the length as an unsigned value.
|
||||
compressed.writeByte(61 << 2); // tag for LITERAL with a 2-byte length
|
||||
compressed.writeShortLE(0x8000); // length - 1
|
||||
compressed.writeByte(0x01).writeZero(0x8000); // literal content
|
||||
|
||||
// Similarly, for a 2-byte copy operation we want to ensure the offset is treated as unsigned.
|
||||
// Copy the initial 0x01 which was written 0x8001 bytes back in the stream.
|
||||
compressed.writeByte(0x02); // tag for COPY with 2-byte offset, length = 1
|
||||
compressed.writeShortLE(0x8001); // offset
|
||||
|
||||
snappy.decode(compressed, actualDecompressed);
|
||||
assertEquals(expectedDecompressed, actualDecompressed);
|
||||
} finally {
|
||||
compressed.release();
|
||||
actualDecompressed.release();
|
||||
expectedDecompressed.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user