Fix Snappy decoding of large 2-byte literal lengths and copy offsets

Motivation:

The Snappy decoder was failing on valid inputs containing literals
with 2-byte lengths > 0x8000 or copies with 2-byte offsets >= 0x8000.

The decoder was also enforcing an artificially low offset limit of
0x7FFF, something the Snappy format description advises against,
and which prevents decoding valid inputs generated by other encoders.

Modifications:

Interpret 2-byte literal lengths and 2-byte copy offsets as unsigned
shorts, in accordance with the format description and reference
implementation.

Allow any positive offset value. Throw an appropriate exception
for negative values (which can theoretically occur due to arithmetic
overflow on 4-byte offsets, but are unlikely to occur in the wild).

Result:

The Snappy decoder can handle valid inputs that previously caused
it to throw exceptions.
This commit is contained in:
David Nault 2018-02-19 00:37:15 -08:00 committed by Norman Maurer
parent ce241bd11e
commit f40ecc3f10
2 changed files with 43 additions and 7 deletions

View File

@ -403,7 +403,7 @@ public final class Snappy {
if (in.readableBytes() < 2) {
return NOT_ENOUGH_INPUT;
}
length = in.readShortLE();
length = in.readUnsignedShortLE();
break;
case 62:
if (in.readableBytes() < 3) {
@ -495,7 +495,7 @@ public final class Snappy {
int initialIndex = out.writerIndex();
int length = 1 + (tag >> 2 & 0x03f);
int offset = in.readShortLE();
int offset = in.readUnsignedShortLE();
validateOffset(offset, writtenSoFar);
@ -565,7 +565,7 @@ public final class Snappy {
/**
* Validates that the offset extracted from a compressed reference is within
* the permissible bounds of an offset (4 <= offset <= 32768), and does not
* the permissible bounds of an offset (0 < offset < Integer.MAX_VALUE), and does not
* exceed the length of the chunk currently read so far.
*
* @param offset The offset extracted from the compressed reference
@ -573,12 +573,13 @@ public final class Snappy {
* @throws DecompressionException if the offset is invalid
*/
private static void validateOffset(int offset, int chunkSizeSoFar) {
if (offset > Short.MAX_VALUE) {
throw new DecompressionException("Offset exceeds maximum permissible value");
if (offset == 0) {
throw new DecompressionException("Offset is less than minimum permissible value");
}
if (offset <= 0) {
throw new DecompressionException("Offset is less than minimum permissible value");
if (offset < 0) {
// Due to arithmetic overflow
throw new DecompressionException("Offset is greater than maximum value supported by this implementation");
}
if (offset > chunkSizeSoFar) {

View File

@ -284,4 +284,39 @@ public class SnappyTest {
}
}
}
@Test
public void testLarge2ByteLiteralLengthAndCopyOffset() {
ByteBuf compressed = Unpooled.buffer();
ByteBuf actualDecompressed = Unpooled.buffer();
ByteBuf expectedDecompressed = Unpooled.buffer().writeByte(0x01).writeZero(0x8000).writeByte(0x01);
try {
// Generate a Snappy-encoded buffer that can only be decompressed correctly if
// the decoder treats 2-byte literal lengths and 2-byte copy offsets as unsigned values.
// Write preamble, uncompressed content length (0x8002) encoded as varint.
compressed.writeByte(0x82).writeByte(0x80).writeByte(0x02);
// Write a literal consisting of 0x01 followed by 0x8000 zeroes.
// The total length of this literal is 0x8001, which gets encoded as 0x8000 (length - 1).
// This length was selected because the encoded form is one larger than the maximum value
// representable using a signed 16-bit integer, and we want to assert the decoder is reading
// the length as an unsigned value.
compressed.writeByte(61 << 2); // tag for LITERAL with a 2-byte length
compressed.writeShortLE(0x8000); // length - 1
compressed.writeByte(0x01).writeZero(0x8000); // literal content
// Similarly, for a 2-byte copy operation we want to ensure the offset is treated as unsigned.
// Copy the initial 0x01 which was written 0x8001 bytes back in the stream.
compressed.writeByte(0x02); // tag for COPY with 2-byte offset, length = 1
compressed.writeShortLE(0x8001); // offset
snappy.decode(compressed, actualDecompressed);
assertEquals(expectedDecompressed, actualDecompressed);
} finally {
compressed.release();
actualDecompressed.release();
expectedDecompressed.release();
}
}
}