Fix hash function and hash table size in Snappy

Motivation:

1. Hash function in the Snappy encoding is wrong probably: used '+' instead of '*'. See the reference implementation [1].
2. Size of the hash table is calculated, but not applied.

Modifications:

1. Fix hash function: replace addition by multiplication.
2. Allocate hash table with calculated size.
3. Use an `Integer.numberOfLeadingZeros` trick for calculate log2.
4. Release buffers in tests.

Result:

1. Better compression. In the test `encodeAndDecodeLongTextUsesCopy` now compressed size is 175 instead of 180 before this change.
2. No redundant allocations for hash table.
3. A bit faster the calc of shift (less an expensive math operations).

[1] 513df5fb5a/snappy.cc (L67)
This commit is contained in:
Nikolay Fedorovskikh 2017-07-31 00:55:07 +05:00 committed by Norman Maurer
parent 2988fb8eeb
commit 6ab9c177ac
2 changed files with 97 additions and 44 deletions

View File

@ -72,7 +72,7 @@ public final class Snappy {
final int baseIndex = inIndex; final int baseIndex = inIndex;
final short[] table = getHashTable(length); final short[] table = getHashTable(length);
final int shift = 32 - (int) Math.floor(Math.log(table.length) / Math.log(2)); final int shift = Integer.numberOfLeadingZeros(table.length) + 1;
int nextEmit = inIndex; int nextEmit = inIndex;
@ -148,7 +148,7 @@ public final class Snappy {
* @return A 32-bit hash of 4 bytes located at index * @return A 32-bit hash of 4 bytes located at index
*/ */
private static int hash(ByteBuf in, int index, int shift) { private static int hash(ByteBuf in, int index, int shift) {
return in.getInt(index) + 0x1e35a7bd >>> shift; return in.getInt(index) * 0x1e35a7bd >>> shift;
} }
/** /**
@ -162,15 +162,7 @@ public final class Snappy {
while (htSize < MAX_HT_SIZE && htSize < inputSize) { while (htSize < MAX_HT_SIZE && htSize < inputSize) {
htSize <<= 1; htSize <<= 1;
} }
return new short[htSize];
short[] table;
if (htSize <= 256) {
table = new short[256];
} else {
table = new short[MAX_HT_SIZE];
}
return table;
} }
/** /**

View File

@ -17,6 +17,7 @@ package io.netty.handler.codec.compression;
import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled; import io.netty.buffer.Unpooled;
import io.netty.util.CharsetUtil;
import org.junit.After; import org.junit.After;
import org.junit.Test; import org.junit.Test;
@ -46,6 +47,10 @@ public class SnappyTest {
0x6e, 0x65, 0x74, 0x74, 0x79 0x6e, 0x65, 0x74, 0x74, 0x79
}); });
assertEquals("Literal was not decoded correctly", expected, out); assertEquals("Literal was not decoded correctly", expected, out);
in.release();
out.release();
expected.release();
} }
@Test @Test
@ -65,6 +70,10 @@ public class SnappyTest {
0x6e, 0x65, 0x74, 0x74, 0x79, 0x6e, 0x65, 0x74, 0x74, 0x79 0x6e, 0x65, 0x74, 0x74, 0x79, 0x6e, 0x65, 0x74, 0x74, 0x79
}); });
assertEquals("Copy was not decoded correctly", expected, out); assertEquals("Copy was not decoded correctly", expected, out);
in.release();
out.release();
expected.release();
} }
@Test(expected = DecompressionException.class) @Test(expected = DecompressionException.class)
@ -77,7 +86,12 @@ public class SnappyTest {
0x00 // INVALID offset (< 1) 0x00 // INVALID offset (< 1)
}); });
ByteBuf out = Unpooled.buffer(10); ByteBuf out = Unpooled.buffer(10);
try {
snappy.decode(in, out); snappy.decode(in, out);
} finally {
in.release();
out.release();
}
} }
@Test(expected = DecompressionException.class) @Test(expected = DecompressionException.class)
@ -90,7 +104,12 @@ public class SnappyTest {
0x0b // INVALID offset (greater than chunk size) 0x0b // INVALID offset (greater than chunk size)
}); });
ByteBuf out = Unpooled.buffer(10); ByteBuf out = Unpooled.buffer(10);
try {
snappy.decode(in, out); snappy.decode(in, out);
} finally {
in.release();
out.release();
}
} }
@Test(expected = DecompressionException.class) @Test(expected = DecompressionException.class)
@ -101,7 +120,12 @@ public class SnappyTest {
0x6e, 0x65, 0x74, 0x74, 0x79, // "netty" 0x6e, 0x65, 0x74, 0x74, 0x79, // "netty"
}); });
ByteBuf out = Unpooled.buffer(10); ByteBuf out = Unpooled.buffer(10);
try {
snappy.decode(in, out); snappy.decode(in, out);
} finally {
in.release();
out.release();
}
} }
@Test @Test
@ -118,22 +142,25 @@ public class SnappyTest {
0x6e, 0x65, 0x74, 0x74, 0x79 // "netty" 0x6e, 0x65, 0x74, 0x74, 0x79 // "netty"
}); });
assertEquals("Encoded literal was invalid", expected, out); assertEquals("Encoded literal was invalid", expected, out);
in.release();
out.release();
expected.release();
} }
@Test @Test
public void encodeLongTextUsesCopy() throws Exception { public void encodeAndDecodeLongTextUsesCopy() throws Exception {
ByteBuf in = Unpooled.wrappedBuffer( String srcStr = "Netty has been designed carefully with the experiences " +
("Netty has been designed carefully with the experiences " +
"earned from the implementation of a lot of protocols " + "earned from the implementation of a lot of protocols " +
"such as FTP, SMTP, HTTP, and various binary and " + "such as FTP, SMTP, HTTP, and various binary and " +
"text-based legacy protocols").getBytes("US-ASCII") "text-based legacy protocols";
); ByteBuf in = Unpooled.wrappedBuffer(srcStr.getBytes("US-ASCII"));
ByteBuf out = Unpooled.buffer(180); ByteBuf out = Unpooled.buffer(180);
snappy.encode(in, out, in.readableBytes()); snappy.encode(in, out, in.readableBytes());
// The only compressibility in the above are the words "the ", // The only compressibility in the above are the words:
// and "protocols", so this is a literal, followed by a copy // "the ", "rotocols", " of ", "TP, " and "and ". So this is a literal,
// followed by another literal, followed by another copy // followed by a copy followed by another literal, followed by another copy...
ByteBuf expected = Unpooled.wrappedBuffer(new byte[] { ByteBuf expected = Unpooled.wrappedBuffer(new byte[] {
-0x49, 0x01, // preamble length -0x49, 0x01, // preamble length
-0x10, 0x42, // literal tag + length -0x10, 0x42, // literal tag + length
@ -147,27 +174,56 @@ public class SnappyTest {
0x6e, 0x63, 0x65, 0x73, 0x20, 0x65, 0x61, 0x72, 0x6e, 0x65, 0x6e, 0x63, 0x65, 0x73, 0x20, 0x65, 0x61, 0x72, 0x6e, 0x65,
0x64, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x64, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20,
// First copy (the) // copy of "the "
0x01, 0x1C, -0x10, 0x01, 0x1c, 0x58,
// Next literal // Next literal
0x66, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61,
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x20,
0x20, 0x6c, 0x6f, 0x74, 0x20, 0x6f, 0x66, 0x20, 0x70, 0x72, 0x6c, 0x6f, 0x74,
0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x73, 0x20, 0x73, 0x75,
0x63, 0x68, 0x20, 0x61, 0x73, 0x20, 0x46, 0x54, 0x50, 0x2c,
0x20, 0x53, 0x4d, 0x54, 0x50, 0x2c, 0x20, 0x48, 0x54, 0x54,
0x50, 0x2c, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x76, 0x61, 0x72,
0x69, 0x6f, 0x75, 0x73, 0x20, 0x62, 0x69, 0x6e, 0x61, 0x72,
0x79, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74,
0x2d, 0x62, 0x61, 0x73, 0x65, 0x64, 0x20, 0x6c, 0x65, 0x67,
0x61, 0x63, 0x79, 0x20,
// Second copy (protocols) // copy of " of "
0x15, 0x4c 0x01, 0x09, 0x60,
// literal
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x73, 0x20,
0x73, 0x75, 0x63, 0x68, 0x20, 0x61, 0x73, 0x20, 0x46, 0x54,
0x50, 0x2c, 0x20, 0x53, 0x4d,
// copy of " TP, "
0x01, 0x06, 0x04,
// literal
0x48, 0x54,
// copy of " TP, "
0x01, 0x06, 0x44,
// literal
0x61, 0x6e, 0x64, 0x20, 0x76, 0x61, 0x72, 0x69, 0x6f, 0x75,
0x73, 0x20, 0x62, 0x69, 0x6e, 0x61, 0x72, 0x79,
// copy of "and "
0x05, 0x13, 0x48,
// literal
0x74, 0x65, 0x78, 0x74, 0x2d, 0x62, 0x61, 0x73, 0x65,
0x64, 0x20, 0x6c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x20, 0x70,
// copy of "rotocols"
0x11, 0x4c,
}); });
assertEquals("Encoded result was incorrect", expected, out); assertEquals("Encoded result was incorrect", expected, out);
// Decode
ByteBuf outDecoded = Unpooled.buffer();
snappy.decode(out, outDecoded);
assertEquals(srcStr, outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII));
in.release();
out.release();
outDecoded.release();
} }
@Test @Test
@ -176,6 +232,7 @@ public class SnappyTest {
'n', 'e', 't', 't', 'y' 'n', 'e', 't', 't', 'y'
}); });
assertEquals(maskChecksum(0xd6cb8b55), calculateChecksum(input)); assertEquals(maskChecksum(0xd6cb8b55), calculateChecksum(input));
input.release();
} }
@Test @Test
@ -185,6 +242,7 @@ public class SnappyTest {
}); });
validateChecksum(maskChecksum(0x2d4d3535), input); validateChecksum(maskChecksum(0x2d4d3535), input);
input.release();
} }
@Test(expected = DecompressionException.class) @Test(expected = DecompressionException.class)
@ -192,13 +250,16 @@ public class SnappyTest {
ByteBuf input = Unpooled.wrappedBuffer(new byte[] { ByteBuf input = Unpooled.wrappedBuffer(new byte[] {
'y', 't', 't', 'e', 'n' 'y', 't', 't', 'e', 'n'
}); });
try {
validateChecksum(maskChecksum(0xd6cb8b55), input); validateChecksum(maskChecksum(0xd6cb8b55), input);
} finally {
input.release();
}
} }
@Test @Test
public void testEncodeLiteralAndDecodeLiteral() { public void testEncodeLiteralAndDecodeLiteral() {
int[] lengths = new int[] { int[] lengths = {
0x11, // default 0x11, // default
0x100, // case 60 0x100, // case 60
0x1000, // case 61 0x1000, // case 61
@ -211,9 +272,9 @@ public class SnappyTest {
ByteBuf decoded = Unpooled.buffer(10); ByteBuf decoded = Unpooled.buffer(10);
ByteBuf expected = Unpooled.wrappedBuffer(new byte[len]); ByteBuf expected = Unpooled.wrappedBuffer(new byte[len]);
try { try {
Snappy.encodeLiteral(in, encoded, len); encodeLiteral(in, encoded, len);
byte tag = encoded.readByte(); byte tag = encoded.readByte();
Snappy.decodeLiteral(tag, encoded, decoded); decodeLiteral(tag, encoded, decoded);
assertEquals("Encoded or decoded literal was incorrect", expected, decoded); assertEquals("Encoded or decoded literal was incorrect", expected, decoded);
} finally { } finally {
in.release(); in.release();