Fix hash function and hash table size in Snappy

Motivation:

1. Hash function in the Snappy encoding is wrong probably: used '+' instead of '*'. See the reference implementation [1].
2. Size of the hash table is calculated, but not applied.

Modifications:

1. Fix hash function: replace addition by multiplication.
2. Allocate hash table with calculated size.
3. Use an `Integer.numberOfLeadingZeros` trick for calculate log2.
4. Release buffers in tests.

Result:

1. Better compression. In the test `encodeAndDecodeLongTextUsesCopy` now compressed size is 175 instead of 180 before this change.
2. No redundant allocations for hash table.
3. A bit faster the calc of shift (less an expensive math operations).

[1] 513df5fb5a/snappy.cc (L67)
This commit is contained in:
Nikolay Fedorovskikh 2017-07-31 00:55:07 +05:00 committed by Norman Maurer
parent 2988fb8eeb
commit 6ab9c177ac
2 changed files with 97 additions and 44 deletions

View File

@ -72,7 +72,7 @@ public final class Snappy {
final int baseIndex = inIndex;
final short[] table = getHashTable(length);
final int shift = 32 - (int) Math.floor(Math.log(table.length) / Math.log(2));
final int shift = Integer.numberOfLeadingZeros(table.length) + 1;
int nextEmit = inIndex;
@ -148,7 +148,7 @@ public final class Snappy {
* @return A 32-bit hash of 4 bytes located at index
*/
private static int hash(ByteBuf in, int index, int shift) {
return in.getInt(index) + 0x1e35a7bd >>> shift;
return in.getInt(index) * 0x1e35a7bd >>> shift;
}
/**
@ -162,15 +162,7 @@ public final class Snappy {
while (htSize < MAX_HT_SIZE && htSize < inputSize) {
htSize <<= 1;
}
short[] table;
if (htSize <= 256) {
table = new short[256];
} else {
table = new short[MAX_HT_SIZE];
}
return table;
return new short[htSize];
}
/**

View File

@ -17,6 +17,7 @@ package io.netty.handler.codec.compression;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.util.CharsetUtil;
import org.junit.After;
import org.junit.Test;
@ -46,6 +47,10 @@ public class SnappyTest {
0x6e, 0x65, 0x74, 0x74, 0x79
});
assertEquals("Literal was not decoded correctly", expected, out);
in.release();
out.release();
expected.release();
}
@Test
@ -65,6 +70,10 @@ public class SnappyTest {
0x6e, 0x65, 0x74, 0x74, 0x79, 0x6e, 0x65, 0x74, 0x74, 0x79
});
assertEquals("Copy was not decoded correctly", expected, out);
in.release();
out.release();
expected.release();
}
@Test(expected = DecompressionException.class)
@ -77,7 +86,12 @@ public class SnappyTest {
0x00 // INVALID offset (< 1)
});
ByteBuf out = Unpooled.buffer(10);
try {
snappy.decode(in, out);
} finally {
in.release();
out.release();
}
}
@Test(expected = DecompressionException.class)
@ -90,7 +104,12 @@ public class SnappyTest {
0x0b // INVALID offset (greater than chunk size)
});
ByteBuf out = Unpooled.buffer(10);
try {
snappy.decode(in, out);
} finally {
in.release();
out.release();
}
}
@Test(expected = DecompressionException.class)
@ -101,7 +120,12 @@ public class SnappyTest {
0x6e, 0x65, 0x74, 0x74, 0x79, // "netty"
});
ByteBuf out = Unpooled.buffer(10);
try {
snappy.decode(in, out);
} finally {
in.release();
out.release();
}
}
@Test
@ -118,22 +142,25 @@ public class SnappyTest {
0x6e, 0x65, 0x74, 0x74, 0x79 // "netty"
});
assertEquals("Encoded literal was invalid", expected, out);
in.release();
out.release();
expected.release();
}
@Test
public void encodeLongTextUsesCopy() throws Exception {
ByteBuf in = Unpooled.wrappedBuffer(
("Netty has been designed carefully with the experiences " +
public void encodeAndDecodeLongTextUsesCopy() throws Exception {
String srcStr = "Netty has been designed carefully with the experiences " +
"earned from the implementation of a lot of protocols " +
"such as FTP, SMTP, HTTP, and various binary and " +
"text-based legacy protocols").getBytes("US-ASCII")
);
"text-based legacy protocols";
ByteBuf in = Unpooled.wrappedBuffer(srcStr.getBytes("US-ASCII"));
ByteBuf out = Unpooled.buffer(180);
snappy.encode(in, out, in.readableBytes());
// The only compressibility in the above are the words "the ",
// and "protocols", so this is a literal, followed by a copy
// followed by another literal, followed by another copy
// The only compressibility in the above are the words:
// "the ", "rotocols", " of ", "TP, " and "and ". So this is a literal,
// followed by a copy followed by another literal, followed by another copy...
ByteBuf expected = Unpooled.wrappedBuffer(new byte[] {
-0x49, 0x01, // preamble length
-0x10, 0x42, // literal tag + length
@ -147,27 +174,56 @@ public class SnappyTest {
0x6e, 0x63, 0x65, 0x73, 0x20, 0x65, 0x61, 0x72, 0x6e, 0x65,
0x64, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20,
// First copy (the)
0x01, 0x1C, -0x10,
// copy of "the "
0x01, 0x1c, 0x58,
// Next literal
0x66, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74,
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x61,
0x20, 0x6c, 0x6f, 0x74, 0x20, 0x6f, 0x66, 0x20, 0x70, 0x72,
0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x73, 0x20, 0x73, 0x75,
0x63, 0x68, 0x20, 0x61, 0x73, 0x20, 0x46, 0x54, 0x50, 0x2c,
0x20, 0x53, 0x4d, 0x54, 0x50, 0x2c, 0x20, 0x48, 0x54, 0x54,
0x50, 0x2c, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x76, 0x61, 0x72,
0x69, 0x6f, 0x75, 0x73, 0x20, 0x62, 0x69, 0x6e, 0x61, 0x72,
0x79, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74,
0x2d, 0x62, 0x61, 0x73, 0x65, 0x64, 0x20, 0x6c, 0x65, 0x67,
0x61, 0x63, 0x79, 0x20,
0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61,
0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x20,
0x6c, 0x6f, 0x74,
// Second copy (protocols)
0x15, 0x4c
// copy of " of "
0x01, 0x09, 0x60,
// literal
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x73, 0x20,
0x73, 0x75, 0x63, 0x68, 0x20, 0x61, 0x73, 0x20, 0x46, 0x54,
0x50, 0x2c, 0x20, 0x53, 0x4d,
// copy of " TP, "
0x01, 0x06, 0x04,
// literal
0x48, 0x54,
// copy of " TP, "
0x01, 0x06, 0x44,
// literal
0x61, 0x6e, 0x64, 0x20, 0x76, 0x61, 0x72, 0x69, 0x6f, 0x75,
0x73, 0x20, 0x62, 0x69, 0x6e, 0x61, 0x72, 0x79,
// copy of "and "
0x05, 0x13, 0x48,
// literal
0x74, 0x65, 0x78, 0x74, 0x2d, 0x62, 0x61, 0x73, 0x65,
0x64, 0x20, 0x6c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x20, 0x70,
// copy of "rotocols"
0x11, 0x4c,
});
assertEquals("Encoded result was incorrect", expected, out);
// Decode
ByteBuf outDecoded = Unpooled.buffer();
snappy.decode(out, outDecoded);
assertEquals(srcStr, outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII));
in.release();
out.release();
outDecoded.release();
}
@Test
@ -176,6 +232,7 @@ public class SnappyTest {
'n', 'e', 't', 't', 'y'
});
assertEquals(maskChecksum(0xd6cb8b55), calculateChecksum(input));
input.release();
}
@Test
@ -185,6 +242,7 @@ public class SnappyTest {
});
validateChecksum(maskChecksum(0x2d4d3535), input);
input.release();
}
@Test(expected = DecompressionException.class)
@ -192,13 +250,16 @@ public class SnappyTest {
ByteBuf input = Unpooled.wrappedBuffer(new byte[] {
'y', 't', 't', 'e', 'n'
});
try {
validateChecksum(maskChecksum(0xd6cb8b55), input);
} finally {
input.release();
}
}
@Test
public void testEncodeLiteralAndDecodeLiteral() {
int[] lengths = new int[] {
int[] lengths = {
0x11, // default
0x100, // case 60
0x1000, // case 61
@ -211,9 +272,9 @@ public class SnappyTest {
ByteBuf decoded = Unpooled.buffer(10);
ByteBuf expected = Unpooled.wrappedBuffer(new byte[len]);
try {
Snappy.encodeLiteral(in, encoded, len);
encodeLiteral(in, encoded, len);
byte tag = encoded.readByte();
Snappy.decodeLiteral(tag, encoded, decoded);
decodeLiteral(tag, encoded, decoded);
assertEquals("Encoded or decoded literal was incorrect", expected, decoded);
} finally {
in.release();