Fix hash function and hash table size in Snappy
Motivation:
1. Hash function in the Snappy encoding is wrong probably: used '+' instead of '*'. See the reference implementation [1].
2. Size of the hash table is calculated, but not applied.
Modifications:
1. Fix hash function: replace addition by multiplication.
2. Allocate hash table with calculated size.
3. Use an `Integer.numberOfLeadingZeros` trick for calculate log2.
4. Release buffers in tests.
Result:
1. Better compression. In the test `encodeAndDecodeLongTextUsesCopy` now compressed size is 175 instead of 180 before this change.
2. No redundant allocations for hash table.
3. A bit faster the calc of shift (less an expensive math operations).
[1] 513df5fb5a/snappy.cc (L67)
This commit is contained in:
parent
2988fb8eeb
commit
6ab9c177ac
@ -72,7 +72,7 @@ public final class Snappy {
|
|||||||
final int baseIndex = inIndex;
|
final int baseIndex = inIndex;
|
||||||
|
|
||||||
final short[] table = getHashTable(length);
|
final short[] table = getHashTable(length);
|
||||||
final int shift = 32 - (int) Math.floor(Math.log(table.length) / Math.log(2));
|
final int shift = Integer.numberOfLeadingZeros(table.length) + 1;
|
||||||
|
|
||||||
int nextEmit = inIndex;
|
int nextEmit = inIndex;
|
||||||
|
|
||||||
@ -148,7 +148,7 @@ public final class Snappy {
|
|||||||
* @return A 32-bit hash of 4 bytes located at index
|
* @return A 32-bit hash of 4 bytes located at index
|
||||||
*/
|
*/
|
||||||
private static int hash(ByteBuf in, int index, int shift) {
|
private static int hash(ByteBuf in, int index, int shift) {
|
||||||
return in.getInt(index) + 0x1e35a7bd >>> shift;
|
return in.getInt(index) * 0x1e35a7bd >>> shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -162,15 +162,7 @@ public final class Snappy {
|
|||||||
while (htSize < MAX_HT_SIZE && htSize < inputSize) {
|
while (htSize < MAX_HT_SIZE && htSize < inputSize) {
|
||||||
htSize <<= 1;
|
htSize <<= 1;
|
||||||
}
|
}
|
||||||
|
return new short[htSize];
|
||||||
short[] table;
|
|
||||||
if (htSize <= 256) {
|
|
||||||
table = new short[256];
|
|
||||||
} else {
|
|
||||||
table = new short[MAX_HT_SIZE];
|
|
||||||
}
|
|
||||||
|
|
||||||
return table;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -17,6 +17,7 @@ package io.netty.handler.codec.compression;
|
|||||||
|
|
||||||
import io.netty.buffer.ByteBuf;
|
import io.netty.buffer.ByteBuf;
|
||||||
import io.netty.buffer.Unpooled;
|
import io.netty.buffer.Unpooled;
|
||||||
|
import io.netty.util.CharsetUtil;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -46,6 +47,10 @@ public class SnappyTest {
|
|||||||
0x6e, 0x65, 0x74, 0x74, 0x79
|
0x6e, 0x65, 0x74, 0x74, 0x79
|
||||||
});
|
});
|
||||||
assertEquals("Literal was not decoded correctly", expected, out);
|
assertEquals("Literal was not decoded correctly", expected, out);
|
||||||
|
|
||||||
|
in.release();
|
||||||
|
out.release();
|
||||||
|
expected.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -65,6 +70,10 @@ public class SnappyTest {
|
|||||||
0x6e, 0x65, 0x74, 0x74, 0x79, 0x6e, 0x65, 0x74, 0x74, 0x79
|
0x6e, 0x65, 0x74, 0x74, 0x79, 0x6e, 0x65, 0x74, 0x74, 0x79
|
||||||
});
|
});
|
||||||
assertEquals("Copy was not decoded correctly", expected, out);
|
assertEquals("Copy was not decoded correctly", expected, out);
|
||||||
|
|
||||||
|
in.release();
|
||||||
|
out.release();
|
||||||
|
expected.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected = DecompressionException.class)
|
@Test(expected = DecompressionException.class)
|
||||||
@ -77,7 +86,12 @@ public class SnappyTest {
|
|||||||
0x00 // INVALID offset (< 1)
|
0x00 // INVALID offset (< 1)
|
||||||
});
|
});
|
||||||
ByteBuf out = Unpooled.buffer(10);
|
ByteBuf out = Unpooled.buffer(10);
|
||||||
snappy.decode(in, out);
|
try {
|
||||||
|
snappy.decode(in, out);
|
||||||
|
} finally {
|
||||||
|
in.release();
|
||||||
|
out.release();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected = DecompressionException.class)
|
@Test(expected = DecompressionException.class)
|
||||||
@ -90,7 +104,12 @@ public class SnappyTest {
|
|||||||
0x0b // INVALID offset (greater than chunk size)
|
0x0b // INVALID offset (greater than chunk size)
|
||||||
});
|
});
|
||||||
ByteBuf out = Unpooled.buffer(10);
|
ByteBuf out = Unpooled.buffer(10);
|
||||||
snappy.decode(in, out);
|
try {
|
||||||
|
snappy.decode(in, out);
|
||||||
|
} finally {
|
||||||
|
in.release();
|
||||||
|
out.release();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected = DecompressionException.class)
|
@Test(expected = DecompressionException.class)
|
||||||
@ -101,7 +120,12 @@ public class SnappyTest {
|
|||||||
0x6e, 0x65, 0x74, 0x74, 0x79, // "netty"
|
0x6e, 0x65, 0x74, 0x74, 0x79, // "netty"
|
||||||
});
|
});
|
||||||
ByteBuf out = Unpooled.buffer(10);
|
ByteBuf out = Unpooled.buffer(10);
|
||||||
snappy.decode(in, out);
|
try {
|
||||||
|
snappy.decode(in, out);
|
||||||
|
} finally {
|
||||||
|
in.release();
|
||||||
|
out.release();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -118,22 +142,25 @@ public class SnappyTest {
|
|||||||
0x6e, 0x65, 0x74, 0x74, 0x79 // "netty"
|
0x6e, 0x65, 0x74, 0x74, 0x79 // "netty"
|
||||||
});
|
});
|
||||||
assertEquals("Encoded literal was invalid", expected, out);
|
assertEquals("Encoded literal was invalid", expected, out);
|
||||||
|
|
||||||
|
in.release();
|
||||||
|
out.release();
|
||||||
|
expected.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void encodeLongTextUsesCopy() throws Exception {
|
public void encodeAndDecodeLongTextUsesCopy() throws Exception {
|
||||||
ByteBuf in = Unpooled.wrappedBuffer(
|
String srcStr = "Netty has been designed carefully with the experiences " +
|
||||||
("Netty has been designed carefully with the experiences " +
|
"earned from the implementation of a lot of protocols " +
|
||||||
"earned from the implementation of a lot of protocols " +
|
"such as FTP, SMTP, HTTP, and various binary and " +
|
||||||
"such as FTP, SMTP, HTTP, and various binary and " +
|
"text-based legacy protocols";
|
||||||
"text-based legacy protocols").getBytes("US-ASCII")
|
ByteBuf in = Unpooled.wrappedBuffer(srcStr.getBytes("US-ASCII"));
|
||||||
);
|
|
||||||
ByteBuf out = Unpooled.buffer(180);
|
ByteBuf out = Unpooled.buffer(180);
|
||||||
snappy.encode(in, out, in.readableBytes());
|
snappy.encode(in, out, in.readableBytes());
|
||||||
|
|
||||||
// The only compressibility in the above are the words "the ",
|
// The only compressibility in the above are the words:
|
||||||
// and "protocols", so this is a literal, followed by a copy
|
// "the ", "rotocols", " of ", "TP, " and "and ". So this is a literal,
|
||||||
// followed by another literal, followed by another copy
|
// followed by a copy followed by another literal, followed by another copy...
|
||||||
ByteBuf expected = Unpooled.wrappedBuffer(new byte[] {
|
ByteBuf expected = Unpooled.wrappedBuffer(new byte[] {
|
||||||
-0x49, 0x01, // preamble length
|
-0x49, 0x01, // preamble length
|
||||||
-0x10, 0x42, // literal tag + length
|
-0x10, 0x42, // literal tag + length
|
||||||
@ -147,27 +174,56 @@ public class SnappyTest {
|
|||||||
0x6e, 0x63, 0x65, 0x73, 0x20, 0x65, 0x61, 0x72, 0x6e, 0x65,
|
0x6e, 0x63, 0x65, 0x73, 0x20, 0x65, 0x61, 0x72, 0x6e, 0x65,
|
||||||
0x64, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20,
|
0x64, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20,
|
||||||
|
|
||||||
// First copy (the)
|
// copy of "the "
|
||||||
0x01, 0x1C, -0x10,
|
0x01, 0x1c, 0x58,
|
||||||
|
|
||||||
// Next literal
|
// Next literal
|
||||||
0x66, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74,
|
0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61,
|
||||||
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x61,
|
0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x20,
|
||||||
0x20, 0x6c, 0x6f, 0x74, 0x20, 0x6f, 0x66, 0x20, 0x70, 0x72,
|
0x6c, 0x6f, 0x74,
|
||||||
0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x73, 0x20, 0x73, 0x75,
|
|
||||||
0x63, 0x68, 0x20, 0x61, 0x73, 0x20, 0x46, 0x54, 0x50, 0x2c,
|
|
||||||
0x20, 0x53, 0x4d, 0x54, 0x50, 0x2c, 0x20, 0x48, 0x54, 0x54,
|
|
||||||
0x50, 0x2c, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x76, 0x61, 0x72,
|
|
||||||
0x69, 0x6f, 0x75, 0x73, 0x20, 0x62, 0x69, 0x6e, 0x61, 0x72,
|
|
||||||
0x79, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74,
|
|
||||||
0x2d, 0x62, 0x61, 0x73, 0x65, 0x64, 0x20, 0x6c, 0x65, 0x67,
|
|
||||||
0x61, 0x63, 0x79, 0x20,
|
|
||||||
|
|
||||||
// Second copy (protocols)
|
// copy of " of "
|
||||||
0x15, 0x4c
|
0x01, 0x09, 0x60,
|
||||||
|
|
||||||
|
// literal
|
||||||
|
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x73, 0x20,
|
||||||
|
0x73, 0x75, 0x63, 0x68, 0x20, 0x61, 0x73, 0x20, 0x46, 0x54,
|
||||||
|
0x50, 0x2c, 0x20, 0x53, 0x4d,
|
||||||
|
|
||||||
|
// copy of " TP, "
|
||||||
|
0x01, 0x06, 0x04,
|
||||||
|
|
||||||
|
// literal
|
||||||
|
0x48, 0x54,
|
||||||
|
|
||||||
|
// copy of " TP, "
|
||||||
|
0x01, 0x06, 0x44,
|
||||||
|
|
||||||
|
// literal
|
||||||
|
0x61, 0x6e, 0x64, 0x20, 0x76, 0x61, 0x72, 0x69, 0x6f, 0x75,
|
||||||
|
0x73, 0x20, 0x62, 0x69, 0x6e, 0x61, 0x72, 0x79,
|
||||||
|
|
||||||
|
// copy of "and "
|
||||||
|
0x05, 0x13, 0x48,
|
||||||
|
|
||||||
|
// literal
|
||||||
|
0x74, 0x65, 0x78, 0x74, 0x2d, 0x62, 0x61, 0x73, 0x65,
|
||||||
|
0x64, 0x20, 0x6c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x20, 0x70,
|
||||||
|
|
||||||
|
// copy of "rotocols"
|
||||||
|
0x11, 0x4c,
|
||||||
});
|
});
|
||||||
|
|
||||||
assertEquals("Encoded result was incorrect", expected, out);
|
assertEquals("Encoded result was incorrect", expected, out);
|
||||||
|
|
||||||
|
// Decode
|
||||||
|
ByteBuf outDecoded = Unpooled.buffer();
|
||||||
|
snappy.decode(out, outDecoded);
|
||||||
|
assertEquals(srcStr, outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII));
|
||||||
|
|
||||||
|
in.release();
|
||||||
|
out.release();
|
||||||
|
outDecoded.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -176,6 +232,7 @@ public class SnappyTest {
|
|||||||
'n', 'e', 't', 't', 'y'
|
'n', 'e', 't', 't', 'y'
|
||||||
});
|
});
|
||||||
assertEquals(maskChecksum(0xd6cb8b55), calculateChecksum(input));
|
assertEquals(maskChecksum(0xd6cb8b55), calculateChecksum(input));
|
||||||
|
input.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -185,6 +242,7 @@ public class SnappyTest {
|
|||||||
});
|
});
|
||||||
|
|
||||||
validateChecksum(maskChecksum(0x2d4d3535), input);
|
validateChecksum(maskChecksum(0x2d4d3535), input);
|
||||||
|
input.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expected = DecompressionException.class)
|
@Test(expected = DecompressionException.class)
|
||||||
@ -192,13 +250,16 @@ public class SnappyTest {
|
|||||||
ByteBuf input = Unpooled.wrappedBuffer(new byte[] {
|
ByteBuf input = Unpooled.wrappedBuffer(new byte[] {
|
||||||
'y', 't', 't', 'e', 'n'
|
'y', 't', 't', 'e', 'n'
|
||||||
});
|
});
|
||||||
|
try {
|
||||||
validateChecksum(maskChecksum(0xd6cb8b55), input);
|
validateChecksum(maskChecksum(0xd6cb8b55), input);
|
||||||
|
} finally {
|
||||||
|
input.release();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEncodeLiteralAndDecodeLiteral() {
|
public void testEncodeLiteralAndDecodeLiteral() {
|
||||||
int[] lengths = new int[] {
|
int[] lengths = {
|
||||||
0x11, // default
|
0x11, // default
|
||||||
0x100, // case 60
|
0x100, // case 60
|
||||||
0x1000, // case 61
|
0x1000, // case 61
|
||||||
@ -211,9 +272,9 @@ public class SnappyTest {
|
|||||||
ByteBuf decoded = Unpooled.buffer(10);
|
ByteBuf decoded = Unpooled.buffer(10);
|
||||||
ByteBuf expected = Unpooled.wrappedBuffer(new byte[len]);
|
ByteBuf expected = Unpooled.wrappedBuffer(new byte[len]);
|
||||||
try {
|
try {
|
||||||
Snappy.encodeLiteral(in, encoded, len);
|
encodeLiteral(in, encoded, len);
|
||||||
byte tag = encoded.readByte();
|
byte tag = encoded.readByte();
|
||||||
Snappy.decodeLiteral(tag, encoded, decoded);
|
decodeLiteral(tag, encoded, decoded);
|
||||||
assertEquals("Encoded or decoded literal was incorrect", expected, decoded);
|
assertEquals("Encoded or decoded literal was incorrect", expected, decoded);
|
||||||
} finally {
|
} finally {
|
||||||
in.release();
|
in.release();
|
||||||
|
Loading…
Reference in New Issue
Block a user