Fix hash function and hash table size in Snappy

Motivation: 1. Hash function in the Snappy encoding is wrong probably: used '+' instead of '*'. See the reference implementation [1]. 2. Size of the hash table is calculated, but not applied. Modifications: 1. Fix hash function: replace addition by multiplication. 2. Allocate hash table with calculated size. 3. Use an `Integer.numberOfLeadingZeros` trick for calculate log2. 4. Release buffers in tests. Result: 1. Better compression. In the test `encodeAndDecodeLongTextUsesCopy` now compressed size is 175 instead of 180 before this change. 2. No redundant allocations for hash table. 3. A bit faster the calc of shift (less an expensive math operations). [1] 513df5fb5a/snappy.cc (L67)
2017-07-31 00:55:07 +05:00 · 2017-07-31 00:55:07 +05:00 · 6ab9c177ac
commit 6ab9c177ac
parent 2988fb8eeb
2 changed files with 97 additions and 44 deletions
--- a/codec/src/main/java/io/netty/handler/codec/compression/Snappy.java
+++ b/codec/src/main/java/io/netty/handler/codec/compression/Snappy.java
@ -72,7 +72,7 @@ public final class Snappy {
        final int baseIndex = inIndex;
        final short[] table = getHashTable(length);
-        final int shift = 32 - (int) Math.floor(Math.log(table.length) / Math.log(2));
+        final int shift = Integer.numberOfLeadingZeros(table.length) + 1;
        int nextEmit = inIndex;
@ -148,7 +148,7 @@ public final class Snappy {
     * @return A 32-bit hash of 4 bytes located at index
     */
    private static int hash(ByteBuf in, int index, int shift) {
-        return in.getInt(index) + 0x1e35a7bd >>> shift;
+        return in.getInt(index) * 0x1e35a7bd >>> shift;
    }
    /**
@ -162,15 +162,7 @@ public final class Snappy {
        while (htSize < MAX_HT_SIZE && htSize < inputSize) {
            htSize <<= 1;
        }
-
+        return new short[htSize];
        short[] table;
        if (htSize <= 256) {
            table = new short[256];
        } else {
            table = new short[MAX_HT_SIZE];
        }
        return table;
    }
    /**
--- a/codec/src/test/java/io/netty/handler/codec/compression/SnappyTest.java
+++ b/codec/src/test/java/io/netty/handler/codec/compression/SnappyTest.java
@ -17,6 +17,7 @@ package io.netty.handler.codec.compression;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.util.CharsetUtil;
 import org.junit.After;
 import org.junit.Test;
@ -46,6 +47,10 @@ public class SnappyTest {
            0x6e, 0x65, 0x74, 0x74, 0x79
        });
        assertEquals("Literal was not decoded correctly", expected, out);
        in.release();
        out.release();
        expected.release();
    }
    @Test
@ -65,6 +70,10 @@ public class SnappyTest {
            0x6e, 0x65, 0x74, 0x74, 0x79, 0x6e, 0x65, 0x74, 0x74, 0x79
        });
        assertEquals("Copy was not decoded correctly", expected, out);
        in.release();
        out.release();
        expected.release();
    }
    @Test(expected = DecompressionException.class)
@ -77,7 +86,12 @@ public class SnappyTest {
            0x00 // INVALID offset (< 1)
        });
        ByteBuf out = Unpooled.buffer(10);
        try {
            snappy.decode(in, out);
        } finally {
            in.release();
            out.release();
        }
    }
    @Test(expected = DecompressionException.class)
@ -90,7 +104,12 @@ public class SnappyTest {
            0x0b // INVALID offset (greater than chunk size)
        });
        ByteBuf out = Unpooled.buffer(10);
        try {
            snappy.decode(in, out);
        } finally {
            in.release();
            out.release();
        }
    }
    @Test(expected = DecompressionException.class)
@ -101,7 +120,12 @@ public class SnappyTest {
            0x6e, 0x65, 0x74, 0x74, 0x79, // "netty"
        });
        ByteBuf out = Unpooled.buffer(10);
        try {
            snappy.decode(in, out);
        } finally {
            in.release();
            out.release();
        }
    }
    @Test
@ -118,22 +142,25 @@ public class SnappyTest {
            0x6e, 0x65, 0x74, 0x74, 0x79 // "netty"
        });
        assertEquals("Encoded literal was invalid", expected, out);
        in.release();
        out.release();
        expected.release();
    }
    @Test
-    public void encodeLongTextUsesCopy() throws Exception {
+    public void encodeAndDecodeLongTextUsesCopy() throws Exception {
-        ByteBuf in = Unpooled.wrappedBuffer(
+        String srcStr = "Netty has been designed carefully with the experiences " +
            ("Netty has been designed carefully with the experiences " +
                   "earned from the implementation of a lot of protocols " +
                   "such as FTP, SMTP, HTTP, and various binary and " +
-            "text-based legacy protocols").getBytes("US-ASCII")
+                   "text-based legacy protocols";
-        );
+        ByteBuf in = Unpooled.wrappedBuffer(srcStr.getBytes("US-ASCII"));
        ByteBuf out = Unpooled.buffer(180);
        snappy.encode(in, out, in.readableBytes());
-        // The only compressibility in the above are the words "the ",
+        // The only compressibility in the above are the words:
-        // and "protocols", so this is a literal, followed by a copy
+        // "the ", "rotocols", " of ", "TP, " and "and ". So this is a literal,
-        // followed by another literal, followed by another copy
+        // followed by a copy followed by another literal, followed by another copy...
        ByteBuf expected = Unpooled.wrappedBuffer(new byte[] {
            -0x49, 0x01, // preamble length
            -0x10, 0x42, // literal tag + length
@ -147,27 +174,56 @@ public class SnappyTest {
            0x6e, 0x63, 0x65, 0x73, 0x20, 0x65, 0x61, 0x72, 0x6e, 0x65,
            0x64, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20,
-            // First copy (the)
+            // copy of "the "
-            0x01, 0x1C, -0x10,
+            0x01, 0x1c, 0x58,
            // Next literal
-            0x66, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74,
+            0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61,
-            0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x61,
+            0x74, 0x69, 0x6f, 0x6e, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x20,
-            0x20, 0x6c, 0x6f, 0x74, 0x20, 0x6f, 0x66, 0x20, 0x70, 0x72,
+            0x6c, 0x6f, 0x74,
            0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x73, 0x20, 0x73, 0x75,
            0x63, 0x68, 0x20, 0x61, 0x73, 0x20, 0x46, 0x54, 0x50, 0x2c,
            0x20, 0x53, 0x4d, 0x54, 0x50, 0x2c, 0x20, 0x48, 0x54, 0x54,
            0x50, 0x2c, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x76, 0x61, 0x72,
            0x69, 0x6f, 0x75, 0x73, 0x20, 0x62, 0x69, 0x6e, 0x61, 0x72,
            0x79, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74,
            0x2d, 0x62, 0x61, 0x73, 0x65, 0x64, 0x20, 0x6c, 0x65, 0x67,
            0x61, 0x63, 0x79, 0x20,
-            // Second copy (protocols)
+            // copy of " of "
-            0x15, 0x4c
+            0x01, 0x09, 0x60,
            // literal
            0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x73, 0x20,
            0x73, 0x75, 0x63, 0x68, 0x20, 0x61, 0x73, 0x20, 0x46, 0x54,
            0x50, 0x2c, 0x20, 0x53, 0x4d,
            // copy of " TP, "
            0x01, 0x06, 0x04,
            // literal
            0x48, 0x54,
            // copy of " TP, "
            0x01, 0x06, 0x44,
            // literal
            0x61, 0x6e, 0x64, 0x20, 0x76, 0x61, 0x72, 0x69, 0x6f, 0x75,
            0x73, 0x20, 0x62, 0x69, 0x6e, 0x61, 0x72, 0x79,
            // copy of "and "
            0x05, 0x13, 0x48,
            // literal
            0x74, 0x65, 0x78, 0x74, 0x2d, 0x62, 0x61, 0x73, 0x65,
            0x64, 0x20, 0x6c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x20, 0x70,
            // copy of "rotocols"
            0x11, 0x4c,
        });
        assertEquals("Encoded result was incorrect", expected, out);
        // Decode
        ByteBuf outDecoded = Unpooled.buffer();
        snappy.decode(out, outDecoded);
        assertEquals(srcStr, outDecoded.getCharSequence(0, outDecoded.writerIndex(), CharsetUtil.US_ASCII));
        in.release();
        out.release();
        outDecoded.release();
    }
    @Test
@ -176,6 +232,7 @@ public class SnappyTest {
                'n', 'e', 't', 't', 'y'
        });
        assertEquals(maskChecksum(0xd6cb8b55), calculateChecksum(input));
        input.release();
    }
    @Test
@ -185,6 +242,7 @@ public class SnappyTest {
        });
        validateChecksum(maskChecksum(0x2d4d3535), input);
        input.release();
    }
    @Test(expected = DecompressionException.class)
@ -192,13 +250,16 @@ public class SnappyTest {
        ByteBuf input = Unpooled.wrappedBuffer(new byte[] {
                'y', 't', 't', 'e', 'n'
        });
-
+        try {
            validateChecksum(maskChecksum(0xd6cb8b55), input);
        } finally {
            input.release();
        }
    }
    @Test
    public void testEncodeLiteralAndDecodeLiteral() {
-        int[] lengths = new int[] {
+        int[] lengths = {
            0x11, // default
            0x100, // case 60
            0x1000, // case 61
@ -211,9 +272,9 @@ public class SnappyTest {
            ByteBuf decoded = Unpooled.buffer(10);
            ByteBuf expected = Unpooled.wrappedBuffer(new byte[len]);
            try {
-                Snappy.encodeLiteral(in, encoded, len);
+                encodeLiteral(in, encoded, len);
                byte tag = encoded.readByte();
-                Snappy.decodeLiteral(tag, encoded, decoded);
+                decodeLiteral(tag, encoded, decoded);
                assertEquals("Encoded or decoded literal was incorrect", expected, decoded);
            } finally {
                in.release();