Fix LZ4 encoder/decoder performance with (default) xxHash32 (#9249)
Motivation: Lz4FrameEncoder and Lz4FrameDecoder in their default configuration use an extremely inefficient way to checksum direct byte buffers. In particular, for every byte checksummed, a single-element byte array is being allocated and a JNI cal is made, which in some internal testing makes a 25x difference in total throughput and allocates *a lot* of garbage. Modifications: Lz4XXHash32, an implementation of ByteBufChecksum specifically for use by Lz4FrameEncoder and Lz4FrameDecoder, is introduced. It utilises xxHash32 block API which provides a hash() method that accepts a ByteBuffer as an argument. Lz4FrameEncoder and Lz4FrameDecoder are modified to use this implementation by default. Result: Lz4FrameEncoder and Lz4FrameDecoder perform well again when operating on direct byte buffers with default checksum configuration; a public implementation is provided for those who need to override the seed.
This commit is contained in:
parent
a2583d0d3c
commit
93414db1f3
@ -66,6 +66,9 @@ abstract class ByteBufChecksum implements Checksum {
|
|||||||
|
|
||||||
static ByteBufChecksum wrapChecksum(Checksum checksum) {
|
static ByteBufChecksum wrapChecksum(Checksum checksum) {
|
||||||
ObjectUtil.checkNotNull(checksum, "checksum");
|
ObjectUtil.checkNotNull(checksum, "checksum");
|
||||||
|
if (checksum instanceof ByteBufChecksum) {
|
||||||
|
return (ByteBufChecksum) checksum;
|
||||||
|
}
|
||||||
if (checksum instanceof Adler32 && ADLER32_UPDATE_METHOD != null) {
|
if (checksum instanceof Adler32 && ADLER32_UPDATE_METHOD != null) {
|
||||||
return new ReflectiveByteBufChecksum(checksum, ADLER32_UPDATE_METHOD);
|
return new ReflectiveByteBufChecksum(checksum, ADLER32_UPDATE_METHOD);
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,6 @@ import io.netty.handler.codec.ByteToMessageDecoder;
|
|||||||
import net.jpountz.lz4.LZ4Exception;
|
import net.jpountz.lz4.LZ4Exception;
|
||||||
import net.jpountz.lz4.LZ4Factory;
|
import net.jpountz.lz4.LZ4Factory;
|
||||||
import net.jpountz.lz4.LZ4FastDecompressor;
|
import net.jpountz.lz4.LZ4FastDecompressor;
|
||||||
import net.jpountz.xxhash.XXHashFactory;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.zip.Checksum;
|
import java.util.zip.Checksum;
|
||||||
@ -124,9 +123,7 @@ public class Lz4FrameDecoder extends ByteToMessageDecoder {
|
|||||||
* <a href="https://github.com/Cyan4973/xxHash">Github</a>.
|
* <a href="https://github.com/Cyan4973/xxHash">Github</a>.
|
||||||
*/
|
*/
|
||||||
public Lz4FrameDecoder(LZ4Factory factory, boolean validateChecksums) {
|
public Lz4FrameDecoder(LZ4Factory factory, boolean validateChecksums) {
|
||||||
this(factory, validateChecksums ?
|
this(factory, validateChecksums ? new Lz4XXHash32(DEFAULT_SEED) : null);
|
||||||
XXHashFactory.fastestInstance().newStreamingHash32(DEFAULT_SEED).asChecksum()
|
|
||||||
: null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -31,7 +31,6 @@ import io.netty.util.internal.ObjectUtil;
|
|||||||
import net.jpountz.lz4.LZ4Compressor;
|
import net.jpountz.lz4.LZ4Compressor;
|
||||||
import net.jpountz.lz4.LZ4Exception;
|
import net.jpountz.lz4.LZ4Exception;
|
||||||
import net.jpountz.lz4.LZ4Factory;
|
import net.jpountz.lz4.LZ4Factory;
|
||||||
import net.jpountz.xxhash.XXHashFactory;
|
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
@ -125,8 +124,7 @@ public class Lz4FrameEncoder extends MessageToByteEncoder<ByteBuf> {
|
|||||||
* and is slower but compresses more efficiently
|
* and is slower but compresses more efficiently
|
||||||
*/
|
*/
|
||||||
public Lz4FrameEncoder(boolean highCompressor) {
|
public Lz4FrameEncoder(boolean highCompressor) {
|
||||||
this(LZ4Factory.fastestInstance(), highCompressor, DEFAULT_BLOCK_SIZE,
|
this(LZ4Factory.fastestInstance(), highCompressor, DEFAULT_BLOCK_SIZE, new Lz4XXHash32(DEFAULT_SEED));
|
||||||
XXHashFactory.fastestInstance().newStreamingHash32(DEFAULT_SEED).asChecksum());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -0,0 +1,107 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2019 The Netty Project
|
||||||
|
*
|
||||||
|
* The Netty Project licenses this file to you under the Apache License,
|
||||||
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at:
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package io.netty.handler.codec.compression;
|
||||||
|
|
||||||
|
import io.netty.buffer.ByteBuf;
|
||||||
|
import net.jpountz.xxhash.StreamingXXHash32;
|
||||||
|
import net.jpountz.xxhash.XXHash32;
|
||||||
|
import net.jpountz.xxhash.XXHashFactory;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.zip.Checksum;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A special-purpose {@link ByteBufChecksum} implementation for use with
|
||||||
|
* {@link Lz4FrameEncoder} and {@link Lz4FrameDecoder}.
|
||||||
|
*
|
||||||
|
* {@link StreamingXXHash32#asChecksum()} has a particularly nasty implementation
|
||||||
|
* of {@link Checksum#update(int)} that allocates a single-element byte array for
|
||||||
|
* every invocation.
|
||||||
|
*
|
||||||
|
* In addition to that, it doesn't implement an overload that accepts a {@link ByteBuffer}
|
||||||
|
* as an argument.
|
||||||
|
*
|
||||||
|
* Combined, this means that we can't use {@code ReflectiveByteBufChecksum} at all,
|
||||||
|
* and can't use {@code SlowByteBufChecksum} because of its atrocious performance
|
||||||
|
* with direct byte buffers (allocating an array and making a JNI call for every byte
|
||||||
|
* checksummed might be considered sub-optimal by some).
|
||||||
|
*
|
||||||
|
* Block version of xxHash32 ({@link XXHash32}), however, does provide
|
||||||
|
* {@link XXHash32#hash(ByteBuffer, int)} method that is efficient and does exactly
|
||||||
|
* what we need, with a caveat that we can only invoke it once before having to reset.
|
||||||
|
* This, however, is fine for our purposes, given the way we use it in
|
||||||
|
* {@link Lz4FrameEncoder} and {@link Lz4FrameDecoder}:
|
||||||
|
* {@code reset()}, followed by one {@code update()}, followed by {@code getValue()}.
|
||||||
|
*/
|
||||||
|
public final class Lz4XXHash32 extends ByteBufChecksum {
|
||||||
|
|
||||||
|
private static final XXHash32 XXHASH32 = XXHashFactory.fastestInstance().hash32();
|
||||||
|
|
||||||
|
private final int seed;
|
||||||
|
private boolean used;
|
||||||
|
private int value;
|
||||||
|
|
||||||
|
@SuppressWarnings("WeakerAccess")
|
||||||
|
public Lz4XXHash32(int seed) {
|
||||||
|
this.seed = seed;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void update(int b) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void update(byte[] b, int off, int len) {
|
||||||
|
if (used) {
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
value = XXHASH32.hash(b, off, len, seed);
|
||||||
|
used = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void update(ByteBuf b, int off, int len) {
|
||||||
|
if (used) {
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
if (b.hasArray()) {
|
||||||
|
value = XXHASH32.hash(b.array(), b.arrayOffset() + off, len, seed);
|
||||||
|
} else {
|
||||||
|
value = XXHASH32.hash(CompressionUtil.safeNioBuffer(b, off, len), seed);
|
||||||
|
}
|
||||||
|
used = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getValue() {
|
||||||
|
if (!used) {
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* If you look carefully, you'll notice that the most significant nibble
|
||||||
|
* is being discarded; we believe this to be a bug, but this is what
|
||||||
|
* StreamingXXHash32#asChecksum() implementation of getValue() does,
|
||||||
|
* so we have to retain this behaviour for compatibility reasons.
|
||||||
|
*/
|
||||||
|
return value & 0xFFFFFFFL;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() {
|
||||||
|
used = false;
|
||||||
|
}
|
||||||
|
}
|
@ -26,6 +26,7 @@ import java.util.zip.Adler32;
|
|||||||
import java.util.zip.CRC32;
|
import java.util.zip.CRC32;
|
||||||
import java.util.zip.Checksum;
|
import java.util.zip.Checksum;
|
||||||
|
|
||||||
|
import static io.netty.handler.codec.compression.Lz4Constants.DEFAULT_SEED;
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
public class ByteBufChecksumTest {
|
public class ByteBufChecksumTest {
|
||||||
@ -51,7 +52,14 @@ public class ByteBufChecksumTest {
|
|||||||
|
|
||||||
private static void testUpdate(ByteBuf buf) {
|
private static void testUpdate(ByteBuf buf) {
|
||||||
try {
|
try {
|
||||||
testUpdate(xxHash32(), ByteBufChecksum.wrapChecksum(xxHash32()), buf);
|
// all variations of xxHash32: slow and naive, optimised, wrapped optimised;
|
||||||
|
// the last two should be literally identical, but it's best to guard against
|
||||||
|
// an accidental regression in ByteBufChecksum#wrapChecksum(Checksum)
|
||||||
|
testUpdate(xxHash32(DEFAULT_SEED), ByteBufChecksum.wrapChecksum(xxHash32(DEFAULT_SEED)), buf);
|
||||||
|
testUpdate(xxHash32(DEFAULT_SEED), new Lz4XXHash32(DEFAULT_SEED), buf);
|
||||||
|
testUpdate(xxHash32(DEFAULT_SEED), ByteBufChecksum.wrapChecksum(new Lz4XXHash32(DEFAULT_SEED)), buf);
|
||||||
|
|
||||||
|
// CRC32 and Adler32, special-cased to use ReflectiveByteBufChecksum
|
||||||
testUpdate(new CRC32(), ByteBufChecksum.wrapChecksum(new CRC32()), buf);
|
testUpdate(new CRC32(), ByteBufChecksum.wrapChecksum(new CRC32()), buf);
|
||||||
testUpdate(new Adler32(), ByteBufChecksum.wrapChecksum(new Adler32()), buf);
|
testUpdate(new Adler32(), ByteBufChecksum.wrapChecksum(new Adler32()), buf);
|
||||||
} finally {
|
} finally {
|
||||||
@ -76,7 +84,7 @@ public class ByteBufChecksumTest {
|
|||||||
assertEquals(checksum.getValue(), wrapped.getValue());
|
assertEquals(checksum.getValue(), wrapped.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Checksum xxHash32() {
|
private static Checksum xxHash32(int seed) {
|
||||||
return XXHashFactory.fastestInstance().newStreamingHash32(Lz4Constants.DEFAULT_SEED).asChecksum();
|
return XXHashFactory.fastestInstance().newStreamingHash32(seed).asChecksum();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user