Fix LZ4 encoder/decoder performance with (default) xxHash32 (#9249)
Motivation: Lz4FrameEncoder and Lz4FrameDecoder in their default configuration use an extremely inefficient way to checksum direct byte buffers. In particular, for every byte checksummed, a single-element byte array is being allocated and a JNI cal is made, which in some internal testing makes a 25x difference in total throughput and allocates *a lot* of garbage. Modifications: Lz4XXHash32, an implementation of ByteBufChecksum specifically for use by Lz4FrameEncoder and Lz4FrameDecoder, is introduced. It utilises xxHash32 block API which provides a hash() method that accepts a ByteBuffer as an argument. Lz4FrameEncoder and Lz4FrameDecoder are modified to use this implementation by default. Result: Lz4FrameEncoder and Lz4FrameDecoder perform well again when operating on direct byte buffers with default checksum configuration; a public implementation is provided for those who need to override the seed.
This commit is contained in:
parent
a2583d0d3c
commit
93414db1f3
@ -66,6 +66,9 @@ abstract class ByteBufChecksum implements Checksum {
|
||||
|
||||
static ByteBufChecksum wrapChecksum(Checksum checksum) {
|
||||
ObjectUtil.checkNotNull(checksum, "checksum");
|
||||
if (checksum instanceof ByteBufChecksum) {
|
||||
return (ByteBufChecksum) checksum;
|
||||
}
|
||||
if (checksum instanceof Adler32 && ADLER32_UPDATE_METHOD != null) {
|
||||
return new ReflectiveByteBufChecksum(checksum, ADLER32_UPDATE_METHOD);
|
||||
}
|
||||
|
@ -21,7 +21,6 @@ import io.netty.handler.codec.ByteToMessageDecoder;
|
||||
import net.jpountz.lz4.LZ4Exception;
|
||||
import net.jpountz.lz4.LZ4Factory;
|
||||
import net.jpountz.lz4.LZ4FastDecompressor;
|
||||
import net.jpountz.xxhash.XXHashFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.zip.Checksum;
|
||||
@ -124,9 +123,7 @@ public class Lz4FrameDecoder extends ByteToMessageDecoder {
|
||||
* <a href="https://github.com/Cyan4973/xxHash">Github</a>.
|
||||
*/
|
||||
public Lz4FrameDecoder(LZ4Factory factory, boolean validateChecksums) {
|
||||
this(factory, validateChecksums ?
|
||||
XXHashFactory.fastestInstance().newStreamingHash32(DEFAULT_SEED).asChecksum()
|
||||
: null);
|
||||
this(factory, validateChecksums ? new Lz4XXHash32(DEFAULT_SEED) : null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -31,7 +31,6 @@ import io.netty.util.internal.ObjectUtil;
|
||||
import net.jpountz.lz4.LZ4Compressor;
|
||||
import net.jpountz.lz4.LZ4Exception;
|
||||
import net.jpountz.lz4.LZ4Factory;
|
||||
import net.jpountz.xxhash.XXHashFactory;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@ -125,8 +124,7 @@ public class Lz4FrameEncoder extends MessageToByteEncoder<ByteBuf> {
|
||||
* and is slower but compresses more efficiently
|
||||
*/
|
||||
public Lz4FrameEncoder(boolean highCompressor) {
|
||||
this(LZ4Factory.fastestInstance(), highCompressor, DEFAULT_BLOCK_SIZE,
|
||||
XXHashFactory.fastestInstance().newStreamingHash32(DEFAULT_SEED).asChecksum());
|
||||
this(LZ4Factory.fastestInstance(), highCompressor, DEFAULT_BLOCK_SIZE, new Lz4XXHash32(DEFAULT_SEED));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright 2019 The Netty Project
|
||||
*
|
||||
* The Netty Project licenses this file to you under the Apache License,
|
||||
* version 2.0 (the "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package io.netty.handler.codec.compression;
|
||||
|
||||
import io.netty.buffer.ByteBuf;
|
||||
import net.jpountz.xxhash.StreamingXXHash32;
|
||||
import net.jpountz.xxhash.XXHash32;
|
||||
import net.jpountz.xxhash.XXHashFactory;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/**
|
||||
* A special-purpose {@link ByteBufChecksum} implementation for use with
|
||||
* {@link Lz4FrameEncoder} and {@link Lz4FrameDecoder}.
|
||||
*
|
||||
* {@link StreamingXXHash32#asChecksum()} has a particularly nasty implementation
|
||||
* of {@link Checksum#update(int)} that allocates a single-element byte array for
|
||||
* every invocation.
|
||||
*
|
||||
* In addition to that, it doesn't implement an overload that accepts a {@link ByteBuffer}
|
||||
* as an argument.
|
||||
*
|
||||
* Combined, this means that we can't use {@code ReflectiveByteBufChecksum} at all,
|
||||
* and can't use {@code SlowByteBufChecksum} because of its atrocious performance
|
||||
* with direct byte buffers (allocating an array and making a JNI call for every byte
|
||||
* checksummed might be considered sub-optimal by some).
|
||||
*
|
||||
* Block version of xxHash32 ({@link XXHash32}), however, does provide
|
||||
* {@link XXHash32#hash(ByteBuffer, int)} method that is efficient and does exactly
|
||||
* what we need, with a caveat that we can only invoke it once before having to reset.
|
||||
* This, however, is fine for our purposes, given the way we use it in
|
||||
* {@link Lz4FrameEncoder} and {@link Lz4FrameDecoder}:
|
||||
* {@code reset()}, followed by one {@code update()}, followed by {@code getValue()}.
|
||||
*/
|
||||
public final class Lz4XXHash32 extends ByteBufChecksum {
|
||||
|
||||
private static final XXHash32 XXHASH32 = XXHashFactory.fastestInstance().hash32();
|
||||
|
||||
private final int seed;
|
||||
private boolean used;
|
||||
private int value;
|
||||
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public Lz4XXHash32(int seed) {
|
||||
this.seed = seed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(int b) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(byte[] b, int off, int len) {
|
||||
if (used) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
value = XXHASH32.hash(b, off, len, seed);
|
||||
used = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(ByteBuf b, int off, int len) {
|
||||
if (used) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
if (b.hasArray()) {
|
||||
value = XXHASH32.hash(b.array(), b.arrayOffset() + off, len, seed);
|
||||
} else {
|
||||
value = XXHASH32.hash(CompressionUtil.safeNioBuffer(b, off, len), seed);
|
||||
}
|
||||
used = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue() {
|
||||
if (!used) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
/*
|
||||
* If you look carefully, you'll notice that the most significant nibble
|
||||
* is being discarded; we believe this to be a bug, but this is what
|
||||
* StreamingXXHash32#asChecksum() implementation of getValue() does,
|
||||
* so we have to retain this behaviour for compatibility reasons.
|
||||
*/
|
||||
return value & 0xFFFFFFFL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
used = false;
|
||||
}
|
||||
}
|
@ -26,6 +26,7 @@ import java.util.zip.Adler32;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
import static io.netty.handler.codec.compression.Lz4Constants.DEFAULT_SEED;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class ByteBufChecksumTest {
|
||||
@ -51,7 +52,14 @@ public class ByteBufChecksumTest {
|
||||
|
||||
private static void testUpdate(ByteBuf buf) {
|
||||
try {
|
||||
testUpdate(xxHash32(), ByteBufChecksum.wrapChecksum(xxHash32()), buf);
|
||||
// all variations of xxHash32: slow and naive, optimised, wrapped optimised;
|
||||
// the last two should be literally identical, but it's best to guard against
|
||||
// an accidental regression in ByteBufChecksum#wrapChecksum(Checksum)
|
||||
testUpdate(xxHash32(DEFAULT_SEED), ByteBufChecksum.wrapChecksum(xxHash32(DEFAULT_SEED)), buf);
|
||||
testUpdate(xxHash32(DEFAULT_SEED), new Lz4XXHash32(DEFAULT_SEED), buf);
|
||||
testUpdate(xxHash32(DEFAULT_SEED), ByteBufChecksum.wrapChecksum(new Lz4XXHash32(DEFAULT_SEED)), buf);
|
||||
|
||||
// CRC32 and Adler32, special-cased to use ReflectiveByteBufChecksum
|
||||
testUpdate(new CRC32(), ByteBufChecksum.wrapChecksum(new CRC32()), buf);
|
||||
testUpdate(new Adler32(), ByteBufChecksum.wrapChecksum(new Adler32()), buf);
|
||||
} finally {
|
||||
@ -76,7 +84,7 @@ public class ByteBufChecksumTest {
|
||||
assertEquals(checksum.getValue(), wrapped.getValue());
|
||||
}
|
||||
|
||||
private static Checksum xxHash32() {
|
||||
return XXHashFactory.fastestInstance().newStreamingHash32(Lz4Constants.DEFAULT_SEED).asChecksum();
|
||||
private static Checksum xxHash32(int seed) {
|
||||
return XXHashFactory.fastestInstance().newStreamingHash32(seed).asChecksum();
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user