FastLzFrameEncoder should not depend on the buffer type (#11516)

Motivation:

At the moment FastLzFrameEncoder depends on the buffer type. This is not needed and may cause memory copies

Modifications:

Rewrite to be able to just act on the ByteBuf

Result:

Less memory copies
This commit is contained in:
Norman Maurer 2021-07-27 09:21:02 +02:00 committed by GitHub
parent c423891fb5
commit e1be815a94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 75 additions and 146 deletions

View File

@ -53,13 +53,13 @@ final class FastLz {
static final int MAX_CHUNK_LENGTH = 0xFFFF;
/**
* Do not call {@link #compress(byte[], int, int, byte[], int, int)} for input buffers
* Do not call {@link #compress(ByteBuf, int, int, ByteBuf, int, int)} for input buffers
* which length less than this value.
*/
static final int MIN_LENGTH_TO_COMPRESSION = 32;
/**
* In this case {@link #compress(byte[], int, int, byte[], int, int)} will choose level
* In this case {@link #compress(ByteBuf, int, int, ByteBuf, int, int)} will choose level
* automatically depending on the length of the input buffer. If length less than
* {@link #MIN_RECOMENDED_LENGTH_FOR_LEVEL_2} {@link #LEVEL_1} will be chosen,
* otherwise {@link #LEVEL_2}.
@ -93,8 +93,8 @@ final class FastLz {
* If the input is not compressible, the return value might be larger than length (input buffer size).
*/
@SuppressWarnings("IdentityBinaryExpression")
static int compress(final byte[] input, final int inOffset, final int inLength,
final byte[] output, final int outOffset, final int proposedLevel) {
static int compress(final ByteBuf input, final int inOffset, final int inLength,
final ByteBuf output, final int outOffset, final int proposedLevel) {
final int level;
if (proposedLevel == LEVEL_AUTO) {
level = inLength < MIN_RECOMENDED_LENGTH_FOR_LEVEL_2 ? LEVEL_1 : LEVEL_2;
@ -123,10 +123,10 @@ final class FastLz {
if (inLength < 4) {
if (inLength != 0) {
// *op++ = length-1;
output[outOffset + op++] = (byte) (inLength - 1);
output.setByte(outOffset + op++, (byte) (inLength - 1));
ipBound++;
while (ip <= ipBound) {
output[outOffset + op++] = input[inOffset + ip++];
output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
}
return inLength + 1;
}
@ -143,9 +143,9 @@ final class FastLz {
/* we start with literal copy */
copy = 2;
output[outOffset + op++] = MAX_COPY - 1;
output[outOffset + op++] = input[inOffset + ip++];
output[outOffset + op++] = input[inOffset + ip++];
output.setByte(outOffset + op++, MAX_COPY - 1);
output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
/* main loop */
while (ip < ipLimit) {
@ -166,7 +166,7 @@ final class FastLz {
/* check for a run */
if (level == LEVEL_2) {
//if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
if (input[inOffset + ip] == input[inOffset + ip - 1] &&
if (input.getByte(inOffset + ip) == input.getByte(inOffset + ip - 1) &&
readU16(input, inOffset + ip - 1) == readU16(input, inOffset + ip + 1)) {
distance = 1;
ip += 3;
@ -197,18 +197,18 @@ final class FastLz {
/* is this a match? check the first 3 bytes */
if (distance == 0
|| (level == LEVEL_1 ? distance >= MAX_DISTANCE : distance >= MAX_FARDISTANCE)
|| input[inOffset + ref++] != input[inOffset + ip++]
|| input[inOffset + ref++] != input[inOffset + ip++]
|| input[inOffset + ref++] != input[inOffset + ip++]) {
|| input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)
|| input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)
|| input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
/*
* goto literal;
*/
output[outOffset + op++] = input[inOffset + anchor++];
output.setByte(outOffset + op++, input.getByte(inOffset + anchor++));
ip = anchor;
copy++;
if (copy == MAX_COPY) {
copy = 0;
output[outOffset + op++] = MAX_COPY - 1;
output.setByte(outOffset + op++, MAX_COPY - 1);
}
continue;
}
@ -216,17 +216,17 @@ final class FastLz {
if (level == LEVEL_2) {
/* far, needs at least 5-byte match */
if (distance >= MAX_DISTANCE) {
if (input[inOffset + ip++] != input[inOffset + ref++]
|| input[inOffset + ip++] != input[inOffset + ref++]) {
if (input.getByte(inOffset + ip++) != input.getByte(inOffset + ref++)
|| input.getByte(inOffset + ip++) != input.getByte(inOffset + ref++)) {
/*
* goto literal;
*/
output[outOffset + op++] = input[inOffset + anchor++];
output.setByte(outOffset + op++, input.getByte(inOffset + anchor++));
ip = anchor;
copy++;
if (copy == MAX_COPY) {
copy = 0;
output[outOffset + op++] = MAX_COPY - 1;
output.setByte(outOffset + op++, MAX_COPY - 1);
}
continue;
}
@ -246,47 +246,29 @@ final class FastLz {
if (distance == 0) {
/* zero distance means a run */
//flzuint8 x = ip[-1];
byte x = input[inOffset + ip - 1];
byte x = input.getByte(inOffset + ip - 1);
while (ip < ipBound) {
if (input[inOffset + ref++] != x) {
if (input.getByte(inOffset + ref++) != x) {
break;
} else {
ip++;
}
}
} else {
for (;;) {
/* safe because the outer check against ip limit */
if (input[inOffset + ref++] != input[inOffset + ip++]) {
break;
}
if (input[inOffset + ref++] != input[inOffset + ip++]) {
break;
}
if (input[inOffset + ref++] != input[inOffset + ip++]) {
break;
}
if (input[inOffset + ref++] != input[inOffset + ip++]) {
break;
}
if (input[inOffset + ref++] != input[inOffset + ip++]) {
break;
}
if (input[inOffset + ref++] != input[inOffset + ip++]) {
break;
}
if (input[inOffset + ref++] != input[inOffset + ip++]) {
break;
}
if (input[inOffset + ref++] != input[inOffset + ip++]) {
/* safe because the outer check against ip limit */
boolean missMatch = false;
for (int i = 0; i < 8; i++) {
if (input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
missMatch = true;
break;
}
}
if (!missMatch) {
while (ip < ipBound) {
if (input[inOffset + ref++] != input[inOffset + ip++]) {
if (input.getByte(inOffset + ref++) != input.getByte(inOffset + ip++)) {
break;
}
}
break;
}
}
@ -294,7 +276,7 @@ final class FastLz {
if (copy != 0) {
/* copy is biased, '0' means 1 byte copy */
// *(op-copy-1) = copy-1;
output[outOffset + op - copy - 1] = (byte) (copy - 1);
output.setByte(outOffset + op - copy - 1, (byte) (copy - 1));
} else {
/* back, to overwrite the copy count */
op--;
@ -311,53 +293,53 @@ final class FastLz {
if (level == LEVEL_2) {
if (distance < MAX_DISTANCE) {
if (len < 7) {
output[outOffset + op++] = (byte) ((len << 5) + (distance >>> 8));
output[outOffset + op++] = (byte) (distance & 255);
output.setByte(outOffset + op++, (byte) ((len << 5) + (distance >>> 8)));
output.setByte(outOffset + op++, (byte) (distance & 255));
} else {
output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
for (len -= 7; len >= 255; len -= 255) {
output[outOffset + op++] = (byte) 255;
output.setByte(outOffset + op++, (byte) 255);
}
output[outOffset + op++] = (byte) len;
output[outOffset + op++] = (byte) (distance & 255);
output.setByte(outOffset + op++, (byte) len);
output.setByte(outOffset + op++, (byte) (distance & 255));
}
} else {
/* far away, but not yet in the another galaxy... */
if (len < 7) {
distance -= MAX_DISTANCE;
output[outOffset + op++] = (byte) ((len << 5) + 31);
output[outOffset + op++] = (byte) 255;
output[outOffset + op++] = (byte) (distance >>> 8);
output[outOffset + op++] = (byte) (distance & 255);
output.setByte(outOffset + op++, (byte) ((len << 5) + 31));
output.setByte(outOffset + op++, (byte) 255);
output.setByte(outOffset + op++, (byte) (distance >>> 8));
output.setByte(outOffset + op++, (byte) (distance & 255));
} else {
distance -= MAX_DISTANCE;
output[outOffset + op++] = (byte) ((7 << 5) + 31);
output.setByte(outOffset + op++, (byte) ((7 << 5) + 31));
for (len -= 7; len >= 255; len -= 255) {
output[outOffset + op++] = (byte) 255;
output.setByte(outOffset + op++, (byte) 255);
}
output[outOffset + op++] = (byte) len;
output[outOffset + op++] = (byte) 255;
output[outOffset + op++] = (byte) (distance >>> 8);
output[outOffset + op++] = (byte) (distance & 255);
output.setByte(outOffset + op++, (byte) len);
output.setByte(outOffset + op++, (byte) 255);
output.setByte(outOffset + op++, (byte) (distance >>> 8));
output.setByte(outOffset + op++, (byte) (distance & 255));
}
}
} else {
if (len > MAX_LEN - 2) {
while (len > MAX_LEN - 2) {
output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
output[outOffset + op++] = (byte) (MAX_LEN - 2 - 7 - 2);
output[outOffset + op++] = (byte) (distance & 255);
output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
output.setByte(outOffset + op++, (byte) (MAX_LEN - 2 - 7 - 2));
output.setByte(outOffset + op++, (byte) (distance & 255));
len -= MAX_LEN - 2;
}
}
if (len < 7) {
output[outOffset + op++] = (byte) ((len << 5) + (distance >>> 8));
output[outOffset + op++] = (byte) (distance & 255);
output.setByte(outOffset + op++, (byte) ((len << 5) + (distance >>> 8)));
output.setByte(outOffset + op++, (byte) (distance & 255));
} else {
output[outOffset + op++] = (byte) ((7 << 5) + (distance >>> 8));
output[outOffset + op++] = (byte) (len - 7);
output[outOffset + op++] = (byte) (distance & 255);
output.setByte(outOffset + op++, (byte) ((7 << 5) + (distance >>> 8)));
output.setByte(outOffset + op++, (byte) (len - 7));
output.setByte(outOffset + op++, (byte) (distance & 255));
}
}
@ -371,7 +353,7 @@ final class FastLz {
htab[hval] = ip++;
/* assuming literal copy */
output[outOffset + op++] = MAX_COPY - 1;
output.setByte(outOffset + op++, MAX_COPY - 1);
continue;
@ -392,25 +374,25 @@ final class FastLz {
/* left-over as literal copy */
ipBound++;
while (ip <= ipBound) {
output[outOffset + op++] = input[inOffset + ip++];
output.setByte(outOffset + op++, input.getByte(inOffset + ip++));
copy++;
if (copy == MAX_COPY) {
copy = 0;
output[outOffset + op++] = MAX_COPY - 1;
output.setByte(outOffset + op++, MAX_COPY - 1);
}
}
/* if we have copied something, adjust the copy length */
if (copy != 0) {
//*(op-copy-1) = copy-1;
output[outOffset + op - copy - 1] = (byte) (copy - 1);
output.setByte(outOffset + op - copy - 1, (byte) (copy - 1));
} else {
op--;
}
if (level == LEVEL_2) {
/* marker for fastlz2 */
output[outOffset] |= 1 << 5;
output.setByte(outOffset, output.getByte(outOffset) | 1 << 5);
}
return op;
@ -560,18 +542,18 @@ final class FastLz {
return op;
}
private static int hashFunction(byte[] p, int offset) {
private static int hashFunction(ByteBuf p, int offset) {
int v = readU16(p, offset);
v ^= readU16(p, offset + 1) ^ v >> 16 - HASH_LOG;
v &= HASH_MASK;
return v;
}
private static int readU16(byte[] data, int offset) {
if (offset + 1 >= data.length) {
return data[offset] & 0xff;
private static int readU16(ByteBuf data, int offset) {
if (offset + 1 >= data.readableBytes()) {
return data.getUnsignedByte(offset);
}
return (data[offset + 1] & 0xff) << 8 | data[offset] & 0xff;
return data.getUnsignedByte(offset + 1) << 8 | data.getUnsignedByte(offset);
}
private FastLz() { }

View File

@ -38,7 +38,7 @@ public class FastLzFrameEncoder extends MessageToByteEncoder<ByteBuf> {
/**
* Underlying checksum calculator in use.
*/
private final Checksum checksum;
private final ByteBufChecksum checksum;
/**
* Creates a FastLZ encoder without checksum calculator and with auto detection of compression level.
@ -85,18 +85,17 @@ public class FastLzFrameEncoder extends MessageToByteEncoder<ByteBuf> {
* You may set {@code null} if you don't want to validate checksum of each block.
*/
public FastLzFrameEncoder(int level, Checksum checksum) {
super(false);
if (level != LEVEL_AUTO && level != LEVEL_1 && level != LEVEL_2) {
throw new IllegalArgumentException(String.format(
"level: %d (expected: %d or %d or %d)", level, LEVEL_AUTO, LEVEL_1, LEVEL_2));
}
this.level = level;
this.checksum = checksum;
this.checksum = checksum == null ? null : ByteBufChecksum.wrapChecksum(checksum);
}
@Override
protected void encode(ChannelHandlerContext ctx, ByteBuf in, ByteBuf out) throws Exception {
final Checksum checksum = this.checksum;
final ByteBufChecksum checksum = this.checksum;
for (;;) {
if (!in.isReadable()) {
@ -115,74 +114,28 @@ public class FastLzFrameEncoder extends MessageToByteEncoder<ByteBuf> {
blockType = BLOCK_TYPE_NON_COMPRESSED;
out.ensureWritable(outputOffset + 2 + length);
final byte[] output;
final int outputPtr;
if (out.hasArray()) {
output = out.array();
outputPtr = out.arrayOffset() + outputOffset + 2;
} else {
output = new byte[length];
outputPtr = 0;
}
final int outputPtr = outputOffset + 2;
if (checksum != null) {
final byte[] input;
final int inputPtr;
if (in.hasArray()) {
input = in.array();
inputPtr = in.arrayOffset() + idx;
} else {
input = new byte[length];
in.getBytes(idx, input);
inputPtr = 0;
}
checksum.reset();
checksum.update(input, inputPtr, length);
checksum.update(in, idx, length);
out.setInt(outputIdx + CHECKSUM_OFFSET, (int) checksum.getValue());
System.arraycopy(input, inputPtr, output, outputPtr, length);
} else {
in.getBytes(idx, output, outputPtr, length);
}
if (!out.hasArray()) {
out.setBytes(outputOffset + 2, output);
}
out.setBytes(outputPtr, in, idx, length);
chunkLength = length;
} else {
// try to compress
final byte[] input;
final int inputPtr;
if (in.hasArray()) {
input = in.array();
inputPtr = in.arrayOffset() + idx;
} else {
input = new byte[length];
in.getBytes(idx, input);
inputPtr = 0;
}
if (checksum != null) {
checksum.reset();
checksum.update(input, inputPtr, length);
checksum.update(in, idx, length);
out.setInt(outputIdx + CHECKSUM_OFFSET, (int) checksum.getValue());
}
final int maxOutputLength = calculateOutputBufferLength(length);
out.ensureWritable(outputOffset + 4 + maxOutputLength);
final byte[] output;
final int outputPtr;
if (out.hasArray()) {
output = out.array();
outputPtr = out.arrayOffset() + outputOffset + 4;
} else {
output = new byte[maxOutputLength];
outputPtr = 0;
}
final int compressedLength = compress(input, inputPtr, length, output, outputPtr, level);
if (!out.hasArray()) {
out.setBytes(outputOffset + 4, output, 0, compressedLength);
}
final int outputPtr = outputOffset + 4;
final int compressedLength = compress(in, in.readerIndex(), length, out, outputPtr, level);
if (compressedLength < length) {
blockType = BLOCK_TYPE_COMPRESSED;
chunkLength = compressedLength;
@ -191,13 +144,7 @@ public class FastLzFrameEncoder extends MessageToByteEncoder<ByteBuf> {
outputOffset += 2;
} else {
blockType = BLOCK_TYPE_NON_COMPRESSED;
if (out.hasArray()) {
System.arraycopy(input, inputPtr, output, outputPtr - 2, length);
} else {
for (int i = 0; i < length; i++) {
out.setByte(outputOffset + 2 + i, input[inputPtr + i]);
}
}
out.setBytes(outputOffset + 2, in, idx, length);
chunkLength = length;
}
}