From daa58f74e704c78a6624c9c2e15920043c3d998d Mon Sep 17 00:00:00 2001 From: Idel Pivnitskiy Date: Tue, 17 Jun 2014 03:46:34 +0400 Subject: [PATCH] Implement a Bzip2Decoder Motivation: Bzip2Decoder provides receiving data compressed in bzip2 format. Modifications: Added classes: - Bzip2Decoder - Bzip2Constants - Bzip2BlockDecompressor - Bzip2HuffmanStageDecoder - Bzip2MoveToFrontTable - Bzip2Rand - Crc32 - Bzip2DecoderTest Result: Implemented and tested new decoder which can uncompress incoming data in bzip2 format. --- NOTICE.txt | 10 +- codec/pom.xml | 9 +- .../compression/Bzip2BlockDecompressor.java | 320 +++++++++++++++++ .../codec/compression/Bzip2Constants.java | 92 +++++ .../codec/compression/Bzip2Decoder.java | 338 ++++++++++++++++++ .../compression/Bzip2HuffmanStageDecoder.java | 200 +++++++++++ .../compression/Bzip2MoveToFrontTable.java | 84 +++++ .../handler/codec/compression/Bzip2Rand.java | 74 ++++ .../handler/codec/compression/Crc32.java | 123 +++++++ .../codec/compression/Bzip2DecoderTest.java | 236 ++++++++++++ license/LICENSE.jbzip2.txt | 19 + pom.xml | 20 +- 12 files changed, 1516 insertions(+), 9 deletions(-) create mode 100644 codec/src/main/java/io/netty/handler/codec/compression/Bzip2BlockDecompressor.java create mode 100644 codec/src/main/java/io/netty/handler/codec/compression/Bzip2Constants.java create mode 100644 codec/src/main/java/io/netty/handler/codec/compression/Bzip2Decoder.java create mode 100644 codec/src/main/java/io/netty/handler/codec/compression/Bzip2HuffmanStageDecoder.java create mode 100644 codec/src/main/java/io/netty/handler/codec/compression/Bzip2MoveToFrontTable.java create mode 100644 codec/src/main/java/io/netty/handler/codec/compression/Bzip2Rand.java create mode 100644 codec/src/main/java/io/netty/handler/codec/compression/Crc32.java create mode 100644 codec/src/test/java/io/netty/handler/codec/compression/Bzip2DecoderTest.java create mode 100644 license/LICENSE.jbzip2.txt diff --git a/NOTICE.txt b/NOTICE.txt index 6e6fb8b678..f30b1f531d 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -6,7 +6,7 @@ Please visit the Netty web site for more information: * http://netty.io/ -Copyright 2011 The Netty Project +Copyright 2014 The Netty Project The Netty Project licenses this file to you under the Apache License, version 2.0 (the "License"); you may not use this file except in compliance @@ -81,6 +81,14 @@ It can be obtained at: * HOMEPAGE: * https://github.com/akka/akka/blob/wip-2.2.3-for-scala-2.11/akka-actor/src/main/java/akka/dispatch/AbstractNodeQueue.java +This product contains a modified portion of 'jbzip2', a Java bzip2 compression +and decompression library written by Matthew J. Francis. It can be obtained at: + + * LICENSE: + * license/LICENSE.jbzip2.txt (MIT License) + * HOMEPAGE: + * https://code.google.com/p/jbzip2/ + This product optionally depends on 'JZlib', a re-implementation of zlib in pure Java, which can be obtained at: diff --git a/codec/pom.xml b/codec/pom.xml index 6b0003dd4e..95109f48f4 100644 --- a/codec/pom.xml +++ b/codec/pom.xml @@ -49,7 +49,7 @@ jzlib true - + org.jboss.marshalling @@ -61,6 +61,13 @@ jboss-marshalling-river test + + + + org.apache.commons + commons-compress + test + diff --git a/codec/src/main/java/io/netty/handler/codec/compression/Bzip2BlockDecompressor.java b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2BlockDecompressor.java new file mode 100644 index 0000000000..a10133b255 --- /dev/null +++ b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2BlockDecompressor.java @@ -0,0 +1,320 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.compression; + +import io.netty.buffer.ByteBuf; + +import static io.netty.handler.codec.compression.Bzip2Constants.*; + +final class Bzip2BlockDecompressor { + /** + * Calculates the block CRC from the fully decoded bytes of the block. + */ + private final Crc32 crc = new Crc32(); + + /** + * The CRC of the current block as read from the block header. + */ + private final int blockCRC; + + /** + * {@code true} if the current block is randomised, otherwise {@code false}. + */ + private final boolean blockRandomised; + + /* Huffman Decoding stage */ + /** + * The end-of-block Huffman symbol. Decoding of the block ends when this is encountered. + */ + int huffmanEndOfBlockSymbol; + + /** + * Bitmap, of ranges of 16 bytes, present/not present. + */ + int huffmanInUse16; + + /** + * A map from Huffman symbol index to output character. Some types of data (e.g. ASCII text) + * may contain only a limited number of byte values; Huffman symbols are only allocated to + * those values that actually occur in the uncompressed data. + */ + final byte[] huffmanSymbolMap = new byte[256]; + + /* Move To Front stage */ + /** + * Counts of each byte value within the {@link Bzip2BlockDecompressor#huffmanSymbolMap} data. + * Collected at the Move To Front stage, consumed by the Inverse Burrows Wheeler Transform stage. + */ + private final int[] bwtByteCounts = new int[256]; + + /** + * The Burrows-Wheeler Transform processed data. Read at the Move To Front stage, consumed by the + * Inverse Burrows Wheeler Transform stage. + */ + private final byte[] bwtBlock; + + /** + * Starting pointer into BWT for after untransform. + */ + private final int bwtStartPointer; + + /* Inverse Burrows-Wheeler Transform stage */ + /** + * At each position contains the union of :- + * An output character (8 bits) + * A pointer from each position to its successor (24 bits, left shifted 8 bits) + * As the pointer cannot exceed the maximum block size of 900k, 24 bits is more than enough to + * hold it; Folding the character data into the spare bits while performing the inverse BWT, + * when both pieces of information are available, saves a large number of memory accesses in + * the final decoding stages. + */ + private int[] bwtMergedPointers; + + /** + * The current merged pointer into the Burrow-Wheeler Transform array. + */ + private int bwtCurrentMergedPointer; + + /** + * The actual length in bytes of the current block at the Inverse Burrows Wheeler Transform + * stage (before final Run-Length Decoding). + */ + private int bwtBlockLength; + + /** + * The number of output bytes that have been decoded up to the Inverse Burrows Wheeler Transform stage. + */ + private int bwtBytesDecoded; + + /* Run-Length Encoding and Random Perturbation stage */ + /** + * The most recently RLE decoded byte. + */ + private int rleLastDecodedByte = -1; + + /** + * The number of previous identical output bytes decoded. After 4 identical bytes, the next byte + * decoded is an RLE repeat count. + */ + private int rleAccumulator; + + /** + * The RLE repeat count of the current decoded byte. When this reaches zero, a new byte is decoded. + */ + private int rleRepeat; + + /** + * If the current block is randomised, the position within the RNUMS randomisation array. + */ + private int randomIndex; + + /** + * If the current block is randomised, the remaining count at the current RNUMS position. + */ + private int randomCount = Bzip2Rand.rNums(0) - 1; + + /** + * Table for Move To Front transformations. + */ + final Bzip2MoveToFrontTable symbolMTF = new Bzip2MoveToFrontTable(); + + int repeatCount; + int repeatIncrement = 1; + int mtfValue; + + Bzip2BlockDecompressor(int blockSize, int blockCRC, boolean blockRandomised, int bwtStartPointer) { + bwtBlock = new byte[blockSize]; + + this.blockCRC = blockCRC; + this.blockRandomised = blockRandomised; + this.bwtStartPointer = bwtStartPointer; + } + + /** + * Reads the Huffman encoded data from the input stream, performs Run-Length Decoding and + * applies the Move To Front transform to reconstruct the Burrows-Wheeler Transform array. + */ + boolean decodeHuffmanData(final Bzip2HuffmanStageDecoder huffmanDecoder, ByteBuf in) { + final byte[] bwtBlock = this.bwtBlock; + final byte[] huffmanSymbolMap = this.huffmanSymbolMap; + final int streamBlockSize = this.bwtBlock.length; + final int huffmanEndOfBlockSymbol = this.huffmanEndOfBlockSymbol; + final int[] bwtByteCounts = this.bwtByteCounts; + final Bzip2MoveToFrontTable symbolMTF = this.symbolMTF; + + int bwtBlockLength = this.bwtBlockLength; + int repeatCount = this.repeatCount; + int repeatIncrement = this.repeatIncrement; + int mtfValue = this.mtfValue; + + for (;;) { + if (in.readableBytes() < 3) { // 3 = (HUFFMAN_DECODE_MAX_CODE_LENGTH + 1) bits / 8 + this.bwtBlockLength = bwtBlockLength; + this.repeatCount = repeatCount; + this.repeatIncrement = repeatIncrement; + this.mtfValue = mtfValue; + return false; + } + final int nextSymbol = huffmanDecoder.nextSymbol(in); + + if (nextSymbol == HUFFMAN_SYMBOL_RUNA) { + repeatCount += repeatIncrement; + repeatIncrement <<= 1; + } else if (nextSymbol == HUFFMAN_SYMBOL_RUNB) { + repeatCount += repeatIncrement << 1; + repeatIncrement <<= 1; + } else { + if (repeatCount > 0) { + if (bwtBlockLength + repeatCount > streamBlockSize) { + throw new DecompressionException("block exceeds declared block size"); + } + final byte nextByte = huffmanSymbolMap[mtfValue]; + bwtByteCounts[nextByte & 0xff] += repeatCount; + while (--repeatCount >= 0) { + bwtBlock[bwtBlockLength++] = nextByte; + } + + repeatCount = 0; + repeatIncrement = 1; + } + + if (nextSymbol == huffmanEndOfBlockSymbol) { + break; + } + + if (bwtBlockLength >= streamBlockSize) { + throw new DecompressionException("block exceeds declared block size"); + } + + mtfValue = symbolMTF.indexToFront(nextSymbol - 1) & 0xff; + + final byte nextByte = huffmanSymbolMap[mtfValue]; + bwtByteCounts[nextByte & 0xff]++; + bwtBlock[bwtBlockLength++] = nextByte; + } + } + this.bwtBlockLength = bwtBlockLength; + initialiseInverseBWT(); + return true; + } + + /** + * Set up the Inverse Burrows-Wheeler Transform merged pointer array. + */ + private void initialiseInverseBWT() { + final int bwtStartPointer = this.bwtStartPointer; + final byte[] bwtBlock = this.bwtBlock; + final int[] bwtMergedPointers = new int[bwtBlockLength]; + final int[] characterBase = new int[256]; + + if (bwtStartPointer < 0 || bwtStartPointer >= bwtBlockLength) { + throw new DecompressionException("start pointer invalid"); + } + + // Cumulative character counts + System.arraycopy(bwtByteCounts, 0, characterBase, 1, 255); + for (int i = 2; i <= 255; i++) { + characterBase[i] += characterBase[i - 1]; + } + + // Merged-Array Inverse Burrows-Wheeler Transform + // Combining the output characters and forward pointers into a single array here, where we + // have already read both of the corresponding values, cuts down on memory accesses in the + // final walk through the array + for (int i = 0; i < bwtBlockLength; i++) { + int value = bwtBlock[i] & 0xff; + bwtMergedPointers[characterBase[value]++] = (i << 8) + value; + } + + this.bwtMergedPointers = bwtMergedPointers; + bwtCurrentMergedPointer = bwtMergedPointers[bwtStartPointer]; + } + + /** + * Decodes a byte from the final Run-Length Encoding stage, pulling a new byte from the + * Burrows-Wheeler Transform stage when required. + * @return The decoded byte, or -1 if there are no more bytes + */ + public int read() { + while (rleRepeat < 1) { + if (bwtBytesDecoded == bwtBlockLength) { + return -1; + } + + int nextByte = decodeNextBWTByte(); + if (nextByte != rleLastDecodedByte) { + // New byte, restart accumulation + rleLastDecodedByte = nextByte; + rleRepeat = 1; + rleAccumulator = 1; + crc.updateCRC(nextByte); + } else { + if (++rleAccumulator == 4) { + // Accumulation complete, start repetition + int rleRepeat = decodeNextBWTByte() + 1; + this.rleRepeat = rleRepeat; + rleAccumulator = 0; + crc.updateCRC(nextByte, rleRepeat); + } else { + rleRepeat = 1; + crc.updateCRC(nextByte); + } + } + } + rleRepeat--; + + return rleLastDecodedByte; + } + + /** + * Decodes a byte from the Burrows-Wheeler Transform stage. If the block has randomisation + * applied, reverses the randomisation. + * @return The decoded byte + */ + private int decodeNextBWTByte() { + int mergedPointer = bwtCurrentMergedPointer; + int nextDecodedByte = mergedPointer & 0xff; + bwtCurrentMergedPointer = bwtMergedPointers[mergedPointer >>> 8]; + + if (blockRandomised) { + if (--randomCount == 0) { + nextDecodedByte ^= 1; + randomIndex = (randomIndex + 1) % 512; + randomCount = Bzip2Rand.rNums(randomIndex); + } + } + bwtBytesDecoded++; + + return nextDecodedByte; + } + + public int blockLength() { + return bwtBlockLength; + } + + /** + * Verify and return the block CRC. This method may only be called + * after all of the block's bytes have been read. + * @return The block CRC + */ + int checkCRC() { + final int computedBlockCRC = crc.getCRC(); + if (blockCRC != computedBlockCRC) { + throw new DecompressionException("block CRC error"); + } + return computedBlockCRC; + } +} diff --git a/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Constants.java b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Constants.java new file mode 100644 index 0000000000..cb1276d866 --- /dev/null +++ b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Constants.java @@ -0,0 +1,92 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.compression; + +/** + * Constants for {@link Bzip2Decoder}. + */ +final class Bzip2Constants { + + /** + * Magic number of Bzip2 stream. + */ + static final int MAGIC_NUMBER = 'B' << 16 | 'Z' << 8 | 'h'; + + /** + * Block header magic number. Equals to BCD (pi). + */ + static final long COMPRESSED_MAGIC = 0x314159265359L; + + /** + * End of stream magic number. Equals to BCD sqrt(pi). + */ + static final long END_OF_STREAM_MAGIC = 0x177245385090L; + + /** + * Base block size. + */ + static final int BASE_BLOCK_SIZE = 100000; + + /** + * Minimum and maximum size of one block. + * Must be multiplied by {@link Bzip2Constants#BASE_BLOCK_SIZE}. + */ + static final int MIN_BLOCK_SIZE = 1; + static final int MAX_BLOCK_SIZE = 9; + + /** + * Maximum possible Huffman alphabet size. + */ + static final int HUFFMAN_MAX_ALPHABET_SIZE = 258; + + /** + * The longest Huffman code length created by the encoder. + */ + static final int HUFFMAN_ENCODE_MAX_CODE_LENGTH = 20; + + /** + * The longest Huffman code length accepted by the decoder. + */ + static final int HUFFMAN_DECODE_MAX_CODE_LENGTH = 23; + + /** + * Huffman symbols used for run-length encoding. + */ + static final int HUFFMAN_SYMBOL_RUNA = 0; + static final int HUFFMAN_SYMBOL_RUNB = 1; + + /** + * Number of symbols decoded after which a new Huffman table is selected. + */ + static final int HUFFMAN_GROUP_RUN_LENGTH = 50; + + /** + * Maximum possible number of Huffman table selectors. + */ + static final int MAX_SELECTORS = 2 + 900000 / HUFFMAN_GROUP_RUN_LENGTH; // 18002 + + /** + * Minimum number of alternative Huffman tables. + */ + static final int HUFFMAN_MINIMUM_TABLES = 2; + + /** + * Maximum number of alternative Huffman tables. + */ + static final int HUFFMAN_MAXIMUM_TABLES = 6; + + private Bzip2Constants() { } +} diff --git a/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Decoder.java b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Decoder.java new file mode 100644 index 0000000000..f845e74495 --- /dev/null +++ b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Decoder.java @@ -0,0 +1,338 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.compression; + +import io.netty.buffer.ByteBuf; +import io.netty.channel.ChannelHandlerContext; +import io.netty.handler.codec.ByteToMessageDecoder; + +import java.util.List; + +import static io.netty.handler.codec.compression.Bzip2Constants.*; + +/** + * Uncompresses a {@link ByteBuf} encoded with the Bzip2 format. + * + * See Bzip2. + */ +public class Bzip2Decoder extends ByteToMessageDecoder { + /** + * Current state of stream. + */ + enum State { + INIT, + INIT_BLOCK, + INIT_BLOCK_PARAMS, + RECEIVE_HUFFMAN_USED_MAP, + RECEIVE_HUFFMAN_USED_BITMAPS, + RECEIVE_SELECTORS_NUMBER, + RECEIVE_SELECTORS, + RECEIVE_HUFFMAN_LENGTH, + DECODE_HUFFMAN_DATA, + END_BLOCK, + EOF + } + private State currentState = State.INIT; + + /** + * The decompressor for the current block. + */ + private Bzip2BlockDecompressor blockDecompressor; + + /** + * BZip2 Huffman coding stage. + */ + private Bzip2HuffmanStageDecoder huffmanStageDecoder; + + /** + * Always: in the range 0 .. 9. The current block size is 100000 * this number. + */ + private int blockSize; + + /** + * The CRC of the current block as read from the block header. + */ + private int blockCRC; + + /** + * The merged CRC of all blocks decompressed so far. + */ + private int streamCRC; + + // For bitwise access + /** + * A buffer of bits read from the input stream that have not yet been returned. + */ + private int bitBuffer; + + /** + * The number of bits currently buffered in {@link #bitBuffer}. + */ + private int bitCount; + + @Override + protected void decode(ChannelHandlerContext ctx, ByteBuf in, List out) throws Exception { + if (!in.isReadable()) { + return; + } + + for (;;) { + switch (currentState) { + case INIT: + if (in.readableBytes() < 4) { + return; + } + int magicNumber = in.readUnsignedMedium(); + if (magicNumber != MAGIC_NUMBER) { + throw new DecompressionException("Unexpected stream identifier contents. Mismatched bzip2 " + + "protocol version?"); + } + int blockSize = in.readByte() - '0'; + if (blockSize < MIN_BLOCK_SIZE || blockSize > MAX_BLOCK_SIZE) { + throw new DecompressionException("block size is invalid"); + } + this.blockSize = blockSize * BASE_BLOCK_SIZE; + + streamCRC = 0; + currentState = State.INIT_BLOCK; + case INIT_BLOCK: + if (in.readableBytes() < 10) { + return; + } + // Get the block magic bytes. + final long magic = (long) readBits(in, 24) << 24 | readBits(in, 24); + if (magic == END_OF_STREAM_MAGIC) { + // End of stream was reached. Check the combined CRC. + final int storedCombinedCRC = readInt(in); + if (storedCombinedCRC != streamCRC) { + throw new DecompressionException("stream CRC error"); + } + currentState = State.EOF; + break; + } + if (magic != COMPRESSED_MAGIC) { + throw new DecompressionException("bad block header"); + } + blockCRC = readInt(in); + currentState = State.INIT_BLOCK_PARAMS; + case INIT_BLOCK_PARAMS: + if (in.readableBytes() < 4) { + return; + } + final boolean blockRandomised = readBoolean(in); + final int bwtStartPointer = readBits(in, 24); + + blockDecompressor = new Bzip2BlockDecompressor(this.blockSize, blockCRC, + blockRandomised, bwtStartPointer); + currentState = State.RECEIVE_HUFFMAN_USED_MAP; + case RECEIVE_HUFFMAN_USED_MAP: + if (in.readableBytes() < 2) { + return; + } + blockDecompressor.huffmanInUse16 = readBits(in, 16); + currentState = State.RECEIVE_HUFFMAN_USED_BITMAPS; + case RECEIVE_HUFFMAN_USED_BITMAPS: + Bzip2BlockDecompressor blockDecompressor = this.blockDecompressor; + final int inUse16 = blockDecompressor.huffmanInUse16; + final int bitNumber = Integer.bitCount(inUse16); + final byte[] huffmanSymbolMap = blockDecompressor.huffmanSymbolMap; + + if (in.readableBytes() < bitNumber * 16 / 8 + 1) { + return; + } + + int huffmanSymbolCount = 0; + if (bitNumber > 0) { + for (int i = 0; i < 16; i++) { + if ((inUse16 & ((1 << 15) >>> i)) != 0) { + for (int j = 0, k = i << 4; j < 16; j++, k++) { + if (readBoolean(in)) { + huffmanSymbolMap[huffmanSymbolCount++] = (byte) k; + } + } + } + } + } + blockDecompressor.huffmanEndOfBlockSymbol = huffmanSymbolCount + 1; + + int totalTables = readBits(in, 3); + if (totalTables < HUFFMAN_MINIMUM_TABLES || totalTables > HUFFMAN_MAXIMUM_TABLES) { + throw new DecompressionException("incorrect huffman groups number"); + } + int alphaSize = huffmanSymbolCount + 2; + if (alphaSize > HUFFMAN_MAX_ALPHABET_SIZE) { + throw new DecompressionException("incorrect alphabet size"); + } + huffmanStageDecoder = new Bzip2HuffmanStageDecoder(this, totalTables, alphaSize); + currentState = State.RECEIVE_SELECTORS_NUMBER; + case RECEIVE_SELECTORS_NUMBER: + if (in.readableBytes() < 2) { + return; + } + int totalSelectors = readBits(in, 15); + if (totalSelectors < 1 || totalSelectors > MAX_SELECTORS) { + throw new DecompressionException("incorrect selectors number"); + } + huffmanStageDecoder.selectors = new byte[totalSelectors]; + + currentState = State.RECEIVE_SELECTORS; + case RECEIVE_SELECTORS: + Bzip2HuffmanStageDecoder huffmanStageDecoder = this.huffmanStageDecoder; + byte[] selectors = huffmanStageDecoder.selectors; + totalSelectors = selectors.length; + final Bzip2MoveToFrontTable tableMtf = huffmanStageDecoder.tableMTF; + + int currSelector; + // Get zero-terminated bit runs (0..62) of MTF'ed Huffman table. length = 1..6 + for (currSelector = huffmanStageDecoder.currentSelector; + currSelector < totalSelectors; currSelector++) { + if (!in.isReadable()) { + // Save state if end of current ByteBuf was reached + huffmanStageDecoder.currentSelector = currSelector; + return; + } + int index = 0; + while (readBoolean(in)) { + index++; + } + selectors[currSelector] = tableMtf.indexToFront(index); + } + + currentState = State.RECEIVE_HUFFMAN_LENGTH; + case RECEIVE_HUFFMAN_LENGTH: + huffmanStageDecoder = this.huffmanStageDecoder; + totalTables = huffmanStageDecoder.totalTables; + final byte[][] codeLength = huffmanStageDecoder.tableCodeLengths; + alphaSize = huffmanStageDecoder.alphabetSize; + + /* Now the coding tables */ + int currGroup; + int currLength = huffmanStageDecoder.currentLength; + int currAlpha = 0; + boolean modifyLength = huffmanStageDecoder.modifyLength; + boolean saveStateAndReturn = false; + loop: for (currGroup = huffmanStageDecoder.currentGroup; currGroup < totalTables; currGroup++) { + // start_huffman_length + if (!in.isReadable()) { + saveStateAndReturn = true; + break; + } + if (currLength < 0) { + currLength = readBits(in, 5); + } + for (currAlpha = huffmanStageDecoder.currentAlpha; currAlpha < alphaSize; currAlpha++) { + // delta_bit_length: 1..40 + if (!hasBit(in)) { + saveStateAndReturn = true; + break loop; + } + while (modifyLength || readBoolean(in)) { // 0=>next symbol; 1=>alter length + if (!hasBit(in)) { + modifyLength = true; + saveStateAndReturn = true; + break loop; + } + currLength += readBoolean(in) ? -1 : 1; // 1=>decrement length; 0=>increment length + modifyLength = false; + if (!hasBit(in)) { + saveStateAndReturn = true; + break loop; + } + } + codeLength[currGroup][currAlpha] = (byte) currLength; + } + currLength = -1; + currAlpha = huffmanStageDecoder.currentAlpha = 0; + modifyLength = false; + } + if (saveStateAndReturn) { + // Save state if end of current ByteBuf was reached + huffmanStageDecoder.currentGroup = currGroup; + huffmanStageDecoder.currentLength = currLength; + huffmanStageDecoder.currentAlpha = currAlpha; + huffmanStageDecoder.modifyLength = modifyLength; + return; + } + + // Finally create the Huffman tables + huffmanStageDecoder.createHuffmanDecodingTables(); + currentState = State.DECODE_HUFFMAN_DATA; + case DECODE_HUFFMAN_DATA: + blockDecompressor = this.blockDecompressor; + final boolean decoded = blockDecompressor.decodeHuffmanData(this.huffmanStageDecoder, in); + if (!decoded) { + return; + } + int blockLength = blockDecompressor.blockLength(); + ByteBuf uncompressed = ctx.alloc().buffer(blockLength); + int uncByte; + while ((uncByte = blockDecompressor.read()) >= 0) { + uncompressed.writeByte(uncByte); + } + + int currentBlockCRC = blockDecompressor.checkCRC(); + streamCRC = (streamCRC << 1 | streamCRC >>> 31) ^ currentBlockCRC; + + out.add(uncompressed); + currentState = State.INIT_BLOCK; + break; + case EOF: + in.skipBytes(in.readableBytes()); + return; + default: + throw new IllegalStateException(); + } + } + } + + /** + * Returns {@code true} if and only if the end of the compressed stream + * has been reached. + */ + public boolean isClosed() { + return currentState == State.EOF; + } + + int readBits(ByteBuf in, final int n) { + int bitCount = this.bitCount; + int bitBuffer = this.bitBuffer; + + if (bitCount < n) { + do { + int uByte = in.readUnsignedByte(); + bitBuffer = bitBuffer << 8 | uByte; + bitCount += 8; + } while (bitCount < n); + + this.bitBuffer = bitBuffer; + } + + this.bitCount = bitCount -= n; + return (bitBuffer >>> bitCount) & ((1 << n) - 1); + } + + private boolean readBoolean(ByteBuf in) { + return readBits(in, 1) != 0; + } + + private int readInt(ByteBuf in) { + return readBits(in, 16) << 16 | readBits(in, 16); + } + + private boolean hasBit(ByteBuf in) { + return bitCount > 0 || in.isReadable(); + } +} diff --git a/codec/src/main/java/io/netty/handler/codec/compression/Bzip2HuffmanStageDecoder.java b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2HuffmanStageDecoder.java new file mode 100644 index 0000000000..a95d2171ce --- /dev/null +++ b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2HuffmanStageDecoder.java @@ -0,0 +1,200 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.compression; + +import io.netty.buffer.ByteBuf; + +import static io.netty.handler.codec.compression.Bzip2Constants.*; +/** + * A decoder for the BZip2 Huffman coding stage + */ +final class Bzip2HuffmanStageDecoder { + + /** + * The Huffman table number to use for each group of 50 symbols. + */ + byte[] selectors; + + /** + * The minimum code length for each Huffman table. + */ + private final int[] minimumLengths; + + /** + * An array of values for each Huffman table that must be subtracted from the numerical value of + * a Huffman code of a given bit length to give its canonical code index. + */ + private final int[][] codeBases; + + /** + * An array of values for each Huffman table that gives the highest numerical value of a Huffman + * code of a given bit length. + */ + private final int[][] codeLimits; + + /** + * A mapping for each Huffman table from canonical code index to output symbol. + */ + private final int[][] codeSymbols; + + /** + * The Huffman table for the current group. + */ + private int currentTable; + + /** + * The index of the current group within the selectors array. + */ + private int groupIndex = -1; + + /** + * The byte position within the current group. A new group is selected every 50 decoded bytes. + */ + private int groupPosition = -1; + + /** + * Total number of used Huffman tables in range 2..6. + */ + final int totalTables; + + /** + * The total number of codes (uniform for each table). + */ + final int alphabetSize; + + /** + * Table for Move To Front transformations. + */ + final Bzip2MoveToFrontTable tableMTF = new Bzip2MoveToFrontTable(); + + // For saving state if end of current ByteBuf was reached + int currentSelector; + + /** + * The Canonical Huffman code lengths for each table. + */ + final byte[][] tableCodeLengths; + + // For saving state if end of current ByteBuf was reached + int currentGroup; + int currentLength = -1; + int currentAlpha; + boolean modifyLength; + + final Bzip2Decoder decoder; + + Bzip2HuffmanStageDecoder(final Bzip2Decoder decoder, final int totalTables, final int alphabetSize) { + this.decoder = decoder; + this.totalTables = totalTables; + this.alphabetSize = alphabetSize; + + minimumLengths = new int[totalTables]; + codeBases = new int[totalTables][HUFFMAN_DECODE_MAX_CODE_LENGTH + 2]; + codeLimits = new int[totalTables][HUFFMAN_DECODE_MAX_CODE_LENGTH + 1]; + codeSymbols = new int[totalTables][HUFFMAN_MAX_ALPHABET_SIZE]; + tableCodeLengths = new byte[totalTables][HUFFMAN_MAX_ALPHABET_SIZE]; + } + + /** + * Constructs Huffman decoding tables from lists of Canonical Huffman code lengths. + */ + void createHuffmanDecodingTables() { + final int alphabetSize = this.alphabetSize; + + for (int table = 0; table < tableCodeLengths.length; table++) { + final int[] tableBases = codeBases[table]; + final int[] tableLimits = codeLimits[table]; + final int[] tableSymbols = codeSymbols[table]; + final byte[] codeLengths = tableCodeLengths[table]; + + int minimumLength = HUFFMAN_DECODE_MAX_CODE_LENGTH; + int maximumLength = 0; + + // Find the minimum and maximum code length for the table + for (int i = 0; i < alphabetSize; i++) { + final byte currLength = codeLengths[i]; + maximumLength = Math.max(currLength, maximumLength); + minimumLength = Math.min(currLength, minimumLength); + } + minimumLengths[table] = minimumLength; + + // Calculate the first output symbol for each code length + for (int i = 0; i < alphabetSize; i++) { + tableBases[codeLengths[i] + 1]++; + } + for (int i = 1, b = tableBases[0]; i < HUFFMAN_DECODE_MAX_CODE_LENGTH + 2; i++) { + b += tableBases[i]; + tableBases[i] = b; + } + + // Calculate the first and last Huffman code for each code length (codes at a given + // length are sequential in value) + for (int i = minimumLength, code = 0; i <= maximumLength; i++) { + int base = code; + code += tableBases[i + 1] - tableBases[i]; + tableBases[i] = base - tableBases[i]; + tableLimits[i] = code - 1; + code <<= 1; + } + + // Populate the mapping from canonical code index to output symbol + for (int bitLength = minimumLength, codeIndex = 0; bitLength <= maximumLength; bitLength++) { + for (int symbol = 0; symbol < alphabetSize; symbol++) { + if (codeLengths[symbol] == bitLength) { + tableSymbols[codeIndex++] = symbol; + } + } + } + } + + currentTable = selectors[0]; + } + + /** + * Decodes and returns the next symbol. + * @return The decoded symbol + */ + int nextSymbol(ByteBuf in) { + // Move to next group selector if required + if (++groupPosition % HUFFMAN_GROUP_RUN_LENGTH == 0) { + groupIndex++; + if (groupIndex == selectors.length) { + throw new DecompressionException("error decoding block"); + } + currentTable = selectors[groupIndex] & 0xff; + } + + final Bzip2Decoder decoder = this.decoder; + final int currentTable = this.currentTable; + final int[] tableLimits = codeLimits[currentTable]; + final int[] tableBases = codeBases[currentTable]; + final int[] tableSymbols = codeSymbols[currentTable]; + int codeLength = minimumLengths[currentTable]; + + // Starting with the minimum bit length for the table, read additional bits one at a time + // until a complete code is recognised + int codeBits = decoder.readBits(in, codeLength); + for (; codeLength <= HUFFMAN_DECODE_MAX_CODE_LENGTH; codeLength++) { + if (codeBits <= tableLimits[codeLength]) { + // Convert the code to a symbol index and return + return tableSymbols[codeBits - tableBases[codeLength]]; + } + codeBits = codeBits << 1 | decoder.readBits(in, 1); + } + + throw new DecompressionException("a valid code was not recognised"); + } +} diff --git a/codec/src/main/java/io/netty/handler/codec/compression/Bzip2MoveToFrontTable.java b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2MoveToFrontTable.java new file mode 100644 index 0000000000..18e2512fcf --- /dev/null +++ b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2MoveToFrontTable.java @@ -0,0 +1,84 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.compression; + +/** + * A 256 entry Move To Front transform. + */ +class Bzip2MoveToFrontTable { + /** + * The Move To Front list. + */ + private final byte[] mtf = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, + (byte) 128, (byte) 129, (byte) 130, (byte) 131, (byte) 132, (byte) 133, (byte) 134, (byte) 135, + (byte) 136, (byte) 137, (byte) 138, (byte) 139, (byte) 140, (byte) 141, (byte) 142, (byte) 143, + (byte) 144, (byte) 145, (byte) 146, (byte) 147, (byte) 148, (byte) 149, (byte) 150, (byte) 151, + (byte) 152, (byte) 153, (byte) 154, (byte) 155, (byte) 156, (byte) 157, (byte) 158, (byte) 159, + (byte) 160, (byte) 161, (byte) 162, (byte) 163, (byte) 164, (byte) 165, (byte) 166, (byte) 167, + (byte) 168, (byte) 169, (byte) 170, (byte) 171, (byte) 172, (byte) 173, (byte) 174, (byte) 175, + (byte) 176, (byte) 177, (byte) 178, (byte) 179, (byte) 180, (byte) 181, (byte) 182, (byte) 183, + (byte) 184, (byte) 185, (byte) 186, (byte) 187, (byte) 188, (byte) 189, (byte) 190, (byte) 191, + (byte) 192, (byte) 193, (byte) 194, (byte) 195, (byte) 196, (byte) 197, (byte) 198, (byte) 199, + (byte) 200, (byte) 201, (byte) 202, (byte) 203, (byte) 204, (byte) 205, (byte) 206, (byte) 207, + (byte) 208, (byte) 209, (byte) 210, (byte) 211, (byte) 212, (byte) 213, (byte) 214, (byte) 215, + (byte) 216, (byte) 217, (byte) 218, (byte) 219, (byte) 220, (byte) 221, (byte) 222, (byte) 223, + (byte) 224, (byte) 225, (byte) 226, (byte) 227, (byte) 228, (byte) 229, (byte) 230, (byte) 231, + (byte) 232, (byte) 233, (byte) 234, (byte) 235, (byte) 236, (byte) 237, (byte) 238, (byte) 239, + (byte) 240, (byte) 241, (byte) 242, (byte) 243, (byte) 244, (byte) 245, (byte) 246, (byte) 247, + (byte) 248, (byte) 249, (byte) 250, (byte) 251, (byte) 252, (byte) 253, (byte) 254, (byte) 255 + }; + + /** + * Moves a value to the head of the MTF list (forward Move To Front transform). + * @param value The value to move + * @return The position the value moved from + */ + int valueToFront(final byte value) { + int index = 0; + byte temp = mtf[0]; + if (value != temp) { + mtf[0] = value; + while (value != temp) { + index++; + final byte temp2 = temp; + temp = mtf[index]; + mtf[index] = temp2; + } + } + return index; + } + + /** + * Gets the value from a given index and moves it to the front of the MTF list (inverse Move To Front transform). + * @param index The index to move + * @return The value at the given index + */ + byte indexToFront(final int index) { + final byte value = mtf[index]; + System.arraycopy(mtf, 0, mtf, 1, index); + mtf[0] = value; + + return value; + } +} diff --git a/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Rand.java b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Rand.java new file mode 100644 index 0000000000..86326b4aaf --- /dev/null +++ b/codec/src/main/java/io/netty/handler/codec/compression/Bzip2Rand.java @@ -0,0 +1,74 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.compression; + +final class Bzip2Rand { + /** + * The BZip2 specification originally included the optional addition of a slight pseudo-random + * perturbation to the input data, in order to work around the block sorting algorithm's non- + * optimal performance on some types of input. The current mainline bzip2 does not require this + * and will not create randomised blocks, but compatibility is still required for old data (and + * third party compressors that haven't caught up). When decompressing a randomised block, for + * each value N in this array, a 1 will be XOR'd onto the output of the Burrows-Wheeler + * transform stage after N bytes, then the next N taken from the following entry. + */ + private static final int[] RNUMS = { + 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863, 491, + 741, 242, 949, 214, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 419, 436, + 278, 496, 867, 210, 399, 680, 480, 51, 878, 465, 811, 169, 869, 675, 611, 697, + 867, 561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 150, 238, 59, 379, + 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, + 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 909, 545, 703, 919, 874, 474, + 882, 500, 594, 612, 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, 161, 604, + 958, 533, 221, 400, 386, 867, 600, 782, 382, 596, 414, 171, 516, 375, 682, 485, + 911, 276, 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, 227, 730, 475, 186, + 263, 647, 537, 686, 600, 224, 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, + 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, 715, 67, 618, 276, 204, 918, + 873, 777, 604, 560, 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, + 970, 447, 318, 353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93, 354, 99, + 820, 908, 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 653, 282, 762, 623, + 680, 81, 927, 626, 789, 125, 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, + 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 857, 956, 358, 619, 580, 124, + 737, 594, 701, 612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 944, 375, + 748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511, 655, 814, 334, + 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 433, 837, 553, 268, + 926, 240, 102, 654, 459, 51, 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, + 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 978, 321, 576, 617, 626, 502, + 894, 679, 243, 440, 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 707, 151, + 457, 449, 797, 195, 791, 558, 945, 679, 297, 59, 87, 824, 713, 663, 412, 693, + 342, 606, 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, 343, 97, 430, 751, + 497, 314, 983, 374, 822, 928, 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, + 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, + 150, 790, 288, 923, 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 896, 831, + 547, 261, 524, 462, 293, 465, 502, 56, 661, 821, 976, 991, 658, 869, 905, 758, + 745, 193, 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 61, 688, 793, 644, + 986, 403, 106, 366, 905, 644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, + 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857, 265, + 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 936, 638 + }; + + /** + * Return the random number at a specific index. + * + * @param i the index + * @return the random number + */ + static int rNums(int i) { + return RNUMS[i]; + } + + private Bzip2Rand() { } +} diff --git a/codec/src/main/java/io/netty/handler/codec/compression/Crc32.java b/codec/src/main/java/io/netty/handler/codec/compression/Crc32.java new file mode 100644 index 0000000000..a66c03b283 --- /dev/null +++ b/codec/src/main/java/io/netty/handler/codec/compression/Crc32.java @@ -0,0 +1,123 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.compression; + +/** + * A CRC32 calculator. + */ +final class Crc32 { + /** + * A static CRC lookup table. + */ + private static final int[] crc32Table = { + 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, + 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, + 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, + 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, + 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, + 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, + 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, + 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, + 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, + 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, + 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, + 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, + 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, + 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, + 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, + 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, + 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, + 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, + 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, + 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, + 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, + 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, + 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, + 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, + 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, + 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, + 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, + 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, + 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, + 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, + 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, + 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, + 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, + 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, + 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, + 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, + 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, + 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, + 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 + }; + + /** + * The current CRC. + */ + private int crc = 0xffffffff; + + /** + * @return The current CRC. + */ + public int getCRC() { + return ~crc; + } + + /** + * Update the CRC with a single byte. + * @param value The value to update the CRC with + */ + public void updateCRC(final int value) { + final int crc = this.crc; + this.crc = crc << 8 ^ crc32Table[(crc >> 24 ^ value) & 0xff]; + } + + /** + * Update the CRC with a sequence of identical bytes. + * @param value The value to update the CRC with + * @param count The number of bytes + */ + public void updateCRC(final int value, int count) { + while (count-- > 0) { + updateCRC(value); + } + } +} diff --git a/codec/src/test/java/io/netty/handler/codec/compression/Bzip2DecoderTest.java b/codec/src/test/java/io/netty/handler/codec/compression/Bzip2DecoderTest.java new file mode 100644 index 0000000000..d17d0ea1dd --- /dev/null +++ b/codec/src/test/java/io/netty/handler/codec/compression/Bzip2DecoderTest.java @@ -0,0 +1,236 @@ +/* + * Copyright 2014 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, + * version 2.0 (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package io.netty.handler.codec.compression; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.embedded.EmbeddedChannel; +import io.netty.util.internal.ThreadLocalRandom; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.io.ByteArrayOutputStream; + +import static io.netty.handler.codec.compression.Bzip2Constants.*; +import static org.junit.Assert.*; + +public class Bzip2DecoderTest { + + private static final ThreadLocalRandom rand; + + private static final byte[] BYTES_SMALL = new byte[256]; + private static final byte[] BYTES_LARGE = new byte[MAX_BLOCK_SIZE * BASE_BLOCK_SIZE * 2]; + + static { + rand = ThreadLocalRandom.current(); + rand.nextBytes(BYTES_SMALL); + rand.nextBytes(BYTES_LARGE); + } + + @Rule + public ExpectedException expected = ExpectedException.none(); + + private EmbeddedChannel channel; + + @Before + public void initChannel() { + channel = new EmbeddedChannel(new Bzip2Decoder()); + } + + @Test + public void testUnexpectedStreamIdentifier() throws Exception { + expected.expect(DecompressionException.class); + expected.expectMessage("Unexpected stream identifier contents"); + + ByteBuf in = Unpooled.buffer(); + in.writeLong(1823080128301928729L); //random value + + channel.writeInbound(in); + } + + @Test + public void testInvalidBlockSize() throws Exception { + expected.expect(DecompressionException.class); + expected.expectMessage("block size is invalid"); + + ByteBuf in = Unpooled.buffer(); + in.writeMedium(MAGIC_NUMBER); + in.writeByte('0'); //incorrect block size + + channel.writeInbound(in); + } + + @Test + public void testBadBlockHeader() throws Exception { + expected.expect(DecompressionException.class); + expected.expectMessage("bad block header"); + + ByteBuf in = Unpooled.buffer(); + in.writeMedium(MAGIC_NUMBER); + in.writeByte('1'); //block size + in.writeInt(11111); //random value + in.writeShort(111); //random value + in.writeInt(111); //block CRC + + channel.writeInbound(in); + } + + @Test + public void testStreamCrcErrorOfEmptyBlock() throws Exception { + expected.expect(DecompressionException.class); + expected.expectMessage("stream CRC error"); + + ByteBuf in = Unpooled.buffer(); + in.writeMedium(MAGIC_NUMBER); + in.writeByte('1'); //block size + in.writeInt((int) (END_OF_STREAM_MAGIC >> 16)); + in.writeShort((int) END_OF_STREAM_MAGIC); + in.writeInt(1); //wrong storedCombinedCRC + + channel.writeInbound(in); + } + + @Test + public void testStreamCrcError() throws Exception { + expected.expect(DecompressionException.class); + expected.expectMessage("stream CRC error"); + + final byte[] data = { 0x42, 0x5A, 0x68, 0x37, 0x31, 0x41, 0x59, 0x26, 0x53, + 0x59, 0x77, 0x7B, (byte) 0xCA, (byte) 0xC0, 0x00, 0x00, + 0x00, 0x05, (byte) 0x80, 0x00, 0x01, 0x02, 0x00, 0x04, + 0x20, 0x20, 0x00, 0x30, (byte) 0xCD, 0x34, 0x19, (byte) 0xA6, + (byte) 0x89, (byte) 0x99, (byte) 0xC5, (byte) 0xDC, (byte) 0x91, + 0x4E, 0x14, 0x24, 0x1D, (byte) 0xDD, (byte) 0xF2, (byte) 0xB0, 0x00 }; + + ByteBuf in = Unpooled.wrappedBuffer(data); + channel.writeInbound(in); + } + + @Test + public void testIncorrectHuffmanGroupsNumber() throws Exception { + expected.expect(DecompressionException.class); + expected.expectMessage("incorrect huffman groups number"); + + final byte[] data = { 0x42, 0x5A, 0x68, 0x37, 0x31, 0x41, 0x59, 0x26, 0x53, + 0x59, 0x77, 0x7B, (byte) 0xCA, (byte) 0xC0, 0x00, 0x00, + 0x00, 0x05, (byte) 0x80, 0x00, 0x01, 0x02, 0x00, 0x04, + 0x20, 0x70, 0x00, 0x30, (byte) 0xCD, 0x34, 0x19, (byte) 0xA6, + (byte) 0x89, (byte) 0x99, (byte) 0xC5, (byte) 0xDC, (byte) 0x91, + 0x4E, 0x14, 0x24, 0x1D, (byte) 0xDE, (byte) 0xF2, (byte) 0xB0, 0x00 }; + + ByteBuf in = Unpooled.wrappedBuffer(data); + channel.writeInbound(in); + } + + @Test + public void testIncorrectSelectorsNumber() throws Exception { + expected.expect(DecompressionException.class); + expected.expectMessage("incorrect selectors number"); + + final byte[] data = { 0x42, 0x5A, 0x68, 0x37, 0x31, 0x41, 0x59, 0x26, 0x53, + 0x59, 0x77, 0x7B, (byte) 0xCA, (byte) 0xC0, 0x00, 0x00, + 0x00, 0x05, (byte) 0x80, 0x00, 0x01, 0x02, 0x00, 0x04, + 0x20, 0x2F, (byte) 0xFF, 0x30, (byte) 0xCD, 0x34, 0x19, (byte) 0xA6, + (byte) 0x89, (byte) 0x99, (byte) 0xC5, (byte) 0xDC, (byte) 0x91, + 0x4E, 0x14, 0x24, 0x1D, (byte) 0xDE, (byte) 0xF2, (byte) 0xB0, 0x00 }; + + ByteBuf in = Unpooled.wrappedBuffer(data); + channel.writeInbound(in); + } + + @Test + public void testBlockCrcError() throws Exception { + expected.expect(DecompressionException.class); + expected.expectMessage("block CRC error"); + + final byte[] data = { 0x42, 0x5A, 0x68, 0x37, 0x31, 0x41, 0x59, 0x26, 0x53, + 0x59, 0x77, 0x77, (byte) 0xCA, (byte) 0xC0, 0x00, 0x00, + 0x00, 0x05, (byte) 0x80, 0x00, 0x01, 0x02, 0x00, 0x04, + 0x20, 0x20, 0x00, 0x30, (byte) 0xCD, 0x34, 0x19, (byte) 0xA6, + (byte) 0x89, (byte) 0x99, (byte) 0xC5, (byte) 0xDC, (byte) 0x91, + 0x4E, 0x14, 0x24, 0x1D, (byte) 0xDE, (byte) 0xF2, (byte) 0xB0, 0x00 }; + + ByteBuf in = Unpooled.wrappedBuffer(data); + channel.writeInbound(in); + } + + private static void testDecompression(final byte[] data) throws Exception { + for (int blockSize = MIN_BLOCK_SIZE; blockSize <= MAX_BLOCK_SIZE; blockSize++) { + final EmbeddedChannel channel = new EmbeddedChannel(new Bzip2Decoder()); + + ByteArrayOutputStream os = new ByteArrayOutputStream(); + BZip2CompressorOutputStream bZip2Os = new BZip2CompressorOutputStream(os, blockSize); + bZip2Os.write(data); + bZip2Os.close(); + + ByteBuf compressed = Unpooled.wrappedBuffer(os.toByteArray()); + channel.writeInbound(compressed); + + ByteBuf uncompressed = Unpooled.buffer(); + ByteBuf msg; + while ((msg = channel.readInbound()) != null) { + uncompressed.writeBytes(msg); + } + final byte[] result = new byte[uncompressed.readableBytes()]; + uncompressed.readBytes(result); + + assertArrayEquals(data, result); + } + } + + @Test + public void testDecompressionOfSmallChunkOfData() throws Exception { + testDecompression(BYTES_SMALL); + } + + @Test + public void testDecompressionOfLargeChunkOfData() throws Exception { + testDecompression(BYTES_LARGE); + } + + @Test + public void testDecompressionOfBatchedFlowOfData() throws Exception { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + BZip2CompressorOutputStream bZip2Os = new BZip2CompressorOutputStream(os, + rand.nextInt(MIN_BLOCK_SIZE, MAX_BLOCK_SIZE + 1)); + bZip2Os.write(BYTES_LARGE); + bZip2Os.close(); + + final byte[] compressedArray = os.toByteArray(); + int written = 0, length = rand.nextInt(100); + while (written + length < compressedArray.length) { + ByteBuf compressed = Unpooled.wrappedBuffer(compressedArray, written, length); + channel.writeInbound(compressed); + written += length; + length = rand.nextInt(100); + } + ByteBuf compressed = Unpooled.wrappedBuffer(compressedArray, written, compressedArray.length - written); + channel.writeInbound(compressed); + + ByteBuf uncompressed = Unpooled.buffer(); + ByteBuf msg; + while ((msg = channel.readInbound()) != null) { + uncompressed.writeBytes(msg); + } + final byte[] result = new byte[uncompressed.readableBytes()]; + uncompressed.readBytes(result); + + assertArrayEquals(BYTES_LARGE, result); + } +} diff --git a/license/LICENSE.jbzip2.txt b/license/LICENSE.jbzip2.txt new file mode 100644 index 0000000000..6d6f1d40c0 --- /dev/null +++ b/license/LICENSE.jbzip2.txt @@ -0,0 +1,19 @@ +Copyright (c) 2010-2011 Matthew J. Francis and Contributors of the jbzip2 Project + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/pom.xml b/pom.xml index c4b8ba813f..0008a73b7d 100644 --- a/pom.xml +++ b/pom.xml @@ -508,6 +508,12 @@ 2.6.0 test + + org.mockito + mockito-all + 1.9.5 + test + ch.qos.logback logback-classic @@ -537,13 +543,13 @@ test - - - org.mockito - mockito-all - 1.9.5 - test - + + + org.apache.commons + commons-compress + 1.8.1 + test +