Moved bit-level read operations from Bzip2Decoder to the new Bzip2BitReader

Motivation:

Collect all bit-level read operations in one class is better. And now it's easy to use not only in Bzip2Decoder. For example, in Bzip2HuffmanStageDecoder.

Modifications:

Created a new class - Bzip2BitReader which provides bit-level reads.
Removed bit-level read operations from Bzip2Decoder.
Improved javadoc.

Result:

Bzip2BitReader allows the reading of single bit booleans, bit strings of arbitrary length (up to 24 bits), and bit aligned 32-bit integers.
This commit is contained in:
Idel Pivnitskiy 2014-07-06 23:23:21 +04:00 committed by Norman Maurer
parent 6b71089373
commit 9d08a34c34
5 changed files with 146 additions and 69 deletions

View File

@ -0,0 +1,86 @@
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.handler.codec.compression;
import io.netty.buffer.ByteBuf;
/**
* An bit reader that allows the reading of single bit booleans, bit strings of
* arbitrary length (up to 24 bits), and bit aligned 32-bit integers. A single byte
* at a time is read from the {@link ByteBuf} when more bits are required.
*/
class Bzip2BitReader {
/**
* A buffer of bits read from the input stream that have not yet been returned.
*/
private int bitBuffer;
/**
* The number of bits currently buffered in {@link #bitBuffer}.
*/
private int bitCount;
/**
* Reads up to 24 bits from the {@link ByteBuf}.
* @param count The number of bits to read (maximum {@code 24}, because the {@link #bitBuffer}
* is {@code int} and it can store up to {@code 8} bits before calling)
* @return The bits requested, right-aligned within the integer
*/
int readBits(ByteBuf in, final int count) {
if (count < 0 || count > 24) {
throw new IllegalArgumentException("count: " + count + " (expected: 0-24)");
}
int bitCount = this.bitCount;
int bitBuffer = this.bitBuffer;
if (bitCount < count) {
do {
int uByte = in.readUnsignedByte();
bitBuffer = bitBuffer << 8 | uByte;
bitCount += 8;
} while (bitCount < count);
this.bitBuffer = bitBuffer;
}
this.bitCount = bitCount -= count;
return (bitBuffer >>> bitCount) & ((1 << count) - 1);
}
/**
* Reads a single bit from the {@link ByteBuf}.
* @return {@code true} if the bit read was {@code 1}, otherwise {@code false}
*/
boolean readBoolean(ByteBuf in) {
return readBits(in, 1) != 0;
}
/**
* Reads 32 bits of input as an integer.
* @return The integer read
*/
int readInt(ByteBuf in) {
return readBits(in, 16) << 16 | readBits(in, 16);
}
/**
* Checks that at least one bit is available for reading.
* @return {@code true} if one bit is available for reading, otherwise {@code false}
*/
boolean hasBit(ByteBuf in) {
return bitCount > 0 || in.isReadable();
}
}

View File

@ -19,6 +19,19 @@ import io.netty.buffer.ByteBuf;
import static io.netty.handler.codec.compression.Bzip2Constants.*; import static io.netty.handler.codec.compression.Bzip2Constants.*;
/**
* Reads and decompresses a single Bzip2 block.<br><br>
*
* Block decoding consists of the following stages:<br>
* 1. Read block header<br>
* 2. Read Huffman tables<br>
* 3. Read and decode Huffman encoded data - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder, ByteBuf)}<br>
* 4. Run-Length Decoding[2] - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder, ByteBuf)}<br>
* 5. Inverse Move To Front Transform - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder, ByteBuf)}<br>
* 6. Inverse Burrows Wheeler Transform - {@link #initialiseInverseBWT()}<br>
* 7. Run-Length Decoding[1] - {@link #read()}<br>
* 8. Optional Block De-Randomisation - {@link #read()} (through {@link #decodeNextBWTByte()})
*/
final class Bzip2BlockDecompressor { final class Bzip2BlockDecompressor {
/** /**
* Calculates the block CRC from the fully decoded bytes of the block. * Calculates the block CRC from the fully decoded bytes of the block.

View File

@ -32,7 +32,7 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
/** /**
* Current state of stream. * Current state of stream.
*/ */
enum State { private enum State {
INIT, INIT,
INIT_BLOCK, INIT_BLOCK,
INIT_BLOCK_PARAMS, INIT_BLOCK_PARAMS,
@ -47,13 +47,18 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
} }
private State currentState = State.INIT; private State currentState = State.INIT;
/**
* A reader that provides bit-level reads.
*/
private final Bzip2BitReader reader = new Bzip2BitReader();
/** /**
* The decompressor for the current block. * The decompressor for the current block.
*/ */
private Bzip2BlockDecompressor blockDecompressor; private Bzip2BlockDecompressor blockDecompressor;
/** /**
* BZip2 Huffman coding stage. * Bzip2 Huffman coding stage.
*/ */
private Bzip2HuffmanStageDecoder huffmanStageDecoder; private Bzip2HuffmanStageDecoder huffmanStageDecoder;
@ -72,17 +77,6 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
*/ */
private int streamCRC; private int streamCRC;
// For bitwise access
/**
* A buffer of bits read from the input stream that have not yet been returned.
*/
private int bitBuffer;
/**
* The number of bits currently buffered in {@link #bitBuffer}.
*/
private int bitCount;
@Override @Override
protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception { protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception {
if (!in.isReadable()) { if (!in.isReadable()) {
@ -112,11 +106,12 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
if (in.readableBytes() < 10) { if (in.readableBytes() < 10) {
return; return;
} }
Bzip2BitReader reader = this.reader;
// Get the block magic bytes. // Get the block magic bytes.
final long magic = (long) readBits(in, 24) << 24 | readBits(in, 24); final long magic = (long) reader.readBits(in, 24) << 24 | reader.readBits(in, 24);
if (magic == END_OF_STREAM_MAGIC) { if (magic == END_OF_STREAM_MAGIC) {
// End of stream was reached. Check the combined CRC. // End of stream was reached. Check the combined CRC.
final int storedCombinedCRC = readInt(in); final int storedCombinedCRC = reader.readInt(in);
if (storedCombinedCRC != streamCRC) { if (storedCombinedCRC != streamCRC) {
throw new DecompressionException("stream CRC error"); throw new DecompressionException("stream CRC error");
} }
@ -126,14 +121,15 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
if (magic != COMPRESSED_MAGIC) { if (magic != COMPRESSED_MAGIC) {
throw new DecompressionException("bad block header"); throw new DecompressionException("bad block header");
} }
blockCRC = readInt(in); blockCRC = reader.readInt(in);
currentState = State.INIT_BLOCK_PARAMS; currentState = State.INIT_BLOCK_PARAMS;
case INIT_BLOCK_PARAMS: case INIT_BLOCK_PARAMS:
if (in.readableBytes() < 4) { if (in.readableBytes() < 4) {
return; return;
} }
final boolean blockRandomised = readBoolean(in); reader = this.reader;
final int bwtStartPointer = readBits(in, 24); final boolean blockRandomised = reader.readBoolean(in);
final int bwtStartPointer = reader.readBits(in, 24);
blockDecompressor = new Bzip2BlockDecompressor(this.blockSize, blockCRC, blockDecompressor = new Bzip2BlockDecompressor(this.blockSize, blockCRC,
blockRandomised, bwtStartPointer); blockRandomised, bwtStartPointer);
@ -142,7 +138,8 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
if (in.readableBytes() < 2) { if (in.readableBytes() < 2) {
return; return;
} }
blockDecompressor.huffmanInUse16 = readBits(in, 16); reader = this.reader;
blockDecompressor.huffmanInUse16 = reader.readBits(in, 16);
currentState = State.RECEIVE_HUFFMAN_USED_BITMAPS; currentState = State.RECEIVE_HUFFMAN_USED_BITMAPS;
case RECEIVE_HUFFMAN_USED_BITMAPS: case RECEIVE_HUFFMAN_USED_BITMAPS:
Bzip2BlockDecompressor blockDecompressor = this.blockDecompressor; Bzip2BlockDecompressor blockDecompressor = this.blockDecompressor;
@ -153,13 +150,14 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
if (in.readableBytes() < bitNumber * 16 / 8 + 1) { if (in.readableBytes() < bitNumber * 16 / 8 + 1) {
return; return;
} }
reader = this.reader;
int huffmanSymbolCount = 0; int huffmanSymbolCount = 0;
if (bitNumber > 0) { if (bitNumber > 0) {
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
if ((inUse16 & 1 << 15 >>> i) != 0) { if ((inUse16 & 1 << 15 >>> i) != 0) {
for (int j = 0, k = i << 4; j < 16; j++, k++) { for (int j = 0, k = i << 4; j < 16; j++, k++) {
if (readBoolean(in)) { if (reader.readBoolean(in)) {
huffmanSymbolMap[huffmanSymbolCount++] = (byte) k; huffmanSymbolMap[huffmanSymbolCount++] = (byte) k;
} }
} }
@ -168,7 +166,7 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
} }
blockDecompressor.huffmanEndOfBlockSymbol = huffmanSymbolCount + 1; blockDecompressor.huffmanEndOfBlockSymbol = huffmanSymbolCount + 1;
int totalTables = readBits(in, 3); int totalTables = reader.readBits(in, 3);
if (totalTables < HUFFMAN_MINIMUM_TABLES || totalTables > HUFFMAN_MAXIMUM_TABLES) { if (totalTables < HUFFMAN_MINIMUM_TABLES || totalTables > HUFFMAN_MAXIMUM_TABLES) {
throw new DecompressionException("incorrect huffman groups number"); throw new DecompressionException("incorrect huffman groups number");
} }
@ -176,13 +174,14 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
if (alphaSize > HUFFMAN_MAX_ALPHABET_SIZE) { if (alphaSize > HUFFMAN_MAX_ALPHABET_SIZE) {
throw new DecompressionException("incorrect alphabet size"); throw new DecompressionException("incorrect alphabet size");
} }
huffmanStageDecoder = new Bzip2HuffmanStageDecoder(this, totalTables, alphaSize); huffmanStageDecoder = new Bzip2HuffmanStageDecoder(reader, totalTables, alphaSize);
currentState = State.RECEIVE_SELECTORS_NUMBER; currentState = State.RECEIVE_SELECTORS_NUMBER;
case RECEIVE_SELECTORS_NUMBER: case RECEIVE_SELECTORS_NUMBER:
if (in.readableBytes() < 2) { if (in.readableBytes() < 2) {
return; return;
} }
int totalSelectors = readBits(in, 15); reader = this.reader;
int totalSelectors = reader.readBits(in, 15);
if (totalSelectors < 1 || totalSelectors > MAX_SELECTORS) { if (totalSelectors < 1 || totalSelectors > MAX_SELECTORS) {
throw new DecompressionException("incorrect selectors number"); throw new DecompressionException("incorrect selectors number");
} }
@ -194,6 +193,7 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
byte[] selectors = huffmanStageDecoder.selectors; byte[] selectors = huffmanStageDecoder.selectors;
totalSelectors = selectors.length; totalSelectors = selectors.length;
final Bzip2MoveToFrontTable tableMtf = huffmanStageDecoder.tableMTF; final Bzip2MoveToFrontTable tableMtf = huffmanStageDecoder.tableMTF;
reader = this.reader;
int currSelector; int currSelector;
// Get zero-terminated bit runs (0..62) of MTF'ed Huffman table. length = 1..6 // Get zero-terminated bit runs (0..62) of MTF'ed Huffman table. length = 1..6
@ -205,7 +205,7 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
return; return;
} }
int index = 0; int index = 0;
while (readBoolean(in)) { while (reader.readBoolean(in)) {
index++; index++;
} }
selectors[currSelector] = tableMtf.indexToFront(index); selectors[currSelector] = tableMtf.indexToFront(index);
@ -217,6 +217,7 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
totalTables = huffmanStageDecoder.totalTables; totalTables = huffmanStageDecoder.totalTables;
final byte[][] codeLength = huffmanStageDecoder.tableCodeLengths; final byte[][] codeLength = huffmanStageDecoder.tableCodeLengths;
alphaSize = huffmanStageDecoder.alphabetSize; alphaSize = huffmanStageDecoder.alphabetSize;
reader = this.reader;
/* Now the coding tables */ /* Now the coding tables */
int currGroup; int currGroup;
@ -231,23 +232,24 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
break; break;
} }
if (currLength < 0) { if (currLength < 0) {
currLength = readBits(in, 5); currLength = reader.readBits(in, 5);
} }
for (currAlpha = huffmanStageDecoder.currentAlpha; currAlpha < alphaSize; currAlpha++) { for (currAlpha = huffmanStageDecoder.currentAlpha; currAlpha < alphaSize; currAlpha++) {
// delta_bit_length: 1..40 // delta_bit_length: 1..40
if (!hasBit(in)) { if (!reader.hasBit(in)) {
saveStateAndReturn = true; saveStateAndReturn = true;
break loop; break loop;
} }
while (modifyLength || readBoolean(in)) { // 0=>next symbol; 1=>alter length while (modifyLength || reader.readBoolean(in)) { // 0=>next symbol; 1=>alter length
if (!hasBit(in)) { if (!reader.hasBit(in)) {
modifyLength = true; modifyLength = true;
saveStateAndReturn = true; saveStateAndReturn = true;
break loop; break loop;
} }
currLength += readBoolean(in) ? -1 : 1; // 1=>decrement length; 0=>increment length // 1=>decrement length; 0=>increment length
currLength += reader.readBoolean(in) ? -1 : 1;
modifyLength = false; modifyLength = false;
if (!hasBit(in)) { if (!reader.hasBit(in)) {
saveStateAndReturn = true; saveStateAndReturn = true;
break loop; break loop;
} }
@ -314,34 +316,4 @@ public class Bzip2Decoder extends ByteToMessageDecoder {
public boolean isClosed() { public boolean isClosed() {
return currentState == State.EOF; return currentState == State.EOF;
} }
int readBits(ByteBuf in, final int n) {
int bitCount = this.bitCount;
int bitBuffer = this.bitBuffer;
if (bitCount < n) {
do {
int uByte = in.readUnsignedByte();
bitBuffer = bitBuffer << 8 | uByte;
bitCount += 8;
} while (bitCount < n);
this.bitBuffer = bitBuffer;
}
this.bitCount = bitCount -= n;
return bitBuffer >>> bitCount & (1 << n) - 1;
}
private boolean readBoolean(ByteBuf in) {
return readBits(in, 1) != 0;
}
private int readInt(ByteBuf in) {
return readBits(in, 16) << 16 | readBits(in, 16);
}
private boolean hasBit(ByteBuf in) {
return bitCount > 0 || in.isReadable();
}
} }

View File

@ -18,10 +18,15 @@ package io.netty.handler.codec.compression;
import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBuf;
import static io.netty.handler.codec.compression.Bzip2Constants.*; import static io.netty.handler.codec.compression.Bzip2Constants.*;
/** /**
* A decoder for the BZip2 Huffman coding stage * A decoder for the Bzip2 Huffman coding stage.
*/ */
final class Bzip2HuffmanStageDecoder { final class Bzip2HuffmanStageDecoder {
/**
* A reader that provides bit-level reads.
*/
private final Bzip2BitReader reader;
/** /**
* The Huffman table number to use for each group of 50 symbols. * The Huffman table number to use for each group of 50 symbols.
@ -94,10 +99,8 @@ final class Bzip2HuffmanStageDecoder {
int currentAlpha; int currentAlpha;
boolean modifyLength; boolean modifyLength;
final Bzip2Decoder decoder; Bzip2HuffmanStageDecoder(final Bzip2BitReader reader, final int totalTables, final int alphabetSize) {
this.reader = reader;
Bzip2HuffmanStageDecoder(final Bzip2Decoder decoder, final int totalTables, final int alphabetSize) {
this.decoder = decoder;
this.totalTables = totalTables; this.totalTables = totalTables;
this.alphabetSize = alphabetSize; this.alphabetSize = alphabetSize;
@ -177,7 +180,7 @@ final class Bzip2HuffmanStageDecoder {
currentTable = selectors[groupIndex] & 0xff; currentTable = selectors[groupIndex] & 0xff;
} }
final Bzip2Decoder decoder = this.decoder; final Bzip2BitReader reader = this.reader;
final int currentTable = this.currentTable; final int currentTable = this.currentTable;
final int[] tableLimits = codeLimits[currentTable]; final int[] tableLimits = codeLimits[currentTable];
final int[] tableBases = codeBases[currentTable]; final int[] tableBases = codeBases[currentTable];
@ -186,13 +189,13 @@ final class Bzip2HuffmanStageDecoder {
// Starting with the minimum bit length for the table, read additional bits one at a time // Starting with the minimum bit length for the table, read additional bits one at a time
// until a complete code is recognised // until a complete code is recognised
int codeBits = decoder.readBits(in, codeLength); int codeBits = reader.readBits(in, codeLength);
for (; codeLength <= HUFFMAN_DECODE_MAX_CODE_LENGTH; codeLength++) { for (; codeLength <= HUFFMAN_DECODE_MAX_CODE_LENGTH; codeLength++) {
if (codeBits <= tableLimits[codeLength]) { if (codeBits <= tableLimits[codeLength]) {
// Convert the code to a symbol index and return // Convert the code to a symbol index and return
return tableSymbols[codeBits - tableBases[codeLength]]; return tableSymbols[codeBits - tableBases[codeLength]];
} }
codeBits = codeBits << 1 | decoder.readBits(in, 1); codeBits = codeBits << 1 | reader.readBits(in, 1);
} }
throw new DecompressionException("a valid code was not recognised"); throw new DecompressionException("a valid code was not recognised");

View File

@ -15,9 +15,12 @@
*/ */
package io.netty.handler.codec.compression; package io.netty.handler.codec.compression;
/**
* Random numbers for decompress Bzip2 blocks.
*/
final class Bzip2Rand { final class Bzip2Rand {
/** /**
* The BZip2 specification originally included the optional addition of a slight pseudo-random * The Bzip2 specification originally included the optional addition of a slight pseudo-random
* perturbation to the input data, in order to work around the block sorting algorithm's non- * perturbation to the input data, in order to work around the block sorting algorithm's non-
* optimal performance on some types of input. The current mainline bzip2 does not require this * optimal performance on some types of input. The current mainline bzip2 does not require this
* and will not create randomised blocks, but compatibility is still required for old data (and * and will not create randomised blocks, but compatibility is still required for old data (and