Windows2000/private/inet/urlmon/compress/gzip/stdenc.c
2020-09-30 17:12:32 +02:00

516 lines
16 KiB
C

/*
* stdenc.c
* Standard encoder
*/
#include <string.h>
#include <stdio.h>
#include <crtdbg.h>
#include "deflate.h"
// Update hash variable "h" with character c
#define UPDATE_HASH(h,c) \
h = ((h) << STD_ENCODER_HASH_SHIFT) ^ (c);
// Insert a string into the hash chain at location bufpos
// Assertions check that we never attempt to insert near the end of the buffer
// (since our hash value is based on values at bufpos, bufpos+1, bufpos+2) and
// that our hash value is always valid for the bytes we are inserting.
#define INSERT_STRING(search,bufpos) \
{ \
_ASSERT((bufpos + 2) < context->bufpos_end); \
UPDATE_HASH(hash, window[bufpos+2]); \
_ASSERT((unsigned int) STD_ENCODER_RECALCULATE_HASH(bufpos) == (unsigned int) (hash & STD_ENCODER_HASH_MASK)); \
search = lookup[hash & STD_ENCODER_HASH_MASK]; \
lookup[hash & STD_ENCODER_HASH_MASK] = (t_search_node) (bufpos); \
prev[bufpos & WINDOW_MASK] = (t_search_node) search; \
}
#define CHECK_FLUSH_RECORDING_BUFFER() \
if (recording_bitcount >= 16) \
{ \
*recording_bufptr++ = (BYTE) recording_bitbuf; \
*recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); \
recording_bitbuf >>= 16; \
recording_bitcount -= 16; \
}
#define OUTPUT_RECORDING_DATA(count,data) \
recording_bitbuf |= ((data) << recording_bitcount); \
recording_bitcount += (count);
// Record unmatched symbol c
#define RECORD_CHAR(c) \
context->outputting_block_num_literals++; \
context->std_encoder->literal_tree_freq[c]++; \
_ASSERT(context->std_encoder->recording_literal_tree_len[c] != 0); \
OUTPUT_RECORDING_DATA(context->std_encoder->recording_literal_tree_len[c], context->std_encoder->recording_literal_tree_code[c]); \
CHECK_FLUSH_RECORDING_BUFFER();
// Record a match with length match_len (>= MIN_MATCH) and displacement match_pos
#define RECORD_MATCH(match_len, match_pos) \
{ \
int pos_slot = POS_SLOT(match_pos); \
int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \
int item = (NUM_CHARS+1) + len_slot; \
int extra_dist_bits = g_ExtraDistanceBits[pos_slot]; \
int extra_len_bits = g_ExtraLengthBits[len_slot]; \
_ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
_ASSERT(context->outputting_block_num_literals >= 0 && context->outputting_block_num_literals < STD_ENCODER_MAX_ITEMS); \
_ASSERT(context->std_encoder->recording_literal_tree_len[item] != 0); \
_ASSERT(context->std_encoder->recording_dist_tree_len[pos_slot] != 0); \
context->outputting_block_num_literals++; \
context->std_encoder->literal_tree_freq[(NUM_CHARS + 1) + len_slot]++; \
context->std_encoder->dist_tree_freq[pos_slot]++; \
OUTPUT_RECORDING_DATA(context->std_encoder->recording_literal_tree_len[item], context->std_encoder->recording_literal_tree_code[item]); \
CHECK_FLUSH_RECORDING_BUFFER(); \
if (extra_len_bits > 0) \
{ \
OUTPUT_RECORDING_DATA(extra_len_bits, (match_len-MIN_MATCH) & ((1 << extra_len_bits)-1)); \
CHECK_FLUSH_RECORDING_BUFFER(); \
} \
OUTPUT_RECORDING_DATA(context->std_encoder->recording_dist_tree_len[pos_slot], context->std_encoder->recording_dist_tree_code[pos_slot]); \
CHECK_FLUSH_RECORDING_BUFFER(); \
if (extra_dist_bits > 0) \
{ \
OUTPUT_RECORDING_DATA(extra_dist_bits, match_pos & ((1 << extra_dist_bits)-1)); \
CHECK_FLUSH_RECORDING_BUFFER(); \
} \
}
#define FLUSH_RECORDING_BITBUF() \
*recording_bufptr++ = (BYTE) recording_bitbuf; \
*recording_bufptr++ = (BYTE) (recording_bitbuf >> 8);
// Verifies that all of the hash pointers in the hash table are correct, and that everything
// in the same hash chain has the same hash value
#ifdef FULL_DEBUG
#define VERIFY_HASHES(bufpos) StdEncoderVerifyHashes(context, bufpos)
#else
#define VERIFY_HASHES(bufpos) ;
#endif
static void StdEncoderMoveWindows(t_encoder_context *context);
static int StdEncoderFindMatch(
const BYTE * window,
const USHORT * prev,
long bufpos,
long search,
unsigned int * match_pos,
int cutoff,
int nice_length
);
void StdEncoderDeflate(
t_encoder_context * context,
int search_depth,
int lazy_match_threshold,
int good_length,
int nice_length
)
{
long bufpos;
unsigned int hash;
t_std_encoder * encoder = context->std_encoder;
byte * window = encoder->window;
t_search_node * prev = encoder->prev;
t_search_node * lookup = encoder->lookup;
unsigned long recording_bitbuf;
int recording_bitcount;
byte * recording_bufptr;
byte * end_recording_bufptr;
// restore literal/match bitmap variables
end_recording_bufptr = &encoder->lit_dist_buffer[STD_ENCODER_LIT_DIST_BUFFER_SIZE-8];
recording_bufptr = encoder->recording_bufptr;
recording_bitbuf = encoder->recording_bitbuf;
recording_bitcount = encoder->recording_bitcount;
bufpos = context->bufpos;
VERIFY_HASHES(bufpos);
// Recalculate our hash
// One disadvantage of the way we do our hashing is that matches are not permitted in the last
// few characters near bufpos_end.
hash = 0;
UPDATE_HASH(hash, window[bufpos]);
UPDATE_HASH(hash, window[bufpos+1]);
while (bufpos < context->bufpos_end)
{
int match_len;
t_match_pos match_pos;
t_match_pos search;
if (context->bufpos_end - bufpos <= 3)
{
// don't insert any strings when we get close to the end of the buffer,
// since we will end up using corrupted hash values (the data after bufpos_end
// is undefined, and those bytes would be swept into the hash value if we
// calculated a hash at bufpos_end-2, for example, since our hash value is
// build from 3 consecutive characters in the buffer).
match_len = 0;
}
else
{
INSERT_STRING(search,bufpos);
// find a match at what we'll call position X
if (search != 0)
{
match_len = StdEncoderFindMatch(window, prev, bufpos, search, &match_pos, search_depth, nice_length);
// truncate match if we're too close to the end of the buffer
if (bufpos + match_len > context->bufpos_end)
match_len = context->bufpos_end - bufpos;
}
else
{
match_len = 0;
}
}
if (match_len < MIN_MATCH)
{
// didn't find a match, so output unmatched char
RECORD_CHAR(window[bufpos]);
bufpos++;
}
else
{
// bufpos now points to X+1
bufpos++;
// is this match so good (long) that we should take it automatically without
// checking X+1 ?
if (match_len <= lazy_match_threshold)
{
int next_match_len;
t_match_pos next_match_pos;
// sets search
INSERT_STRING(search,bufpos);
// no, so check for a better match at X+1
if (search != 0)
{
next_match_len = StdEncoderFindMatch(
window,
prev,
bufpos,
search,
&next_match_pos,
match_len < good_length ? search_depth : (search_depth >> 2),
nice_length
);
// truncate match if we're too close to the end of the buffer
// note: next_match_len could now be < MIN_MATCH
if (bufpos + next_match_len > context->bufpos_end)
next_match_len = context->bufpos_end - bufpos;
}
else
{
next_match_len = 0;
}
// right now X and X+1 are both inserted into the search tree
if (next_match_len > match_len)
{
// since next_match_len > match_len, it can't be < MIN_MATCH here
// match at X+1 is better, so output unmatched char at X
RECORD_CHAR(window[bufpos-1]);
// now output match at location X+1
RECORD_MATCH(next_match_len, next_match_pos);
// insert remainder of second match into search tree
// example: (*=inserted already)
// X X+1 X+2 X+3 X+4
// * *
// nextmatchlen=3
// bufpos
// If next_match_len == 3, we want to perform 2
// insertions (at X+2 and X+3). However, first we must
// inc bufpos.
bufpos++; // now points to X+2
match_len = next_match_len;
goto insert;
}
else
{
// match at X is better, so take it
RECORD_MATCH(match_len, match_pos);
// Insert remainder of first match into search tree, minus the first
// two locations, which were inserted by the FindMatch() calls.
// For example, if match_len == 3, then we've inserted at X and X+1
// already (and bufpos is now pointing at X+1), and now we need to insert
// only at X+2.
match_len--;
bufpos++; // now bufpos points to X+2
goto insert;
}
}
else /* match_length >= good_match */
{
// in assertion: bufpos points to X+1, location X inserted already
// first match is so good that we're not even going to check at X+1
RECORD_MATCH(match_len, match_pos);
// insert remainder of match at X into search tree
insert:
if (context->bufpos_end - bufpos <= match_len)
{
bufpos += (match_len-1);
}
else
{
while (--match_len > 0)
{
t_match_pos ignore; // we're not interested in the search position
INSERT_STRING(ignore,bufpos);
bufpos++;
}
}
}
}
// literal buffer or distance buffer filled up (or close to filling up)?
if (context->outputting_block_num_literals >= STD_ENCODER_MAX_ITEMS-4 ||
recording_bufptr >= end_recording_bufptr)
{
// yes, then we must output a block
_ASSERT(context->outputting_block_num_literals <= STD_ENCODER_MAX_ITEMS);
// flush our recording matches bit buffer
FLUSH_RECORDING_BITBUF();
StdEncoderOutputBlock(context);
// did we output the whole block?
if (context->state != STATE_NORMAL)
break;
// we did output the whole block, so reset literal encoding
recording_bufptr = encoder->recording_bufptr;
recording_bitbuf = encoder->recording_bitbuf;
recording_bitcount = encoder->recording_bitcount;
}
} /* end ... while (bufpos < bufpos_end) */
_ASSERT(bufpos <= context->bufpos_end);
// save recording state
encoder->recording_bufptr = recording_bufptr;
encoder->recording_bitbuf = recording_bitbuf;
encoder->recording_bitcount = recording_bitcount;
context->bufpos = bufpos;
VERIFY_HASHES(bufpos);
if (context->bufpos == 2*WINDOW_SIZE)
StdEncoderMoveWindows(context);
}
static int StdEncoderFindMatch(
const BYTE * window,
const USHORT * prev,
long bufpos,
long search,
unsigned int * match_pos,
int cutoff,
int nice_length
)
{
const BYTE * window_bufpos = &window[bufpos];
long earliest; // how far back we can look
int best_match = 0; // best match length found so far
t_match_pos l_match_pos;
_ASSERT(bufpos >= 0 && bufpos < 2*WINDOW_SIZE);
_ASSERT(search < bufpos);
_ASSERT(STD_ENCODER_RECALCULATE_HASH(search) == STD_ENCODER_RECALCULATE_HASH(bufpos));
earliest = bufpos - WINDOW_SIZE;
_ASSERT(earliest >= 0);
while (search > earliest)
{
_ASSERT(STD_ENCODER_RECALCULATE_HASH(search) == STD_ENCODER_RECALCULATE_HASH(bufpos));
_ASSERT(search < bufpos);
if (window_bufpos[best_match] == window[search + best_match])
{
int j;
for (j = 0; j < MAX_MATCH; j++)
{
if (window_bufpos[j] != window[search+j])
break;
}
if (j > best_match)
{
best_match = j;
l_match_pos = search; // absolute position
if (j > nice_length)
break;
}
}
if (--cutoff == 0)
break;
search = (long) prev[search & WINDOW_MASK];
}
// turn l_match_pos into relative position
l_match_pos = bufpos - l_match_pos - 1;
if (best_match == 3 && l_match_pos >= STD_ENCODER_MATCH3_DIST_THRESHOLD)
return 0;
_ASSERT(best_match < MIN_MATCH || l_match_pos < WINDOW_SIZE);
*match_pos = l_match_pos;
return best_match;
}
static void StdEncoderMoveWindows(t_encoder_context *context)
{
if (context->bufpos >= 2*WINDOW_SIZE)
{
int i;
t_search_node *lookup = context->std_encoder->lookup;
t_search_node *prev = context->std_encoder->prev;
BYTE *window = context->std_encoder->window;
VERIFY_HASHES(2*WINDOW_SIZE);
memcpy(&window[0], &window[context->bufpos - WINDOW_SIZE], WINDOW_SIZE);
for (i = 0; i < STD_ENCODER_HASH_TABLE_SIZE; i++)
{
long val = ((long) lookup[i]) - WINDOW_SIZE;
if (val <= 0)
lookup[i] = (t_search_node) 0;
else
lookup[i] = (t_search_node) val;
}
for (i = 0; i < WINDOW_SIZE; i++)
{
long val = ((long) prev[i]) - WINDOW_SIZE;
if (val <= 0)
prev[i] = (t_search_node) 0;
else
prev[i] = (t_search_node) val;
}
#ifdef FULL_DEBUG
memset(&window[WINDOW_SIZE], 0, WINDOW_SIZE);
#endif
VERIFY_HASHES(2*WINDOW_SIZE);
context->bufpos = WINDOW_SIZE;
context->bufpos_end = context->bufpos;
}
}
// Zero the running frequency counts
// Also set freq[END_OF_BLOCK_CODE] = 1
void StdEncoderZeroFrequencyCounts(t_std_encoder *encoder)
{
_ASSERT(encoder != NULL);
memset(encoder->literal_tree_freq, 0, sizeof(encoder->literal_tree_freq));
memset(encoder->dist_tree_freq, 0, sizeof(encoder->dist_tree_freq));
encoder->literal_tree_freq[END_OF_BLOCK_CODE] = 1;
}
void StdEncoderReset(t_encoder_context *context)
{
t_std_encoder *encoder = context->std_encoder;
_ASSERT(encoder != NULL);
memset(encoder->lookup, 0, sizeof(encoder->lookup));
context->window_size = WINDOW_SIZE;
context->bufpos = context->window_size;
context->bufpos_end = context->bufpos;
encoder->recording_bitbuf = 0;
encoder->recording_bitcount = 0;
encoder->recording_bufptr = encoder->lit_dist_buffer;
DeflateInitRecordingTables(
encoder->recording_literal_tree_len,
encoder->recording_literal_tree_code,
encoder->recording_dist_tree_len,
encoder->recording_dist_tree_code
);
StdEncoderZeroFrequencyCounts(encoder);
}
BOOL StdEncoderInit(t_encoder_context *context)
{
context->std_encoder = (t_std_encoder *) LocalAlloc(LMEM_FIXED, sizeof(t_std_encoder));
if (context->std_encoder == NULL)
return FALSE;
StdEncoderReset(context);
return TRUE;
}