/* * stdenc.c * Standard encoder */ #include #include #include #include "deflate.h" // Update hash variable "h" with character c #define UPDATE_HASH(h,c) \ h = ((h) << STD_ENCODER_HASH_SHIFT) ^ (c); // Insert a string into the hash chain at location bufpos // Assertions check that we never attempt to insert near the end of the buffer // (since our hash value is based on values at bufpos, bufpos+1, bufpos+2) and // that our hash value is always valid for the bytes we are inserting. #define INSERT_STRING(search,bufpos) \ { \ _ASSERT((bufpos + 2) < context->bufpos_end); \ UPDATE_HASH(hash, window[bufpos+2]); \ _ASSERT((unsigned int) STD_ENCODER_RECALCULATE_HASH(bufpos) == (unsigned int) (hash & STD_ENCODER_HASH_MASK)); \ search = lookup[hash & STD_ENCODER_HASH_MASK]; \ lookup[hash & STD_ENCODER_HASH_MASK] = (t_search_node) (bufpos); \ prev[bufpos & WINDOW_MASK] = (t_search_node) search; \ } #define CHECK_FLUSH_RECORDING_BUFFER() \ if (recording_bitcount >= 16) \ { \ *recording_bufptr++ = (BYTE) recording_bitbuf; \ *recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); \ recording_bitbuf >>= 16; \ recording_bitcount -= 16; \ } #define OUTPUT_RECORDING_DATA(count,data) \ recording_bitbuf |= ((data) << recording_bitcount); \ recording_bitcount += (count); // Record unmatched symbol c #define RECORD_CHAR(c) \ context->outputting_block_num_literals++; \ context->std_encoder->literal_tree_freq[c]++; \ _ASSERT(context->std_encoder->recording_literal_tree_len[c] != 0); \ OUTPUT_RECORDING_DATA(context->std_encoder->recording_literal_tree_len[c], context->std_encoder->recording_literal_tree_code[c]); \ CHECK_FLUSH_RECORDING_BUFFER(); // Record a match with length match_len (>= MIN_MATCH) and displacement match_pos #define RECORD_MATCH(match_len, match_pos) \ { \ int pos_slot = POS_SLOT(match_pos); \ int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \ int item = (NUM_CHARS+1) + len_slot; \ int extra_dist_bits = g_ExtraDistanceBits[pos_slot]; \ int extra_len_bits = g_ExtraLengthBits[len_slot]; \ _ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \ _ASSERT(context->outputting_block_num_literals >= 0 && context->outputting_block_num_literals < STD_ENCODER_MAX_ITEMS); \ _ASSERT(context->std_encoder->recording_literal_tree_len[item] != 0); \ _ASSERT(context->std_encoder->recording_dist_tree_len[pos_slot] != 0); \ context->outputting_block_num_literals++; \ context->std_encoder->literal_tree_freq[(NUM_CHARS + 1) + len_slot]++; \ context->std_encoder->dist_tree_freq[pos_slot]++; \ OUTPUT_RECORDING_DATA(context->std_encoder->recording_literal_tree_len[item], context->std_encoder->recording_literal_tree_code[item]); \ CHECK_FLUSH_RECORDING_BUFFER(); \ if (extra_len_bits > 0) \ { \ OUTPUT_RECORDING_DATA(extra_len_bits, (match_len-MIN_MATCH) & ((1 << extra_len_bits)-1)); \ CHECK_FLUSH_RECORDING_BUFFER(); \ } \ OUTPUT_RECORDING_DATA(context->std_encoder->recording_dist_tree_len[pos_slot], context->std_encoder->recording_dist_tree_code[pos_slot]); \ CHECK_FLUSH_RECORDING_BUFFER(); \ if (extra_dist_bits > 0) \ { \ OUTPUT_RECORDING_DATA(extra_dist_bits, match_pos & ((1 << extra_dist_bits)-1)); \ CHECK_FLUSH_RECORDING_BUFFER(); \ } \ } #define FLUSH_RECORDING_BITBUF() \ *recording_bufptr++ = (BYTE) recording_bitbuf; \ *recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); // Verifies that all of the hash pointers in the hash table are correct, and that everything // in the same hash chain has the same hash value #ifdef FULL_DEBUG #define VERIFY_HASHES(bufpos) StdEncoderVerifyHashes(context, bufpos) #else #define VERIFY_HASHES(bufpos) ; #endif static void StdEncoderMoveWindows(t_encoder_context *context); static int StdEncoderFindMatch( const BYTE * window, const USHORT * prev, long bufpos, long search, unsigned int * match_pos, int cutoff, int nice_length ); void StdEncoderDeflate( t_encoder_context * context, int search_depth, int lazy_match_threshold, int good_length, int nice_length ) { long bufpos; unsigned int hash; t_std_encoder * encoder = context->std_encoder; byte * window = encoder->window; t_search_node * prev = encoder->prev; t_search_node * lookup = encoder->lookup; unsigned long recording_bitbuf; int recording_bitcount; byte * recording_bufptr; byte * end_recording_bufptr; // restore literal/match bitmap variables end_recording_bufptr = &encoder->lit_dist_buffer[STD_ENCODER_LIT_DIST_BUFFER_SIZE-8]; recording_bufptr = encoder->recording_bufptr; recording_bitbuf = encoder->recording_bitbuf; recording_bitcount = encoder->recording_bitcount; bufpos = context->bufpos; VERIFY_HASHES(bufpos); // Recalculate our hash // One disadvantage of the way we do our hashing is that matches are not permitted in the last // few characters near bufpos_end. hash = 0; UPDATE_HASH(hash, window[bufpos]); UPDATE_HASH(hash, window[bufpos+1]); while (bufpos < context->bufpos_end) { int match_len; t_match_pos match_pos; t_match_pos search; if (context->bufpos_end - bufpos <= 3) { // don't insert any strings when we get close to the end of the buffer, // since we will end up using corrupted hash values (the data after bufpos_end // is undefined, and those bytes would be swept into the hash value if we // calculated a hash at bufpos_end-2, for example, since our hash value is // build from 3 consecutive characters in the buffer). match_len = 0; } else { INSERT_STRING(search,bufpos); // find a match at what we'll call position X if (search != 0) { match_len = StdEncoderFindMatch(window, prev, bufpos, search, &match_pos, search_depth, nice_length); // truncate match if we're too close to the end of the buffer if (bufpos + match_len > context->bufpos_end) match_len = context->bufpos_end - bufpos; } else { match_len = 0; } } if (match_len < MIN_MATCH) { // didn't find a match, so output unmatched char RECORD_CHAR(window[bufpos]); bufpos++; } else { // bufpos now points to X+1 bufpos++; // is this match so good (long) that we should take it automatically without // checking X+1 ? if (match_len <= lazy_match_threshold) { int next_match_len; t_match_pos next_match_pos; // sets search INSERT_STRING(search,bufpos); // no, so check for a better match at X+1 if (search != 0) { next_match_len = StdEncoderFindMatch( window, prev, bufpos, search, &next_match_pos, match_len < good_length ? search_depth : (search_depth >> 2), nice_length ); // truncate match if we're too close to the end of the buffer // note: next_match_len could now be < MIN_MATCH if (bufpos + next_match_len > context->bufpos_end) next_match_len = context->bufpos_end - bufpos; } else { next_match_len = 0; } // right now X and X+1 are both inserted into the search tree if (next_match_len > match_len) { // since next_match_len > match_len, it can't be < MIN_MATCH here // match at X+1 is better, so output unmatched char at X RECORD_CHAR(window[bufpos-1]); // now output match at location X+1 RECORD_MATCH(next_match_len, next_match_pos); // insert remainder of second match into search tree // example: (*=inserted already) // X X+1 X+2 X+3 X+4 // * * // nextmatchlen=3 // bufpos // If next_match_len == 3, we want to perform 2 // insertions (at X+2 and X+3). However, first we must // inc bufpos. bufpos++; // now points to X+2 match_len = next_match_len; goto insert; } else { // match at X is better, so take it RECORD_MATCH(match_len, match_pos); // Insert remainder of first match into search tree, minus the first // two locations, which were inserted by the FindMatch() calls. // For example, if match_len == 3, then we've inserted at X and X+1 // already (and bufpos is now pointing at X+1), and now we need to insert // only at X+2. match_len--; bufpos++; // now bufpos points to X+2 goto insert; } } else /* match_length >= good_match */ { // in assertion: bufpos points to X+1, location X inserted already // first match is so good that we're not even going to check at X+1 RECORD_MATCH(match_len, match_pos); // insert remainder of match at X into search tree insert: if (context->bufpos_end - bufpos <= match_len) { bufpos += (match_len-1); } else { while (--match_len > 0) { t_match_pos ignore; // we're not interested in the search position INSERT_STRING(ignore,bufpos); bufpos++; } } } } // literal buffer or distance buffer filled up (or close to filling up)? if (context->outputting_block_num_literals >= STD_ENCODER_MAX_ITEMS-4 || recording_bufptr >= end_recording_bufptr) { // yes, then we must output a block _ASSERT(context->outputting_block_num_literals <= STD_ENCODER_MAX_ITEMS); // flush our recording matches bit buffer FLUSH_RECORDING_BITBUF(); StdEncoderOutputBlock(context); // did we output the whole block? if (context->state != STATE_NORMAL) break; // we did output the whole block, so reset literal encoding recording_bufptr = encoder->recording_bufptr; recording_bitbuf = encoder->recording_bitbuf; recording_bitcount = encoder->recording_bitcount; } } /* end ... while (bufpos < bufpos_end) */ _ASSERT(bufpos <= context->bufpos_end); // save recording state encoder->recording_bufptr = recording_bufptr; encoder->recording_bitbuf = recording_bitbuf; encoder->recording_bitcount = recording_bitcount; context->bufpos = bufpos; VERIFY_HASHES(bufpos); if (context->bufpos == 2*WINDOW_SIZE) StdEncoderMoveWindows(context); } static int StdEncoderFindMatch( const BYTE * window, const USHORT * prev, long bufpos, long search, unsigned int * match_pos, int cutoff, int nice_length ) { const BYTE * window_bufpos = &window[bufpos]; long earliest; // how far back we can look int best_match = 0; // best match length found so far t_match_pos l_match_pos; _ASSERT(bufpos >= 0 && bufpos < 2*WINDOW_SIZE); _ASSERT(search < bufpos); _ASSERT(STD_ENCODER_RECALCULATE_HASH(search) == STD_ENCODER_RECALCULATE_HASH(bufpos)); earliest = bufpos - WINDOW_SIZE; _ASSERT(earliest >= 0); while (search > earliest) { _ASSERT(STD_ENCODER_RECALCULATE_HASH(search) == STD_ENCODER_RECALCULATE_HASH(bufpos)); _ASSERT(search < bufpos); if (window_bufpos[best_match] == window[search + best_match]) { int j; for (j = 0; j < MAX_MATCH; j++) { if (window_bufpos[j] != window[search+j]) break; } if (j > best_match) { best_match = j; l_match_pos = search; // absolute position if (j > nice_length) break; } } if (--cutoff == 0) break; search = (long) prev[search & WINDOW_MASK]; } // turn l_match_pos into relative position l_match_pos = bufpos - l_match_pos - 1; if (best_match == 3 && l_match_pos >= STD_ENCODER_MATCH3_DIST_THRESHOLD) return 0; _ASSERT(best_match < MIN_MATCH || l_match_pos < WINDOW_SIZE); *match_pos = l_match_pos; return best_match; } static void StdEncoderMoveWindows(t_encoder_context *context) { if (context->bufpos >= 2*WINDOW_SIZE) { int i; t_search_node *lookup = context->std_encoder->lookup; t_search_node *prev = context->std_encoder->prev; BYTE *window = context->std_encoder->window; VERIFY_HASHES(2*WINDOW_SIZE); memcpy(&window[0], &window[context->bufpos - WINDOW_SIZE], WINDOW_SIZE); for (i = 0; i < STD_ENCODER_HASH_TABLE_SIZE; i++) { long val = ((long) lookup[i]) - WINDOW_SIZE; if (val <= 0) lookup[i] = (t_search_node) 0; else lookup[i] = (t_search_node) val; } for (i = 0; i < WINDOW_SIZE; i++) { long val = ((long) prev[i]) - WINDOW_SIZE; if (val <= 0) prev[i] = (t_search_node) 0; else prev[i] = (t_search_node) val; } #ifdef FULL_DEBUG memset(&window[WINDOW_SIZE], 0, WINDOW_SIZE); #endif VERIFY_HASHES(2*WINDOW_SIZE); context->bufpos = WINDOW_SIZE; context->bufpos_end = context->bufpos; } } // Zero the running frequency counts // Also set freq[END_OF_BLOCK_CODE] = 1 void StdEncoderZeroFrequencyCounts(t_std_encoder *encoder) { _ASSERT(encoder != NULL); memset(encoder->literal_tree_freq, 0, sizeof(encoder->literal_tree_freq)); memset(encoder->dist_tree_freq, 0, sizeof(encoder->dist_tree_freq)); encoder->literal_tree_freq[END_OF_BLOCK_CODE] = 1; } void StdEncoderReset(t_encoder_context *context) { t_std_encoder *encoder = context->std_encoder; _ASSERT(encoder != NULL); memset(encoder->lookup, 0, sizeof(encoder->lookup)); context->window_size = WINDOW_SIZE; context->bufpos = context->window_size; context->bufpos_end = context->bufpos; encoder->recording_bitbuf = 0; encoder->recording_bitcount = 0; encoder->recording_bufptr = encoder->lit_dist_buffer; DeflateInitRecordingTables( encoder->recording_literal_tree_len, encoder->recording_literal_tree_code, encoder->recording_dist_tree_len, encoder->recording_dist_tree_code ); StdEncoderZeroFrequencyCounts(encoder); } BOOL StdEncoderInit(t_encoder_context *context) { context->std_encoder = (t_std_encoder *) LocalAlloc(LMEM_FIXED, sizeof(t_std_encoder)); if (context->std_encoder == NULL) return FALSE; StdEncoderReset(context); return TRUE; }