1355d67fcSMatthew Dillon /* deflate.h -- internal compression state 2355d67fcSMatthew Dillon * Copyright (C) 1995-2012 Jean-loup Gailly 3355d67fcSMatthew Dillon * For conditions of distribution and use, see copyright notice in zlib.h 4355d67fcSMatthew Dillon */ 5355d67fcSMatthew Dillon 6355d67fcSMatthew Dillon /* WARNING: this file should *not* be used by applications. It is 7355d67fcSMatthew Dillon part of the implementation of the compression library and is 8355d67fcSMatthew Dillon subject to change. Applications should only use zlib.h. 9355d67fcSMatthew Dillon */ 10355d67fcSMatthew Dillon 11355d67fcSMatthew Dillon /* @(#) $Id$ */ 12355d67fcSMatthew Dillon 13355d67fcSMatthew Dillon #ifndef DEFLATE_H 14355d67fcSMatthew Dillon #define DEFLATE_H 15355d67fcSMatthew Dillon 16355d67fcSMatthew Dillon #include "hammer2_zlib_zutil.h" 17355d67fcSMatthew Dillon 18355d67fcSMatthew Dillon /* =========================================================================== 19355d67fcSMatthew Dillon * Internal compression state. 20355d67fcSMatthew Dillon */ 21355d67fcSMatthew Dillon 22355d67fcSMatthew Dillon #define LENGTH_CODES 29 23355d67fcSMatthew Dillon /* number of length codes, not counting the special END_BLOCK code */ 24355d67fcSMatthew Dillon 25355d67fcSMatthew Dillon #define LITERALS 256 26355d67fcSMatthew Dillon /* number of literal bytes 0..255 */ 27355d67fcSMatthew Dillon 28355d67fcSMatthew Dillon #define L_CODES (LITERALS+1+LENGTH_CODES) 29355d67fcSMatthew Dillon /* number of Literal or Length codes, including the END_BLOCK code */ 30355d67fcSMatthew Dillon 31355d67fcSMatthew Dillon #define D_CODES 30 32355d67fcSMatthew Dillon /* number of distance codes */ 33355d67fcSMatthew Dillon 34355d67fcSMatthew Dillon #define BL_CODES 19 35355d67fcSMatthew Dillon /* number of codes used to transfer the bit lengths */ 36355d67fcSMatthew Dillon 37355d67fcSMatthew Dillon #define HEAP_SIZE (2*L_CODES+1) 38355d67fcSMatthew Dillon /* maximum heap size */ 39355d67fcSMatthew Dillon 40355d67fcSMatthew Dillon #define MAX_BITS 15 41355d67fcSMatthew Dillon /* All codes must not exceed MAX_BITS bits */ 42355d67fcSMatthew Dillon 43355d67fcSMatthew Dillon #define Buf_size 16 44355d67fcSMatthew Dillon /* size of bit buffer in bi_buf */ 45355d67fcSMatthew Dillon 46355d67fcSMatthew Dillon #define INIT_STATE 42 47355d67fcSMatthew Dillon #define EXTRA_STATE 69 48355d67fcSMatthew Dillon #define NAME_STATE 73 49355d67fcSMatthew Dillon #define COMMENT_STATE 91 50355d67fcSMatthew Dillon #define HCRC_STATE 103 51355d67fcSMatthew Dillon #define BUSY_STATE 113 52355d67fcSMatthew Dillon #define FINISH_STATE 666 53355d67fcSMatthew Dillon /* Stream status */ 54355d67fcSMatthew Dillon 55355d67fcSMatthew Dillon 56355d67fcSMatthew Dillon /* Data structure describing a single value and its code string. */ 57355d67fcSMatthew Dillon typedef struct ct_data_s { 58355d67fcSMatthew Dillon union { 59355d67fcSMatthew Dillon ush freq; /* frequency count */ 60355d67fcSMatthew Dillon ush code; /* bit string */ 61355d67fcSMatthew Dillon } fc; 62355d67fcSMatthew Dillon union { 63355d67fcSMatthew Dillon ush dad; /* father node in Huffman tree */ 64355d67fcSMatthew Dillon ush len; /* length of bit string */ 65355d67fcSMatthew Dillon } dl; 66355d67fcSMatthew Dillon } FAR ct_data; 67355d67fcSMatthew Dillon 68355d67fcSMatthew Dillon #define Freq fc.freq 69355d67fcSMatthew Dillon #define Code fc.code 70355d67fcSMatthew Dillon #define Dad dl.dad 71355d67fcSMatthew Dillon #define Len dl.len 72355d67fcSMatthew Dillon 73355d67fcSMatthew Dillon typedef struct static_tree_desc_s static_tree_desc; 74355d67fcSMatthew Dillon 75355d67fcSMatthew Dillon typedef struct tree_desc_s { 76355d67fcSMatthew Dillon ct_data *dyn_tree; /* the dynamic tree */ 77355d67fcSMatthew Dillon int max_code; /* largest code with non zero frequency */ 78355d67fcSMatthew Dillon static_tree_desc *stat_desc; /* the corresponding static tree */ 79355d67fcSMatthew Dillon } FAR tree_desc; 80355d67fcSMatthew Dillon 81355d67fcSMatthew Dillon typedef ush Pos; 82355d67fcSMatthew Dillon typedef Pos FAR Posf; 83355d67fcSMatthew Dillon typedef unsigned IPos; 84355d67fcSMatthew Dillon 85355d67fcSMatthew Dillon /* A Pos is an index in the character window. We use short instead of int to 86355d67fcSMatthew Dillon * save space in the various tables. IPos is used only for parameter passing. 87355d67fcSMatthew Dillon */ 88355d67fcSMatthew Dillon 89355d67fcSMatthew Dillon typedef struct internal_state { 90355d67fcSMatthew Dillon z_streamp strm; /* pointer back to this zlib stream */ 91355d67fcSMatthew Dillon int status; /* as the name implies */ 92355d67fcSMatthew Dillon Bytef *pending_buf; /* output still pending */ 93355d67fcSMatthew Dillon ulg pending_buf_size; /* size of pending_buf */ 94355d67fcSMatthew Dillon Bytef *pending_out; /* next pending byte to output to the stream */ 95355d67fcSMatthew Dillon uInt pending; /* nb of bytes in the pending buffer */ 96355d67fcSMatthew Dillon int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ 97355d67fcSMatthew Dillon uInt gzindex; /* where in extra, name, or comment */ 98355d67fcSMatthew Dillon Byte method; /* can only be DEFLATED */ 99355d67fcSMatthew Dillon int last_flush; /* value of flush param for previous deflate call */ 100355d67fcSMatthew Dillon 101355d67fcSMatthew Dillon /* used by deflate.c: */ 102355d67fcSMatthew Dillon 103355d67fcSMatthew Dillon uInt w_size; /* LZ77 window size (32K by default) */ 104355d67fcSMatthew Dillon uInt w_bits; /* log2(w_size) (8..16) */ 105355d67fcSMatthew Dillon uInt w_mask; /* w_size - 1 */ 106355d67fcSMatthew Dillon 107355d67fcSMatthew Dillon Bytef *window; 108355d67fcSMatthew Dillon /* Sliding window. Input bytes are read into the second half of the window, 109355d67fcSMatthew Dillon * and move to the first half later to keep a dictionary of at least wSize 110355d67fcSMatthew Dillon * bytes. With this organization, matches are limited to a distance of 111355d67fcSMatthew Dillon * wSize-MAX_MATCH bytes, but this ensures that IO is always 112355d67fcSMatthew Dillon * performed with a length multiple of the block size. Also, it limits 113355d67fcSMatthew Dillon * the window size to 64K, which is quite useful on MSDOS. 114355d67fcSMatthew Dillon * To do: use the user input buffer as sliding window. 115355d67fcSMatthew Dillon */ 116355d67fcSMatthew Dillon 117355d67fcSMatthew Dillon ulg window_size; 118355d67fcSMatthew Dillon /* Actual size of window: 2*wSize, except when the user input buffer 119355d67fcSMatthew Dillon * is directly used as sliding window. 120355d67fcSMatthew Dillon */ 121355d67fcSMatthew Dillon 122355d67fcSMatthew Dillon Posf *prev; 123355d67fcSMatthew Dillon /* Link to older string with same hash index. To limit the size of this 124355d67fcSMatthew Dillon * array to 64K, this link is maintained only for the last 32K strings. 125355d67fcSMatthew Dillon * An index in this array is thus a window index modulo 32K. 126355d67fcSMatthew Dillon */ 127355d67fcSMatthew Dillon 128355d67fcSMatthew Dillon Posf *head; /* Heads of the hash chains or NIL. */ 129355d67fcSMatthew Dillon 130355d67fcSMatthew Dillon uInt ins_h; /* hash index of string to be inserted */ 131355d67fcSMatthew Dillon uInt hash_size; /* number of elements in hash table */ 132355d67fcSMatthew Dillon uInt hash_bits; /* log2(hash_size) */ 133355d67fcSMatthew Dillon uInt hash_mask; /* hash_size-1 */ 134355d67fcSMatthew Dillon 135355d67fcSMatthew Dillon uInt hash_shift; 136355d67fcSMatthew Dillon /* Number of bits by which ins_h must be shifted at each input 137355d67fcSMatthew Dillon * step. It must be such that after MIN_MATCH steps, the oldest 138355d67fcSMatthew Dillon * byte no longer takes part in the hash key, that is: 139355d67fcSMatthew Dillon * hash_shift * MIN_MATCH >= hash_bits 140355d67fcSMatthew Dillon */ 141355d67fcSMatthew Dillon 142355d67fcSMatthew Dillon long block_start; 143355d67fcSMatthew Dillon /* Window position at the beginning of the current output block. Gets 144355d67fcSMatthew Dillon * negative when the window is moved backwards. 145355d67fcSMatthew Dillon */ 146355d67fcSMatthew Dillon 147355d67fcSMatthew Dillon uInt match_length; /* length of best match */ 148355d67fcSMatthew Dillon IPos prev_match; /* previous match */ 149355d67fcSMatthew Dillon int match_available; /* set if previous match exists */ 150355d67fcSMatthew Dillon uInt strstart; /* start of string to insert */ 151355d67fcSMatthew Dillon uInt match_start; /* start of matching string */ 152355d67fcSMatthew Dillon uInt lookahead; /* number of valid bytes ahead in window */ 153355d67fcSMatthew Dillon 154355d67fcSMatthew Dillon uInt prev_length; 155355d67fcSMatthew Dillon /* Length of the best match at previous step. Matches not greater than this 156355d67fcSMatthew Dillon * are discarded. This is used in the lazy match evaluation. 157355d67fcSMatthew Dillon */ 158355d67fcSMatthew Dillon 159355d67fcSMatthew Dillon uInt max_chain_length; 160355d67fcSMatthew Dillon /* To speed up deflation, hash chains are never searched beyond this 161355d67fcSMatthew Dillon * length. A higher limit improves compression ratio but degrades the 162355d67fcSMatthew Dillon * speed. 163355d67fcSMatthew Dillon */ 164355d67fcSMatthew Dillon 165355d67fcSMatthew Dillon uInt max_lazy_match; 166355d67fcSMatthew Dillon /* Attempt to find a better match only when the current match is strictly 167355d67fcSMatthew Dillon * smaller than this value. This mechanism is used only for compression 168355d67fcSMatthew Dillon * levels >= 4. 169355d67fcSMatthew Dillon */ 170355d67fcSMatthew Dillon # define max_insert_length max_lazy_match 171355d67fcSMatthew Dillon /* Insert new strings in the hash table only if the match length is not 172355d67fcSMatthew Dillon * greater than this length. This saves time but degrades compression. 173355d67fcSMatthew Dillon * max_insert_length is used only for compression levels <= 3. 174355d67fcSMatthew Dillon */ 175355d67fcSMatthew Dillon 176355d67fcSMatthew Dillon int level; /* compression level (1..9) */ 177355d67fcSMatthew Dillon int strategy; /* favor or force Huffman coding*/ 178355d67fcSMatthew Dillon 179355d67fcSMatthew Dillon uInt good_match; 180355d67fcSMatthew Dillon /* Use a faster search when the previous match is longer than this */ 181355d67fcSMatthew Dillon 182355d67fcSMatthew Dillon int nice_match; /* Stop searching when current match exceeds this */ 183355d67fcSMatthew Dillon 184355d67fcSMatthew Dillon /* used by trees.c: */ 185355d67fcSMatthew Dillon /* Didn't use ct_data typedef below to suppress compiler warning */ 186355d67fcSMatthew Dillon struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ 187355d67fcSMatthew Dillon struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ 188355d67fcSMatthew Dillon struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ 189355d67fcSMatthew Dillon 190355d67fcSMatthew Dillon struct tree_desc_s l_desc; /* desc. for literal tree */ 191355d67fcSMatthew Dillon struct tree_desc_s d_desc; /* desc. for distance tree */ 192355d67fcSMatthew Dillon struct tree_desc_s bl_desc; /* desc. for bit length tree */ 193355d67fcSMatthew Dillon 194355d67fcSMatthew Dillon ush bl_count[MAX_BITS+1]; 195355d67fcSMatthew Dillon /* number of codes at each bit length for an optimal tree */ 196355d67fcSMatthew Dillon 197355d67fcSMatthew Dillon int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ 198355d67fcSMatthew Dillon int heap_len; /* number of elements in the heap */ 199355d67fcSMatthew Dillon int heap_max; /* element of largest frequency */ 200355d67fcSMatthew Dillon /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. 201355d67fcSMatthew Dillon * The same heap array is used to build all trees. 202355d67fcSMatthew Dillon */ 203355d67fcSMatthew Dillon 204355d67fcSMatthew Dillon uch depth[2*L_CODES+1]; 205355d67fcSMatthew Dillon /* Depth of each subtree used as tie breaker for trees of equal frequency 206355d67fcSMatthew Dillon */ 207355d67fcSMatthew Dillon 208355d67fcSMatthew Dillon uchf *l_buf; /* buffer for literals or lengths */ 209355d67fcSMatthew Dillon 210355d67fcSMatthew Dillon uInt lit_bufsize; 211355d67fcSMatthew Dillon /* Size of match buffer for literals/lengths. There are 4 reasons for 212355d67fcSMatthew Dillon * limiting lit_bufsize to 64K: 213355d67fcSMatthew Dillon * - frequencies can be kept in 16 bit counters 214355d67fcSMatthew Dillon * - if compression is not successful for the first block, all input 215355d67fcSMatthew Dillon * data is still in the window so we can still emit a stored block even 216355d67fcSMatthew Dillon * when input comes from standard input. (This can also be done for 217355d67fcSMatthew Dillon * all blocks if lit_bufsize is not greater than 32K.) 218355d67fcSMatthew Dillon * - if compression is not successful for a file smaller than 64K, we can 219355d67fcSMatthew Dillon * even emit a stored file instead of a stored block (saving 5 bytes). 220355d67fcSMatthew Dillon * This is applicable only for zip (not gzip or zlib). 221355d67fcSMatthew Dillon * - creating new Huffman trees less frequently may not provide fast 222355d67fcSMatthew Dillon * adaptation to changes in the input data statistics. (Take for 223355d67fcSMatthew Dillon * example a binary file with poorly compressible code followed by 224355d67fcSMatthew Dillon * a highly compressible string table.) Smaller buffer sizes give 225355d67fcSMatthew Dillon * fast adaptation but have of course the overhead of transmitting 226355d67fcSMatthew Dillon * trees more frequently. 227355d67fcSMatthew Dillon * - I can't count above 4 228355d67fcSMatthew Dillon */ 229355d67fcSMatthew Dillon 230355d67fcSMatthew Dillon uInt last_lit; /* running index in l_buf */ 231355d67fcSMatthew Dillon 232355d67fcSMatthew Dillon ushf *d_buf; 233355d67fcSMatthew Dillon /* Buffer for distances. To simplify the code, d_buf and l_buf have 234355d67fcSMatthew Dillon * the same number of elements. To use different lengths, an extra flag 235355d67fcSMatthew Dillon * array would be necessary. 236355d67fcSMatthew Dillon */ 237355d67fcSMatthew Dillon 238355d67fcSMatthew Dillon ulg opt_len; /* bit length of current block with optimal trees */ 239355d67fcSMatthew Dillon ulg static_len; /* bit length of current block with static trees */ 240355d67fcSMatthew Dillon uInt matches; /* number of string matches in current block */ 241355d67fcSMatthew Dillon uInt insert; /* bytes at end of window left to insert */ 242355d67fcSMatthew Dillon 243*a46112e5SSascha Wildner #ifdef H2_ZLIB_DEBUG 244355d67fcSMatthew Dillon ulg compressed_len; /* total bit length of compressed file mod 2^32 */ 245355d67fcSMatthew Dillon ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ 246355d67fcSMatthew Dillon #endif 247355d67fcSMatthew Dillon 248355d67fcSMatthew Dillon ush bi_buf; 249355d67fcSMatthew Dillon /* Output buffer. bits are inserted starting at the bottom (least 250355d67fcSMatthew Dillon * significant bits). 251355d67fcSMatthew Dillon */ 252355d67fcSMatthew Dillon int bi_valid; 253355d67fcSMatthew Dillon /* Number of valid bits in bi_buf. All bits above the last valid bit 254355d67fcSMatthew Dillon * are always zero. 255355d67fcSMatthew Dillon */ 256355d67fcSMatthew Dillon 257355d67fcSMatthew Dillon ulg high_water; 258355d67fcSMatthew Dillon /* High water mark offset in window for initialized bytes -- bytes above 259355d67fcSMatthew Dillon * this are set to zero in order to avoid memory check warnings when 260355d67fcSMatthew Dillon * longest match routines access bytes past the input. This is then 261355d67fcSMatthew Dillon * updated to the new high water mark. 262355d67fcSMatthew Dillon */ 263355d67fcSMatthew Dillon 264355d67fcSMatthew Dillon } FAR deflate_state; 265355d67fcSMatthew Dillon 266355d67fcSMatthew Dillon /* Output a byte on the stream. 267355d67fcSMatthew Dillon * IN assertion: there is enough room in pending_buf. 268355d67fcSMatthew Dillon */ 269355d67fcSMatthew Dillon #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} 270355d67fcSMatthew Dillon 271355d67fcSMatthew Dillon 272355d67fcSMatthew Dillon #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) 273355d67fcSMatthew Dillon /* Minimum amount of lookahead, except at the end of the input file. 274355d67fcSMatthew Dillon * See deflate.c for comments about the MIN_MATCH+1. 275355d67fcSMatthew Dillon */ 276355d67fcSMatthew Dillon 277355d67fcSMatthew Dillon #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) 278355d67fcSMatthew Dillon /* In order to simplify the code, particularly on 16 bit machines, match 279355d67fcSMatthew Dillon * distances are limited to MAX_DIST instead of WSIZE. 280355d67fcSMatthew Dillon */ 281355d67fcSMatthew Dillon 282355d67fcSMatthew Dillon #define WIN_INIT MAX_MATCH 283355d67fcSMatthew Dillon /* Number of bytes after end of data in window to initialize in order to avoid 284355d67fcSMatthew Dillon memory checker errors from longest match routines */ 285355d67fcSMatthew Dillon 286355d67fcSMatthew Dillon /* in trees.c */ 287355d67fcSMatthew Dillon void ZLIB_INTERNAL _tr_init(deflate_state *s); 288355d67fcSMatthew Dillon int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc); 289355d67fcSMatthew Dillon void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, 290355d67fcSMatthew Dillon ulg stored_len, int last); 291355d67fcSMatthew Dillon void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s); 292355d67fcSMatthew Dillon void ZLIB_INTERNAL _tr_align(deflate_state *s); 293355d67fcSMatthew Dillon void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, 294355d67fcSMatthew Dillon ulg stored_len, int last); 295355d67fcSMatthew Dillon 296355d67fcSMatthew Dillon #define d_code(dist) \ 297355d67fcSMatthew Dillon ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) 298355d67fcSMatthew Dillon /* Mapping from a distance to a distance code. dist is the distance - 1 and 299355d67fcSMatthew Dillon * must not have side effects. _dist_code[256] and _dist_code[257] are never 300355d67fcSMatthew Dillon * used. 301355d67fcSMatthew Dillon */ 302355d67fcSMatthew Dillon 303*a46112e5SSascha Wildner #ifndef H2_ZLIB_DEBUG 304355d67fcSMatthew Dillon /* Inline versions of _tr_tally for speed: */ 305355d67fcSMatthew Dillon 306355d67fcSMatthew Dillon #if defined(GEN_TREES_H) || !defined(STDC) 307355d67fcSMatthew Dillon extern uch ZLIB_INTERNAL _length_code[]; 308355d67fcSMatthew Dillon extern uch ZLIB_INTERNAL _dist_code[]; 309355d67fcSMatthew Dillon #else 310355d67fcSMatthew Dillon extern const uch ZLIB_INTERNAL _length_code[]; 311355d67fcSMatthew Dillon extern const uch ZLIB_INTERNAL _dist_code[]; 312355d67fcSMatthew Dillon #endif 313355d67fcSMatthew Dillon 314355d67fcSMatthew Dillon # define _tr_tally_lit(s, c, flush) \ 315355d67fcSMatthew Dillon { uch cc = (c); \ 316355d67fcSMatthew Dillon s->d_buf[s->last_lit] = 0; \ 317355d67fcSMatthew Dillon s->l_buf[s->last_lit++] = cc; \ 318355d67fcSMatthew Dillon s->dyn_ltree[cc].Freq++; \ 319355d67fcSMatthew Dillon flush = (s->last_lit == s->lit_bufsize-1); \ 320355d67fcSMatthew Dillon } 321355d67fcSMatthew Dillon # define _tr_tally_dist(s, distance, length, flush) \ 322355d67fcSMatthew Dillon { uch len = (length); \ 323355d67fcSMatthew Dillon ush dist = (distance); \ 324355d67fcSMatthew Dillon s->d_buf[s->last_lit] = dist; \ 325355d67fcSMatthew Dillon s->l_buf[s->last_lit++] = len; \ 326355d67fcSMatthew Dillon dist--; \ 327355d67fcSMatthew Dillon s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 328355d67fcSMatthew Dillon s->dyn_dtree[d_code(dist)].Freq++; \ 329355d67fcSMatthew Dillon flush = (s->last_lit == s->lit_bufsize-1); \ 330355d67fcSMatthew Dillon } 331355d67fcSMatthew Dillon #else 332355d67fcSMatthew Dillon # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 333355d67fcSMatthew Dillon # define _tr_tally_dist(s, distance, length, flush) \ 334355d67fcSMatthew Dillon flush = _tr_tally(s, distance, length) 335355d67fcSMatthew Dillon #endif 336355d67fcSMatthew Dillon 337355d67fcSMatthew Dillon #endif /* DEFLATE_H */ 338