1*44bedb31SLionel Sambuc /* $NetBSD: inftrees.c,v 1.2 2006/01/16 03:23:10 christos Exp $ */
2*44bedb31SLionel Sambuc
3*44bedb31SLionel Sambuc /* inftrees.c -- generate Huffman trees for efficient decoding
4*44bedb31SLionel Sambuc * Copyright (C) 1995-2005 Mark Adler
5*44bedb31SLionel Sambuc * For conditions of distribution and use, see copyright notice in zlib.h
6*44bedb31SLionel Sambuc */
7*44bedb31SLionel Sambuc
8*44bedb31SLionel Sambuc #include "zutil.h"
9*44bedb31SLionel Sambuc #include "inftrees.h"
10*44bedb31SLionel Sambuc
11*44bedb31SLionel Sambuc #define MAXBITS 15
12*44bedb31SLionel Sambuc
13*44bedb31SLionel Sambuc const char inflate_copyright[] =
14*44bedb31SLionel Sambuc " inflate 1.2.3 Copyright 1995-2005 Mark Adler ";
15*44bedb31SLionel Sambuc /*
16*44bedb31SLionel Sambuc If you use the zlib library in a product, an acknowledgment is welcome
17*44bedb31SLionel Sambuc in the documentation of your product. If for some reason you cannot
18*44bedb31SLionel Sambuc include such an acknowledgment, I would appreciate that you keep this
19*44bedb31SLionel Sambuc copyright string in the executable of your product.
20*44bedb31SLionel Sambuc */
21*44bedb31SLionel Sambuc
22*44bedb31SLionel Sambuc /*
23*44bedb31SLionel Sambuc Build a set of tables to decode the provided canonical Huffman code.
24*44bedb31SLionel Sambuc The code lengths are lens[0..codes-1]. The result starts at *table,
25*44bedb31SLionel Sambuc whose indices are 0..2^bits-1. work is a writable array of at least
26*44bedb31SLionel Sambuc lens shorts, which is used as a work area. type is the type of code
27*44bedb31SLionel Sambuc to be generated, CODES, LENS, or DISTS. On return, zero is success,
28*44bedb31SLionel Sambuc -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
29*44bedb31SLionel Sambuc on return points to the next available entry's address. bits is the
30*44bedb31SLionel Sambuc requested root table index bits, and on return it is the actual root
31*44bedb31SLionel Sambuc table index bits. It will differ if the request is greater than the
32*44bedb31SLionel Sambuc longest code or if it is less than the shortest code.
33*44bedb31SLionel Sambuc */
inflate_table(type,lens,codes,table,bits,work)34*44bedb31SLionel Sambuc int inflate_table(type, lens, codes, table, bits, work)
35*44bedb31SLionel Sambuc codetype type;
36*44bedb31SLionel Sambuc unsigned short FAR *lens;
37*44bedb31SLionel Sambuc unsigned codes;
38*44bedb31SLionel Sambuc code FAR * FAR *table;
39*44bedb31SLionel Sambuc unsigned FAR *bits;
40*44bedb31SLionel Sambuc unsigned short FAR *work;
41*44bedb31SLionel Sambuc {
42*44bedb31SLionel Sambuc unsigned len; /* a code's length in bits */
43*44bedb31SLionel Sambuc unsigned sym; /* index of code symbols */
44*44bedb31SLionel Sambuc unsigned mmin, mmax; /* minimum and maximum code lengths */
45*44bedb31SLionel Sambuc unsigned root; /* number of index bits for root table */
46*44bedb31SLionel Sambuc unsigned curr; /* number of index bits for current table */
47*44bedb31SLionel Sambuc unsigned drop; /* code bits to drop for sub-table */
48*44bedb31SLionel Sambuc int left; /* number of prefix codes available */
49*44bedb31SLionel Sambuc unsigned used; /* code entries in table used */
50*44bedb31SLionel Sambuc unsigned huff; /* Huffman code */
51*44bedb31SLionel Sambuc unsigned incr; /* for incrementing code, index */
52*44bedb31SLionel Sambuc unsigned fill; /* index for replicating entries */
53*44bedb31SLionel Sambuc unsigned low; /* low bits for current root entry */
54*44bedb31SLionel Sambuc unsigned mask; /* mask for low root bits */
55*44bedb31SLionel Sambuc code this; /* table entry for duplication */
56*44bedb31SLionel Sambuc code FAR *next; /* next available space in table */
57*44bedb31SLionel Sambuc const unsigned short FAR *base; /* base value table to use */
58*44bedb31SLionel Sambuc const unsigned short FAR *extra; /* extra bits table to use */
59*44bedb31SLionel Sambuc int end; /* use base and extra for symbol > end */
60*44bedb31SLionel Sambuc unsigned short count[MAXBITS+1]; /* number of codes of each length */
61*44bedb31SLionel Sambuc unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
62*44bedb31SLionel Sambuc static const unsigned short lbase[31] = { /* Length codes 257..285 base */
63*44bedb31SLionel Sambuc 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
64*44bedb31SLionel Sambuc 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
65*44bedb31SLionel Sambuc static const unsigned short lext[31] = { /* Length codes 257..285 extra */
66*44bedb31SLionel Sambuc 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
67*44bedb31SLionel Sambuc 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
68*44bedb31SLionel Sambuc static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
69*44bedb31SLionel Sambuc 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
70*44bedb31SLionel Sambuc 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
71*44bedb31SLionel Sambuc 8193, 12289, 16385, 24577, 0, 0};
72*44bedb31SLionel Sambuc static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
73*44bedb31SLionel Sambuc 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
74*44bedb31SLionel Sambuc 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
75*44bedb31SLionel Sambuc 28, 28, 29, 29, 64, 64};
76*44bedb31SLionel Sambuc
77*44bedb31SLionel Sambuc /*
78*44bedb31SLionel Sambuc Process a set of code lengths to create a canonical Huffman code. The
79*44bedb31SLionel Sambuc code lengths are lens[0..codes-1]. Each length corresponds to the
80*44bedb31SLionel Sambuc symbols 0..codes-1. The Huffman code is generated by first sorting the
81*44bedb31SLionel Sambuc symbols by length from short to long, and retaining the symbol order
82*44bedb31SLionel Sambuc for codes with equal lengths. Then the code starts with all zero bits
83*44bedb31SLionel Sambuc for the first code of the shortest length, and the codes are integer
84*44bedb31SLionel Sambuc increments for the same length, and zeros are appended as the length
85*44bedb31SLionel Sambuc increases. For the deflate format, these bits are stored backwards
86*44bedb31SLionel Sambuc from their more natural integer increment ordering, and so when the
87*44bedb31SLionel Sambuc decoding tables are built in the large loop below, the integer codes
88*44bedb31SLionel Sambuc are incremented backwards.
89*44bedb31SLionel Sambuc
90*44bedb31SLionel Sambuc This routine assumes, but does not check, that all of the entries in
91*44bedb31SLionel Sambuc lens[] are in the range 0..MAXBITS. The caller must assure this.
92*44bedb31SLionel Sambuc 1..MAXBITS is interpreted as that code length. zero means that that
93*44bedb31SLionel Sambuc symbol does not occur in this code.
94*44bedb31SLionel Sambuc
95*44bedb31SLionel Sambuc The codes are sorted by computing a count of codes for each length,
96*44bedb31SLionel Sambuc creating from that a table of starting indices for each length in the
97*44bedb31SLionel Sambuc sorted table, and then entering the symbols in order in the sorted
98*44bedb31SLionel Sambuc table. The sorted table is work[], with that space being provided by
99*44bedb31SLionel Sambuc the caller.
100*44bedb31SLionel Sambuc
101*44bedb31SLionel Sambuc The length counts are used for other purposes as well, i.e. finding
102*44bedb31SLionel Sambuc the minimum and maximum length codes, determining if there are any
103*44bedb31SLionel Sambuc codes at all, checking for a valid set of lengths, and looking ahead
104*44bedb31SLionel Sambuc at length counts to determine sub-table sizes when building the
105*44bedb31SLionel Sambuc decoding tables.
106*44bedb31SLionel Sambuc */
107*44bedb31SLionel Sambuc
108*44bedb31SLionel Sambuc /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
109*44bedb31SLionel Sambuc for (len = 0; len <= MAXBITS; len++)
110*44bedb31SLionel Sambuc count[len] = 0;
111*44bedb31SLionel Sambuc for (sym = 0; sym < codes; sym++)
112*44bedb31SLionel Sambuc count[lens[sym]]++;
113*44bedb31SLionel Sambuc
114*44bedb31SLionel Sambuc /* bound code lengths, force root to be within code lengths */
115*44bedb31SLionel Sambuc root = *bits;
116*44bedb31SLionel Sambuc for (mmax = MAXBITS; mmax >= 1; mmax--)
117*44bedb31SLionel Sambuc if (count[mmax] != 0) break;
118*44bedb31SLionel Sambuc if (root > mmax) root = mmax;
119*44bedb31SLionel Sambuc if (mmax == 0) { /* no symbols to code at all */
120*44bedb31SLionel Sambuc this.op = (unsigned char)64; /* invalid code marker */
121*44bedb31SLionel Sambuc this.bits = (unsigned char)1;
122*44bedb31SLionel Sambuc this.val = (unsigned short)0;
123*44bedb31SLionel Sambuc *(*table)++ = this; /* make a table to force an error */
124*44bedb31SLionel Sambuc *(*table)++ = this;
125*44bedb31SLionel Sambuc *bits = 1;
126*44bedb31SLionel Sambuc return 0; /* no symbols, but wait for decoding to report error */
127*44bedb31SLionel Sambuc }
128*44bedb31SLionel Sambuc for (mmin = 1; mmin <= MAXBITS; mmin++)
129*44bedb31SLionel Sambuc if (count[mmin] != 0) break;
130*44bedb31SLionel Sambuc if (root < mmin) root = mmin;
131*44bedb31SLionel Sambuc
132*44bedb31SLionel Sambuc /* check for an over-subscribed or incomplete set of lengths */
133*44bedb31SLionel Sambuc left = 1;
134*44bedb31SLionel Sambuc for (len = 1; len <= MAXBITS; len++) {
135*44bedb31SLionel Sambuc left <<= 1;
136*44bedb31SLionel Sambuc left -= count[len];
137*44bedb31SLionel Sambuc if (left < 0) return -1; /* over-subscribed */
138*44bedb31SLionel Sambuc }
139*44bedb31SLionel Sambuc if (left > 0 && (type == CODES || mmax != 1))
140*44bedb31SLionel Sambuc return -1; /* incomplete set */
141*44bedb31SLionel Sambuc
142*44bedb31SLionel Sambuc /* generate offsets into symbol table for each length for sorting */
143*44bedb31SLionel Sambuc offs[1] = 0;
144*44bedb31SLionel Sambuc for (len = 1; len < MAXBITS; len++)
145*44bedb31SLionel Sambuc offs[len + 1] = offs[len] + count[len];
146*44bedb31SLionel Sambuc
147*44bedb31SLionel Sambuc /* sort symbols by length, by symbol order within each length */
148*44bedb31SLionel Sambuc for (sym = 0; sym < codes; sym++)
149*44bedb31SLionel Sambuc if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
150*44bedb31SLionel Sambuc
151*44bedb31SLionel Sambuc /*
152*44bedb31SLionel Sambuc Create and fill in decoding tables. In this loop, the table being
153*44bedb31SLionel Sambuc filled is at next and has curr index bits. The code being used is huff
154*44bedb31SLionel Sambuc with length len. That code is converted to an index by dropping drop
155*44bedb31SLionel Sambuc bits off of the bottom. For codes where len is less than drop + curr,
156*44bedb31SLionel Sambuc those top drop + curr - len bits are incremented through all values to
157*44bedb31SLionel Sambuc fill the table with replicated entries.
158*44bedb31SLionel Sambuc
159*44bedb31SLionel Sambuc root is the number of index bits for the root table. When len exceeds
160*44bedb31SLionel Sambuc root, sub-tables are created pointed to by the root entry with an index
161*44bedb31SLionel Sambuc of the low root bits of huff. This is saved in low to check for when a
162*44bedb31SLionel Sambuc new sub-table should be started. drop is zero when the root table is
163*44bedb31SLionel Sambuc being filled, and drop is root when sub-tables are being filled.
164*44bedb31SLionel Sambuc
165*44bedb31SLionel Sambuc When a new sub-table is needed, it is necessary to look ahead in the
166*44bedb31SLionel Sambuc code lengths to determine what size sub-table is needed. The length
167*44bedb31SLionel Sambuc counts are used for this, and so count[] is decremented as codes are
168*44bedb31SLionel Sambuc entered in the tables.
169*44bedb31SLionel Sambuc
170*44bedb31SLionel Sambuc used keeps track of how many table entries have been allocated from the
171*44bedb31SLionel Sambuc provided *table space. It is checked when a LENS table is being made
172*44bedb31SLionel Sambuc against the space in *table, ENOUGH, minus the maximum space needed by
173*44bedb31SLionel Sambuc the worst case distance code, MAXD. This should never happen, but the
174*44bedb31SLionel Sambuc sufficiency of ENOUGH has not been proven exhaustively, hence the check.
175*44bedb31SLionel Sambuc This assumes that when type == LENS, bits == 9.
176*44bedb31SLionel Sambuc
177*44bedb31SLionel Sambuc sym increments through all symbols, and the loop terminates when
178*44bedb31SLionel Sambuc all codes of length mmax, i.e. all codes, have been processed. This
179*44bedb31SLionel Sambuc routine permits incomplete codes, so another loop after this one fills
180*44bedb31SLionel Sambuc in the rest of the decoding tables with invalid code markers.
181*44bedb31SLionel Sambuc */
182*44bedb31SLionel Sambuc
183*44bedb31SLionel Sambuc /* set up for code type */
184*44bedb31SLionel Sambuc switch (type) {
185*44bedb31SLionel Sambuc case CODES:
186*44bedb31SLionel Sambuc base = extra = work; /* dummy value--not used */
187*44bedb31SLionel Sambuc end = 19;
188*44bedb31SLionel Sambuc break;
189*44bedb31SLionel Sambuc case LENS:
190*44bedb31SLionel Sambuc base = lbase;
191*44bedb31SLionel Sambuc base -= 257;
192*44bedb31SLionel Sambuc extra = lext;
193*44bedb31SLionel Sambuc extra -= 257;
194*44bedb31SLionel Sambuc end = 256;
195*44bedb31SLionel Sambuc break;
196*44bedb31SLionel Sambuc default: /* DISTS */
197*44bedb31SLionel Sambuc base = dbase;
198*44bedb31SLionel Sambuc extra = dext;
199*44bedb31SLionel Sambuc end = -1;
200*44bedb31SLionel Sambuc }
201*44bedb31SLionel Sambuc
202*44bedb31SLionel Sambuc /* initialize state for loop */
203*44bedb31SLionel Sambuc huff = 0; /* starting code */
204*44bedb31SLionel Sambuc sym = 0; /* starting code symbol */
205*44bedb31SLionel Sambuc len = mmin; /* starting code length */
206*44bedb31SLionel Sambuc next = *table; /* current table to fill in */
207*44bedb31SLionel Sambuc curr = root; /* current table index bits */
208*44bedb31SLionel Sambuc drop = 0; /* current bits to drop from code for index */
209*44bedb31SLionel Sambuc low = (unsigned)(-1); /* trigger new sub-table when len > root */
210*44bedb31SLionel Sambuc used = 1U << root; /* use root table entries */
211*44bedb31SLionel Sambuc mask = used - 1; /* mask for comparing low */
212*44bedb31SLionel Sambuc
213*44bedb31SLionel Sambuc /* check available table space */
214*44bedb31SLionel Sambuc if (type == LENS && used >= ENOUGH - MAXD)
215*44bedb31SLionel Sambuc return 1;
216*44bedb31SLionel Sambuc
217*44bedb31SLionel Sambuc /* process all codes and make table entries */
218*44bedb31SLionel Sambuc for (;;) {
219*44bedb31SLionel Sambuc /* create table entry */
220*44bedb31SLionel Sambuc this.bits = (unsigned char)(len - drop);
221*44bedb31SLionel Sambuc if ((int)(work[sym]) < end) {
222*44bedb31SLionel Sambuc this.op = (unsigned char)0;
223*44bedb31SLionel Sambuc this.val = work[sym];
224*44bedb31SLionel Sambuc }
225*44bedb31SLionel Sambuc else if ((int)(work[sym]) > end) {
226*44bedb31SLionel Sambuc this.op = (unsigned char)(extra[work[sym]]);
227*44bedb31SLionel Sambuc this.val = base[work[sym]];
228*44bedb31SLionel Sambuc }
229*44bedb31SLionel Sambuc else {
230*44bedb31SLionel Sambuc this.op = (unsigned char)(32 + 64); /* end of block */
231*44bedb31SLionel Sambuc this.val = 0;
232*44bedb31SLionel Sambuc }
233*44bedb31SLionel Sambuc
234*44bedb31SLionel Sambuc /* replicate for those indices with low len bits equal to huff */
235*44bedb31SLionel Sambuc incr = 1U << (len - drop);
236*44bedb31SLionel Sambuc fill = 1U << curr;
237*44bedb31SLionel Sambuc mmin = fill; /* save offset to next table */
238*44bedb31SLionel Sambuc do {
239*44bedb31SLionel Sambuc fill -= incr;
240*44bedb31SLionel Sambuc next[(huff >> drop) + fill] = this;
241*44bedb31SLionel Sambuc } while (fill != 0);
242*44bedb31SLionel Sambuc
243*44bedb31SLionel Sambuc /* backwards increment the len-bit code huff */
244*44bedb31SLionel Sambuc incr = 1U << (len - 1);
245*44bedb31SLionel Sambuc while (huff & incr)
246*44bedb31SLionel Sambuc incr >>= 1;
247*44bedb31SLionel Sambuc if (incr != 0) {
248*44bedb31SLionel Sambuc huff &= incr - 1;
249*44bedb31SLionel Sambuc huff += incr;
250*44bedb31SLionel Sambuc }
251*44bedb31SLionel Sambuc else
252*44bedb31SLionel Sambuc huff = 0;
253*44bedb31SLionel Sambuc
254*44bedb31SLionel Sambuc /* go to next symbol, update count, len */
255*44bedb31SLionel Sambuc sym++;
256*44bedb31SLionel Sambuc if (--(count[len]) == 0) {
257*44bedb31SLionel Sambuc if (len == mmax) break;
258*44bedb31SLionel Sambuc len = lens[work[sym]];
259*44bedb31SLionel Sambuc }
260*44bedb31SLionel Sambuc
261*44bedb31SLionel Sambuc /* create new sub-table if needed */
262*44bedb31SLionel Sambuc if (len > root && (huff & mask) != low) {
263*44bedb31SLionel Sambuc /* if first time, transition to sub-tables */
264*44bedb31SLionel Sambuc if (drop == 0)
265*44bedb31SLionel Sambuc drop = root;
266*44bedb31SLionel Sambuc
267*44bedb31SLionel Sambuc /* increment past last table */
268*44bedb31SLionel Sambuc next += mmin; /* here mmin is 1 << curr */
269*44bedb31SLionel Sambuc
270*44bedb31SLionel Sambuc /* determine length of next table */
271*44bedb31SLionel Sambuc curr = len - drop;
272*44bedb31SLionel Sambuc left = (int)(1 << curr);
273*44bedb31SLionel Sambuc while (curr + drop < mmax) {
274*44bedb31SLionel Sambuc left -= count[curr + drop];
275*44bedb31SLionel Sambuc if (left <= 0) break;
276*44bedb31SLionel Sambuc curr++;
277*44bedb31SLionel Sambuc left <<= 1;
278*44bedb31SLionel Sambuc }
279*44bedb31SLionel Sambuc
280*44bedb31SLionel Sambuc /* check for enough space */
281*44bedb31SLionel Sambuc used += 1U << curr;
282*44bedb31SLionel Sambuc if (type == LENS && used >= ENOUGH - MAXD)
283*44bedb31SLionel Sambuc return 1;
284*44bedb31SLionel Sambuc
285*44bedb31SLionel Sambuc /* point entry in root table to sub-table */
286*44bedb31SLionel Sambuc low = huff & mask;
287*44bedb31SLionel Sambuc (*table)[low].op = (unsigned char)curr;
288*44bedb31SLionel Sambuc (*table)[low].bits = (unsigned char)root;
289*44bedb31SLionel Sambuc (*table)[low].val = (unsigned short)(next - *table);
290*44bedb31SLionel Sambuc }
291*44bedb31SLionel Sambuc }
292*44bedb31SLionel Sambuc
293*44bedb31SLionel Sambuc /*
294*44bedb31SLionel Sambuc Fill in rest of table for incomplete codes. This loop is similar to the
295*44bedb31SLionel Sambuc loop above in incrementing huff for table indices. It is assumed that
296*44bedb31SLionel Sambuc len is equal to curr + drop, so there is no loop needed to increment
297*44bedb31SLionel Sambuc through high index bits. When the current sub-table is filled, the loop
298*44bedb31SLionel Sambuc drops back to the root table to fill in any remaining entries there.
299*44bedb31SLionel Sambuc */
300*44bedb31SLionel Sambuc this.op = (unsigned char)64; /* invalid code marker */
301*44bedb31SLionel Sambuc this.bits = (unsigned char)(len - drop);
302*44bedb31SLionel Sambuc this.val = (unsigned short)0;
303*44bedb31SLionel Sambuc while (huff != 0) {
304*44bedb31SLionel Sambuc /* when done with sub-table, drop back to root table */
305*44bedb31SLionel Sambuc if (drop != 0 && (huff & mask) != low) {
306*44bedb31SLionel Sambuc drop = 0;
307*44bedb31SLionel Sambuc len = root;
308*44bedb31SLionel Sambuc next = *table;
309*44bedb31SLionel Sambuc this.bits = (unsigned char)len;
310*44bedb31SLionel Sambuc }
311*44bedb31SLionel Sambuc
312*44bedb31SLionel Sambuc /* put invalid code marker in table */
313*44bedb31SLionel Sambuc next[huff >> drop] = this;
314*44bedb31SLionel Sambuc
315*44bedb31SLionel Sambuc /* backwards increment the len-bit code huff */
316*44bedb31SLionel Sambuc incr = 1U << (len - 1);
317*44bedb31SLionel Sambuc while (huff & incr)
318*44bedb31SLionel Sambuc incr >>= 1;
319*44bedb31SLionel Sambuc if (incr != 0) {
320*44bedb31SLionel Sambuc huff &= incr - 1;
321*44bedb31SLionel Sambuc huff += incr;
322*44bedb31SLionel Sambuc }
323*44bedb31SLionel Sambuc else
324*44bedb31SLionel Sambuc huff = 0;
325*44bedb31SLionel Sambuc }
326*44bedb31SLionel Sambuc
327*44bedb31SLionel Sambuc /* set return parameters */
328*44bedb31SLionel Sambuc *table += used;
329*44bedb31SLionel Sambuc *bits = root;
330*44bedb31SLionel Sambuc return 0;
331*44bedb31SLionel Sambuc }
332