1*44bedb31SLionel Sambuc /* $NetBSD: inftree9.c,v 1.1.1.1 2006/01/14 20:10:52 christos Exp $ */
2*44bedb31SLionel Sambuc
3*44bedb31SLionel Sambuc /* inftree9.c -- generate Huffman trees for efficient decoding
4*44bedb31SLionel Sambuc * Copyright (C) 1995-2005 Mark Adler
5*44bedb31SLionel Sambuc * For conditions of distribution and use, see copyright notice in zlib.h
6*44bedb31SLionel Sambuc */
7*44bedb31SLionel Sambuc
8*44bedb31SLionel Sambuc #include "zutil.h"
9*44bedb31SLionel Sambuc #include "inftree9.h"
10*44bedb31SLionel Sambuc
11*44bedb31SLionel Sambuc #define MAXBITS 15
12*44bedb31SLionel Sambuc
13*44bedb31SLionel Sambuc const char inflate9_copyright[] =
14*44bedb31SLionel Sambuc " inflate9 1.2.3 Copyright 1995-2005 Mark Adler ";
15*44bedb31SLionel Sambuc /*
16*44bedb31SLionel Sambuc If you use the zlib library in a product, an acknowledgment is welcome
17*44bedb31SLionel Sambuc in the documentation of your product. If for some reason you cannot
18*44bedb31SLionel Sambuc include such an acknowledgment, I would appreciate that you keep this
19*44bedb31SLionel Sambuc copyright string in the executable of your product.
20*44bedb31SLionel Sambuc */
21*44bedb31SLionel Sambuc
22*44bedb31SLionel Sambuc /*
23*44bedb31SLionel Sambuc Build a set of tables to decode the provided canonical Huffman code.
24*44bedb31SLionel Sambuc The code lengths are lens[0..codes-1]. The result starts at *table,
25*44bedb31SLionel Sambuc whose indices are 0..2^bits-1. work is a writable array of at least
26*44bedb31SLionel Sambuc lens shorts, which is used as a work area. type is the type of code
27*44bedb31SLionel Sambuc to be generated, CODES, LENS, or DISTS. On return, zero is success,
28*44bedb31SLionel Sambuc -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
29*44bedb31SLionel Sambuc on return points to the next available entry's address. bits is the
30*44bedb31SLionel Sambuc requested root table index bits, and on return it is the actual root
31*44bedb31SLionel Sambuc table index bits. It will differ if the request is greater than the
32*44bedb31SLionel Sambuc longest code or if it is less than the shortest code.
33*44bedb31SLionel Sambuc */
inflate_table9(type,lens,codes,table,bits,work)34*44bedb31SLionel Sambuc int inflate_table9(type, lens, codes, table, bits, work)
35*44bedb31SLionel Sambuc codetype type;
36*44bedb31SLionel Sambuc unsigned short FAR *lens;
37*44bedb31SLionel Sambuc unsigned codes;
38*44bedb31SLionel Sambuc code FAR * FAR *table;
39*44bedb31SLionel Sambuc unsigned FAR *bits;
40*44bedb31SLionel Sambuc unsigned short FAR *work;
41*44bedb31SLionel Sambuc {
42*44bedb31SLionel Sambuc unsigned len; /* a code's length in bits */
43*44bedb31SLionel Sambuc unsigned sym; /* index of code symbols */
44*44bedb31SLionel Sambuc unsigned min, max; /* minimum and maximum code lengths */
45*44bedb31SLionel Sambuc unsigned root; /* number of index bits for root table */
46*44bedb31SLionel Sambuc unsigned curr; /* number of index bits for current table */
47*44bedb31SLionel Sambuc unsigned drop; /* code bits to drop for sub-table */
48*44bedb31SLionel Sambuc int left; /* number of prefix codes available */
49*44bedb31SLionel Sambuc unsigned used; /* code entries in table used */
50*44bedb31SLionel Sambuc unsigned huff; /* Huffman code */
51*44bedb31SLionel Sambuc unsigned incr; /* for incrementing code, index */
52*44bedb31SLionel Sambuc unsigned fill; /* index for replicating entries */
53*44bedb31SLionel Sambuc unsigned low; /* low bits for current root entry */
54*44bedb31SLionel Sambuc unsigned mask; /* mask for low root bits */
55*44bedb31SLionel Sambuc code this; /* table entry for duplication */
56*44bedb31SLionel Sambuc code FAR *next; /* next available space in table */
57*44bedb31SLionel Sambuc const unsigned short FAR *base; /* base value table to use */
58*44bedb31SLionel Sambuc const unsigned short FAR *extra; /* extra bits table to use */
59*44bedb31SLionel Sambuc int end; /* use base and extra for symbol > end */
60*44bedb31SLionel Sambuc unsigned short count[MAXBITS+1]; /* number of codes of each length */
61*44bedb31SLionel Sambuc unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
62*44bedb31SLionel Sambuc static const unsigned short lbase[31] = { /* Length codes 257..285 base */
63*44bedb31SLionel Sambuc 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17,
64*44bedb31SLionel Sambuc 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115,
65*44bedb31SLionel Sambuc 131, 163, 195, 227, 3, 0, 0};
66*44bedb31SLionel Sambuc static const unsigned short lext[31] = { /* Length codes 257..285 extra */
67*44bedb31SLionel Sambuc 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
68*44bedb31SLionel Sambuc 130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132,
69*44bedb31SLionel Sambuc 133, 133, 133, 133, 144, 201, 196};
70*44bedb31SLionel Sambuc static const unsigned short dbase[32] = { /* Distance codes 0..31 base */
71*44bedb31SLionel Sambuc 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49,
72*44bedb31SLionel Sambuc 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073,
73*44bedb31SLionel Sambuc 4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153};
74*44bedb31SLionel Sambuc static const unsigned short dext[32] = { /* Distance codes 0..31 extra */
75*44bedb31SLionel Sambuc 128, 128, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132,
76*44bedb31SLionel Sambuc 133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138,
77*44bedb31SLionel Sambuc 139, 139, 140, 140, 141, 141, 142, 142};
78*44bedb31SLionel Sambuc
79*44bedb31SLionel Sambuc /*
80*44bedb31SLionel Sambuc Process a set of code lengths to create a canonical Huffman code. The
81*44bedb31SLionel Sambuc code lengths are lens[0..codes-1]. Each length corresponds to the
82*44bedb31SLionel Sambuc symbols 0..codes-1. The Huffman code is generated by first sorting the
83*44bedb31SLionel Sambuc symbols by length from short to long, and retaining the symbol order
84*44bedb31SLionel Sambuc for codes with equal lengths. Then the code starts with all zero bits
85*44bedb31SLionel Sambuc for the first code of the shortest length, and the codes are integer
86*44bedb31SLionel Sambuc increments for the same length, and zeros are appended as the length
87*44bedb31SLionel Sambuc increases. For the deflate format, these bits are stored backwards
88*44bedb31SLionel Sambuc from their more natural integer increment ordering, and so when the
89*44bedb31SLionel Sambuc decoding tables are built in the large loop below, the integer codes
90*44bedb31SLionel Sambuc are incremented backwards.
91*44bedb31SLionel Sambuc
92*44bedb31SLionel Sambuc This routine assumes, but does not check, that all of the entries in
93*44bedb31SLionel Sambuc lens[] are in the range 0..MAXBITS. The caller must assure this.
94*44bedb31SLionel Sambuc 1..MAXBITS is interpreted as that code length. zero means that that
95*44bedb31SLionel Sambuc symbol does not occur in this code.
96*44bedb31SLionel Sambuc
97*44bedb31SLionel Sambuc The codes are sorted by computing a count of codes for each length,
98*44bedb31SLionel Sambuc creating from that a table of starting indices for each length in the
99*44bedb31SLionel Sambuc sorted table, and then entering the symbols in order in the sorted
100*44bedb31SLionel Sambuc table. The sorted table is work[], with that space being provided by
101*44bedb31SLionel Sambuc the caller.
102*44bedb31SLionel Sambuc
103*44bedb31SLionel Sambuc The length counts are used for other purposes as well, i.e. finding
104*44bedb31SLionel Sambuc the minimum and maximum length codes, determining if there are any
105*44bedb31SLionel Sambuc codes at all, checking for a valid set of lengths, and looking ahead
106*44bedb31SLionel Sambuc at length counts to determine sub-table sizes when building the
107*44bedb31SLionel Sambuc decoding tables.
108*44bedb31SLionel Sambuc */
109*44bedb31SLionel Sambuc
110*44bedb31SLionel Sambuc /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
111*44bedb31SLionel Sambuc for (len = 0; len <= MAXBITS; len++)
112*44bedb31SLionel Sambuc count[len] = 0;
113*44bedb31SLionel Sambuc for (sym = 0; sym < codes; sym++)
114*44bedb31SLionel Sambuc count[lens[sym]]++;
115*44bedb31SLionel Sambuc
116*44bedb31SLionel Sambuc /* bound code lengths, force root to be within code lengths */
117*44bedb31SLionel Sambuc root = *bits;
118*44bedb31SLionel Sambuc for (max = MAXBITS; max >= 1; max--)
119*44bedb31SLionel Sambuc if (count[max] != 0) break;
120*44bedb31SLionel Sambuc if (root > max) root = max;
121*44bedb31SLionel Sambuc if (max == 0) return -1; /* no codes! */
122*44bedb31SLionel Sambuc for (min = 1; min <= MAXBITS; min++)
123*44bedb31SLionel Sambuc if (count[min] != 0) break;
124*44bedb31SLionel Sambuc if (root < min) root = min;
125*44bedb31SLionel Sambuc
126*44bedb31SLionel Sambuc /* check for an over-subscribed or incomplete set of lengths */
127*44bedb31SLionel Sambuc left = 1;
128*44bedb31SLionel Sambuc for (len = 1; len <= MAXBITS; len++) {
129*44bedb31SLionel Sambuc left <<= 1;
130*44bedb31SLionel Sambuc left -= count[len];
131*44bedb31SLionel Sambuc if (left < 0) return -1; /* over-subscribed */
132*44bedb31SLionel Sambuc }
133*44bedb31SLionel Sambuc if (left > 0 && (type == CODES || max != 1))
134*44bedb31SLionel Sambuc return -1; /* incomplete set */
135*44bedb31SLionel Sambuc
136*44bedb31SLionel Sambuc /* generate offsets into symbol table for each length for sorting */
137*44bedb31SLionel Sambuc offs[1] = 0;
138*44bedb31SLionel Sambuc for (len = 1; len < MAXBITS; len++)
139*44bedb31SLionel Sambuc offs[len + 1] = offs[len] + count[len];
140*44bedb31SLionel Sambuc
141*44bedb31SLionel Sambuc /* sort symbols by length, by symbol order within each length */
142*44bedb31SLionel Sambuc for (sym = 0; sym < codes; sym++)
143*44bedb31SLionel Sambuc if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
144*44bedb31SLionel Sambuc
145*44bedb31SLionel Sambuc /*
146*44bedb31SLionel Sambuc Create and fill in decoding tables. In this loop, the table being
147*44bedb31SLionel Sambuc filled is at next and has curr index bits. The code being used is huff
148*44bedb31SLionel Sambuc with length len. That code is converted to an index by dropping drop
149*44bedb31SLionel Sambuc bits off of the bottom. For codes where len is less than drop + curr,
150*44bedb31SLionel Sambuc those top drop + curr - len bits are incremented through all values to
151*44bedb31SLionel Sambuc fill the table with replicated entries.
152*44bedb31SLionel Sambuc
153*44bedb31SLionel Sambuc root is the number of index bits for the root table. When len exceeds
154*44bedb31SLionel Sambuc root, sub-tables are created pointed to by the root entry with an index
155*44bedb31SLionel Sambuc of the low root bits of huff. This is saved in low to check for when a
156*44bedb31SLionel Sambuc new sub-table should be started. drop is zero when the root table is
157*44bedb31SLionel Sambuc being filled, and drop is root when sub-tables are being filled.
158*44bedb31SLionel Sambuc
159*44bedb31SLionel Sambuc When a new sub-table is needed, it is necessary to look ahead in the
160*44bedb31SLionel Sambuc code lengths to determine what size sub-table is needed. The length
161*44bedb31SLionel Sambuc counts are used for this, and so count[] is decremented as codes are
162*44bedb31SLionel Sambuc entered in the tables.
163*44bedb31SLionel Sambuc
164*44bedb31SLionel Sambuc used keeps track of how many table entries have been allocated from the
165*44bedb31SLionel Sambuc provided *table space. It is checked when a LENS table is being made
166*44bedb31SLionel Sambuc against the space in *table, ENOUGH, minus the maximum space needed by
167*44bedb31SLionel Sambuc the worst case distance code, MAXD. This should never happen, but the
168*44bedb31SLionel Sambuc sufficiency of ENOUGH has not been proven exhaustively, hence the check.
169*44bedb31SLionel Sambuc This assumes that when type == LENS, bits == 9.
170*44bedb31SLionel Sambuc
171*44bedb31SLionel Sambuc sym increments through all symbols, and the loop terminates when
172*44bedb31SLionel Sambuc all codes of length max, i.e. all codes, have been processed. This
173*44bedb31SLionel Sambuc routine permits incomplete codes, so another loop after this one fills
174*44bedb31SLionel Sambuc in the rest of the decoding tables with invalid code markers.
175*44bedb31SLionel Sambuc */
176*44bedb31SLionel Sambuc
177*44bedb31SLionel Sambuc /* set up for code type */
178*44bedb31SLionel Sambuc switch (type) {
179*44bedb31SLionel Sambuc case CODES:
180*44bedb31SLionel Sambuc base = extra = work; /* dummy value--not used */
181*44bedb31SLionel Sambuc end = 19;
182*44bedb31SLionel Sambuc break;
183*44bedb31SLionel Sambuc case LENS:
184*44bedb31SLionel Sambuc base = lbase;
185*44bedb31SLionel Sambuc base -= 257;
186*44bedb31SLionel Sambuc extra = lext;
187*44bedb31SLionel Sambuc extra -= 257;
188*44bedb31SLionel Sambuc end = 256;
189*44bedb31SLionel Sambuc break;
190*44bedb31SLionel Sambuc default: /* DISTS */
191*44bedb31SLionel Sambuc base = dbase;
192*44bedb31SLionel Sambuc extra = dext;
193*44bedb31SLionel Sambuc end = -1;
194*44bedb31SLionel Sambuc }
195*44bedb31SLionel Sambuc
196*44bedb31SLionel Sambuc /* initialize state for loop */
197*44bedb31SLionel Sambuc huff = 0; /* starting code */
198*44bedb31SLionel Sambuc sym = 0; /* starting code symbol */
199*44bedb31SLionel Sambuc len = min; /* starting code length */
200*44bedb31SLionel Sambuc next = *table; /* current table to fill in */
201*44bedb31SLionel Sambuc curr = root; /* current table index bits */
202*44bedb31SLionel Sambuc drop = 0; /* current bits to drop from code for index */
203*44bedb31SLionel Sambuc low = (unsigned)(-1); /* trigger new sub-table when len > root */
204*44bedb31SLionel Sambuc used = 1U << root; /* use root table entries */
205*44bedb31SLionel Sambuc mask = used - 1; /* mask for comparing low */
206*44bedb31SLionel Sambuc
207*44bedb31SLionel Sambuc /* check available table space */
208*44bedb31SLionel Sambuc if (type == LENS && used >= ENOUGH - MAXD)
209*44bedb31SLionel Sambuc return 1;
210*44bedb31SLionel Sambuc
211*44bedb31SLionel Sambuc /* process all codes and make table entries */
212*44bedb31SLionel Sambuc for (;;) {
213*44bedb31SLionel Sambuc /* create table entry */
214*44bedb31SLionel Sambuc this.bits = (unsigned char)(len - drop);
215*44bedb31SLionel Sambuc if ((int)(work[sym]) < end) {
216*44bedb31SLionel Sambuc this.op = (unsigned char)0;
217*44bedb31SLionel Sambuc this.val = work[sym];
218*44bedb31SLionel Sambuc }
219*44bedb31SLionel Sambuc else if ((int)(work[sym]) > end) {
220*44bedb31SLionel Sambuc this.op = (unsigned char)(extra[work[sym]]);
221*44bedb31SLionel Sambuc this.val = base[work[sym]];
222*44bedb31SLionel Sambuc }
223*44bedb31SLionel Sambuc else {
224*44bedb31SLionel Sambuc this.op = (unsigned char)(32 + 64); /* end of block */
225*44bedb31SLionel Sambuc this.val = 0;
226*44bedb31SLionel Sambuc }
227*44bedb31SLionel Sambuc
228*44bedb31SLionel Sambuc /* replicate for those indices with low len bits equal to huff */
229*44bedb31SLionel Sambuc incr = 1U << (len - drop);
230*44bedb31SLionel Sambuc fill = 1U << curr;
231*44bedb31SLionel Sambuc do {
232*44bedb31SLionel Sambuc fill -= incr;
233*44bedb31SLionel Sambuc next[(huff >> drop) + fill] = this;
234*44bedb31SLionel Sambuc } while (fill != 0);
235*44bedb31SLionel Sambuc
236*44bedb31SLionel Sambuc /* backwards increment the len-bit code huff */
237*44bedb31SLionel Sambuc incr = 1U << (len - 1);
238*44bedb31SLionel Sambuc while (huff & incr)
239*44bedb31SLionel Sambuc incr >>= 1;
240*44bedb31SLionel Sambuc if (incr != 0) {
241*44bedb31SLionel Sambuc huff &= incr - 1;
242*44bedb31SLionel Sambuc huff += incr;
243*44bedb31SLionel Sambuc }
244*44bedb31SLionel Sambuc else
245*44bedb31SLionel Sambuc huff = 0;
246*44bedb31SLionel Sambuc
247*44bedb31SLionel Sambuc /* go to next symbol, update count, len */
248*44bedb31SLionel Sambuc sym++;
249*44bedb31SLionel Sambuc if (--(count[len]) == 0) {
250*44bedb31SLionel Sambuc if (len == max) break;
251*44bedb31SLionel Sambuc len = lens[work[sym]];
252*44bedb31SLionel Sambuc }
253*44bedb31SLionel Sambuc
254*44bedb31SLionel Sambuc /* create new sub-table if needed */
255*44bedb31SLionel Sambuc if (len > root && (huff & mask) != low) {
256*44bedb31SLionel Sambuc /* if first time, transition to sub-tables */
257*44bedb31SLionel Sambuc if (drop == 0)
258*44bedb31SLionel Sambuc drop = root;
259*44bedb31SLionel Sambuc
260*44bedb31SLionel Sambuc /* increment past last table */
261*44bedb31SLionel Sambuc next += 1U << curr;
262*44bedb31SLionel Sambuc
263*44bedb31SLionel Sambuc /* determine length of next table */
264*44bedb31SLionel Sambuc curr = len - drop;
265*44bedb31SLionel Sambuc left = (int)(1 << curr);
266*44bedb31SLionel Sambuc while (curr + drop < max) {
267*44bedb31SLionel Sambuc left -= count[curr + drop];
268*44bedb31SLionel Sambuc if (left <= 0) break;
269*44bedb31SLionel Sambuc curr++;
270*44bedb31SLionel Sambuc left <<= 1;
271*44bedb31SLionel Sambuc }
272*44bedb31SLionel Sambuc
273*44bedb31SLionel Sambuc /* check for enough space */
274*44bedb31SLionel Sambuc used += 1U << curr;
275*44bedb31SLionel Sambuc if (type == LENS && used >= ENOUGH - MAXD)
276*44bedb31SLionel Sambuc return 1;
277*44bedb31SLionel Sambuc
278*44bedb31SLionel Sambuc /* point entry in root table to sub-table */
279*44bedb31SLionel Sambuc low = huff & mask;
280*44bedb31SLionel Sambuc (*table)[low].op = (unsigned char)curr;
281*44bedb31SLionel Sambuc (*table)[low].bits = (unsigned char)root;
282*44bedb31SLionel Sambuc (*table)[low].val = (unsigned short)(next - *table);
283*44bedb31SLionel Sambuc }
284*44bedb31SLionel Sambuc }
285*44bedb31SLionel Sambuc
286*44bedb31SLionel Sambuc /*
287*44bedb31SLionel Sambuc Fill in rest of table for incomplete codes. This loop is similar to the
288*44bedb31SLionel Sambuc loop above in incrementing huff for table indices. It is assumed that
289*44bedb31SLionel Sambuc len is equal to curr + drop, so there is no loop needed to increment
290*44bedb31SLionel Sambuc through high index bits. When the current sub-table is filled, the loop
291*44bedb31SLionel Sambuc drops back to the root table to fill in any remaining entries there.
292*44bedb31SLionel Sambuc */
293*44bedb31SLionel Sambuc this.op = (unsigned char)64; /* invalid code marker */
294*44bedb31SLionel Sambuc this.bits = (unsigned char)(len - drop);
295*44bedb31SLionel Sambuc this.val = (unsigned short)0;
296*44bedb31SLionel Sambuc while (huff != 0) {
297*44bedb31SLionel Sambuc /* when done with sub-table, drop back to root table */
298*44bedb31SLionel Sambuc if (drop != 0 && (huff & mask) != low) {
299*44bedb31SLionel Sambuc drop = 0;
300*44bedb31SLionel Sambuc len = root;
301*44bedb31SLionel Sambuc next = *table;
302*44bedb31SLionel Sambuc curr = root;
303*44bedb31SLionel Sambuc this.bits = (unsigned char)len;
304*44bedb31SLionel Sambuc }
305*44bedb31SLionel Sambuc
306*44bedb31SLionel Sambuc /* put invalid code marker in table */
307*44bedb31SLionel Sambuc next[huff >> drop] = this;
308*44bedb31SLionel Sambuc
309*44bedb31SLionel Sambuc /* backwards increment the len-bit code huff */
310*44bedb31SLionel Sambuc incr = 1U << (len - 1);
311*44bedb31SLionel Sambuc while (huff & incr)
312*44bedb31SLionel Sambuc incr >>= 1;
313*44bedb31SLionel Sambuc if (incr != 0) {
314*44bedb31SLionel Sambuc huff &= incr - 1;
315*44bedb31SLionel Sambuc huff += incr;
316*44bedb31SLionel Sambuc }
317*44bedb31SLionel Sambuc else
318*44bedb31SLionel Sambuc huff = 0;
319*44bedb31SLionel Sambuc }
320*44bedb31SLionel Sambuc
321*44bedb31SLionel Sambuc /* set return parameters */
322*44bedb31SLionel Sambuc *table += used;
323*44bedb31SLionel Sambuc *bits = root;
324*44bedb31SLionel Sambuc return 0;
325*44bedb31SLionel Sambuc }
326