135534e26Sdownsj /* inftrees.c -- generate Huffman trees for efficient decoding
2*f0633749Stb * Copyright (C) 1995-2024 Mark Adler
335534e26Sdownsj * For conditions of distribution and use, see copyright notice in zlib.h
435534e26Sdownsj */
535534e26Sdownsj
635534e26Sdownsj #include "zutil.h"
735534e26Sdownsj #include "inftrees.h"
835534e26Sdownsj
9a79de952Smillert #define MAXBITS 15
105dc7a4a5Smillert
1135534e26Sdownsj /*
1235534e26Sdownsj If you use the zlib library in a product, an acknowledgment is welcome
1335534e26Sdownsj in the documentation of your product. If for some reason you cannot
1435534e26Sdownsj include such an acknowledgment, I would appreciate that you keep this
1535534e26Sdownsj copyright string in the executable of your product.
1635534e26Sdownsj */
1735534e26Sdownsj
1835534e26Sdownsj /*
19a79de952Smillert Build a set of tables to decode the provided canonical Huffman code.
20a79de952Smillert The code lengths are lens[0..codes-1]. The result starts at *table,
21a79de952Smillert whose indices are 0..2^bits-1. work is a writable array of at least
22a79de952Smillert lens shorts, which is used as a work area. type is the type of code
23a79de952Smillert to be generated, CODES, LENS, or DISTS. On return, zero is success,
24a79de952Smillert -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
25a79de952Smillert on return points to the next available entry's address. bits is the
26a79de952Smillert requested root table index bits, and on return it is the actual root
27a79de952Smillert table index bits. It will differ if the request is greater than the
28a79de952Smillert longest code or if it is less than the shortest code.
29a79de952Smillert */
inflate_table(codetype type,unsigned short FAR * lens,unsigned codes,code FAR * FAR * table,unsigned FAR * bits,unsigned short FAR * work)30cfac609cStb int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
31cfac609cStb unsigned codes, code FAR * FAR *table,
32cfac609cStb unsigned FAR *bits, unsigned short FAR *work) {
33a79de952Smillert unsigned len; /* a code's length in bits */
34a79de952Smillert unsigned sym; /* index of code symbols */
35a79de952Smillert unsigned min, max; /* minimum and maximum code lengths */
36a79de952Smillert unsigned root; /* number of index bits for root table */
37a79de952Smillert unsigned curr; /* number of index bits for current table */
38a79de952Smillert unsigned drop; /* code bits to drop for sub-table */
39a79de952Smillert int left; /* number of prefix codes available */
40a79de952Smillert unsigned used; /* code entries in table used */
41a79de952Smillert unsigned huff; /* Huffman code */
42a79de952Smillert unsigned incr; /* for incrementing code, index */
43a79de952Smillert unsigned fill; /* index for replicating entries */
44a79de952Smillert unsigned low; /* low bits for current root entry */
45a79de952Smillert unsigned mask; /* mask for low root bits */
4636f395ceStb code here; /* table entry for duplication */
47a79de952Smillert code FAR *next; /* next available space in table */
48a79de952Smillert const unsigned short FAR *base; /* base value table to use */
49a79de952Smillert const unsigned short FAR *extra; /* extra bits table to use */
5036f395ceStb unsigned match; /* use base and extra for symbol >= match */
51a79de952Smillert unsigned short count[MAXBITS+1]; /* number of codes of each length */
52a79de952Smillert unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
53a79de952Smillert static const unsigned short lbase[31] = { /* Length codes 257..285 base */
54a79de952Smillert 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
55a79de952Smillert 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
56a79de952Smillert static const unsigned short lext[31] = { /* Length codes 257..285 extra */
57a79de952Smillert 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
58*f0633749Stb 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 73, 200};
59a79de952Smillert static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
60a79de952Smillert 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
61a79de952Smillert 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
62a79de952Smillert 8193, 12289, 16385, 24577, 0, 0};
63a79de952Smillert static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
64a79de952Smillert 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
65a79de952Smillert 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
66a79de952Smillert 28, 28, 29, 29, 64, 64};
6735534e26Sdownsj
68a79de952Smillert /*
69a79de952Smillert Process a set of code lengths to create a canonical Huffman code. The
70a79de952Smillert code lengths are lens[0..codes-1]. Each length corresponds to the
71a79de952Smillert symbols 0..codes-1. The Huffman code is generated by first sorting the
72a79de952Smillert symbols by length from short to long, and retaining the symbol order
73a79de952Smillert for codes with equal lengths. Then the code starts with all zero bits
74a79de952Smillert for the first code of the shortest length, and the codes are integer
75a79de952Smillert increments for the same length, and zeros are appended as the length
76a79de952Smillert increases. For the deflate format, these bits are stored backwards
77a79de952Smillert from their more natural integer increment ordering, and so when the
78a79de952Smillert decoding tables are built in the large loop below, the integer codes
79a79de952Smillert are incremented backwards.
8035534e26Sdownsj
81a79de952Smillert This routine assumes, but does not check, that all of the entries in
82a79de952Smillert lens[] are in the range 0..MAXBITS. The caller must assure this.
83a79de952Smillert 1..MAXBITS is interpreted as that code length. zero means that that
84a79de952Smillert symbol does not occur in this code.
85a79de952Smillert
86a79de952Smillert The codes are sorted by computing a count of codes for each length,
87a79de952Smillert creating from that a table of starting indices for each length in the
88a79de952Smillert sorted table, and then entering the symbols in order in the sorted
89a79de952Smillert table. The sorted table is work[], with that space being provided by
90a79de952Smillert the caller.
91a79de952Smillert
92a79de952Smillert The length counts are used for other purposes as well, i.e. finding
93a79de952Smillert the minimum and maximum length codes, determining if there are any
94a79de952Smillert codes at all, checking for a valid set of lengths, and looking ahead
95a79de952Smillert at length counts to determine sub-table sizes when building the
96a79de952Smillert decoding tables.
9735534e26Sdownsj */
9835534e26Sdownsj
99a79de952Smillert /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
100a79de952Smillert for (len = 0; len <= MAXBITS; len++)
101a79de952Smillert count[len] = 0;
102a79de952Smillert for (sym = 0; sym < codes; sym++)
103a79de952Smillert count[lens[sym]]++;
10435534e26Sdownsj
105a79de952Smillert /* bound code lengths, force root to be within code lengths */
106a79de952Smillert root = *bits;
107a79de952Smillert for (max = MAXBITS; max >= 1; max--)
108a79de952Smillert if (count[max] != 0) break;
109a79de952Smillert if (root > max) root = max;
110dec6a01cSdjm if (max == 0) { /* no symbols to code at all */
11136f395ceStb here.op = (unsigned char)64; /* invalid code marker */
11236f395ceStb here.bits = (unsigned char)1;
11336f395ceStb here.val = (unsigned short)0;
11436f395ceStb *(*table)++ = here; /* make a table to force an error */
11536f395ceStb *(*table)++ = here;
116dec6a01cSdjm *bits = 1;
117dec6a01cSdjm return 0; /* no symbols, but wait for decoding to report error */
118dec6a01cSdjm }
11936f395ceStb for (min = 1; min < max; min++)
120a79de952Smillert if (count[min] != 0) break;
121a79de952Smillert if (root < min) root = min;
12235534e26Sdownsj
123a79de952Smillert /* check for an over-subscribed or incomplete set of lengths */
124a79de952Smillert left = 1;
125a79de952Smillert for (len = 1; len <= MAXBITS; len++) {
126a79de952Smillert left <<= 1;
127a79de952Smillert left -= count[len];
128a79de952Smillert if (left < 0) return -1; /* over-subscribed */
12935534e26Sdownsj }
13079551740Smillert if (left > 0 && (type == CODES || max != 1))
131a79de952Smillert return -1; /* incomplete set */
13235534e26Sdownsj
133a79de952Smillert /* generate offsets into symbol table for each length for sorting */
134a79de952Smillert offs[1] = 0;
135a79de952Smillert for (len = 1; len < MAXBITS; len++)
136a79de952Smillert offs[len + 1] = offs[len] + count[len];
13735534e26Sdownsj
138a79de952Smillert /* sort symbols by length, by symbol order within each length */
139a79de952Smillert for (sym = 0; sym < codes; sym++)
140a79de952Smillert if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
141a79de952Smillert
142a79de952Smillert /*
143a79de952Smillert Create and fill in decoding tables. In this loop, the table being
144a79de952Smillert filled is at next and has curr index bits. The code being used is huff
145a79de952Smillert with length len. That code is converted to an index by dropping drop
146a79de952Smillert bits off of the bottom. For codes where len is less than drop + curr,
147a79de952Smillert those top drop + curr - len bits are incremented through all values to
148a79de952Smillert fill the table with replicated entries.
149a79de952Smillert
150a79de952Smillert root is the number of index bits for the root table. When len exceeds
151a79de952Smillert root, sub-tables are created pointed to by the root entry with an index
152a79de952Smillert of the low root bits of huff. This is saved in low to check for when a
153a79de952Smillert new sub-table should be started. drop is zero when the root table is
154a79de952Smillert being filled, and drop is root when sub-tables are being filled.
155a79de952Smillert
156a79de952Smillert When a new sub-table is needed, it is necessary to look ahead in the
157a79de952Smillert code lengths to determine what size sub-table is needed. The length
158a79de952Smillert counts are used for this, and so count[] is decremented as codes are
159a79de952Smillert entered in the tables.
160a79de952Smillert
161a79de952Smillert used keeps track of how many table entries have been allocated from the
16236f395ceStb provided *table space. It is checked for LENS and DIST tables against
16336f395ceStb the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
16436f395ceStb the initial root table size constants. See the comments in inftrees.h
16536f395ceStb for more information.
166a79de952Smillert
167a79de952Smillert sym increments through all symbols, and the loop terminates when
168a79de952Smillert all codes of length max, i.e. all codes, have been processed. This
169a79de952Smillert routine permits incomplete codes, so another loop after this one fills
170a79de952Smillert in the rest of the decoding tables with invalid code markers.
171a79de952Smillert */
172a79de952Smillert
173a79de952Smillert /* set up for code type */
174a79de952Smillert switch (type) {
175a79de952Smillert case CODES:
176a79de952Smillert base = extra = work; /* dummy value--not used */
17736f395ceStb match = 20;
17835534e26Sdownsj break;
179a79de952Smillert case LENS:
180a79de952Smillert base = lbase;
181a79de952Smillert extra = lext;
18236f395ceStb match = 257;
18335534e26Sdownsj break;
184a79de952Smillert default: /* DISTS */
185a79de952Smillert base = dbase;
186a79de952Smillert extra = dext;
18736f395ceStb match = 0;
18835534e26Sdownsj }
18935534e26Sdownsj
190a79de952Smillert /* initialize state for loop */
191a79de952Smillert huff = 0; /* starting code */
192a79de952Smillert sym = 0; /* starting code symbol */
193a79de952Smillert len = min; /* starting code length */
194a79de952Smillert next = *table; /* current table to fill in */
195a79de952Smillert curr = root; /* current table index bits */
196a79de952Smillert drop = 0; /* current bits to drop from code for index */
197a79de952Smillert low = (unsigned)(-1); /* trigger new sub-table when len > root */
198a79de952Smillert used = 1U << root; /* use root table entries */
199a79de952Smillert mask = used - 1; /* mask for comparing low */
20035534e26Sdownsj
201a79de952Smillert /* check available table space */
20236f395ceStb if ((type == LENS && used > ENOUGH_LENS) ||
20336f395ceStb (type == DISTS && used > ENOUGH_DISTS))
204a79de952Smillert return 1;
205a79de952Smillert
206a79de952Smillert /* process all codes and make table entries */
207a79de952Smillert for (;;) {
208a79de952Smillert /* create table entry */
20936f395ceStb here.bits = (unsigned char)(len - drop);
21036f395ceStb if (work[sym] + 1U < match) {
21136f395ceStb here.op = (unsigned char)0;
21236f395ceStb here.val = work[sym];
213a79de952Smillert }
21436f395ceStb else if (work[sym] >= match) {
21536f395ceStb here.op = (unsigned char)(extra[work[sym] - match]);
21636f395ceStb here.val = base[work[sym] - match];
217a79de952Smillert }
218a79de952Smillert else {
21936f395ceStb here.op = (unsigned char)(32 + 64); /* end of block */
22036f395ceStb here.val = 0;
221a79de952Smillert }
222a79de952Smillert
223a79de952Smillert /* replicate for those indices with low len bits equal to huff */
224a79de952Smillert incr = 1U << (len - drop);
225a79de952Smillert fill = 1U << curr;
226d76b9bfaSmillert min = fill; /* save offset to next table */
22735534e26Sdownsj do {
228a79de952Smillert fill -= incr;
22936f395ceStb next[(huff >> drop) + fill] = here;
230a79de952Smillert } while (fill != 0);
23135534e26Sdownsj
232a79de952Smillert /* backwards increment the len-bit code huff */
233a79de952Smillert incr = 1U << (len - 1);
234a79de952Smillert while (huff & incr)
235a79de952Smillert incr >>= 1;
236a79de952Smillert if (incr != 0) {
237a79de952Smillert huff &= incr - 1;
238a79de952Smillert huff += incr;
23935534e26Sdownsj }
2405dc7a4a5Smillert else
241a79de952Smillert huff = 0;
242a79de952Smillert
243a79de952Smillert /* go to next symbol, update count, len */
244a79de952Smillert sym++;
245a79de952Smillert if (--(count[len]) == 0) {
246a79de952Smillert if (len == max) break;
247a79de952Smillert len = lens[work[sym]];
24835534e26Sdownsj }
24935534e26Sdownsj
250a79de952Smillert /* create new sub-table if needed */
251a79de952Smillert if (len > root && (huff & mask) != low) {
252a79de952Smillert /* if first time, transition to sub-tables */
253a79de952Smillert if (drop == 0)
254a79de952Smillert drop = root;
255a79de952Smillert
256a79de952Smillert /* increment past last table */
257d76b9bfaSmillert next += min; /* here min is 1 << curr */
258a79de952Smillert
259a79de952Smillert /* determine length of next table */
260a79de952Smillert curr = len - drop;
261a79de952Smillert left = (int)(1 << curr);
262a79de952Smillert while (curr + drop < max) {
263a79de952Smillert left -= count[curr + drop];
264a79de952Smillert if (left <= 0) break;
265a79de952Smillert curr++;
266a79de952Smillert left <<= 1;
267a79de952Smillert }
268a79de952Smillert
269a79de952Smillert /* check for enough space */
270a79de952Smillert used += 1U << curr;
27136f395ceStb if ((type == LENS && used > ENOUGH_LENS) ||
27236f395ceStb (type == DISTS && used > ENOUGH_DISTS))
273a79de952Smillert return 1;
274a79de952Smillert
275a79de952Smillert /* point entry in root table to sub-table */
276a79de952Smillert low = huff & mask;
277a79de952Smillert (*table)[low].op = (unsigned char)curr;
278a79de952Smillert (*table)[low].bits = (unsigned char)root;
279a79de952Smillert (*table)[low].val = (unsigned short)(next - *table);
280a79de952Smillert }
281a79de952Smillert }
282a79de952Smillert
28336f395ceStb /* fill in remaining table entry if code is incomplete (guaranteed to have
28436f395ceStb at most one remaining entry, since if the code is incomplete, the
28536f395ceStb maximum code length that was allowed to get this far is one bit) */
28636f395ceStb if (huff != 0) {
28736f395ceStb here.op = (unsigned char)64; /* invalid code marker */
28836f395ceStb here.bits = (unsigned char)(len - drop);
28936f395ceStb here.val = (unsigned short)0;
29036f395ceStb next[huff] = here;
29135534e26Sdownsj }
29235534e26Sdownsj
293a79de952Smillert /* set return parameters */
294a79de952Smillert *table += used;
295a79de952Smillert *bits = root;
296a79de952Smillert return 0;
29735534e26Sdownsj }
298