175fd0b74Schristos /* inftrees.c -- generate Huffman trees for efficient decoding
2*e992f068Schristos * Copyright (C) 1995-2022 Mark Adler
375fd0b74Schristos * For conditions of distribution and use, see copyright notice in zlib.h
475fd0b74Schristos */
575fd0b74Schristos
675fd0b74Schristos #include "zutil.h"
775fd0b74Schristos #include "inftrees.h"
875fd0b74Schristos
975fd0b74Schristos #define MAXBITS 15
1075fd0b74Schristos
1175fd0b74Schristos const char inflate_copyright[] =
12*e992f068Schristos " inflate 1.2.12 Copyright 1995-2022 Mark Adler ";
1375fd0b74Schristos /*
1475fd0b74Schristos If you use the zlib library in a product, an acknowledgment is welcome
1575fd0b74Schristos in the documentation of your product. If for some reason you cannot
1675fd0b74Schristos include such an acknowledgment, I would appreciate that you keep this
1775fd0b74Schristos copyright string in the executable of your product.
1875fd0b74Schristos */
1975fd0b74Schristos
2075fd0b74Schristos /*
2175fd0b74Schristos Build a set of tables to decode the provided canonical Huffman code.
2275fd0b74Schristos The code lengths are lens[0..codes-1]. The result starts at *table,
2375fd0b74Schristos whose indices are 0..2^bits-1. work is a writable array of at least
2475fd0b74Schristos lens shorts, which is used as a work area. type is the type of code
2575fd0b74Schristos to be generated, CODES, LENS, or DISTS. On return, zero is success,
2675fd0b74Schristos -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
2775fd0b74Schristos on return points to the next available entry's address. bits is the
2875fd0b74Schristos requested root table index bits, and on return it is the actual root
2975fd0b74Schristos table index bits. It will differ if the request is greater than the
3075fd0b74Schristos longest code or if it is less than the shortest code.
3175fd0b74Schristos */
inflate_table(type,lens,codes,table,bits,work)3275fd0b74Schristos int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
3375fd0b74Schristos codetype type;
3475fd0b74Schristos unsigned short FAR *lens;
3575fd0b74Schristos unsigned codes;
3675fd0b74Schristos code FAR * FAR *table;
3775fd0b74Schristos unsigned FAR *bits;
3875fd0b74Schristos unsigned short FAR *work;
3975fd0b74Schristos {
4075fd0b74Schristos unsigned len; /* a code's length in bits */
4175fd0b74Schristos unsigned sym; /* index of code symbols */
4275fd0b74Schristos unsigned min, max; /* minimum and maximum code lengths */
4375fd0b74Schristos unsigned root; /* number of index bits for root table */
4475fd0b74Schristos unsigned curr; /* number of index bits for current table */
4575fd0b74Schristos unsigned drop; /* code bits to drop for sub-table */
4675fd0b74Schristos int left; /* number of prefix codes available */
4775fd0b74Schristos unsigned used; /* code entries in table used */
4875fd0b74Schristos unsigned huff; /* Huffman code */
4975fd0b74Schristos unsigned incr; /* for incrementing code, index */
5075fd0b74Schristos unsigned fill; /* index for replicating entries */
5175fd0b74Schristos unsigned low; /* low bits for current root entry */
5275fd0b74Schristos unsigned mask; /* mask for low root bits */
5375fd0b74Schristos code here; /* table entry for duplication */
5475fd0b74Schristos code FAR *next; /* next available space in table */
5575fd0b74Schristos const unsigned short FAR *base; /* base value table to use */
5675fd0b74Schristos const unsigned short FAR *extra; /* extra bits table to use */
57ede78133Schristos unsigned match; /* use base and extra for symbol >= match */
5875fd0b74Schristos unsigned short count[MAXBITS+1]; /* number of codes of each length */
5975fd0b74Schristos unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
6075fd0b74Schristos static const unsigned short lbase[31] = { /* Length codes 257..285 base */
6175fd0b74Schristos 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
6275fd0b74Schristos 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
6375fd0b74Schristos static const unsigned short lext[31] = { /* Length codes 257..285 extra */
6475fd0b74Schristos 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
65*e992f068Schristos 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 199, 202};
6675fd0b74Schristos static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
6775fd0b74Schristos 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
6875fd0b74Schristos 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
6975fd0b74Schristos 8193, 12289, 16385, 24577, 0, 0};
7075fd0b74Schristos static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
7175fd0b74Schristos 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
7275fd0b74Schristos 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
7375fd0b74Schristos 28, 28, 29, 29, 64, 64};
7475fd0b74Schristos
7575fd0b74Schristos /*
7675fd0b74Schristos Process a set of code lengths to create a canonical Huffman code. The
7775fd0b74Schristos code lengths are lens[0..codes-1]. Each length corresponds to the
7875fd0b74Schristos symbols 0..codes-1. The Huffman code is generated by first sorting the
7975fd0b74Schristos symbols by length from short to long, and retaining the symbol order
8075fd0b74Schristos for codes with equal lengths. Then the code starts with all zero bits
8175fd0b74Schristos for the first code of the shortest length, and the codes are integer
8275fd0b74Schristos increments for the same length, and zeros are appended as the length
8375fd0b74Schristos increases. For the deflate format, these bits are stored backwards
8475fd0b74Schristos from their more natural integer increment ordering, and so when the
8575fd0b74Schristos decoding tables are built in the large loop below, the integer codes
8675fd0b74Schristos are incremented backwards.
8775fd0b74Schristos
8875fd0b74Schristos This routine assumes, but does not check, that all of the entries in
8975fd0b74Schristos lens[] are in the range 0..MAXBITS. The caller must assure this.
9075fd0b74Schristos 1..MAXBITS is interpreted as that code length. zero means that that
9175fd0b74Schristos symbol does not occur in this code.
9275fd0b74Schristos
9375fd0b74Schristos The codes are sorted by computing a count of codes for each length,
9475fd0b74Schristos creating from that a table of starting indices for each length in the
9575fd0b74Schristos sorted table, and then entering the symbols in order in the sorted
9675fd0b74Schristos table. The sorted table is work[], with that space being provided by
9775fd0b74Schristos the caller.
9875fd0b74Schristos
9975fd0b74Schristos The length counts are used for other purposes as well, i.e. finding
10075fd0b74Schristos the minimum and maximum length codes, determining if there are any
10175fd0b74Schristos codes at all, checking for a valid set of lengths, and looking ahead
10275fd0b74Schristos at length counts to determine sub-table sizes when building the
10375fd0b74Schristos decoding tables.
10475fd0b74Schristos */
10575fd0b74Schristos
10675fd0b74Schristos /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
10775fd0b74Schristos for (len = 0; len <= MAXBITS; len++)
10875fd0b74Schristos count[len] = 0;
10975fd0b74Schristos for (sym = 0; sym < codes; sym++)
11075fd0b74Schristos count[lens[sym]]++;
11175fd0b74Schristos
11275fd0b74Schristos /* bound code lengths, force root to be within code lengths */
11375fd0b74Schristos root = *bits;
11475fd0b74Schristos for (max = MAXBITS; max >= 1; max--)
11575fd0b74Schristos if (count[max] != 0) break;
11675fd0b74Schristos if (root > max) root = max;
11775fd0b74Schristos if (max == 0) { /* no symbols to code at all */
11875fd0b74Schristos here.op = (unsigned char)64; /* invalid code marker */
11975fd0b74Schristos here.bits = (unsigned char)1;
12075fd0b74Schristos here.val = (unsigned short)0;
12175fd0b74Schristos *(*table)++ = here; /* make a table to force an error */
12275fd0b74Schristos *(*table)++ = here;
12375fd0b74Schristos *bits = 1;
12475fd0b74Schristos return 0; /* no symbols, but wait for decoding to report error */
12575fd0b74Schristos }
12675fd0b74Schristos for (min = 1; min < max; min++)
12775fd0b74Schristos if (count[min] != 0) break;
12875fd0b74Schristos if (root < min) root = min;
12975fd0b74Schristos
13075fd0b74Schristos /* check for an over-subscribed or incomplete set of lengths */
13175fd0b74Schristos left = 1;
13275fd0b74Schristos for (len = 1; len <= MAXBITS; len++) {
13375fd0b74Schristos left <<= 1;
13475fd0b74Schristos left -= count[len];
13575fd0b74Schristos if (left < 0) return -1; /* over-subscribed */
13675fd0b74Schristos }
13775fd0b74Schristos if (left > 0 && (type == CODES || max != 1))
13875fd0b74Schristos return -1; /* incomplete set */
13975fd0b74Schristos
14075fd0b74Schristos /* generate offsets into symbol table for each length for sorting */
14175fd0b74Schristos offs[1] = 0;
14275fd0b74Schristos for (len = 1; len < MAXBITS; len++)
14375fd0b74Schristos offs[len + 1] = offs[len] + count[len];
14475fd0b74Schristos
14575fd0b74Schristos /* sort symbols by length, by symbol order within each length */
14675fd0b74Schristos for (sym = 0; sym < codes; sym++)
14775fd0b74Schristos if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
14875fd0b74Schristos
14975fd0b74Schristos /*
15075fd0b74Schristos Create and fill in decoding tables. In this loop, the table being
15175fd0b74Schristos filled is at next and has curr index bits. The code being used is huff
15275fd0b74Schristos with length len. That code is converted to an index by dropping drop
15375fd0b74Schristos bits off of the bottom. For codes where len is less than drop + curr,
15475fd0b74Schristos those top drop + curr - len bits are incremented through all values to
15575fd0b74Schristos fill the table with replicated entries.
15675fd0b74Schristos
15775fd0b74Schristos root is the number of index bits for the root table. When len exceeds
15875fd0b74Schristos root, sub-tables are created pointed to by the root entry with an index
15975fd0b74Schristos of the low root bits of huff. This is saved in low to check for when a
16075fd0b74Schristos new sub-table should be started. drop is zero when the root table is
16175fd0b74Schristos being filled, and drop is root when sub-tables are being filled.
16275fd0b74Schristos
16375fd0b74Schristos When a new sub-table is needed, it is necessary to look ahead in the
16475fd0b74Schristos code lengths to determine what size sub-table is needed. The length
16575fd0b74Schristos counts are used for this, and so count[] is decremented as codes are
16675fd0b74Schristos entered in the tables.
16775fd0b74Schristos
16875fd0b74Schristos used keeps track of how many table entries have been allocated from the
16975fd0b74Schristos provided *table space. It is checked for LENS and DIST tables against
17075fd0b74Schristos the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
17175fd0b74Schristos the initial root table size constants. See the comments in inftrees.h
17275fd0b74Schristos for more information.
17375fd0b74Schristos
17475fd0b74Schristos sym increments through all symbols, and the loop terminates when
17575fd0b74Schristos all codes of length max, i.e. all codes, have been processed. This
17675fd0b74Schristos routine permits incomplete codes, so another loop after this one fills
17775fd0b74Schristos in the rest of the decoding tables with invalid code markers.
17875fd0b74Schristos */
17975fd0b74Schristos
18075fd0b74Schristos /* set up for code type */
18175fd0b74Schristos switch (type) {
18275fd0b74Schristos case CODES:
18375fd0b74Schristos base = extra = work; /* dummy value--not used */
184ede78133Schristos match = 20;
18575fd0b74Schristos break;
18675fd0b74Schristos case LENS:
18775fd0b74Schristos base = lbase;
18875fd0b74Schristos extra = lext;
189ede78133Schristos match = 257;
19075fd0b74Schristos break;
19175fd0b74Schristos default: /* DISTS */
19275fd0b74Schristos base = dbase;
19375fd0b74Schristos extra = dext;
194ede78133Schristos match = 0;
19575fd0b74Schristos }
19675fd0b74Schristos
19775fd0b74Schristos /* initialize state for loop */
19875fd0b74Schristos huff = 0; /* starting code */
19975fd0b74Schristos sym = 0; /* starting code symbol */
20075fd0b74Schristos len = min; /* starting code length */
20175fd0b74Schristos next = *table; /* current table to fill in */
20275fd0b74Schristos curr = root; /* current table index bits */
20375fd0b74Schristos drop = 0; /* current bits to drop from code for index */
20475fd0b74Schristos low = (unsigned)(-1); /* trigger new sub-table when len > root */
20575fd0b74Schristos used = 1U << root; /* use root table entries */
20675fd0b74Schristos mask = used - 1; /* mask for comparing low */
20775fd0b74Schristos
20875fd0b74Schristos /* check available table space */
20975fd0b74Schristos if ((type == LENS && used > ENOUGH_LENS) ||
21075fd0b74Schristos (type == DISTS && used > ENOUGH_DISTS))
21175fd0b74Schristos return 1;
21275fd0b74Schristos
21375fd0b74Schristos /* process all codes and make table entries */
21475fd0b74Schristos for (;;) {
21575fd0b74Schristos /* create table entry */
21675fd0b74Schristos here.bits = (unsigned char)(len - drop);
217ede78133Schristos if (work[sym] + 1U < match) {
21875fd0b74Schristos here.op = (unsigned char)0;
21975fd0b74Schristos here.val = work[sym];
22075fd0b74Schristos }
221ede78133Schristos else if (work[sym] >= match) {
222ede78133Schristos here.op = (unsigned char)(extra[work[sym] - match]);
223ede78133Schristos here.val = base[work[sym] - match];
22475fd0b74Schristos }
22575fd0b74Schristos else {
22675fd0b74Schristos here.op = (unsigned char)(32 + 64); /* end of block */
22775fd0b74Schristos here.val = 0;
22875fd0b74Schristos }
22975fd0b74Schristos
23075fd0b74Schristos /* replicate for those indices with low len bits equal to huff */
23175fd0b74Schristos incr = 1U << (len - drop);
23275fd0b74Schristos fill = 1U << curr;
23375fd0b74Schristos min = fill; /* save offset to next table */
23475fd0b74Schristos do {
23575fd0b74Schristos fill -= incr;
23675fd0b74Schristos next[(huff >> drop) + fill] = here;
23775fd0b74Schristos } while (fill != 0);
23875fd0b74Schristos
23975fd0b74Schristos /* backwards increment the len-bit code huff */
24075fd0b74Schristos incr = 1U << (len - 1);
24175fd0b74Schristos while (huff & incr)
24275fd0b74Schristos incr >>= 1;
24375fd0b74Schristos if (incr != 0) {
24475fd0b74Schristos huff &= incr - 1;
24575fd0b74Schristos huff += incr;
24675fd0b74Schristos }
24775fd0b74Schristos else
24875fd0b74Schristos huff = 0;
24975fd0b74Schristos
25075fd0b74Schristos /* go to next symbol, update count, len */
25175fd0b74Schristos sym++;
25275fd0b74Schristos if (--(count[len]) == 0) {
25375fd0b74Schristos if (len == max) break;
25475fd0b74Schristos len = lens[work[sym]];
25575fd0b74Schristos }
25675fd0b74Schristos
25775fd0b74Schristos /* create new sub-table if needed */
25875fd0b74Schristos if (len > root && (huff & mask) != low) {
25975fd0b74Schristos /* if first time, transition to sub-tables */
26075fd0b74Schristos if (drop == 0)
26175fd0b74Schristos drop = root;
26275fd0b74Schristos
26375fd0b74Schristos /* increment past last table */
26475fd0b74Schristos next += min; /* here min is 1 << curr */
26575fd0b74Schristos
26675fd0b74Schristos /* determine length of next table */
26775fd0b74Schristos curr = len - drop;
26875fd0b74Schristos left = (int)(1 << curr);
26975fd0b74Schristos while (curr + drop < max) {
27075fd0b74Schristos left -= count[curr + drop];
27175fd0b74Schristos if (left <= 0) break;
27275fd0b74Schristos curr++;
27375fd0b74Schristos left <<= 1;
27475fd0b74Schristos }
27575fd0b74Schristos
27675fd0b74Schristos /* check for enough space */
27775fd0b74Schristos used += 1U << curr;
27875fd0b74Schristos if ((type == LENS && used > ENOUGH_LENS) ||
27975fd0b74Schristos (type == DISTS && used > ENOUGH_DISTS))
28075fd0b74Schristos return 1;
28175fd0b74Schristos
28275fd0b74Schristos /* point entry in root table to sub-table */
28375fd0b74Schristos low = huff & mask;
28475fd0b74Schristos (*table)[low].op = (unsigned char)curr;
28575fd0b74Schristos (*table)[low].bits = (unsigned char)root;
28675fd0b74Schristos (*table)[low].val = (unsigned short)(next - *table);
28775fd0b74Schristos }
28875fd0b74Schristos }
28975fd0b74Schristos
29075fd0b74Schristos /* fill in remaining table entry if code is incomplete (guaranteed to have
29175fd0b74Schristos at most one remaining entry, since if the code is incomplete, the
29275fd0b74Schristos maximum code length that was allowed to get this far is one bit) */
29375fd0b74Schristos if (huff != 0) {
29475fd0b74Schristos here.op = (unsigned char)64; /* invalid code marker */
29575fd0b74Schristos here.bits = (unsigned char)(len - drop);
29675fd0b74Schristos here.val = (unsigned short)0;
29775fd0b74Schristos next[huff] = here;
29875fd0b74Schristos }
29975fd0b74Schristos
30075fd0b74Schristos /* set return parameters */
30175fd0b74Schristos *table += used;
30275fd0b74Schristos *bits = root;
30375fd0b74Schristos return 0;
30475fd0b74Schristos }
305