xref: /openbsd-src/sys/lib/libz/inftrees.c (revision f06337493c892bd9f15614befd7ae3c84ee58472)
135534e26Sdownsj /* inftrees.c -- generate Huffman trees for efficient decoding
2*f0633749Stb  * Copyright (C) 1995-2024 Mark Adler
335534e26Sdownsj  * For conditions of distribution and use, see copyright notice in zlib.h
435534e26Sdownsj  */
535534e26Sdownsj 
635534e26Sdownsj #include "zutil.h"
735534e26Sdownsj #include "inftrees.h"
835534e26Sdownsj 
9a79de952Smillert #define MAXBITS 15
105dc7a4a5Smillert 
1135534e26Sdownsj /*
1235534e26Sdownsj   If you use the zlib library in a product, an acknowledgment is welcome
1335534e26Sdownsj   in the documentation of your product. If for some reason you cannot
1435534e26Sdownsj   include such an acknowledgment, I would appreciate that you keep this
1535534e26Sdownsj   copyright string in the executable of your product.
1635534e26Sdownsj  */
1735534e26Sdownsj 
1835534e26Sdownsj /*
19a79de952Smillert    Build a set of tables to decode the provided canonical Huffman code.
20a79de952Smillert    The code lengths are lens[0..codes-1].  The result starts at *table,
21a79de952Smillert    whose indices are 0..2^bits-1.  work is a writable array of at least
22a79de952Smillert    lens shorts, which is used as a work area.  type is the type of code
23a79de952Smillert    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
24a79de952Smillert    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
25a79de952Smillert    on return points to the next available entry's address.  bits is the
26a79de952Smillert    requested root table index bits, and on return it is the actual root
27a79de952Smillert    table index bits.  It will differ if the request is greater than the
28a79de952Smillert    longest code or if it is less than the shortest code.
29a79de952Smillert  */
inflate_table(codetype type,unsigned short FAR * lens,unsigned codes,code FAR * FAR * table,unsigned FAR * bits,unsigned short FAR * work)30cfac609cStb int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
31cfac609cStb                                 unsigned codes, code FAR * FAR *table,
32cfac609cStb                                 unsigned FAR *bits, unsigned short FAR *work) {
33a79de952Smillert     unsigned len;               /* a code's length in bits */
34a79de952Smillert     unsigned sym;               /* index of code symbols */
35a79de952Smillert     unsigned min, max;          /* minimum and maximum code lengths */
36a79de952Smillert     unsigned root;              /* number of index bits for root table */
37a79de952Smillert     unsigned curr;              /* number of index bits for current table */
38a79de952Smillert     unsigned drop;              /* code bits to drop for sub-table */
39a79de952Smillert     int left;                   /* number of prefix codes available */
40a79de952Smillert     unsigned used;              /* code entries in table used */
41a79de952Smillert     unsigned huff;              /* Huffman code */
42a79de952Smillert     unsigned incr;              /* for incrementing code, index */
43a79de952Smillert     unsigned fill;              /* index for replicating entries */
44a79de952Smillert     unsigned low;               /* low bits for current root entry */
45a79de952Smillert     unsigned mask;              /* mask for low root bits */
4636f395ceStb     code here;                  /* table entry for duplication */
47a79de952Smillert     code FAR *next;             /* next available space in table */
48a79de952Smillert     const unsigned short FAR *base;     /* base value table to use */
49a79de952Smillert     const unsigned short FAR *extra;    /* extra bits table to use */
5036f395ceStb     unsigned match;             /* use base and extra for symbol >= match */
51a79de952Smillert     unsigned short count[MAXBITS+1];    /* number of codes of each length */
52a79de952Smillert     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
53a79de952Smillert     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
54a79de952Smillert         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
55a79de952Smillert         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
56a79de952Smillert     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
57a79de952Smillert         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
58*f0633749Stb         19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 73, 200};
59a79de952Smillert     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
60a79de952Smillert         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
61a79de952Smillert         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
62a79de952Smillert         8193, 12289, 16385, 24577, 0, 0};
63a79de952Smillert     static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
64a79de952Smillert         16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
65a79de952Smillert         23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
66a79de952Smillert         28, 28, 29, 29, 64, 64};
6735534e26Sdownsj 
68a79de952Smillert     /*
69a79de952Smillert        Process a set of code lengths to create a canonical Huffman code.  The
70a79de952Smillert        code lengths are lens[0..codes-1].  Each length corresponds to the
71a79de952Smillert        symbols 0..codes-1.  The Huffman code is generated by first sorting the
72a79de952Smillert        symbols by length from short to long, and retaining the symbol order
73a79de952Smillert        for codes with equal lengths.  Then the code starts with all zero bits
74a79de952Smillert        for the first code of the shortest length, and the codes are integer
75a79de952Smillert        increments for the same length, and zeros are appended as the length
76a79de952Smillert        increases.  For the deflate format, these bits are stored backwards
77a79de952Smillert        from their more natural integer increment ordering, and so when the
78a79de952Smillert        decoding tables are built in the large loop below, the integer codes
79a79de952Smillert        are incremented backwards.
8035534e26Sdownsj 
81a79de952Smillert        This routine assumes, but does not check, that all of the entries in
82a79de952Smillert        lens[] are in the range 0..MAXBITS.  The caller must assure this.
83a79de952Smillert        1..MAXBITS is interpreted as that code length.  zero means that that
84a79de952Smillert        symbol does not occur in this code.
85a79de952Smillert 
86a79de952Smillert        The codes are sorted by computing a count of codes for each length,
87a79de952Smillert        creating from that a table of starting indices for each length in the
88a79de952Smillert        sorted table, and then entering the symbols in order in the sorted
89a79de952Smillert        table.  The sorted table is work[], with that space being provided by
90a79de952Smillert        the caller.
91a79de952Smillert 
92a79de952Smillert        The length counts are used for other purposes as well, i.e. finding
93a79de952Smillert        the minimum and maximum length codes, determining if there are any
94a79de952Smillert        codes at all, checking for a valid set of lengths, and looking ahead
95a79de952Smillert        at length counts to determine sub-table sizes when building the
96a79de952Smillert        decoding tables.
9735534e26Sdownsj      */
9835534e26Sdownsj 
99a79de952Smillert     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
100a79de952Smillert     for (len = 0; len <= MAXBITS; len++)
101a79de952Smillert         count[len] = 0;
102a79de952Smillert     for (sym = 0; sym < codes; sym++)
103a79de952Smillert         count[lens[sym]]++;
10435534e26Sdownsj 
105a79de952Smillert     /* bound code lengths, force root to be within code lengths */
106a79de952Smillert     root = *bits;
107a79de952Smillert     for (max = MAXBITS; max >= 1; max--)
108a79de952Smillert         if (count[max] != 0) break;
109a79de952Smillert     if (root > max) root = max;
110dec6a01cSdjm     if (max == 0) {                     /* no symbols to code at all */
11136f395ceStb         here.op = (unsigned char)64;    /* invalid code marker */
11236f395ceStb         here.bits = (unsigned char)1;
11336f395ceStb         here.val = (unsigned short)0;
11436f395ceStb         *(*table)++ = here;             /* make a table to force an error */
11536f395ceStb         *(*table)++ = here;
116dec6a01cSdjm         *bits = 1;
117dec6a01cSdjm         return 0;     /* no symbols, but wait for decoding to report error */
118dec6a01cSdjm     }
11936f395ceStb     for (min = 1; min < max; min++)
120a79de952Smillert         if (count[min] != 0) break;
121a79de952Smillert     if (root < min) root = min;
12235534e26Sdownsj 
123a79de952Smillert     /* check for an over-subscribed or incomplete set of lengths */
124a79de952Smillert     left = 1;
125a79de952Smillert     for (len = 1; len <= MAXBITS; len++) {
126a79de952Smillert         left <<= 1;
127a79de952Smillert         left -= count[len];
128a79de952Smillert         if (left < 0) return -1;        /* over-subscribed */
12935534e26Sdownsj     }
13079551740Smillert     if (left > 0 && (type == CODES || max != 1))
131a79de952Smillert         return -1;                      /* incomplete set */
13235534e26Sdownsj 
133a79de952Smillert     /* generate offsets into symbol table for each length for sorting */
134a79de952Smillert     offs[1] = 0;
135a79de952Smillert     for (len = 1; len < MAXBITS; len++)
136a79de952Smillert         offs[len + 1] = offs[len] + count[len];
13735534e26Sdownsj 
138a79de952Smillert     /* sort symbols by length, by symbol order within each length */
139a79de952Smillert     for (sym = 0; sym < codes; sym++)
140a79de952Smillert         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
141a79de952Smillert 
142a79de952Smillert     /*
143a79de952Smillert        Create and fill in decoding tables.  In this loop, the table being
144a79de952Smillert        filled is at next and has curr index bits.  The code being used is huff
145a79de952Smillert        with length len.  That code is converted to an index by dropping drop
146a79de952Smillert        bits off of the bottom.  For codes where len is less than drop + curr,
147a79de952Smillert        those top drop + curr - len bits are incremented through all values to
148a79de952Smillert        fill the table with replicated entries.
149a79de952Smillert 
150a79de952Smillert        root is the number of index bits for the root table.  When len exceeds
151a79de952Smillert        root, sub-tables are created pointed to by the root entry with an index
152a79de952Smillert        of the low root bits of huff.  This is saved in low to check for when a
153a79de952Smillert        new sub-table should be started.  drop is zero when the root table is
154a79de952Smillert        being filled, and drop is root when sub-tables are being filled.
155a79de952Smillert 
156a79de952Smillert        When a new sub-table is needed, it is necessary to look ahead in the
157a79de952Smillert        code lengths to determine what size sub-table is needed.  The length
158a79de952Smillert        counts are used for this, and so count[] is decremented as codes are
159a79de952Smillert        entered in the tables.
160a79de952Smillert 
161a79de952Smillert        used keeps track of how many table entries have been allocated from the
16236f395ceStb        provided *table space.  It is checked for LENS and DIST tables against
16336f395ceStb        the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
16436f395ceStb        the initial root table size constants.  See the comments in inftrees.h
16536f395ceStb        for more information.
166a79de952Smillert 
167a79de952Smillert        sym increments through all symbols, and the loop terminates when
168a79de952Smillert        all codes of length max, i.e. all codes, have been processed.  This
169a79de952Smillert        routine permits incomplete codes, so another loop after this one fills
170a79de952Smillert        in the rest of the decoding tables with invalid code markers.
171a79de952Smillert      */
172a79de952Smillert 
173a79de952Smillert     /* set up for code type */
174a79de952Smillert     switch (type) {
175a79de952Smillert     case CODES:
176a79de952Smillert         base = extra = work;    /* dummy value--not used */
17736f395ceStb         match = 20;
17835534e26Sdownsj         break;
179a79de952Smillert     case LENS:
180a79de952Smillert         base = lbase;
181a79de952Smillert         extra = lext;
18236f395ceStb         match = 257;
18335534e26Sdownsj         break;
184a79de952Smillert     default:    /* DISTS */
185a79de952Smillert         base = dbase;
186a79de952Smillert         extra = dext;
18736f395ceStb         match = 0;
18835534e26Sdownsj     }
18935534e26Sdownsj 
190a79de952Smillert     /* initialize state for loop */
191a79de952Smillert     huff = 0;                   /* starting code */
192a79de952Smillert     sym = 0;                    /* starting code symbol */
193a79de952Smillert     len = min;                  /* starting code length */
194a79de952Smillert     next = *table;              /* current table to fill in */
195a79de952Smillert     curr = root;                /* current table index bits */
196a79de952Smillert     drop = 0;                   /* current bits to drop from code for index */
197a79de952Smillert     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
198a79de952Smillert     used = 1U << root;          /* use root table entries */
199a79de952Smillert     mask = used - 1;            /* mask for comparing low */
20035534e26Sdownsj 
201a79de952Smillert     /* check available table space */
20236f395ceStb     if ((type == LENS && used > ENOUGH_LENS) ||
20336f395ceStb         (type == DISTS && used > ENOUGH_DISTS))
204a79de952Smillert         return 1;
205a79de952Smillert 
206a79de952Smillert     /* process all codes and make table entries */
207a79de952Smillert     for (;;) {
208a79de952Smillert         /* create table entry */
20936f395ceStb         here.bits = (unsigned char)(len - drop);
21036f395ceStb         if (work[sym] + 1U < match) {
21136f395ceStb             here.op = (unsigned char)0;
21236f395ceStb             here.val = work[sym];
213a79de952Smillert         }
21436f395ceStb         else if (work[sym] >= match) {
21536f395ceStb             here.op = (unsigned char)(extra[work[sym] - match]);
21636f395ceStb             here.val = base[work[sym] - match];
217a79de952Smillert         }
218a79de952Smillert         else {
21936f395ceStb             here.op = (unsigned char)(32 + 64);         /* end of block */
22036f395ceStb             here.val = 0;
221a79de952Smillert         }
222a79de952Smillert 
223a79de952Smillert         /* replicate for those indices with low len bits equal to huff */
224a79de952Smillert         incr = 1U << (len - drop);
225a79de952Smillert         fill = 1U << curr;
226d76b9bfaSmillert         min = fill;                 /* save offset to next table */
22735534e26Sdownsj         do {
228a79de952Smillert             fill -= incr;
22936f395ceStb             next[(huff >> drop) + fill] = here;
230a79de952Smillert         } while (fill != 0);
23135534e26Sdownsj 
232a79de952Smillert         /* backwards increment the len-bit code huff */
233a79de952Smillert         incr = 1U << (len - 1);
234a79de952Smillert         while (huff & incr)
235a79de952Smillert             incr >>= 1;
236a79de952Smillert         if (incr != 0) {
237a79de952Smillert             huff &= incr - 1;
238a79de952Smillert             huff += incr;
23935534e26Sdownsj         }
2405dc7a4a5Smillert         else
241a79de952Smillert             huff = 0;
242a79de952Smillert 
243a79de952Smillert         /* go to next symbol, update count, len */
244a79de952Smillert         sym++;
245a79de952Smillert         if (--(count[len]) == 0) {
246a79de952Smillert             if (len == max) break;
247a79de952Smillert             len = lens[work[sym]];
24835534e26Sdownsj         }
24935534e26Sdownsj 
250a79de952Smillert         /* create new sub-table if needed */
251a79de952Smillert         if (len > root && (huff & mask) != low) {
252a79de952Smillert             /* if first time, transition to sub-tables */
253a79de952Smillert             if (drop == 0)
254a79de952Smillert                 drop = root;
255a79de952Smillert 
256a79de952Smillert             /* increment past last table */
257d76b9bfaSmillert             next += min;            /* here min is 1 << curr */
258a79de952Smillert 
259a79de952Smillert             /* determine length of next table */
260a79de952Smillert             curr = len - drop;
261a79de952Smillert             left = (int)(1 << curr);
262a79de952Smillert             while (curr + drop < max) {
263a79de952Smillert                 left -= count[curr + drop];
264a79de952Smillert                 if (left <= 0) break;
265a79de952Smillert                 curr++;
266a79de952Smillert                 left <<= 1;
267a79de952Smillert             }
268a79de952Smillert 
269a79de952Smillert             /* check for enough space */
270a79de952Smillert             used += 1U << curr;
27136f395ceStb             if ((type == LENS && used > ENOUGH_LENS) ||
27236f395ceStb                 (type == DISTS && used > ENOUGH_DISTS))
273a79de952Smillert                 return 1;
274a79de952Smillert 
275a79de952Smillert             /* point entry in root table to sub-table */
276a79de952Smillert             low = huff & mask;
277a79de952Smillert             (*table)[low].op = (unsigned char)curr;
278a79de952Smillert             (*table)[low].bits = (unsigned char)root;
279a79de952Smillert             (*table)[low].val = (unsigned short)(next - *table);
280a79de952Smillert         }
281a79de952Smillert     }
282a79de952Smillert 
28336f395ceStb     /* fill in remaining table entry if code is incomplete (guaranteed to have
28436f395ceStb        at most one remaining entry, since if the code is incomplete, the
28536f395ceStb        maximum code length that was allowed to get this far is one bit) */
28636f395ceStb     if (huff != 0) {
28736f395ceStb         here.op = (unsigned char)64;            /* invalid code marker */
28836f395ceStb         here.bits = (unsigned char)(len - drop);
28936f395ceStb         here.val = (unsigned short)0;
29036f395ceStb         next[huff] = here;
29135534e26Sdownsj     }
29235534e26Sdownsj 
293a79de952Smillert     /* set return parameters */
294a79de952Smillert     *table += used;
295a79de952Smillert     *bits = root;
296a79de952Smillert     return 0;
29735534e26Sdownsj }
298