xref: /netbsd-src/common/dist/zlib/inftrees.c (revision 96c3282121aac2037abbd5952fd638784deb5ab1)
1*96c32821Schristos /*	$NetBSD: inftrees.c,v 1.5 2024/09/22 19:12:27 christos Exp $	*/
2aaf4ece6Schristos 
3aaf4ece6Schristos /* inftrees.c -- generate Huffman trees for efficient decoding
4*96c32821Schristos  * Copyright (C) 1995-2024 Mark Adler
5aaf4ece6Schristos  * For conditions of distribution and use, see copyright notice in zlib.h
6aaf4ece6Schristos  */
7aaf4ece6Schristos 
8aaf4ece6Schristos #include "zutil.h"
9aaf4ece6Schristos #include "inftrees.h"
10aaf4ece6Schristos 
11aaf4ece6Schristos #define MAXBITS 15
12aaf4ece6Schristos 
13aaf4ece6Schristos const char inflate_copyright[] =
14*96c32821Schristos    " inflate 1.3.1 Copyright 1995-2024 Mark Adler ";
15aaf4ece6Schristos /*
16aaf4ece6Schristos   If you use the zlib library in a product, an acknowledgment is welcome
17aaf4ece6Schristos   in the documentation of your product. If for some reason you cannot
18aaf4ece6Schristos   include such an acknowledgment, I would appreciate that you keep this
19aaf4ece6Schristos   copyright string in the executable of your product.
20aaf4ece6Schristos  */
21aaf4ece6Schristos 
22aaf4ece6Schristos /*
23aaf4ece6Schristos    Build a set of tables to decode the provided canonical Huffman code.
24aaf4ece6Schristos    The code lengths are lens[0..codes-1].  The result starts at *table,
25aaf4ece6Schristos    whose indices are 0..2^bits-1.  work is a writable array of at least
26aaf4ece6Schristos    lens shorts, which is used as a work area.  type is the type of code
27aaf4ece6Schristos    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
28aaf4ece6Schristos    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
29aaf4ece6Schristos    on return points to the next available entry's address.  bits is the
30aaf4ece6Schristos    requested root table index bits, and on return it is the actual root
31aaf4ece6Schristos    table index bits.  It will differ if the request is greater than the
32aaf4ece6Schristos    longest code or if it is less than the shortest code.
33aaf4ece6Schristos  */
34*96c32821Schristos int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
35*96c32821Schristos                                 unsigned codes, code FAR * FAR *table,
36*96c32821Schristos                                 unsigned FAR *bits, unsigned short FAR *work) {
37aaf4ece6Schristos     unsigned len;               /* a code's length in bits */
38aaf4ece6Schristos     unsigned sym;               /* index of code symbols */
395a1ae2ecSchristos     unsigned mmin, mmax;        /* minimum and maximum code lengths */
40aaf4ece6Schristos     unsigned root;              /* number of index bits for root table */
41aaf4ece6Schristos     unsigned curr;              /* number of index bits for current table */
42aaf4ece6Schristos     unsigned drop;              /* code bits to drop for sub-table */
43aaf4ece6Schristos     int left;                   /* number of prefix codes available */
44aaf4ece6Schristos     unsigned used;              /* code entries in table used */
45aaf4ece6Schristos     unsigned huff;              /* Huffman code */
46aaf4ece6Schristos     unsigned incr;              /* for incrementing code, index */
47aaf4ece6Schristos     unsigned fill;              /* index for replicating entries */
48aaf4ece6Schristos     unsigned low;               /* low bits for current root entry */
49aaf4ece6Schristos     unsigned mask;              /* mask for low root bits */
506db8c6e9Schristos     code here;                  /* table entry for duplication */
51aaf4ece6Schristos     code FAR *next;             /* next available space in table */
52aaf4ece6Schristos     const unsigned short FAR *base;     /* base value table to use */
53aaf4ece6Schristos     const unsigned short FAR *extra;    /* extra bits table to use */
546db8c6e9Schristos     unsigned match;             /* use base and extra for symbol >= match */
55aaf4ece6Schristos     unsigned short count[MAXBITS+1];    /* number of codes of each length */
56aaf4ece6Schristos     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
57aaf4ece6Schristos     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
58aaf4ece6Schristos         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
59aaf4ece6Schristos         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
60aaf4ece6Schristos     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
61aaf4ece6Schristos         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
62*96c32821Schristos         19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77};
63aaf4ece6Schristos     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
64aaf4ece6Schristos         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
65aaf4ece6Schristos         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
66aaf4ece6Schristos         8193, 12289, 16385, 24577, 0, 0};
67aaf4ece6Schristos     static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
68aaf4ece6Schristos         16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
69aaf4ece6Schristos         23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
70aaf4ece6Schristos         28, 28, 29, 29, 64, 64};
71aaf4ece6Schristos 
72aaf4ece6Schristos     /*
73aaf4ece6Schristos        Process a set of code lengths to create a canonical Huffman code.  The
74aaf4ece6Schristos        code lengths are lens[0..codes-1].  Each length corresponds to the
75aaf4ece6Schristos        symbols 0..codes-1.  The Huffman code is generated by first sorting the
76aaf4ece6Schristos        symbols by length from short to long, and retaining the symbol order
77aaf4ece6Schristos        for codes with equal lengths.  Then the code starts with all zero bits
78aaf4ece6Schristos        for the first code of the shortest length, and the codes are integer
79aaf4ece6Schristos        increments for the same length, and zeros are appended as the length
80aaf4ece6Schristos        increases.  For the deflate format, these bits are stored backwards
81aaf4ece6Schristos        from their more natural integer increment ordering, and so when the
82aaf4ece6Schristos        decoding tables are built in the large loop below, the integer codes
83aaf4ece6Schristos        are incremented backwards.
84aaf4ece6Schristos 
85aaf4ece6Schristos        This routine assumes, but does not check, that all of the entries in
86aaf4ece6Schristos        lens[] are in the range 0..MAXBITS.  The caller must assure this.
87aaf4ece6Schristos        1..MAXBITS is interpreted as that code length.  zero means that that
88aaf4ece6Schristos        symbol does not occur in this code.
89aaf4ece6Schristos 
90aaf4ece6Schristos        The codes are sorted by computing a count of codes for each length,
91aaf4ece6Schristos        creating from that a table of starting indices for each length in the
92aaf4ece6Schristos        sorted table, and then entering the symbols in order in the sorted
93aaf4ece6Schristos        table.  The sorted table is work[], with that space being provided by
94aaf4ece6Schristos        the caller.
95aaf4ece6Schristos 
96aaf4ece6Schristos        The length counts are used for other purposes as well, i.e. finding
97aaf4ece6Schristos        the minimum and maximum length codes, determining if there are any
98aaf4ece6Schristos        codes at all, checking for a valid set of lengths, and looking ahead
99aaf4ece6Schristos        at length counts to determine sub-table sizes when building the
100aaf4ece6Schristos        decoding tables.
101aaf4ece6Schristos      */
102aaf4ece6Schristos 
103aaf4ece6Schristos     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
104aaf4ece6Schristos     for (len = 0; len <= MAXBITS; len++)
105aaf4ece6Schristos         count[len] = 0;
106aaf4ece6Schristos     for (sym = 0; sym < codes; sym++)
107aaf4ece6Schristos         count[lens[sym]]++;
108aaf4ece6Schristos 
109aaf4ece6Schristos     /* bound code lengths, force root to be within code lengths */
110aaf4ece6Schristos     root = *bits;
1115a1ae2ecSchristos     for (mmax = MAXBITS; mmax >= 1; mmax--)
1125a1ae2ecSchristos         if (count[mmax] != 0) break;
1135a1ae2ecSchristos     if (root > mmax) root = mmax;
1145a1ae2ecSchristos     if (mmax == 0) {                     /* no symbols to code at all */
1156db8c6e9Schristos         here.op = (unsigned char)64;    /* invalid code marker */
1166db8c6e9Schristos         here.bits = (unsigned char)1;
1176db8c6e9Schristos         here.val = (unsigned short)0;
1186db8c6e9Schristos         *(*table)++ = here;             /* make a table to force an error */
1196db8c6e9Schristos         *(*table)++ = here;
120aaf4ece6Schristos         *bits = 1;
121aaf4ece6Schristos         return 0;     /* no symbols, but wait for decoding to report error */
122aaf4ece6Schristos     }
1235a1ae2ecSchristos     for (mmin = 1; mmin <= MAXBITS; mmin++)
1245a1ae2ecSchristos         if (count[mmin] != 0) break;
1255a1ae2ecSchristos     if (root < mmin) root = mmin;
126aaf4ece6Schristos 
127aaf4ece6Schristos     /* check for an over-subscribed or incomplete set of lengths */
128aaf4ece6Schristos     left = 1;
129aaf4ece6Schristos     for (len = 1; len <= MAXBITS; len++) {
130aaf4ece6Schristos         left <<= 1;
131aaf4ece6Schristos         left -= count[len];
132aaf4ece6Schristos         if (left < 0) return -1;        /* over-subscribed */
133aaf4ece6Schristos     }
1345a1ae2ecSchristos     if (left > 0 && (type == CODES || mmax != 1))
135aaf4ece6Schristos         return -1;                      /* incomplete set */
136aaf4ece6Schristos 
137aaf4ece6Schristos     /* generate offsets into symbol table for each length for sorting */
138aaf4ece6Schristos     offs[1] = 0;
139aaf4ece6Schristos     for (len = 1; len < MAXBITS; len++)
140aaf4ece6Schristos         offs[len + 1] = offs[len] + count[len];
141aaf4ece6Schristos 
142aaf4ece6Schristos     /* sort symbols by length, by symbol order within each length */
143aaf4ece6Schristos     for (sym = 0; sym < codes; sym++)
144aaf4ece6Schristos         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
145aaf4ece6Schristos 
146aaf4ece6Schristos     /*
147aaf4ece6Schristos        Create and fill in decoding tables.  In this loop, the table being
148aaf4ece6Schristos        filled is at next and has curr index bits.  The code being used is huff
149aaf4ece6Schristos        with length len.  That code is converted to an index by dropping drop
150aaf4ece6Schristos        bits off of the bottom.  For codes where len is less than drop + curr,
151aaf4ece6Schristos        those top drop + curr - len bits are incremented through all values to
152aaf4ece6Schristos        fill the table with replicated entries.
153aaf4ece6Schristos 
154aaf4ece6Schristos        root is the number of index bits for the root table.  When len exceeds
155aaf4ece6Schristos        root, sub-tables are created pointed to by the root entry with an index
156aaf4ece6Schristos        of the low root bits of huff.  This is saved in low to check for when a
157aaf4ece6Schristos        new sub-table should be started.  drop is zero when the root table is
158aaf4ece6Schristos        being filled, and drop is root when sub-tables are being filled.
159aaf4ece6Schristos 
160aaf4ece6Schristos        When a new sub-table is needed, it is necessary to look ahead in the
161aaf4ece6Schristos        code lengths to determine what size sub-table is needed.  The length
162aaf4ece6Schristos        counts are used for this, and so count[] is decremented as codes are
163aaf4ece6Schristos        entered in the tables.
164aaf4ece6Schristos 
165aaf4ece6Schristos        used keeps track of how many table entries have been allocated from the
1666db8c6e9Schristos        provided *table space.  It is checked for LENS and DIST tables against
1676db8c6e9Schristos        the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
1686db8c6e9Schristos        the initial root table size constants.  See the comments in inftrees.h
1696db8c6e9Schristos        for more information.
170aaf4ece6Schristos 
171aaf4ece6Schristos        sym increments through all symbols, and the loop terminates when
1725a1ae2ecSchristos        all codes of length mmax, i.e. all codes, have been processed.  This
173aaf4ece6Schristos        routine permits incomplete codes, so another loop after this one fills
174aaf4ece6Schristos        in the rest of the decoding tables with invalid code markers.
175aaf4ece6Schristos      */
176aaf4ece6Schristos 
177aaf4ece6Schristos     /* set up for code type */
178aaf4ece6Schristos     switch (type) {
179aaf4ece6Schristos     case CODES:
180aaf4ece6Schristos         base = extra = work;    /* dummy value--not used */
1816db8c6e9Schristos         match = 20;
182aaf4ece6Schristos         break;
183aaf4ece6Schristos     case LENS:
184aaf4ece6Schristos         base = lbase;
185aaf4ece6Schristos         extra = lext;
1866db8c6e9Schristos         match = 257;
187aaf4ece6Schristos         break;
188aaf4ece6Schristos     default:    /* DISTS */
189aaf4ece6Schristos         base = dbase;
190aaf4ece6Schristos         extra = dext;
1916db8c6e9Schristos         match = 0;
192aaf4ece6Schristos     }
193aaf4ece6Schristos 
194aaf4ece6Schristos     /* initialize state for loop */
195aaf4ece6Schristos     huff = 0;                   /* starting code */
196aaf4ece6Schristos     sym = 0;                    /* starting code symbol */
1975a1ae2ecSchristos     len = mmin;                  /* starting code length */
198aaf4ece6Schristos     next = *table;              /* current table to fill in */
199aaf4ece6Schristos     curr = root;                /* current table index bits */
200aaf4ece6Schristos     drop = 0;                   /* current bits to drop from code for index */
201aaf4ece6Schristos     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
202aaf4ece6Schristos     used = 1U << root;          /* use root table entries */
203aaf4ece6Schristos     mask = used - 1;            /* mask for comparing low */
204aaf4ece6Schristos 
205aaf4ece6Schristos     /* check available table space */
2066db8c6e9Schristos     if ((type == LENS && used > ENOUGH_LENS) ||
2076db8c6e9Schristos         (type == DISTS && used > ENOUGH_DISTS))
208aaf4ece6Schristos         return 1;
209aaf4ece6Schristos 
210aaf4ece6Schristos     /* process all codes and make table entries */
211aaf4ece6Schristos     for (;;) {
212aaf4ece6Schristos         /* create table entry */
2136db8c6e9Schristos         here.bits = (unsigned char)(len - drop);
2146db8c6e9Schristos         if (work[sym] + 1U < match) {
2156db8c6e9Schristos             here.op = (unsigned char)0;
2166db8c6e9Schristos             here.val = work[sym];
217aaf4ece6Schristos         }
2186db8c6e9Schristos         else if (work[sym] >= match) {
2196db8c6e9Schristos             here.op = (unsigned char)(extra[work[sym] - match]);
2206db8c6e9Schristos             here.val = base[work[sym] - match];
221aaf4ece6Schristos         }
222aaf4ece6Schristos         else {
2236db8c6e9Schristos             here.op = (unsigned char)(32 + 64);         /* end of block */
2246db8c6e9Schristos             here.val = 0;
225aaf4ece6Schristos         }
226aaf4ece6Schristos 
227aaf4ece6Schristos         /* replicate for those indices with low len bits equal to huff */
228aaf4ece6Schristos         incr = 1U << (len - drop);
229aaf4ece6Schristos         fill = 1U << curr;
2305a1ae2ecSchristos         mmin = fill;                 /* save offset to next table */
231aaf4ece6Schristos         do {
232aaf4ece6Schristos             fill -= incr;
2336db8c6e9Schristos             next[(huff >> drop) + fill] = here;
234aaf4ece6Schristos         } while (fill != 0);
235aaf4ece6Schristos 
236aaf4ece6Schristos         /* backwards increment the len-bit code huff */
237aaf4ece6Schristos         incr = 1U << (len - 1);
238aaf4ece6Schristos         while (huff & incr)
239aaf4ece6Schristos             incr >>= 1;
240aaf4ece6Schristos         if (incr != 0) {
241aaf4ece6Schristos             huff &= incr - 1;
242aaf4ece6Schristos             huff += incr;
243aaf4ece6Schristos         }
244aaf4ece6Schristos         else
245aaf4ece6Schristos             huff = 0;
246aaf4ece6Schristos 
247aaf4ece6Schristos         /* go to next symbol, update count, len */
248aaf4ece6Schristos         sym++;
249aaf4ece6Schristos         if (--(count[len]) == 0) {
2505a1ae2ecSchristos             if (len == mmax) break;
251aaf4ece6Schristos             len = lens[work[sym]];
252aaf4ece6Schristos         }
253aaf4ece6Schristos 
254aaf4ece6Schristos         /* create new sub-table if needed */
255aaf4ece6Schristos         if (len > root && (huff & mask) != low) {
256aaf4ece6Schristos             /* if first time, transition to sub-tables */
257aaf4ece6Schristos             if (drop == 0)
258aaf4ece6Schristos                 drop = root;
259aaf4ece6Schristos 
260aaf4ece6Schristos             /* increment past last table */
2615a1ae2ecSchristos             next += mmin;            /* here mmin is 1 << curr */
262aaf4ece6Schristos 
263aaf4ece6Schristos             /* determine length of next table */
264aaf4ece6Schristos             curr = len - drop;
265aaf4ece6Schristos             left = (int)(1 << curr);
2665a1ae2ecSchristos             while (curr + drop < mmax) {
267aaf4ece6Schristos                 left -= count[curr + drop];
268aaf4ece6Schristos                 if (left <= 0) break;
269aaf4ece6Schristos                 curr++;
270aaf4ece6Schristos                 left <<= 1;
271aaf4ece6Schristos             }
272aaf4ece6Schristos 
273aaf4ece6Schristos             /* check for enough space */
274aaf4ece6Schristos             used += 1U << curr;
2756db8c6e9Schristos             if ((type == LENS && used > ENOUGH_LENS) ||
2766db8c6e9Schristos                 (type == DISTS && used > ENOUGH_DISTS))
277aaf4ece6Schristos                 return 1;
278aaf4ece6Schristos 
279aaf4ece6Schristos             /* point entry in root table to sub-table */
280aaf4ece6Schristos             low = huff & mask;
281aaf4ece6Schristos             (*table)[low].op = (unsigned char)curr;
282aaf4ece6Schristos             (*table)[low].bits = (unsigned char)root;
283aaf4ece6Schristos             (*table)[low].val = (unsigned short)(next - *table);
284aaf4ece6Schristos         }
285aaf4ece6Schristos     }
286aaf4ece6Schristos 
2876db8c6e9Schristos     /* fill in remaining table entry if code is incomplete (guaranteed to have
2886db8c6e9Schristos        at most one remaining entry, since if the code is incomplete, the
2896db8c6e9Schristos        maximum code length that was allowed to get this far is one bit) */
2906db8c6e9Schristos     if (huff != 0) {
2916db8c6e9Schristos         here.op = (unsigned char)64;            /* invalid code marker */
2926db8c6e9Schristos         here.bits = (unsigned char)(len - drop);
2936db8c6e9Schristos         here.val = (unsigned short)0;
2946db8c6e9Schristos         next[huff] = here;
295aaf4ece6Schristos     }
296aaf4ece6Schristos 
297aaf4ece6Schristos     /* set return parameters */
298aaf4ece6Schristos     *table += used;
299aaf4ece6Schristos     *bits = root;
300aaf4ece6Schristos     return 0;
301aaf4ece6Schristos }
302