xref: /netbsd-src/common/dist/zlib/contrib/infback9/inftree9.c (revision b175d1c2a0d8a7ee59df83b5ae5f0bd11632ced6)
1aaf4ece6Schristos /* inftree9.c -- generate Huffman trees for efficient decoding
2*b175d1c2Schristos  * Copyright (C) 1995-2024 Mark Adler
3aaf4ece6Schristos  * For conditions of distribution and use, see copyright notice in zlib.h
4aaf4ece6Schristos  */
5aaf4ece6Schristos 
6aaf4ece6Schristos #include "zutil.h"
7aaf4ece6Schristos #include "inftree9.h"
8aaf4ece6Schristos 
9aaf4ece6Schristos #define MAXBITS 15
10aaf4ece6Schristos 
11aaf4ece6Schristos const char inflate9_copyright[] =
12*b175d1c2Schristos    " inflate9 1.3.1 Copyright 1995-2024 Mark Adler ";
13aaf4ece6Schristos /*
14aaf4ece6Schristos   If you use the zlib library in a product, an acknowledgment is welcome
15aaf4ece6Schristos   in the documentation of your product. If for some reason you cannot
16aaf4ece6Schristos   include such an acknowledgment, I would appreciate that you keep this
17aaf4ece6Schristos   copyright string in the executable of your product.
18aaf4ece6Schristos  */
19aaf4ece6Schristos 
20aaf4ece6Schristos /*
21aaf4ece6Schristos    Build a set of tables to decode the provided canonical Huffman code.
22aaf4ece6Schristos    The code lengths are lens[0..codes-1].  The result starts at *table,
23aaf4ece6Schristos    whose indices are 0..2^bits-1.  work is a writable array of at least
24aaf4ece6Schristos    lens shorts, which is used as a work area.  type is the type of code
25aaf4ece6Schristos    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
26aaf4ece6Schristos    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
27aaf4ece6Schristos    on return points to the next available entry's address.  bits is the
28aaf4ece6Schristos    requested root table index bits, and on return it is the actual root
29aaf4ece6Schristos    table index bits.  It will differ if the request is greater than the
30aaf4ece6Schristos    longest code or if it is less than the shortest code.
31aaf4ece6Schristos  */
32*b175d1c2Schristos int inflate_table9(codetype type, unsigned short FAR *lens, unsigned codes,
33*b175d1c2Schristos                    code FAR * FAR *table, unsigned FAR *bits,
34*b175d1c2Schristos                    unsigned short FAR *work) {
35aaf4ece6Schristos     unsigned len;               /* a code's length in bits */
36aaf4ece6Schristos     unsigned sym;               /* index of code symbols */
37aaf4ece6Schristos     unsigned min, max;          /* minimum and maximum code lengths */
38aaf4ece6Schristos     unsigned root;              /* number of index bits for root table */
39aaf4ece6Schristos     unsigned curr;              /* number of index bits for current table */
40aaf4ece6Schristos     unsigned drop;              /* code bits to drop for sub-table */
41aaf4ece6Schristos     int left;                   /* number of prefix codes available */
42aaf4ece6Schristos     unsigned used;              /* code entries in table used */
43aaf4ece6Schristos     unsigned huff;              /* Huffman code */
44aaf4ece6Schristos     unsigned incr;              /* for incrementing code, index */
45aaf4ece6Schristos     unsigned fill;              /* index for replicating entries */
46aaf4ece6Schristos     unsigned low;               /* low bits for current root entry */
47aaf4ece6Schristos     unsigned mask;              /* mask for low root bits */
48aaf4ece6Schristos     code this;                  /* table entry for duplication */
49aaf4ece6Schristos     code FAR *next;             /* next available space in table */
50aaf4ece6Schristos     const unsigned short FAR *base;     /* base value table to use */
51aaf4ece6Schristos     const unsigned short FAR *extra;    /* extra bits table to use */
52aaf4ece6Schristos     int end;                    /* use base and extra for symbol > end */
53aaf4ece6Schristos     unsigned short count[MAXBITS+1];    /* number of codes of each length */
54aaf4ece6Schristos     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
55aaf4ece6Schristos     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
56aaf4ece6Schristos         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17,
57aaf4ece6Schristos         19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115,
58aaf4ece6Schristos         131, 163, 195, 227, 3, 0, 0};
59aaf4ece6Schristos     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
60aaf4ece6Schristos         128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129,
61aaf4ece6Schristos         130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132,
62*b175d1c2Schristos         133, 133, 133, 133, 144, 203, 77};
63aaf4ece6Schristos     static const unsigned short dbase[32] = { /* Distance codes 0..31 base */
64aaf4ece6Schristos         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49,
65aaf4ece6Schristos         65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073,
66aaf4ece6Schristos         4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153};
67aaf4ece6Schristos     static const unsigned short dext[32] = { /* Distance codes 0..31 extra */
68aaf4ece6Schristos         128, 128, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132,
69aaf4ece6Schristos         133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138,
70aaf4ece6Schristos         139, 139, 140, 140, 141, 141, 142, 142};
71aaf4ece6Schristos 
72aaf4ece6Schristos     /*
73aaf4ece6Schristos        Process a set of code lengths to create a canonical Huffman code.  The
74aaf4ece6Schristos        code lengths are lens[0..codes-1].  Each length corresponds to the
75aaf4ece6Schristos        symbols 0..codes-1.  The Huffman code is generated by first sorting the
76aaf4ece6Schristos        symbols by length from short to long, and retaining the symbol order
77aaf4ece6Schristos        for codes with equal lengths.  Then the code starts with all zero bits
78aaf4ece6Schristos        for the first code of the shortest length, and the codes are integer
79aaf4ece6Schristos        increments for the same length, and zeros are appended as the length
80aaf4ece6Schristos        increases.  For the deflate format, these bits are stored backwards
81aaf4ece6Schristos        from their more natural integer increment ordering, and so when the
82aaf4ece6Schristos        decoding tables are built in the large loop below, the integer codes
83aaf4ece6Schristos        are incremented backwards.
84aaf4ece6Schristos 
85aaf4ece6Schristos        This routine assumes, but does not check, that all of the entries in
86aaf4ece6Schristos        lens[] are in the range 0..MAXBITS.  The caller must assure this.
87aaf4ece6Schristos        1..MAXBITS is interpreted as that code length.  zero means that that
88aaf4ece6Schristos        symbol does not occur in this code.
89aaf4ece6Schristos 
90aaf4ece6Schristos        The codes are sorted by computing a count of codes for each length,
91aaf4ece6Schristos        creating from that a table of starting indices for each length in the
92aaf4ece6Schristos        sorted table, and then entering the symbols in order in the sorted
93aaf4ece6Schristos        table.  The sorted table is work[], with that space being provided by
94aaf4ece6Schristos        the caller.
95aaf4ece6Schristos 
96aaf4ece6Schristos        The length counts are used for other purposes as well, i.e. finding
97aaf4ece6Schristos        the minimum and maximum length codes, determining if there are any
98aaf4ece6Schristos        codes at all, checking for a valid set of lengths, and looking ahead
99aaf4ece6Schristos        at length counts to determine sub-table sizes when building the
100aaf4ece6Schristos        decoding tables.
101aaf4ece6Schristos      */
102aaf4ece6Schristos 
103aaf4ece6Schristos     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
104aaf4ece6Schristos     for (len = 0; len <= MAXBITS; len++)
105aaf4ece6Schristos         count[len] = 0;
106aaf4ece6Schristos     for (sym = 0; sym < codes; sym++)
107aaf4ece6Schristos         count[lens[sym]]++;
108aaf4ece6Schristos 
109aaf4ece6Schristos     /* bound code lengths, force root to be within code lengths */
110aaf4ece6Schristos     root = *bits;
111aaf4ece6Schristos     for (max = MAXBITS; max >= 1; max--)
112aaf4ece6Schristos         if (count[max] != 0) break;
113aaf4ece6Schristos     if (root > max) root = max;
114aaf4ece6Schristos     if (max == 0) return -1;            /* no codes! */
115aaf4ece6Schristos     for (min = 1; min <= MAXBITS; min++)
116aaf4ece6Schristos         if (count[min] != 0) break;
117aaf4ece6Schristos     if (root < min) root = min;
118aaf4ece6Schristos 
119aaf4ece6Schristos     /* check for an over-subscribed or incomplete set of lengths */
120aaf4ece6Schristos     left = 1;
121aaf4ece6Schristos     for (len = 1; len <= MAXBITS; len++) {
122aaf4ece6Schristos         left <<= 1;
123aaf4ece6Schristos         left -= count[len];
124aaf4ece6Schristos         if (left < 0) return -1;        /* over-subscribed */
125aaf4ece6Schristos     }
126aaf4ece6Schristos     if (left > 0 && (type == CODES || max != 1))
127aaf4ece6Schristos         return -1;                      /* incomplete set */
128aaf4ece6Schristos 
129aaf4ece6Schristos     /* generate offsets into symbol table for each length for sorting */
130aaf4ece6Schristos     offs[1] = 0;
131aaf4ece6Schristos     for (len = 1; len < MAXBITS; len++)
132aaf4ece6Schristos         offs[len + 1] = offs[len] + count[len];
133aaf4ece6Schristos 
134aaf4ece6Schristos     /* sort symbols by length, by symbol order within each length */
135aaf4ece6Schristos     for (sym = 0; sym < codes; sym++)
136aaf4ece6Schristos         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
137aaf4ece6Schristos 
138aaf4ece6Schristos     /*
139aaf4ece6Schristos        Create and fill in decoding tables.  In this loop, the table being
140aaf4ece6Schristos        filled is at next and has curr index bits.  The code being used is huff
141aaf4ece6Schristos        with length len.  That code is converted to an index by dropping drop
142aaf4ece6Schristos        bits off of the bottom.  For codes where len is less than drop + curr,
143aaf4ece6Schristos        those top drop + curr - len bits are incremented through all values to
144aaf4ece6Schristos        fill the table with replicated entries.
145aaf4ece6Schristos 
146aaf4ece6Schristos        root is the number of index bits for the root table.  When len exceeds
147aaf4ece6Schristos        root, sub-tables are created pointed to by the root entry with an index
148aaf4ece6Schristos        of the low root bits of huff.  This is saved in low to check for when a
149aaf4ece6Schristos        new sub-table should be started.  drop is zero when the root table is
150aaf4ece6Schristos        being filled, and drop is root when sub-tables are being filled.
151aaf4ece6Schristos 
152aaf4ece6Schristos        When a new sub-table is needed, it is necessary to look ahead in the
153aaf4ece6Schristos        code lengths to determine what size sub-table is needed.  The length
154aaf4ece6Schristos        counts are used for this, and so count[] is decremented as codes are
155aaf4ece6Schristos        entered in the tables.
156aaf4ece6Schristos 
157aaf4ece6Schristos        used keeps track of how many table entries have been allocated from the
158c3423655Schristos        provided *table space.  It is checked for LENS and DIST tables against
159c3423655Schristos        the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
160c3423655Schristos        the initial root table size constants.  See the comments in inftree9.h
161c3423655Schristos        for more information.
162aaf4ece6Schristos 
163aaf4ece6Schristos        sym increments through all symbols, and the loop terminates when
164aaf4ece6Schristos        all codes of length max, i.e. all codes, have been processed.  This
165aaf4ece6Schristos        routine permits incomplete codes, so another loop after this one fills
166aaf4ece6Schristos        in the rest of the decoding tables with invalid code markers.
167aaf4ece6Schristos      */
168aaf4ece6Schristos 
169aaf4ece6Schristos     /* set up for code type */
170aaf4ece6Schristos     switch (type) {
171aaf4ece6Schristos     case CODES:
172aaf4ece6Schristos         base = extra = work;    /* dummy value--not used */
173aaf4ece6Schristos         end = 19;
174aaf4ece6Schristos         break;
175aaf4ece6Schristos     case LENS:
176aaf4ece6Schristos         base = lbase;
177aaf4ece6Schristos         base -= 257;
178aaf4ece6Schristos         extra = lext;
179aaf4ece6Schristos         extra -= 257;
180aaf4ece6Schristos         end = 256;
181aaf4ece6Schristos         break;
182aaf4ece6Schristos     default:            /* DISTS */
183aaf4ece6Schristos         base = dbase;
184aaf4ece6Schristos         extra = dext;
185aaf4ece6Schristos         end = -1;
186aaf4ece6Schristos     }
187aaf4ece6Schristos 
188aaf4ece6Schristos     /* initialize state for loop */
189aaf4ece6Schristos     huff = 0;                   /* starting code */
190aaf4ece6Schristos     sym = 0;                    /* starting code symbol */
191aaf4ece6Schristos     len = min;                  /* starting code length */
192aaf4ece6Schristos     next = *table;              /* current table to fill in */
193aaf4ece6Schristos     curr = root;                /* current table index bits */
194aaf4ece6Schristos     drop = 0;                   /* current bits to drop from code for index */
195aaf4ece6Schristos     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
196aaf4ece6Schristos     used = 1U << root;          /* use root table entries */
197aaf4ece6Schristos     mask = used - 1;            /* mask for comparing low */
198aaf4ece6Schristos 
199aaf4ece6Schristos     /* check available table space */
200c3423655Schristos     if ((type == LENS && used >= ENOUGH_LENS) ||
201c3423655Schristos         (type == DISTS && used >= ENOUGH_DISTS))
202aaf4ece6Schristos         return 1;
203aaf4ece6Schristos 
204aaf4ece6Schristos     /* process all codes and make table entries */
205aaf4ece6Schristos     for (;;) {
206aaf4ece6Schristos         /* create table entry */
207aaf4ece6Schristos         this.bits = (unsigned char)(len - drop);
208aaf4ece6Schristos         if ((int)(work[sym]) < end) {
209aaf4ece6Schristos             this.op = (unsigned char)0;
210aaf4ece6Schristos             this.val = work[sym];
211aaf4ece6Schristos         }
212aaf4ece6Schristos         else if ((int)(work[sym]) > end) {
213aaf4ece6Schristos             this.op = (unsigned char)(extra[work[sym]]);
214aaf4ece6Schristos             this.val = base[work[sym]];
215aaf4ece6Schristos         }
216aaf4ece6Schristos         else {
217aaf4ece6Schristos             this.op = (unsigned char)(32 + 64);         /* end of block */
218aaf4ece6Schristos             this.val = 0;
219aaf4ece6Schristos         }
220aaf4ece6Schristos 
221aaf4ece6Schristos         /* replicate for those indices with low len bits equal to huff */
222aaf4ece6Schristos         incr = 1U << (len - drop);
223aaf4ece6Schristos         fill = 1U << curr;
224aaf4ece6Schristos         do {
225aaf4ece6Schristos             fill -= incr;
226aaf4ece6Schristos             next[(huff >> drop) + fill] = this;
227aaf4ece6Schristos         } while (fill != 0);
228aaf4ece6Schristos 
229aaf4ece6Schristos         /* backwards increment the len-bit code huff */
230aaf4ece6Schristos         incr = 1U << (len - 1);
231aaf4ece6Schristos         while (huff & incr)
232aaf4ece6Schristos             incr >>= 1;
233aaf4ece6Schristos         if (incr != 0) {
234aaf4ece6Schristos             huff &= incr - 1;
235aaf4ece6Schristos             huff += incr;
236aaf4ece6Schristos         }
237aaf4ece6Schristos         else
238aaf4ece6Schristos             huff = 0;
239aaf4ece6Schristos 
240aaf4ece6Schristos         /* go to next symbol, update count, len */
241aaf4ece6Schristos         sym++;
242aaf4ece6Schristos         if (--(count[len]) == 0) {
243aaf4ece6Schristos             if (len == max) break;
244aaf4ece6Schristos             len = lens[work[sym]];
245aaf4ece6Schristos         }
246aaf4ece6Schristos 
247aaf4ece6Schristos         /* create new sub-table if needed */
248aaf4ece6Schristos         if (len > root && (huff & mask) != low) {
249aaf4ece6Schristos             /* if first time, transition to sub-tables */
250aaf4ece6Schristos             if (drop == 0)
251aaf4ece6Schristos                 drop = root;
252aaf4ece6Schristos 
253aaf4ece6Schristos             /* increment past last table */
254aaf4ece6Schristos             next += 1U << curr;
255aaf4ece6Schristos 
256aaf4ece6Schristos             /* determine length of next table */
257aaf4ece6Schristos             curr = len - drop;
258aaf4ece6Schristos             left = (int)(1 << curr);
259aaf4ece6Schristos             while (curr + drop < max) {
260aaf4ece6Schristos                 left -= count[curr + drop];
261aaf4ece6Schristos                 if (left <= 0) break;
262aaf4ece6Schristos                 curr++;
263aaf4ece6Schristos                 left <<= 1;
264aaf4ece6Schristos             }
265aaf4ece6Schristos 
266aaf4ece6Schristos             /* check for enough space */
267aaf4ece6Schristos             used += 1U << curr;
268c3423655Schristos             if ((type == LENS && used >= ENOUGH_LENS) ||
269c3423655Schristos                 (type == DISTS && used >= ENOUGH_DISTS))
270aaf4ece6Schristos                 return 1;
271aaf4ece6Schristos 
272aaf4ece6Schristos             /* point entry in root table to sub-table */
273aaf4ece6Schristos             low = huff & mask;
274aaf4ece6Schristos             (*table)[low].op = (unsigned char)curr;
275aaf4ece6Schristos             (*table)[low].bits = (unsigned char)root;
276aaf4ece6Schristos             (*table)[low].val = (unsigned short)(next - *table);
277aaf4ece6Schristos         }
278aaf4ece6Schristos     }
279aaf4ece6Schristos 
280aaf4ece6Schristos     /*
281aaf4ece6Schristos        Fill in rest of table for incomplete codes.  This loop is similar to the
282aaf4ece6Schristos        loop above in incrementing huff for table indices.  It is assumed that
283aaf4ece6Schristos        len is equal to curr + drop, so there is no loop needed to increment
284aaf4ece6Schristos        through high index bits.  When the current sub-table is filled, the loop
285aaf4ece6Schristos        drops back to the root table to fill in any remaining entries there.
286aaf4ece6Schristos      */
287aaf4ece6Schristos     this.op = (unsigned char)64;                /* invalid code marker */
288aaf4ece6Schristos     this.bits = (unsigned char)(len - drop);
289aaf4ece6Schristos     this.val = (unsigned short)0;
290aaf4ece6Schristos     while (huff != 0) {
291aaf4ece6Schristos         /* when done with sub-table, drop back to root table */
292aaf4ece6Schristos         if (drop != 0 && (huff & mask) != low) {
293aaf4ece6Schristos             drop = 0;
294aaf4ece6Schristos             len = root;
295aaf4ece6Schristos             next = *table;
296aaf4ece6Schristos             curr = root;
297aaf4ece6Schristos             this.bits = (unsigned char)len;
298aaf4ece6Schristos         }
299aaf4ece6Schristos 
300aaf4ece6Schristos         /* put invalid code marker in table */
301aaf4ece6Schristos         next[huff >> drop] = this;
302aaf4ece6Schristos 
303aaf4ece6Schristos         /* backwards increment the len-bit code huff */
304aaf4ece6Schristos         incr = 1U << (len - 1);
305aaf4ece6Schristos         while (huff & incr)
306aaf4ece6Schristos             incr >>= 1;
307aaf4ece6Schristos         if (incr != 0) {
308aaf4ece6Schristos             huff &= incr - 1;
309aaf4ece6Schristos             huff += incr;
310aaf4ece6Schristos         }
311aaf4ece6Schristos         else
312aaf4ece6Schristos             huff = 0;
313aaf4ece6Schristos     }
314aaf4ece6Schristos 
315aaf4ece6Schristos     /* set return parameters */
316aaf4ece6Schristos     *table += used;
317aaf4ece6Schristos     *bits = root;
318aaf4ece6Schristos     return 0;
319aaf4ece6Schristos }
320