xref: /netbsd-src/external/gpl3/binutils.old/dist/zlib/inftrees.c (revision e992f068c547fd6e84b3f104dc2340adcc955732)
175fd0b74Schristos /* inftrees.c -- generate Huffman trees for efficient decoding
2*e992f068Schristos  * Copyright (C) 1995-2022 Mark Adler
375fd0b74Schristos  * For conditions of distribution and use, see copyright notice in zlib.h
475fd0b74Schristos  */
575fd0b74Schristos 
675fd0b74Schristos #include "zutil.h"
775fd0b74Schristos #include "inftrees.h"
875fd0b74Schristos 
975fd0b74Schristos #define MAXBITS 15
1075fd0b74Schristos 
1175fd0b74Schristos const char inflate_copyright[] =
12*e992f068Schristos    " inflate 1.2.12 Copyright 1995-2022 Mark Adler ";
1375fd0b74Schristos /*
1475fd0b74Schristos   If you use the zlib library in a product, an acknowledgment is welcome
1575fd0b74Schristos   in the documentation of your product. If for some reason you cannot
1675fd0b74Schristos   include such an acknowledgment, I would appreciate that you keep this
1775fd0b74Schristos   copyright string in the executable of your product.
1875fd0b74Schristos  */
1975fd0b74Schristos 
2075fd0b74Schristos /*
2175fd0b74Schristos    Build a set of tables to decode the provided canonical Huffman code.
2275fd0b74Schristos    The code lengths are lens[0..codes-1].  The result starts at *table,
2375fd0b74Schristos    whose indices are 0..2^bits-1.  work is a writable array of at least
2475fd0b74Schristos    lens shorts, which is used as a work area.  type is the type of code
2575fd0b74Schristos    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
2675fd0b74Schristos    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
2775fd0b74Schristos    on return points to the next available entry's address.  bits is the
2875fd0b74Schristos    requested root table index bits, and on return it is the actual root
2975fd0b74Schristos    table index bits.  It will differ if the request is greater than the
3075fd0b74Schristos    longest code or if it is less than the shortest code.
3175fd0b74Schristos  */
inflate_table(type,lens,codes,table,bits,work)3275fd0b74Schristos int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
3375fd0b74Schristos codetype type;
3475fd0b74Schristos unsigned short FAR *lens;
3575fd0b74Schristos unsigned codes;
3675fd0b74Schristos code FAR * FAR *table;
3775fd0b74Schristos unsigned FAR *bits;
3875fd0b74Schristos unsigned short FAR *work;
3975fd0b74Schristos {
4075fd0b74Schristos     unsigned len;               /* a code's length in bits */
4175fd0b74Schristos     unsigned sym;               /* index of code symbols */
4275fd0b74Schristos     unsigned min, max;          /* minimum and maximum code lengths */
4375fd0b74Schristos     unsigned root;              /* number of index bits for root table */
4475fd0b74Schristos     unsigned curr;              /* number of index bits for current table */
4575fd0b74Schristos     unsigned drop;              /* code bits to drop for sub-table */
4675fd0b74Schristos     int left;                   /* number of prefix codes available */
4775fd0b74Schristos     unsigned used;              /* code entries in table used */
4875fd0b74Schristos     unsigned huff;              /* Huffman code */
4975fd0b74Schristos     unsigned incr;              /* for incrementing code, index */
5075fd0b74Schristos     unsigned fill;              /* index for replicating entries */
5175fd0b74Schristos     unsigned low;               /* low bits for current root entry */
5275fd0b74Schristos     unsigned mask;              /* mask for low root bits */
5375fd0b74Schristos     code here;                  /* table entry for duplication */
5475fd0b74Schristos     code FAR *next;             /* next available space in table */
5575fd0b74Schristos     const unsigned short FAR *base;     /* base value table to use */
5675fd0b74Schristos     const unsigned short FAR *extra;    /* extra bits table to use */
57ede78133Schristos     unsigned match;             /* use base and extra for symbol >= match */
5875fd0b74Schristos     unsigned short count[MAXBITS+1];    /* number of codes of each length */
5975fd0b74Schristos     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
6075fd0b74Schristos     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
6175fd0b74Schristos         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
6275fd0b74Schristos         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
6375fd0b74Schristos     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
6475fd0b74Schristos         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
65*e992f068Schristos         19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 199, 202};
6675fd0b74Schristos     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
6775fd0b74Schristos         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
6875fd0b74Schristos         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
6975fd0b74Schristos         8193, 12289, 16385, 24577, 0, 0};
7075fd0b74Schristos     static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
7175fd0b74Schristos         16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
7275fd0b74Schristos         23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
7375fd0b74Schristos         28, 28, 29, 29, 64, 64};
7475fd0b74Schristos 
7575fd0b74Schristos     /*
7675fd0b74Schristos        Process a set of code lengths to create a canonical Huffman code.  The
7775fd0b74Schristos        code lengths are lens[0..codes-1].  Each length corresponds to the
7875fd0b74Schristos        symbols 0..codes-1.  The Huffman code is generated by first sorting the
7975fd0b74Schristos        symbols by length from short to long, and retaining the symbol order
8075fd0b74Schristos        for codes with equal lengths.  Then the code starts with all zero bits
8175fd0b74Schristos        for the first code of the shortest length, and the codes are integer
8275fd0b74Schristos        increments for the same length, and zeros are appended as the length
8375fd0b74Schristos        increases.  For the deflate format, these bits are stored backwards
8475fd0b74Schristos        from their more natural integer increment ordering, and so when the
8575fd0b74Schristos        decoding tables are built in the large loop below, the integer codes
8675fd0b74Schristos        are incremented backwards.
8775fd0b74Schristos 
8875fd0b74Schristos        This routine assumes, but does not check, that all of the entries in
8975fd0b74Schristos        lens[] are in the range 0..MAXBITS.  The caller must assure this.
9075fd0b74Schristos        1..MAXBITS is interpreted as that code length.  zero means that that
9175fd0b74Schristos        symbol does not occur in this code.
9275fd0b74Schristos 
9375fd0b74Schristos        The codes are sorted by computing a count of codes for each length,
9475fd0b74Schristos        creating from that a table of starting indices for each length in the
9575fd0b74Schristos        sorted table, and then entering the symbols in order in the sorted
9675fd0b74Schristos        table.  The sorted table is work[], with that space being provided by
9775fd0b74Schristos        the caller.
9875fd0b74Schristos 
9975fd0b74Schristos        The length counts are used for other purposes as well, i.e. finding
10075fd0b74Schristos        the minimum and maximum length codes, determining if there are any
10175fd0b74Schristos        codes at all, checking for a valid set of lengths, and looking ahead
10275fd0b74Schristos        at length counts to determine sub-table sizes when building the
10375fd0b74Schristos        decoding tables.
10475fd0b74Schristos      */
10575fd0b74Schristos 
10675fd0b74Schristos     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
10775fd0b74Schristos     for (len = 0; len <= MAXBITS; len++)
10875fd0b74Schristos         count[len] = 0;
10975fd0b74Schristos     for (sym = 0; sym < codes; sym++)
11075fd0b74Schristos         count[lens[sym]]++;
11175fd0b74Schristos 
11275fd0b74Schristos     /* bound code lengths, force root to be within code lengths */
11375fd0b74Schristos     root = *bits;
11475fd0b74Schristos     for (max = MAXBITS; max >= 1; max--)
11575fd0b74Schristos         if (count[max] != 0) break;
11675fd0b74Schristos     if (root > max) root = max;
11775fd0b74Schristos     if (max == 0) {                     /* no symbols to code at all */
11875fd0b74Schristos         here.op = (unsigned char)64;    /* invalid code marker */
11975fd0b74Schristos         here.bits = (unsigned char)1;
12075fd0b74Schristos         here.val = (unsigned short)0;
12175fd0b74Schristos         *(*table)++ = here;             /* make a table to force an error */
12275fd0b74Schristos         *(*table)++ = here;
12375fd0b74Schristos         *bits = 1;
12475fd0b74Schristos         return 0;     /* no symbols, but wait for decoding to report error */
12575fd0b74Schristos     }
12675fd0b74Schristos     for (min = 1; min < max; min++)
12775fd0b74Schristos         if (count[min] != 0) break;
12875fd0b74Schristos     if (root < min) root = min;
12975fd0b74Schristos 
13075fd0b74Schristos     /* check for an over-subscribed or incomplete set of lengths */
13175fd0b74Schristos     left = 1;
13275fd0b74Schristos     for (len = 1; len <= MAXBITS; len++) {
13375fd0b74Schristos         left <<= 1;
13475fd0b74Schristos         left -= count[len];
13575fd0b74Schristos         if (left < 0) return -1;        /* over-subscribed */
13675fd0b74Schristos     }
13775fd0b74Schristos     if (left > 0 && (type == CODES || max != 1))
13875fd0b74Schristos         return -1;                      /* incomplete set */
13975fd0b74Schristos 
14075fd0b74Schristos     /* generate offsets into symbol table for each length for sorting */
14175fd0b74Schristos     offs[1] = 0;
14275fd0b74Schristos     for (len = 1; len < MAXBITS; len++)
14375fd0b74Schristos         offs[len + 1] = offs[len] + count[len];
14475fd0b74Schristos 
14575fd0b74Schristos     /* sort symbols by length, by symbol order within each length */
14675fd0b74Schristos     for (sym = 0; sym < codes; sym++)
14775fd0b74Schristos         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
14875fd0b74Schristos 
14975fd0b74Schristos     /*
15075fd0b74Schristos        Create and fill in decoding tables.  In this loop, the table being
15175fd0b74Schristos        filled is at next and has curr index bits.  The code being used is huff
15275fd0b74Schristos        with length len.  That code is converted to an index by dropping drop
15375fd0b74Schristos        bits off of the bottom.  For codes where len is less than drop + curr,
15475fd0b74Schristos        those top drop + curr - len bits are incremented through all values to
15575fd0b74Schristos        fill the table with replicated entries.
15675fd0b74Schristos 
15775fd0b74Schristos        root is the number of index bits for the root table.  When len exceeds
15875fd0b74Schristos        root, sub-tables are created pointed to by the root entry with an index
15975fd0b74Schristos        of the low root bits of huff.  This is saved in low to check for when a
16075fd0b74Schristos        new sub-table should be started.  drop is zero when the root table is
16175fd0b74Schristos        being filled, and drop is root when sub-tables are being filled.
16275fd0b74Schristos 
16375fd0b74Schristos        When a new sub-table is needed, it is necessary to look ahead in the
16475fd0b74Schristos        code lengths to determine what size sub-table is needed.  The length
16575fd0b74Schristos        counts are used for this, and so count[] is decremented as codes are
16675fd0b74Schristos        entered in the tables.
16775fd0b74Schristos 
16875fd0b74Schristos        used keeps track of how many table entries have been allocated from the
16975fd0b74Schristos        provided *table space.  It is checked for LENS and DIST tables against
17075fd0b74Schristos        the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
17175fd0b74Schristos        the initial root table size constants.  See the comments in inftrees.h
17275fd0b74Schristos        for more information.
17375fd0b74Schristos 
17475fd0b74Schristos        sym increments through all symbols, and the loop terminates when
17575fd0b74Schristos        all codes of length max, i.e. all codes, have been processed.  This
17675fd0b74Schristos        routine permits incomplete codes, so another loop after this one fills
17775fd0b74Schristos        in the rest of the decoding tables with invalid code markers.
17875fd0b74Schristos      */
17975fd0b74Schristos 
18075fd0b74Schristos     /* set up for code type */
18175fd0b74Schristos     switch (type) {
18275fd0b74Schristos     case CODES:
18375fd0b74Schristos         base = extra = work;    /* dummy value--not used */
184ede78133Schristos         match = 20;
18575fd0b74Schristos         break;
18675fd0b74Schristos     case LENS:
18775fd0b74Schristos         base = lbase;
18875fd0b74Schristos         extra = lext;
189ede78133Schristos         match = 257;
19075fd0b74Schristos         break;
19175fd0b74Schristos     default:    /* DISTS */
19275fd0b74Schristos         base = dbase;
19375fd0b74Schristos         extra = dext;
194ede78133Schristos         match = 0;
19575fd0b74Schristos     }
19675fd0b74Schristos 
19775fd0b74Schristos     /* initialize state for loop */
19875fd0b74Schristos     huff = 0;                   /* starting code */
19975fd0b74Schristos     sym = 0;                    /* starting code symbol */
20075fd0b74Schristos     len = min;                  /* starting code length */
20175fd0b74Schristos     next = *table;              /* current table to fill in */
20275fd0b74Schristos     curr = root;                /* current table index bits */
20375fd0b74Schristos     drop = 0;                   /* current bits to drop from code for index */
20475fd0b74Schristos     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
20575fd0b74Schristos     used = 1U << root;          /* use root table entries */
20675fd0b74Schristos     mask = used - 1;            /* mask for comparing low */
20775fd0b74Schristos 
20875fd0b74Schristos     /* check available table space */
20975fd0b74Schristos     if ((type == LENS && used > ENOUGH_LENS) ||
21075fd0b74Schristos         (type == DISTS && used > ENOUGH_DISTS))
21175fd0b74Schristos         return 1;
21275fd0b74Schristos 
21375fd0b74Schristos     /* process all codes and make table entries */
21475fd0b74Schristos     for (;;) {
21575fd0b74Schristos         /* create table entry */
21675fd0b74Schristos         here.bits = (unsigned char)(len - drop);
217ede78133Schristos         if (work[sym] + 1U < match) {
21875fd0b74Schristos             here.op = (unsigned char)0;
21975fd0b74Schristos             here.val = work[sym];
22075fd0b74Schristos         }
221ede78133Schristos         else if (work[sym] >= match) {
222ede78133Schristos             here.op = (unsigned char)(extra[work[sym] - match]);
223ede78133Schristos             here.val = base[work[sym] - match];
22475fd0b74Schristos         }
22575fd0b74Schristos         else {
22675fd0b74Schristos             here.op = (unsigned char)(32 + 64);         /* end of block */
22775fd0b74Schristos             here.val = 0;
22875fd0b74Schristos         }
22975fd0b74Schristos 
23075fd0b74Schristos         /* replicate for those indices with low len bits equal to huff */
23175fd0b74Schristos         incr = 1U << (len - drop);
23275fd0b74Schristos         fill = 1U << curr;
23375fd0b74Schristos         min = fill;                 /* save offset to next table */
23475fd0b74Schristos         do {
23575fd0b74Schristos             fill -= incr;
23675fd0b74Schristos             next[(huff >> drop) + fill] = here;
23775fd0b74Schristos         } while (fill != 0);
23875fd0b74Schristos 
23975fd0b74Schristos         /* backwards increment the len-bit code huff */
24075fd0b74Schristos         incr = 1U << (len - 1);
24175fd0b74Schristos         while (huff & incr)
24275fd0b74Schristos             incr >>= 1;
24375fd0b74Schristos         if (incr != 0) {
24475fd0b74Schristos             huff &= incr - 1;
24575fd0b74Schristos             huff += incr;
24675fd0b74Schristos         }
24775fd0b74Schristos         else
24875fd0b74Schristos             huff = 0;
24975fd0b74Schristos 
25075fd0b74Schristos         /* go to next symbol, update count, len */
25175fd0b74Schristos         sym++;
25275fd0b74Schristos         if (--(count[len]) == 0) {
25375fd0b74Schristos             if (len == max) break;
25475fd0b74Schristos             len = lens[work[sym]];
25575fd0b74Schristos         }
25675fd0b74Schristos 
25775fd0b74Schristos         /* create new sub-table if needed */
25875fd0b74Schristos         if (len > root && (huff & mask) != low) {
25975fd0b74Schristos             /* if first time, transition to sub-tables */
26075fd0b74Schristos             if (drop == 0)
26175fd0b74Schristos                 drop = root;
26275fd0b74Schristos 
26375fd0b74Schristos             /* increment past last table */
26475fd0b74Schristos             next += min;            /* here min is 1 << curr */
26575fd0b74Schristos 
26675fd0b74Schristos             /* determine length of next table */
26775fd0b74Schristos             curr = len - drop;
26875fd0b74Schristos             left = (int)(1 << curr);
26975fd0b74Schristos             while (curr + drop < max) {
27075fd0b74Schristos                 left -= count[curr + drop];
27175fd0b74Schristos                 if (left <= 0) break;
27275fd0b74Schristos                 curr++;
27375fd0b74Schristos                 left <<= 1;
27475fd0b74Schristos             }
27575fd0b74Schristos 
27675fd0b74Schristos             /* check for enough space */
27775fd0b74Schristos             used += 1U << curr;
27875fd0b74Schristos             if ((type == LENS && used > ENOUGH_LENS) ||
27975fd0b74Schristos                 (type == DISTS && used > ENOUGH_DISTS))
28075fd0b74Schristos                 return 1;
28175fd0b74Schristos 
28275fd0b74Schristos             /* point entry in root table to sub-table */
28375fd0b74Schristos             low = huff & mask;
28475fd0b74Schristos             (*table)[low].op = (unsigned char)curr;
28575fd0b74Schristos             (*table)[low].bits = (unsigned char)root;
28675fd0b74Schristos             (*table)[low].val = (unsigned short)(next - *table);
28775fd0b74Schristos         }
28875fd0b74Schristos     }
28975fd0b74Schristos 
29075fd0b74Schristos     /* fill in remaining table entry if code is incomplete (guaranteed to have
29175fd0b74Schristos        at most one remaining entry, since if the code is incomplete, the
29275fd0b74Schristos        maximum code length that was allowed to get this far is one bit) */
29375fd0b74Schristos     if (huff != 0) {
29475fd0b74Schristos         here.op = (unsigned char)64;            /* invalid code marker */
29575fd0b74Schristos         here.bits = (unsigned char)(len - drop);
29675fd0b74Schristos         here.val = (unsigned short)0;
29775fd0b74Schristos         next[huff] = here;
29875fd0b74Schristos     }
29975fd0b74Schristos 
30075fd0b74Schristos     /* set return parameters */
30175fd0b74Schristos     *table += used;
30275fd0b74Schristos     *bits = root;
30375fd0b74Schristos     return 0;
30475fd0b74Schristos }
305