1*96c32821Schristos /* $NetBSD: inftrees.c,v 1.5 2024/09/22 19:12:27 christos Exp $ */ 2aaf4ece6Schristos 3aaf4ece6Schristos /* inftrees.c -- generate Huffman trees for efficient decoding 4*96c32821Schristos * Copyright (C) 1995-2024 Mark Adler 5aaf4ece6Schristos * For conditions of distribution and use, see copyright notice in zlib.h 6aaf4ece6Schristos */ 7aaf4ece6Schristos 8aaf4ece6Schristos #include "zutil.h" 9aaf4ece6Schristos #include "inftrees.h" 10aaf4ece6Schristos 11aaf4ece6Schristos #define MAXBITS 15 12aaf4ece6Schristos 13aaf4ece6Schristos const char inflate_copyright[] = 14*96c32821Schristos " inflate 1.3.1 Copyright 1995-2024 Mark Adler "; 15aaf4ece6Schristos /* 16aaf4ece6Schristos If you use the zlib library in a product, an acknowledgment is welcome 17aaf4ece6Schristos in the documentation of your product. If for some reason you cannot 18aaf4ece6Schristos include such an acknowledgment, I would appreciate that you keep this 19aaf4ece6Schristos copyright string in the executable of your product. 20aaf4ece6Schristos */ 21aaf4ece6Schristos 22aaf4ece6Schristos /* 23aaf4ece6Schristos Build a set of tables to decode the provided canonical Huffman code. 24aaf4ece6Schristos The code lengths are lens[0..codes-1]. The result starts at *table, 25aaf4ece6Schristos whose indices are 0..2^bits-1. work is a writable array of at least 26aaf4ece6Schristos lens shorts, which is used as a work area. type is the type of code 27aaf4ece6Schristos to be generated, CODES, LENS, or DISTS. On return, zero is success, 28aaf4ece6Schristos -1 is an invalid code, and +1 means that ENOUGH isn't enough. table 29aaf4ece6Schristos on return points to the next available entry's address. bits is the 30aaf4ece6Schristos requested root table index bits, and on return it is the actual root 31aaf4ece6Schristos table index bits. It will differ if the request is greater than the 32aaf4ece6Schristos longest code or if it is less than the shortest code. 33aaf4ece6Schristos */ 34*96c32821Schristos int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens, 35*96c32821Schristos unsigned codes, code FAR * FAR *table, 36*96c32821Schristos unsigned FAR *bits, unsigned short FAR *work) { 37aaf4ece6Schristos unsigned len; /* a code's length in bits */ 38aaf4ece6Schristos unsigned sym; /* index of code symbols */ 395a1ae2ecSchristos unsigned mmin, mmax; /* minimum and maximum code lengths */ 40aaf4ece6Schristos unsigned root; /* number of index bits for root table */ 41aaf4ece6Schristos unsigned curr; /* number of index bits for current table */ 42aaf4ece6Schristos unsigned drop; /* code bits to drop for sub-table */ 43aaf4ece6Schristos int left; /* number of prefix codes available */ 44aaf4ece6Schristos unsigned used; /* code entries in table used */ 45aaf4ece6Schristos unsigned huff; /* Huffman code */ 46aaf4ece6Schristos unsigned incr; /* for incrementing code, index */ 47aaf4ece6Schristos unsigned fill; /* index for replicating entries */ 48aaf4ece6Schristos unsigned low; /* low bits for current root entry */ 49aaf4ece6Schristos unsigned mask; /* mask for low root bits */ 506db8c6e9Schristos code here; /* table entry for duplication */ 51aaf4ece6Schristos code FAR *next; /* next available space in table */ 52aaf4ece6Schristos const unsigned short FAR *base; /* base value table to use */ 53aaf4ece6Schristos const unsigned short FAR *extra; /* extra bits table to use */ 546db8c6e9Schristos unsigned match; /* use base and extra for symbol >= match */ 55aaf4ece6Schristos unsigned short count[MAXBITS+1]; /* number of codes of each length */ 56aaf4ece6Schristos unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ 57aaf4ece6Schristos static const unsigned short lbase[31] = { /* Length codes 257..285 base */ 58aaf4ece6Schristos 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 59aaf4ece6Schristos 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; 60aaf4ece6Schristos static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 61aaf4ece6Schristos 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 62*96c32821Schristos 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77}; 63aaf4ece6Schristos static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 64aaf4ece6Schristos 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 65aaf4ece6Schristos 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 66aaf4ece6Schristos 8193, 12289, 16385, 24577, 0, 0}; 67aaf4ece6Schristos static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ 68aaf4ece6Schristos 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 69aaf4ece6Schristos 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 70aaf4ece6Schristos 28, 28, 29, 29, 64, 64}; 71aaf4ece6Schristos 72aaf4ece6Schristos /* 73aaf4ece6Schristos Process a set of code lengths to create a canonical Huffman code. The 74aaf4ece6Schristos code lengths are lens[0..codes-1]. Each length corresponds to the 75aaf4ece6Schristos symbols 0..codes-1. The Huffman code is generated by first sorting the 76aaf4ece6Schristos symbols by length from short to long, and retaining the symbol order 77aaf4ece6Schristos for codes with equal lengths. Then the code starts with all zero bits 78aaf4ece6Schristos for the first code of the shortest length, and the codes are integer 79aaf4ece6Schristos increments for the same length, and zeros are appended as the length 80aaf4ece6Schristos increases. For the deflate format, these bits are stored backwards 81aaf4ece6Schristos from their more natural integer increment ordering, and so when the 82aaf4ece6Schristos decoding tables are built in the large loop below, the integer codes 83aaf4ece6Schristos are incremented backwards. 84aaf4ece6Schristos 85aaf4ece6Schristos This routine assumes, but does not check, that all of the entries in 86aaf4ece6Schristos lens[] are in the range 0..MAXBITS. The caller must assure this. 87aaf4ece6Schristos 1..MAXBITS is interpreted as that code length. zero means that that 88aaf4ece6Schristos symbol does not occur in this code. 89aaf4ece6Schristos 90aaf4ece6Schristos The codes are sorted by computing a count of codes for each length, 91aaf4ece6Schristos creating from that a table of starting indices for each length in the 92aaf4ece6Schristos sorted table, and then entering the symbols in order in the sorted 93aaf4ece6Schristos table. The sorted table is work[], with that space being provided by 94aaf4ece6Schristos the caller. 95aaf4ece6Schristos 96aaf4ece6Schristos The length counts are used for other purposes as well, i.e. finding 97aaf4ece6Schristos the minimum and maximum length codes, determining if there are any 98aaf4ece6Schristos codes at all, checking for a valid set of lengths, and looking ahead 99aaf4ece6Schristos at length counts to determine sub-table sizes when building the 100aaf4ece6Schristos decoding tables. 101aaf4ece6Schristos */ 102aaf4ece6Schristos 103aaf4ece6Schristos /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ 104aaf4ece6Schristos for (len = 0; len <= MAXBITS; len++) 105aaf4ece6Schristos count[len] = 0; 106aaf4ece6Schristos for (sym = 0; sym < codes; sym++) 107aaf4ece6Schristos count[lens[sym]]++; 108aaf4ece6Schristos 109aaf4ece6Schristos /* bound code lengths, force root to be within code lengths */ 110aaf4ece6Schristos root = *bits; 1115a1ae2ecSchristos for (mmax = MAXBITS; mmax >= 1; mmax--) 1125a1ae2ecSchristos if (count[mmax] != 0) break; 1135a1ae2ecSchristos if (root > mmax) root = mmax; 1145a1ae2ecSchristos if (mmax == 0) { /* no symbols to code at all */ 1156db8c6e9Schristos here.op = (unsigned char)64; /* invalid code marker */ 1166db8c6e9Schristos here.bits = (unsigned char)1; 1176db8c6e9Schristos here.val = (unsigned short)0; 1186db8c6e9Schristos *(*table)++ = here; /* make a table to force an error */ 1196db8c6e9Schristos *(*table)++ = here; 120aaf4ece6Schristos *bits = 1; 121aaf4ece6Schristos return 0; /* no symbols, but wait for decoding to report error */ 122aaf4ece6Schristos } 1235a1ae2ecSchristos for (mmin = 1; mmin <= MAXBITS; mmin++) 1245a1ae2ecSchristos if (count[mmin] != 0) break; 1255a1ae2ecSchristos if (root < mmin) root = mmin; 126aaf4ece6Schristos 127aaf4ece6Schristos /* check for an over-subscribed or incomplete set of lengths */ 128aaf4ece6Schristos left = 1; 129aaf4ece6Schristos for (len = 1; len <= MAXBITS; len++) { 130aaf4ece6Schristos left <<= 1; 131aaf4ece6Schristos left -= count[len]; 132aaf4ece6Schristos if (left < 0) return -1; /* over-subscribed */ 133aaf4ece6Schristos } 1345a1ae2ecSchristos if (left > 0 && (type == CODES || mmax != 1)) 135aaf4ece6Schristos return -1; /* incomplete set */ 136aaf4ece6Schristos 137aaf4ece6Schristos /* generate offsets into symbol table for each length for sorting */ 138aaf4ece6Schristos offs[1] = 0; 139aaf4ece6Schristos for (len = 1; len < MAXBITS; len++) 140aaf4ece6Schristos offs[len + 1] = offs[len] + count[len]; 141aaf4ece6Schristos 142aaf4ece6Schristos /* sort symbols by length, by symbol order within each length */ 143aaf4ece6Schristos for (sym = 0; sym < codes; sym++) 144aaf4ece6Schristos if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; 145aaf4ece6Schristos 146aaf4ece6Schristos /* 147aaf4ece6Schristos Create and fill in decoding tables. In this loop, the table being 148aaf4ece6Schristos filled is at next and has curr index bits. The code being used is huff 149aaf4ece6Schristos with length len. That code is converted to an index by dropping drop 150aaf4ece6Schristos bits off of the bottom. For codes where len is less than drop + curr, 151aaf4ece6Schristos those top drop + curr - len bits are incremented through all values to 152aaf4ece6Schristos fill the table with replicated entries. 153aaf4ece6Schristos 154aaf4ece6Schristos root is the number of index bits for the root table. When len exceeds 155aaf4ece6Schristos root, sub-tables are created pointed to by the root entry with an index 156aaf4ece6Schristos of the low root bits of huff. This is saved in low to check for when a 157aaf4ece6Schristos new sub-table should be started. drop is zero when the root table is 158aaf4ece6Schristos being filled, and drop is root when sub-tables are being filled. 159aaf4ece6Schristos 160aaf4ece6Schristos When a new sub-table is needed, it is necessary to look ahead in the 161aaf4ece6Schristos code lengths to determine what size sub-table is needed. The length 162aaf4ece6Schristos counts are used for this, and so count[] is decremented as codes are 163aaf4ece6Schristos entered in the tables. 164aaf4ece6Schristos 165aaf4ece6Schristos used keeps track of how many table entries have been allocated from the 1666db8c6e9Schristos provided *table space. It is checked for LENS and DIST tables against 1676db8c6e9Schristos the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in 1686db8c6e9Schristos the initial root table size constants. See the comments in inftrees.h 1696db8c6e9Schristos for more information. 170aaf4ece6Schristos 171aaf4ece6Schristos sym increments through all symbols, and the loop terminates when 1725a1ae2ecSchristos all codes of length mmax, i.e. all codes, have been processed. This 173aaf4ece6Schristos routine permits incomplete codes, so another loop after this one fills 174aaf4ece6Schristos in the rest of the decoding tables with invalid code markers. 175aaf4ece6Schristos */ 176aaf4ece6Schristos 177aaf4ece6Schristos /* set up for code type */ 178aaf4ece6Schristos switch (type) { 179aaf4ece6Schristos case CODES: 180aaf4ece6Schristos base = extra = work; /* dummy value--not used */ 1816db8c6e9Schristos match = 20; 182aaf4ece6Schristos break; 183aaf4ece6Schristos case LENS: 184aaf4ece6Schristos base = lbase; 185aaf4ece6Schristos extra = lext; 1866db8c6e9Schristos match = 257; 187aaf4ece6Schristos break; 188aaf4ece6Schristos default: /* DISTS */ 189aaf4ece6Schristos base = dbase; 190aaf4ece6Schristos extra = dext; 1916db8c6e9Schristos match = 0; 192aaf4ece6Schristos } 193aaf4ece6Schristos 194aaf4ece6Schristos /* initialize state for loop */ 195aaf4ece6Schristos huff = 0; /* starting code */ 196aaf4ece6Schristos sym = 0; /* starting code symbol */ 1975a1ae2ecSchristos len = mmin; /* starting code length */ 198aaf4ece6Schristos next = *table; /* current table to fill in */ 199aaf4ece6Schristos curr = root; /* current table index bits */ 200aaf4ece6Schristos drop = 0; /* current bits to drop from code for index */ 201aaf4ece6Schristos low = (unsigned)(-1); /* trigger new sub-table when len > root */ 202aaf4ece6Schristos used = 1U << root; /* use root table entries */ 203aaf4ece6Schristos mask = used - 1; /* mask for comparing low */ 204aaf4ece6Schristos 205aaf4ece6Schristos /* check available table space */ 2066db8c6e9Schristos if ((type == LENS && used > ENOUGH_LENS) || 2076db8c6e9Schristos (type == DISTS && used > ENOUGH_DISTS)) 208aaf4ece6Schristos return 1; 209aaf4ece6Schristos 210aaf4ece6Schristos /* process all codes and make table entries */ 211aaf4ece6Schristos for (;;) { 212aaf4ece6Schristos /* create table entry */ 2136db8c6e9Schristos here.bits = (unsigned char)(len - drop); 2146db8c6e9Schristos if (work[sym] + 1U < match) { 2156db8c6e9Schristos here.op = (unsigned char)0; 2166db8c6e9Schristos here.val = work[sym]; 217aaf4ece6Schristos } 2186db8c6e9Schristos else if (work[sym] >= match) { 2196db8c6e9Schristos here.op = (unsigned char)(extra[work[sym] - match]); 2206db8c6e9Schristos here.val = base[work[sym] - match]; 221aaf4ece6Schristos } 222aaf4ece6Schristos else { 2236db8c6e9Schristos here.op = (unsigned char)(32 + 64); /* end of block */ 2246db8c6e9Schristos here.val = 0; 225aaf4ece6Schristos } 226aaf4ece6Schristos 227aaf4ece6Schristos /* replicate for those indices with low len bits equal to huff */ 228aaf4ece6Schristos incr = 1U << (len - drop); 229aaf4ece6Schristos fill = 1U << curr; 2305a1ae2ecSchristos mmin = fill; /* save offset to next table */ 231aaf4ece6Schristos do { 232aaf4ece6Schristos fill -= incr; 2336db8c6e9Schristos next[(huff >> drop) + fill] = here; 234aaf4ece6Schristos } while (fill != 0); 235aaf4ece6Schristos 236aaf4ece6Schristos /* backwards increment the len-bit code huff */ 237aaf4ece6Schristos incr = 1U << (len - 1); 238aaf4ece6Schristos while (huff & incr) 239aaf4ece6Schristos incr >>= 1; 240aaf4ece6Schristos if (incr != 0) { 241aaf4ece6Schristos huff &= incr - 1; 242aaf4ece6Schristos huff += incr; 243aaf4ece6Schristos } 244aaf4ece6Schristos else 245aaf4ece6Schristos huff = 0; 246aaf4ece6Schristos 247aaf4ece6Schristos /* go to next symbol, update count, len */ 248aaf4ece6Schristos sym++; 249aaf4ece6Schristos if (--(count[len]) == 0) { 2505a1ae2ecSchristos if (len == mmax) break; 251aaf4ece6Schristos len = lens[work[sym]]; 252aaf4ece6Schristos } 253aaf4ece6Schristos 254aaf4ece6Schristos /* create new sub-table if needed */ 255aaf4ece6Schristos if (len > root && (huff & mask) != low) { 256aaf4ece6Schristos /* if first time, transition to sub-tables */ 257aaf4ece6Schristos if (drop == 0) 258aaf4ece6Schristos drop = root; 259aaf4ece6Schristos 260aaf4ece6Schristos /* increment past last table */ 2615a1ae2ecSchristos next += mmin; /* here mmin is 1 << curr */ 262aaf4ece6Schristos 263aaf4ece6Schristos /* determine length of next table */ 264aaf4ece6Schristos curr = len - drop; 265aaf4ece6Schristos left = (int)(1 << curr); 2665a1ae2ecSchristos while (curr + drop < mmax) { 267aaf4ece6Schristos left -= count[curr + drop]; 268aaf4ece6Schristos if (left <= 0) break; 269aaf4ece6Schristos curr++; 270aaf4ece6Schristos left <<= 1; 271aaf4ece6Schristos } 272aaf4ece6Schristos 273aaf4ece6Schristos /* check for enough space */ 274aaf4ece6Schristos used += 1U << curr; 2756db8c6e9Schristos if ((type == LENS && used > ENOUGH_LENS) || 2766db8c6e9Schristos (type == DISTS && used > ENOUGH_DISTS)) 277aaf4ece6Schristos return 1; 278aaf4ece6Schristos 279aaf4ece6Schristos /* point entry in root table to sub-table */ 280aaf4ece6Schristos low = huff & mask; 281aaf4ece6Schristos (*table)[low].op = (unsigned char)curr; 282aaf4ece6Schristos (*table)[low].bits = (unsigned char)root; 283aaf4ece6Schristos (*table)[low].val = (unsigned short)(next - *table); 284aaf4ece6Schristos } 285aaf4ece6Schristos } 286aaf4ece6Schristos 2876db8c6e9Schristos /* fill in remaining table entry if code is incomplete (guaranteed to have 2886db8c6e9Schristos at most one remaining entry, since if the code is incomplete, the 2896db8c6e9Schristos maximum code length that was allowed to get this far is one bit) */ 2906db8c6e9Schristos if (huff != 0) { 2916db8c6e9Schristos here.op = (unsigned char)64; /* invalid code marker */ 2926db8c6e9Schristos here.bits = (unsigned char)(len - drop); 2936db8c6e9Schristos here.val = (unsigned short)0; 2946db8c6e9Schristos next[huff] = here; 295aaf4ece6Schristos } 296aaf4ece6Schristos 297aaf4ece6Schristos /* set return parameters */ 298aaf4ece6Schristos *table += used; 299aaf4ece6Schristos *bits = root; 300aaf4ece6Schristos return 0; 301aaf4ece6Schristos } 302