1aaf4ece6Schristos /* inftree9.c -- generate Huffman trees for efficient decoding 2*b175d1c2Schristos * Copyright (C) 1995-2024 Mark Adler 3aaf4ece6Schristos * For conditions of distribution and use, see copyright notice in zlib.h 4aaf4ece6Schristos */ 5aaf4ece6Schristos 6aaf4ece6Schristos #include "zutil.h" 7aaf4ece6Schristos #include "inftree9.h" 8aaf4ece6Schristos 9aaf4ece6Schristos #define MAXBITS 15 10aaf4ece6Schristos 11aaf4ece6Schristos const char inflate9_copyright[] = 12*b175d1c2Schristos " inflate9 1.3.1 Copyright 1995-2024 Mark Adler "; 13aaf4ece6Schristos /* 14aaf4ece6Schristos If you use the zlib library in a product, an acknowledgment is welcome 15aaf4ece6Schristos in the documentation of your product. If for some reason you cannot 16aaf4ece6Schristos include such an acknowledgment, I would appreciate that you keep this 17aaf4ece6Schristos copyright string in the executable of your product. 18aaf4ece6Schristos */ 19aaf4ece6Schristos 20aaf4ece6Schristos /* 21aaf4ece6Schristos Build a set of tables to decode the provided canonical Huffman code. 22aaf4ece6Schristos The code lengths are lens[0..codes-1]. The result starts at *table, 23aaf4ece6Schristos whose indices are 0..2^bits-1. work is a writable array of at least 24aaf4ece6Schristos lens shorts, which is used as a work area. type is the type of code 25aaf4ece6Schristos to be generated, CODES, LENS, or DISTS. On return, zero is success, 26aaf4ece6Schristos -1 is an invalid code, and +1 means that ENOUGH isn't enough. table 27aaf4ece6Schristos on return points to the next available entry's address. bits is the 28aaf4ece6Schristos requested root table index bits, and on return it is the actual root 29aaf4ece6Schristos table index bits. It will differ if the request is greater than the 30aaf4ece6Schristos longest code or if it is less than the shortest code. 31aaf4ece6Schristos */ 32*b175d1c2Schristos int inflate_table9(codetype type, unsigned short FAR *lens, unsigned codes, 33*b175d1c2Schristos code FAR * FAR *table, unsigned FAR *bits, 34*b175d1c2Schristos unsigned short FAR *work) { 35aaf4ece6Schristos unsigned len; /* a code's length in bits */ 36aaf4ece6Schristos unsigned sym; /* index of code symbols */ 37aaf4ece6Schristos unsigned min, max; /* minimum and maximum code lengths */ 38aaf4ece6Schristos unsigned root; /* number of index bits for root table */ 39aaf4ece6Schristos unsigned curr; /* number of index bits for current table */ 40aaf4ece6Schristos unsigned drop; /* code bits to drop for sub-table */ 41aaf4ece6Schristos int left; /* number of prefix codes available */ 42aaf4ece6Schristos unsigned used; /* code entries in table used */ 43aaf4ece6Schristos unsigned huff; /* Huffman code */ 44aaf4ece6Schristos unsigned incr; /* for incrementing code, index */ 45aaf4ece6Schristos unsigned fill; /* index for replicating entries */ 46aaf4ece6Schristos unsigned low; /* low bits for current root entry */ 47aaf4ece6Schristos unsigned mask; /* mask for low root bits */ 48aaf4ece6Schristos code this; /* table entry for duplication */ 49aaf4ece6Schristos code FAR *next; /* next available space in table */ 50aaf4ece6Schristos const unsigned short FAR *base; /* base value table to use */ 51aaf4ece6Schristos const unsigned short FAR *extra; /* extra bits table to use */ 52aaf4ece6Schristos int end; /* use base and extra for symbol > end */ 53aaf4ece6Schristos unsigned short count[MAXBITS+1]; /* number of codes of each length */ 54aaf4ece6Schristos unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ 55aaf4ece6Schristos static const unsigned short lbase[31] = { /* Length codes 257..285 base */ 56aaf4ece6Schristos 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 57aaf4ece6Schristos 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 58aaf4ece6Schristos 131, 163, 195, 227, 3, 0, 0}; 59aaf4ece6Schristos static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 60aaf4ece6Schristos 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 61aaf4ece6Schristos 130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132, 62*b175d1c2Schristos 133, 133, 133, 133, 144, 203, 77}; 63aaf4ece6Schristos static const unsigned short dbase[32] = { /* Distance codes 0..31 base */ 64aaf4ece6Schristos 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65aaf4ece6Schristos 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 66aaf4ece6Schristos 4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153}; 67aaf4ece6Schristos static const unsigned short dext[32] = { /* Distance codes 0..31 extra */ 68aaf4ece6Schristos 128, 128, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132, 69aaf4ece6Schristos 133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138, 70aaf4ece6Schristos 139, 139, 140, 140, 141, 141, 142, 142}; 71aaf4ece6Schristos 72aaf4ece6Schristos /* 73aaf4ece6Schristos Process a set of code lengths to create a canonical Huffman code. The 74aaf4ece6Schristos code lengths are lens[0..codes-1]. Each length corresponds to the 75aaf4ece6Schristos symbols 0..codes-1. The Huffman code is generated by first sorting the 76aaf4ece6Schristos symbols by length from short to long, and retaining the symbol order 77aaf4ece6Schristos for codes with equal lengths. Then the code starts with all zero bits 78aaf4ece6Schristos for the first code of the shortest length, and the codes are integer 79aaf4ece6Schristos increments for the same length, and zeros are appended as the length 80aaf4ece6Schristos increases. For the deflate format, these bits are stored backwards 81aaf4ece6Schristos from their more natural integer increment ordering, and so when the 82aaf4ece6Schristos decoding tables are built in the large loop below, the integer codes 83aaf4ece6Schristos are incremented backwards. 84aaf4ece6Schristos 85aaf4ece6Schristos This routine assumes, but does not check, that all of the entries in 86aaf4ece6Schristos lens[] are in the range 0..MAXBITS. The caller must assure this. 87aaf4ece6Schristos 1..MAXBITS is interpreted as that code length. zero means that that 88aaf4ece6Schristos symbol does not occur in this code. 89aaf4ece6Schristos 90aaf4ece6Schristos The codes are sorted by computing a count of codes for each length, 91aaf4ece6Schristos creating from that a table of starting indices for each length in the 92aaf4ece6Schristos sorted table, and then entering the symbols in order in the sorted 93aaf4ece6Schristos table. The sorted table is work[], with that space being provided by 94aaf4ece6Schristos the caller. 95aaf4ece6Schristos 96aaf4ece6Schristos The length counts are used for other purposes as well, i.e. finding 97aaf4ece6Schristos the minimum and maximum length codes, determining if there are any 98aaf4ece6Schristos codes at all, checking for a valid set of lengths, and looking ahead 99aaf4ece6Schristos at length counts to determine sub-table sizes when building the 100aaf4ece6Schristos decoding tables. 101aaf4ece6Schristos */ 102aaf4ece6Schristos 103aaf4ece6Schristos /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ 104aaf4ece6Schristos for (len = 0; len <= MAXBITS; len++) 105aaf4ece6Schristos count[len] = 0; 106aaf4ece6Schristos for (sym = 0; sym < codes; sym++) 107aaf4ece6Schristos count[lens[sym]]++; 108aaf4ece6Schristos 109aaf4ece6Schristos /* bound code lengths, force root to be within code lengths */ 110aaf4ece6Schristos root = *bits; 111aaf4ece6Schristos for (max = MAXBITS; max >= 1; max--) 112aaf4ece6Schristos if (count[max] != 0) break; 113aaf4ece6Schristos if (root > max) root = max; 114aaf4ece6Schristos if (max == 0) return -1; /* no codes! */ 115aaf4ece6Schristos for (min = 1; min <= MAXBITS; min++) 116aaf4ece6Schristos if (count[min] != 0) break; 117aaf4ece6Schristos if (root < min) root = min; 118aaf4ece6Schristos 119aaf4ece6Schristos /* check for an over-subscribed or incomplete set of lengths */ 120aaf4ece6Schristos left = 1; 121aaf4ece6Schristos for (len = 1; len <= MAXBITS; len++) { 122aaf4ece6Schristos left <<= 1; 123aaf4ece6Schristos left -= count[len]; 124aaf4ece6Schristos if (left < 0) return -1; /* over-subscribed */ 125aaf4ece6Schristos } 126aaf4ece6Schristos if (left > 0 && (type == CODES || max != 1)) 127aaf4ece6Schristos return -1; /* incomplete set */ 128aaf4ece6Schristos 129aaf4ece6Schristos /* generate offsets into symbol table for each length for sorting */ 130aaf4ece6Schristos offs[1] = 0; 131aaf4ece6Schristos for (len = 1; len < MAXBITS; len++) 132aaf4ece6Schristos offs[len + 1] = offs[len] + count[len]; 133aaf4ece6Schristos 134aaf4ece6Schristos /* sort symbols by length, by symbol order within each length */ 135aaf4ece6Schristos for (sym = 0; sym < codes; sym++) 136aaf4ece6Schristos if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; 137aaf4ece6Schristos 138aaf4ece6Schristos /* 139aaf4ece6Schristos Create and fill in decoding tables. In this loop, the table being 140aaf4ece6Schristos filled is at next and has curr index bits. The code being used is huff 141aaf4ece6Schristos with length len. That code is converted to an index by dropping drop 142aaf4ece6Schristos bits off of the bottom. For codes where len is less than drop + curr, 143aaf4ece6Schristos those top drop + curr - len bits are incremented through all values to 144aaf4ece6Schristos fill the table with replicated entries. 145aaf4ece6Schristos 146aaf4ece6Schristos root is the number of index bits for the root table. When len exceeds 147aaf4ece6Schristos root, sub-tables are created pointed to by the root entry with an index 148aaf4ece6Schristos of the low root bits of huff. This is saved in low to check for when a 149aaf4ece6Schristos new sub-table should be started. drop is zero when the root table is 150aaf4ece6Schristos being filled, and drop is root when sub-tables are being filled. 151aaf4ece6Schristos 152aaf4ece6Schristos When a new sub-table is needed, it is necessary to look ahead in the 153aaf4ece6Schristos code lengths to determine what size sub-table is needed. The length 154aaf4ece6Schristos counts are used for this, and so count[] is decremented as codes are 155aaf4ece6Schristos entered in the tables. 156aaf4ece6Schristos 157aaf4ece6Schristos used keeps track of how many table entries have been allocated from the 158c3423655Schristos provided *table space. It is checked for LENS and DIST tables against 159c3423655Schristos the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in 160c3423655Schristos the initial root table size constants. See the comments in inftree9.h 161c3423655Schristos for more information. 162aaf4ece6Schristos 163aaf4ece6Schristos sym increments through all symbols, and the loop terminates when 164aaf4ece6Schristos all codes of length max, i.e. all codes, have been processed. This 165aaf4ece6Schristos routine permits incomplete codes, so another loop after this one fills 166aaf4ece6Schristos in the rest of the decoding tables with invalid code markers. 167aaf4ece6Schristos */ 168aaf4ece6Schristos 169aaf4ece6Schristos /* set up for code type */ 170aaf4ece6Schristos switch (type) { 171aaf4ece6Schristos case CODES: 172aaf4ece6Schristos base = extra = work; /* dummy value--not used */ 173aaf4ece6Schristos end = 19; 174aaf4ece6Schristos break; 175aaf4ece6Schristos case LENS: 176aaf4ece6Schristos base = lbase; 177aaf4ece6Schristos base -= 257; 178aaf4ece6Schristos extra = lext; 179aaf4ece6Schristos extra -= 257; 180aaf4ece6Schristos end = 256; 181aaf4ece6Schristos break; 182aaf4ece6Schristos default: /* DISTS */ 183aaf4ece6Schristos base = dbase; 184aaf4ece6Schristos extra = dext; 185aaf4ece6Schristos end = -1; 186aaf4ece6Schristos } 187aaf4ece6Schristos 188aaf4ece6Schristos /* initialize state for loop */ 189aaf4ece6Schristos huff = 0; /* starting code */ 190aaf4ece6Schristos sym = 0; /* starting code symbol */ 191aaf4ece6Schristos len = min; /* starting code length */ 192aaf4ece6Schristos next = *table; /* current table to fill in */ 193aaf4ece6Schristos curr = root; /* current table index bits */ 194aaf4ece6Schristos drop = 0; /* current bits to drop from code for index */ 195aaf4ece6Schristos low = (unsigned)(-1); /* trigger new sub-table when len > root */ 196aaf4ece6Schristos used = 1U << root; /* use root table entries */ 197aaf4ece6Schristos mask = used - 1; /* mask for comparing low */ 198aaf4ece6Schristos 199aaf4ece6Schristos /* check available table space */ 200c3423655Schristos if ((type == LENS && used >= ENOUGH_LENS) || 201c3423655Schristos (type == DISTS && used >= ENOUGH_DISTS)) 202aaf4ece6Schristos return 1; 203aaf4ece6Schristos 204aaf4ece6Schristos /* process all codes and make table entries */ 205aaf4ece6Schristos for (;;) { 206aaf4ece6Schristos /* create table entry */ 207aaf4ece6Schristos this.bits = (unsigned char)(len - drop); 208aaf4ece6Schristos if ((int)(work[sym]) < end) { 209aaf4ece6Schristos this.op = (unsigned char)0; 210aaf4ece6Schristos this.val = work[sym]; 211aaf4ece6Schristos } 212aaf4ece6Schristos else if ((int)(work[sym]) > end) { 213aaf4ece6Schristos this.op = (unsigned char)(extra[work[sym]]); 214aaf4ece6Schristos this.val = base[work[sym]]; 215aaf4ece6Schristos } 216aaf4ece6Schristos else { 217aaf4ece6Schristos this.op = (unsigned char)(32 + 64); /* end of block */ 218aaf4ece6Schristos this.val = 0; 219aaf4ece6Schristos } 220aaf4ece6Schristos 221aaf4ece6Schristos /* replicate for those indices with low len bits equal to huff */ 222aaf4ece6Schristos incr = 1U << (len - drop); 223aaf4ece6Schristos fill = 1U << curr; 224aaf4ece6Schristos do { 225aaf4ece6Schristos fill -= incr; 226aaf4ece6Schristos next[(huff >> drop) + fill] = this; 227aaf4ece6Schristos } while (fill != 0); 228aaf4ece6Schristos 229aaf4ece6Schristos /* backwards increment the len-bit code huff */ 230aaf4ece6Schristos incr = 1U << (len - 1); 231aaf4ece6Schristos while (huff & incr) 232aaf4ece6Schristos incr >>= 1; 233aaf4ece6Schristos if (incr != 0) { 234aaf4ece6Schristos huff &= incr - 1; 235aaf4ece6Schristos huff += incr; 236aaf4ece6Schristos } 237aaf4ece6Schristos else 238aaf4ece6Schristos huff = 0; 239aaf4ece6Schristos 240aaf4ece6Schristos /* go to next symbol, update count, len */ 241aaf4ece6Schristos sym++; 242aaf4ece6Schristos if (--(count[len]) == 0) { 243aaf4ece6Schristos if (len == max) break; 244aaf4ece6Schristos len = lens[work[sym]]; 245aaf4ece6Schristos } 246aaf4ece6Schristos 247aaf4ece6Schristos /* create new sub-table if needed */ 248aaf4ece6Schristos if (len > root && (huff & mask) != low) { 249aaf4ece6Schristos /* if first time, transition to sub-tables */ 250aaf4ece6Schristos if (drop == 0) 251aaf4ece6Schristos drop = root; 252aaf4ece6Schristos 253aaf4ece6Schristos /* increment past last table */ 254aaf4ece6Schristos next += 1U << curr; 255aaf4ece6Schristos 256aaf4ece6Schristos /* determine length of next table */ 257aaf4ece6Schristos curr = len - drop; 258aaf4ece6Schristos left = (int)(1 << curr); 259aaf4ece6Schristos while (curr + drop < max) { 260aaf4ece6Schristos left -= count[curr + drop]; 261aaf4ece6Schristos if (left <= 0) break; 262aaf4ece6Schristos curr++; 263aaf4ece6Schristos left <<= 1; 264aaf4ece6Schristos } 265aaf4ece6Schristos 266aaf4ece6Schristos /* check for enough space */ 267aaf4ece6Schristos used += 1U << curr; 268c3423655Schristos if ((type == LENS && used >= ENOUGH_LENS) || 269c3423655Schristos (type == DISTS && used >= ENOUGH_DISTS)) 270aaf4ece6Schristos return 1; 271aaf4ece6Schristos 272aaf4ece6Schristos /* point entry in root table to sub-table */ 273aaf4ece6Schristos low = huff & mask; 274aaf4ece6Schristos (*table)[low].op = (unsigned char)curr; 275aaf4ece6Schristos (*table)[low].bits = (unsigned char)root; 276aaf4ece6Schristos (*table)[low].val = (unsigned short)(next - *table); 277aaf4ece6Schristos } 278aaf4ece6Schristos } 279aaf4ece6Schristos 280aaf4ece6Schristos /* 281aaf4ece6Schristos Fill in rest of table for incomplete codes. This loop is similar to the 282aaf4ece6Schristos loop above in incrementing huff for table indices. It is assumed that 283aaf4ece6Schristos len is equal to curr + drop, so there is no loop needed to increment 284aaf4ece6Schristos through high index bits. When the current sub-table is filled, the loop 285aaf4ece6Schristos drops back to the root table to fill in any remaining entries there. 286aaf4ece6Schristos */ 287aaf4ece6Schristos this.op = (unsigned char)64; /* invalid code marker */ 288aaf4ece6Schristos this.bits = (unsigned char)(len - drop); 289aaf4ece6Schristos this.val = (unsigned short)0; 290aaf4ece6Schristos while (huff != 0) { 291aaf4ece6Schristos /* when done with sub-table, drop back to root table */ 292aaf4ece6Schristos if (drop != 0 && (huff & mask) != low) { 293aaf4ece6Schristos drop = 0; 294aaf4ece6Schristos len = root; 295aaf4ece6Schristos next = *table; 296aaf4ece6Schristos curr = root; 297aaf4ece6Schristos this.bits = (unsigned char)len; 298aaf4ece6Schristos } 299aaf4ece6Schristos 300aaf4ece6Schristos /* put invalid code marker in table */ 301aaf4ece6Schristos next[huff >> drop] = this; 302aaf4ece6Schristos 303aaf4ece6Schristos /* backwards increment the len-bit code huff */ 304aaf4ece6Schristos incr = 1U << (len - 1); 305aaf4ece6Schristos while (huff & incr) 306aaf4ece6Schristos incr >>= 1; 307aaf4ece6Schristos if (incr != 0) { 308aaf4ece6Schristos huff &= incr - 1; 309aaf4ece6Schristos huff += incr; 310aaf4ece6Schristos } 311aaf4ece6Schristos else 312aaf4ece6Schristos huff = 0; 313aaf4ece6Schristos } 314aaf4ece6Schristos 315aaf4ece6Schristos /* set return parameters */ 316aaf4ece6Schristos *table += used; 317aaf4ece6Schristos *bits = root; 318aaf4ece6Schristos return 0; 319aaf4ece6Schristos } 320