1*96c32821Schristos /* $NetBSD: inffast.c,v 1.6 2024/09/22 19:12:27 christos Exp $ */ 2aaf4ece6Schristos 3aaf4ece6Schristos /* inffast.c -- fast decoding 43b1253e8Schristos * Copyright (C) 1995-2017 Mark Adler 5aaf4ece6Schristos * For conditions of distribution and use, see copyright notice in zlib.h 6aaf4ece6Schristos */ 7aaf4ece6Schristos 8aaf4ece6Schristos #include "zutil.h" 9aaf4ece6Schristos #include "inftrees.h" 10aaf4ece6Schristos #include "inflate.h" 11aaf4ece6Schristos #include "inffast.h" 12aaf4ece6Schristos 136db8c6e9Schristos #ifdef ASMINF 146db8c6e9Schristos # pragma message("Assembler code may have bugs -- use at your own risk") 15aaf4ece6Schristos #else 16aaf4ece6Schristos 17aaf4ece6Schristos /* 18aaf4ece6Schristos Decode literal, length, and distance codes and write out the resulting 19aaf4ece6Schristos literal and match bytes until either not enough input or output is 20aaf4ece6Schristos available, an end-of-block is encountered, or a data error is encountered. 21aaf4ece6Schristos When large enough input and output buffers are supplied to inflate(), for 22aaf4ece6Schristos example, a 16K input buffer and a 64K output buffer, more than 95% of the 23aaf4ece6Schristos inflate execution time is spent in this routine. 24aaf4ece6Schristos 25aaf4ece6Schristos Entry assumptions: 26aaf4ece6Schristos 27aaf4ece6Schristos state->mode == LEN 28aaf4ece6Schristos strm->avail_in >= 6 29aaf4ece6Schristos strm->avail_out >= 258 30aaf4ece6Schristos start >= strm->avail_out 31aaf4ece6Schristos state->bits < 8 32aaf4ece6Schristos 33aaf4ece6Schristos On return, state->mode is one of: 34aaf4ece6Schristos 35aaf4ece6Schristos LEN -- ran out of enough output space or enough available input 36aaf4ece6Schristos TYPE -- reached end of block code, inflate() to interpret next block 37aaf4ece6Schristos BAD -- error in block data 38aaf4ece6Schristos 39aaf4ece6Schristos Notes: 40aaf4ece6Schristos 41aaf4ece6Schristos - The maximum input bits used by a length/distance pair is 15 bits for the 42aaf4ece6Schristos length code, 5 bits for the length extra, 15 bits for the distance code, 43aaf4ece6Schristos and 13 bits for the distance extra. This totals 48 bits, or six bytes. 44aaf4ece6Schristos Therefore if strm->avail_in >= 6, then there is enough input to avoid 45aaf4ece6Schristos checking for available input while decoding. 46aaf4ece6Schristos 47aaf4ece6Schristos - The maximum bytes that a single length/distance pair can output is 258 48aaf4ece6Schristos bytes, which is the maximum length that can be coded. inflate_fast() 49aaf4ece6Schristos requires strm->avail_out >= 258 for each loop to avoid checking for 50aaf4ece6Schristos output space. 51aaf4ece6Schristos */ 52*96c32821Schristos void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) { 53aaf4ece6Schristos struct inflate_state FAR *state; 546db8c6e9Schristos z_const unsigned char FAR *in; /* local strm->next_in */ 556db8c6e9Schristos z_const unsigned char FAR *last; /* have enough input while in < last */ 56aaf4ece6Schristos unsigned char FAR *out; /* local strm->next_out */ 57aaf4ece6Schristos unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 58aaf4ece6Schristos unsigned char FAR *end; /* while out < end, enough space available */ 59aaf4ece6Schristos #ifdef INFLATE_STRICT 60aaf4ece6Schristos unsigned dmax; /* maximum distance from zlib header */ 61aaf4ece6Schristos #endif 62aaf4ece6Schristos unsigned wsize; /* window size or zero if not using window */ 63aaf4ece6Schristos unsigned whave; /* valid bytes in the window */ 646db8c6e9Schristos unsigned wnext; /* window write index */ 65aaf4ece6Schristos unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 66aaf4ece6Schristos unsigned long hold; /* local strm->hold */ 67aaf4ece6Schristos unsigned bits; /* local strm->bits */ 68aaf4ece6Schristos code const FAR *lcode; /* local strm->lencode */ 69aaf4ece6Schristos code const FAR *dcode; /* local strm->distcode */ 70aaf4ece6Schristos unsigned lmask; /* mask for first level of length codes */ 71aaf4ece6Schristos unsigned dmask; /* mask for first level of distance codes */ 723b1253e8Schristos code const *here; /* retrieved table entry */ 73aaf4ece6Schristos unsigned op; /* code bits, operation, extra bits, or */ 74aaf4ece6Schristos /* window position, window bytes to copy */ 75aaf4ece6Schristos unsigned len; /* match length, unused bytes */ 76aaf4ece6Schristos unsigned dist; /* match distance */ 77aaf4ece6Schristos unsigned char FAR *from; /* where to copy match from */ 78aaf4ece6Schristos 79aaf4ece6Schristos /* copy state to local variables */ 80aaf4ece6Schristos state = (struct inflate_state FAR *)strm->state; 816db8c6e9Schristos in = strm->next_in; 82aaf4ece6Schristos last = in + (strm->avail_in - 5); 836db8c6e9Schristos out = strm->next_out; 84aaf4ece6Schristos beg = out - (start - strm->avail_out); 85aaf4ece6Schristos end = out + (strm->avail_out - 257); 86aaf4ece6Schristos #ifdef INFLATE_STRICT 87aaf4ece6Schristos dmax = state->dmax; 88aaf4ece6Schristos #endif 89aaf4ece6Schristos wsize = state->wsize; 90aaf4ece6Schristos whave = state->whave; 916db8c6e9Schristos wnext = state->wnext; 92aaf4ece6Schristos window = state->window; 93aaf4ece6Schristos hold = state->hold; 94aaf4ece6Schristos bits = state->bits; 95aaf4ece6Schristos lcode = state->lencode; 96aaf4ece6Schristos dcode = state->distcode; 97aaf4ece6Schristos lmask = (1U << state->lenbits) - 1; 98aaf4ece6Schristos dmask = (1U << state->distbits) - 1; 99aaf4ece6Schristos 100aaf4ece6Schristos /* decode literals and length/distances until end-of-block or not enough 101aaf4ece6Schristos input data or output space */ 102aaf4ece6Schristos do { 103aaf4ece6Schristos if (bits < 15) { 1046db8c6e9Schristos hold += (unsigned long)(*in++) << bits; 105aaf4ece6Schristos bits += 8; 1066db8c6e9Schristos hold += (unsigned long)(*in++) << bits; 107aaf4ece6Schristos bits += 8; 108aaf4ece6Schristos } 1093b1253e8Schristos here = lcode + (hold & lmask); 110aaf4ece6Schristos dolen: 1113b1253e8Schristos op = (unsigned)(here->bits); 112aaf4ece6Schristos hold >>= op; 113aaf4ece6Schristos bits -= op; 1143b1253e8Schristos op = (unsigned)(here->op); 115aaf4ece6Schristos if (op == 0) { /* literal */ 1163b1253e8Schristos Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? 117aaf4ece6Schristos "inflate: literal '%c'\n" : 1183b1253e8Schristos "inflate: literal 0x%02x\n", here->val)); 1193b1253e8Schristos *out++ = (unsigned char)(here->val); 120aaf4ece6Schristos } 121aaf4ece6Schristos else if (op & 16) { /* length base */ 1223b1253e8Schristos len = (unsigned)(here->val); 123aaf4ece6Schristos op &= 15; /* number of extra bits */ 124aaf4ece6Schristos if (op) { 125aaf4ece6Schristos if (bits < op) { 1266db8c6e9Schristos hold += (unsigned long)(*in++) << bits; 127aaf4ece6Schristos bits += 8; 128aaf4ece6Schristos } 129aaf4ece6Schristos len += (unsigned)hold & ((1U << op) - 1); 130aaf4ece6Schristos hold >>= op; 131aaf4ece6Schristos bits -= op; 132aaf4ece6Schristos } 133aaf4ece6Schristos Tracevv((stderr, "inflate: length %u\n", len)); 134aaf4ece6Schristos if (bits < 15) { 1356db8c6e9Schristos hold += (unsigned long)(*in++) << bits; 136aaf4ece6Schristos bits += 8; 1376db8c6e9Schristos hold += (unsigned long)(*in++) << bits; 138aaf4ece6Schristos bits += 8; 139aaf4ece6Schristos } 1403b1253e8Schristos here = dcode + (hold & dmask); 141aaf4ece6Schristos dodist: 1423b1253e8Schristos op = (unsigned)(here->bits); 143aaf4ece6Schristos hold >>= op; 144aaf4ece6Schristos bits -= op; 1453b1253e8Schristos op = (unsigned)(here->op); 146aaf4ece6Schristos if (op & 16) { /* distance base */ 1473b1253e8Schristos dist = (unsigned)(here->val); 148aaf4ece6Schristos op &= 15; /* number of extra bits */ 149aaf4ece6Schristos if (bits < op) { 1506db8c6e9Schristos hold += (unsigned long)(*in++) << bits; 151aaf4ece6Schristos bits += 8; 152aaf4ece6Schristos if (bits < op) { 1536db8c6e9Schristos hold += (unsigned long)(*in++) << bits; 154aaf4ece6Schristos bits += 8; 155aaf4ece6Schristos } 156aaf4ece6Schristos } 157aaf4ece6Schristos dist += (unsigned)hold & ((1U << op) - 1); 158aaf4ece6Schristos #ifdef INFLATE_STRICT 159aaf4ece6Schristos if (dist > dmax) { 160aaf4ece6Schristos strm->msg = (char *)"invalid distance too far back"; 161aaf4ece6Schristos state->mode = BAD; 162aaf4ece6Schristos break; 163aaf4ece6Schristos } 164aaf4ece6Schristos #endif 165aaf4ece6Schristos hold >>= op; 166aaf4ece6Schristos bits -= op; 167aaf4ece6Schristos Tracevv((stderr, "inflate: distance %u\n", dist)); 168aaf4ece6Schristos op = (unsigned)(out - beg); /* max distance in output */ 169aaf4ece6Schristos if (dist > op) { /* see if copy from window */ 170aaf4ece6Schristos op = dist - op; /* distance back in window */ 171aaf4ece6Schristos if (op > whave) { 1726db8c6e9Schristos if (state->sane) { 1736db8c6e9Schristos strm->msg = 1746db8c6e9Schristos __UNCONST("invalid distance too far back"); 175aaf4ece6Schristos state->mode = BAD; 176aaf4ece6Schristos break; 177aaf4ece6Schristos } 1786db8c6e9Schristos #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR 1796db8c6e9Schristos if (len <= op - whave) { 1806db8c6e9Schristos do { 1816db8c6e9Schristos *out++ = 0; 1826db8c6e9Schristos } while (--len); 1836db8c6e9Schristos continue; 1846db8c6e9Schristos } 1856db8c6e9Schristos len -= op - whave; 1866db8c6e9Schristos do { 1876db8c6e9Schristos *out++ = 0; 1886db8c6e9Schristos } while (--op > whave); 1896db8c6e9Schristos if (op == 0) { 1906db8c6e9Schristos from = out - dist; 1916db8c6e9Schristos do { 1926db8c6e9Schristos *out++ = *from++; 1936db8c6e9Schristos } while (--len); 1946db8c6e9Schristos continue; 1956db8c6e9Schristos } 1966db8c6e9Schristos #endif 1976db8c6e9Schristos } 1986db8c6e9Schristos from = window; 1996db8c6e9Schristos if (wnext == 0) { /* very common case */ 200aaf4ece6Schristos from += wsize - op; 201aaf4ece6Schristos if (op < len) { /* some from window */ 202aaf4ece6Schristos len -= op; 203aaf4ece6Schristos do { 2046db8c6e9Schristos *out++ = *from++; 205aaf4ece6Schristos } while (--op); 206aaf4ece6Schristos from = out - dist; /* rest from output */ 207aaf4ece6Schristos } 208aaf4ece6Schristos } 2096db8c6e9Schristos else if (wnext < op) { /* wrap around window */ 2106db8c6e9Schristos from += wsize + wnext - op; 2116db8c6e9Schristos op -= wnext; 212aaf4ece6Schristos if (op < len) { /* some from end of window */ 213aaf4ece6Schristos len -= op; 214aaf4ece6Schristos do { 2156db8c6e9Schristos *out++ = *from++; 216aaf4ece6Schristos } while (--op); 2176db8c6e9Schristos from = window; 2186db8c6e9Schristos if (wnext < len) { /* some from start of window */ 2196db8c6e9Schristos op = wnext; 220aaf4ece6Schristos len -= op; 221aaf4ece6Schristos do { 2226db8c6e9Schristos *out++ = *from++; 223aaf4ece6Schristos } while (--op); 224aaf4ece6Schristos from = out - dist; /* rest from output */ 225aaf4ece6Schristos } 226aaf4ece6Schristos } 227aaf4ece6Schristos } 228aaf4ece6Schristos else { /* contiguous in window */ 2296db8c6e9Schristos from += wnext - op; 230aaf4ece6Schristos if (op < len) { /* some from window */ 231aaf4ece6Schristos len -= op; 232aaf4ece6Schristos do { 2336db8c6e9Schristos *out++ = *from++; 234aaf4ece6Schristos } while (--op); 235aaf4ece6Schristos from = out - dist; /* rest from output */ 236aaf4ece6Schristos } 237aaf4ece6Schristos } 238aaf4ece6Schristos while (len > 2) { 2396db8c6e9Schristos *out++ = *from++; 2406db8c6e9Schristos *out++ = *from++; 2416db8c6e9Schristos *out++ = *from++; 242aaf4ece6Schristos len -= 3; 243aaf4ece6Schristos } 244aaf4ece6Schristos if (len) { 2456db8c6e9Schristos *out++ = *from++; 246aaf4ece6Schristos if (len > 1) 2476db8c6e9Schristos *out++ = *from++; 248aaf4ece6Schristos } 249aaf4ece6Schristos } 250aaf4ece6Schristos else { 251aaf4ece6Schristos from = out - dist; /* copy direct from output */ 252aaf4ece6Schristos do { /* minimum length is three */ 2536db8c6e9Schristos *out++ = *from++; 2546db8c6e9Schristos *out++ = *from++; 2556db8c6e9Schristos *out++ = *from++; 256aaf4ece6Schristos len -= 3; 257aaf4ece6Schristos } while (len > 2); 258aaf4ece6Schristos if (len) { 2596db8c6e9Schristos *out++ = *from++; 260aaf4ece6Schristos if (len > 1) 2616db8c6e9Schristos *out++ = *from++; 262aaf4ece6Schristos } 263aaf4ece6Schristos } 264aaf4ece6Schristos } 265aaf4ece6Schristos else if ((op & 64) == 0) { /* 2nd level distance code */ 2663b1253e8Schristos here = dcode + here->val + (hold & ((1U << op) - 1)); 267aaf4ece6Schristos goto dodist; 268aaf4ece6Schristos } 269aaf4ece6Schristos else { 270b85ba082Schristos strm->msg = __UNCONST("invalid distance code"); 271aaf4ece6Schristos state->mode = BAD; 272aaf4ece6Schristos break; 273aaf4ece6Schristos } 274aaf4ece6Schristos } 275aaf4ece6Schristos else if ((op & 64) == 0) { /* 2nd level length code */ 2763b1253e8Schristos here = lcode + here->val + (hold & ((1U << op) - 1)); 277aaf4ece6Schristos goto dolen; 278aaf4ece6Schristos } 279aaf4ece6Schristos else if (op & 32) { /* end-of-block */ 280aaf4ece6Schristos Tracevv((stderr, "inflate: end of block\n")); 281aaf4ece6Schristos state->mode = TYPE; 282aaf4ece6Schristos break; 283aaf4ece6Schristos } 284aaf4ece6Schristos else { 285b85ba082Schristos strm->msg = __UNCONST("invalid literal/length code"); 286aaf4ece6Schristos state->mode = BAD; 287aaf4ece6Schristos break; 288aaf4ece6Schristos } 289aaf4ece6Schristos } while (in < last && out < end); 290aaf4ece6Schristos 291aaf4ece6Schristos /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 292aaf4ece6Schristos len = bits >> 3; 293aaf4ece6Schristos in -= len; 294aaf4ece6Schristos bits -= len << 3; 295aaf4ece6Schristos hold &= (1U << bits) - 1; 296aaf4ece6Schristos 297aaf4ece6Schristos /* update state and return */ 2986db8c6e9Schristos strm->next_in = in; 2996db8c6e9Schristos strm->next_out = out; 300aaf4ece6Schristos strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 301aaf4ece6Schristos strm->avail_out = (unsigned)(out < end ? 302aaf4ece6Schristos 257 + (end - out) : 257 - (out - end)); 303aaf4ece6Schristos state->hold = hold; 304aaf4ece6Schristos state->bits = bits; 305aaf4ece6Schristos return; 306aaf4ece6Schristos } 307aaf4ece6Schristos 308aaf4ece6Schristos /* 309aaf4ece6Schristos inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 310aaf4ece6Schristos - Using bit fields for code structure 311aaf4ece6Schristos - Different op definition to avoid & for extra bits (do & for table bits) 3126db8c6e9Schristos - Three separate decoding do-loops for direct, window, and wnext == 0 313aaf4ece6Schristos - Special case for distance > 1 copies to do overlapped load and store copy 314aaf4ece6Schristos - Explicit branch predictions (based on measured branch probabilities) 315aaf4ece6Schristos - Deferring match copy and interspersed it with decoding subsequent codes 316aaf4ece6Schristos - Swapping literal/length else 317aaf4ece6Schristos - Swapping window/direct else 318aaf4ece6Schristos - Larger unrolled copy loops (three is about right) 319aaf4ece6Schristos - Moving len -= 3 statement into middle of loop 320aaf4ece6Schristos */ 321aaf4ece6Schristos 322aaf4ece6Schristos #endif /* !ASMINF */ 323