1*3aa7d58aSMatthew Dillon /* inffast.c -- fast decoding
2*3aa7d58aSMatthew Dillon * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
3*3aa7d58aSMatthew Dillon * For conditions of distribution and use, see copyright notice in zlib.h
4*3aa7d58aSMatthew Dillon */
5*3aa7d58aSMatthew Dillon
6*3aa7d58aSMatthew Dillon #include "hammer2_zlib_zutil.h"
7*3aa7d58aSMatthew Dillon #include "hammer2_zlib_inftrees.h"
8*3aa7d58aSMatthew Dillon #include "hammer2_zlib_inflate.h"
9*3aa7d58aSMatthew Dillon #include "hammer2_zlib_inffast.h"
10*3aa7d58aSMatthew Dillon
11*3aa7d58aSMatthew Dillon #ifndef ASMINF
12*3aa7d58aSMatthew Dillon
13*3aa7d58aSMatthew Dillon /* Allow machine dependent optimization for post-increment or pre-increment.
14*3aa7d58aSMatthew Dillon Based on testing to date,
15*3aa7d58aSMatthew Dillon Pre-increment preferred for:
16*3aa7d58aSMatthew Dillon - PowerPC G3 (Adler)
17*3aa7d58aSMatthew Dillon - MIPS R5000 (Randers-Pehrson)
18*3aa7d58aSMatthew Dillon Post-increment preferred for:
19*3aa7d58aSMatthew Dillon - none
20*3aa7d58aSMatthew Dillon No measurable difference:
21*3aa7d58aSMatthew Dillon - Pentium III (Anderson)
22*3aa7d58aSMatthew Dillon - M68060 (Nikl)
23*3aa7d58aSMatthew Dillon */
24*3aa7d58aSMatthew Dillon #ifdef POSTINC
25*3aa7d58aSMatthew Dillon # define OFF 0
26*3aa7d58aSMatthew Dillon # define PUP(a) *(a)++
27*3aa7d58aSMatthew Dillon #else
28*3aa7d58aSMatthew Dillon # define OFF 1
29*3aa7d58aSMatthew Dillon # define PUP(a) *++(a)
30*3aa7d58aSMatthew Dillon #endif
31*3aa7d58aSMatthew Dillon
32*3aa7d58aSMatthew Dillon /*
33*3aa7d58aSMatthew Dillon Decode literal, length, and distance codes and write out the resulting
34*3aa7d58aSMatthew Dillon literal and match bytes until either not enough input or output is
35*3aa7d58aSMatthew Dillon available, an end-of-block is encountered, or a data error is encountered.
36*3aa7d58aSMatthew Dillon When large enough input and output buffers are supplied to inflate(), for
37*3aa7d58aSMatthew Dillon example, a 16K input buffer and a 64K output buffer, more than 95% of the
38*3aa7d58aSMatthew Dillon inflate execution time is spent in this routine.
39*3aa7d58aSMatthew Dillon
40*3aa7d58aSMatthew Dillon Entry assumptions:
41*3aa7d58aSMatthew Dillon
42*3aa7d58aSMatthew Dillon state->mode == LEN
43*3aa7d58aSMatthew Dillon strm->avail_in >= 6
44*3aa7d58aSMatthew Dillon strm->avail_out >= 258
45*3aa7d58aSMatthew Dillon start >= strm->avail_out
46*3aa7d58aSMatthew Dillon state->bits < 8
47*3aa7d58aSMatthew Dillon
48*3aa7d58aSMatthew Dillon On return, state->mode is one of:
49*3aa7d58aSMatthew Dillon
50*3aa7d58aSMatthew Dillon LEN -- ran out of enough output space or enough available input
51*3aa7d58aSMatthew Dillon TYPE -- reached end of block code, inflate() to interpret next block
52*3aa7d58aSMatthew Dillon BAD -- error in block data
53*3aa7d58aSMatthew Dillon
54*3aa7d58aSMatthew Dillon Notes:
55*3aa7d58aSMatthew Dillon
56*3aa7d58aSMatthew Dillon - The maximum input bits used by a length/distance pair is 15 bits for the
57*3aa7d58aSMatthew Dillon length code, 5 bits for the length extra, 15 bits for the distance code,
58*3aa7d58aSMatthew Dillon and 13 bits for the distance extra. This totals 48 bits, or six bytes.
59*3aa7d58aSMatthew Dillon Therefore if strm->avail_in >= 6, then there is enough input to avoid
60*3aa7d58aSMatthew Dillon checking for available input while decoding.
61*3aa7d58aSMatthew Dillon
62*3aa7d58aSMatthew Dillon - The maximum bytes that a single length/distance pair can output is 258
63*3aa7d58aSMatthew Dillon bytes, which is the maximum length that can be coded. inflate_fast()
64*3aa7d58aSMatthew Dillon requires strm->avail_out >= 258 for each loop to avoid checking for
65*3aa7d58aSMatthew Dillon output space.
66*3aa7d58aSMatthew Dillon */
67*3aa7d58aSMatthew Dillon void
68*3aa7d58aSMatthew Dillon ZLIB_INTERNAL
inflate_fast(z_streamp strm,unsigned start)69*3aa7d58aSMatthew Dillon inflate_fast(z_streamp strm, unsigned start) /* inflate()'s starting value for strm->avail_out */
70*3aa7d58aSMatthew Dillon {
71*3aa7d58aSMatthew Dillon struct inflate_state FAR *state;
72*3aa7d58aSMatthew Dillon z_const unsigned char FAR *in; /* local strm->next_in */
73*3aa7d58aSMatthew Dillon z_const unsigned char FAR *last; /* have enough input while in < last */
74*3aa7d58aSMatthew Dillon unsigned char FAR *out; /* local strm->next_out */
75*3aa7d58aSMatthew Dillon unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
76*3aa7d58aSMatthew Dillon unsigned char FAR *end; /* while out < end, enough space available */
77*3aa7d58aSMatthew Dillon #ifdef INFLATE_STRICT
78*3aa7d58aSMatthew Dillon unsigned dmax; /* maximum distance from zlib header */
79*3aa7d58aSMatthew Dillon #endif
80*3aa7d58aSMatthew Dillon unsigned wsize; /* window size or zero if not using window */
81*3aa7d58aSMatthew Dillon unsigned whave; /* valid bytes in the window */
82*3aa7d58aSMatthew Dillon unsigned wnext; /* window write index */
83*3aa7d58aSMatthew Dillon unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
84*3aa7d58aSMatthew Dillon unsigned long hold; /* local strm->hold */
85*3aa7d58aSMatthew Dillon unsigned bits; /* local strm->bits */
86*3aa7d58aSMatthew Dillon code const FAR *lcode; /* local strm->lencode */
87*3aa7d58aSMatthew Dillon code const FAR *dcode; /* local strm->distcode */
88*3aa7d58aSMatthew Dillon unsigned lmask; /* mask for first level of length codes */
89*3aa7d58aSMatthew Dillon unsigned dmask; /* mask for first level of distance codes */
90*3aa7d58aSMatthew Dillon code here; /* retrieved table entry */
91*3aa7d58aSMatthew Dillon unsigned op; /* code bits, operation, extra bits, or */
92*3aa7d58aSMatthew Dillon /* window position, window bytes to copy */
93*3aa7d58aSMatthew Dillon unsigned len; /* match length, unused bytes */
94*3aa7d58aSMatthew Dillon unsigned dist; /* match distance */
95*3aa7d58aSMatthew Dillon unsigned char FAR *from; /* where to copy match from */
96*3aa7d58aSMatthew Dillon
97*3aa7d58aSMatthew Dillon /* copy state to local variables */
98*3aa7d58aSMatthew Dillon state = (struct inflate_state FAR *)strm->state;
99*3aa7d58aSMatthew Dillon in = strm->next_in - OFF;
100*3aa7d58aSMatthew Dillon last = in + (strm->avail_in - 5);
101*3aa7d58aSMatthew Dillon out = strm->next_out - OFF;
102*3aa7d58aSMatthew Dillon beg = out - (start - strm->avail_out);
103*3aa7d58aSMatthew Dillon end = out + (strm->avail_out - 257);
104*3aa7d58aSMatthew Dillon #ifdef INFLATE_STRICT
105*3aa7d58aSMatthew Dillon dmax = state->dmax;
106*3aa7d58aSMatthew Dillon #endif
107*3aa7d58aSMatthew Dillon wsize = state->wsize;
108*3aa7d58aSMatthew Dillon whave = state->whave;
109*3aa7d58aSMatthew Dillon wnext = state->wnext;
110*3aa7d58aSMatthew Dillon window = state->window;
111*3aa7d58aSMatthew Dillon hold = state->hold;
112*3aa7d58aSMatthew Dillon bits = state->bits;
113*3aa7d58aSMatthew Dillon lcode = state->lencode;
114*3aa7d58aSMatthew Dillon dcode = state->distcode;
115*3aa7d58aSMatthew Dillon lmask = (1U << state->lenbits) - 1;
116*3aa7d58aSMatthew Dillon dmask = (1U << state->distbits) - 1;
117*3aa7d58aSMatthew Dillon
118*3aa7d58aSMatthew Dillon /* decode literals and length/distances until end-of-block or not enough
119*3aa7d58aSMatthew Dillon input data or output space */
120*3aa7d58aSMatthew Dillon do {
121*3aa7d58aSMatthew Dillon if (bits < 15) {
122*3aa7d58aSMatthew Dillon hold += (unsigned long)(PUP(in)) << bits;
123*3aa7d58aSMatthew Dillon bits += 8;
124*3aa7d58aSMatthew Dillon hold += (unsigned long)(PUP(in)) << bits;
125*3aa7d58aSMatthew Dillon bits += 8;
126*3aa7d58aSMatthew Dillon }
127*3aa7d58aSMatthew Dillon here = lcode[hold & lmask];
128*3aa7d58aSMatthew Dillon dolen:
129*3aa7d58aSMatthew Dillon op = (unsigned)(here.bits);
130*3aa7d58aSMatthew Dillon hold >>= op;
131*3aa7d58aSMatthew Dillon bits -= op;
132*3aa7d58aSMatthew Dillon op = (unsigned)(here.op);
133*3aa7d58aSMatthew Dillon if (op == 0) { /* literal */
134*3aa7d58aSMatthew Dillon Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
135*3aa7d58aSMatthew Dillon "inflate: literal '%c'\n" :
136*3aa7d58aSMatthew Dillon "inflate: literal 0x%02x\n", here.val));
137*3aa7d58aSMatthew Dillon PUP(out) = (unsigned char)(here.val);
138*3aa7d58aSMatthew Dillon }
139*3aa7d58aSMatthew Dillon else if (op & 16) { /* length base */
140*3aa7d58aSMatthew Dillon len = (unsigned)(here.val);
141*3aa7d58aSMatthew Dillon op &= 15; /* number of extra bits */
142*3aa7d58aSMatthew Dillon if (op) {
143*3aa7d58aSMatthew Dillon if (bits < op) {
144*3aa7d58aSMatthew Dillon hold += (unsigned long)(PUP(in)) << bits;
145*3aa7d58aSMatthew Dillon bits += 8;
146*3aa7d58aSMatthew Dillon }
147*3aa7d58aSMatthew Dillon len += (unsigned)hold & ((1U << op) - 1);
148*3aa7d58aSMatthew Dillon hold >>= op;
149*3aa7d58aSMatthew Dillon bits -= op;
150*3aa7d58aSMatthew Dillon }
151*3aa7d58aSMatthew Dillon Tracevv((stderr, "inflate: length %u\n", len));
152*3aa7d58aSMatthew Dillon if (bits < 15) {
153*3aa7d58aSMatthew Dillon hold += (unsigned long)(PUP(in)) << bits;
154*3aa7d58aSMatthew Dillon bits += 8;
155*3aa7d58aSMatthew Dillon hold += (unsigned long)(PUP(in)) << bits;
156*3aa7d58aSMatthew Dillon bits += 8;
157*3aa7d58aSMatthew Dillon }
158*3aa7d58aSMatthew Dillon here = dcode[hold & dmask];
159*3aa7d58aSMatthew Dillon dodist:
160*3aa7d58aSMatthew Dillon op = (unsigned)(here.bits);
161*3aa7d58aSMatthew Dillon hold >>= op;
162*3aa7d58aSMatthew Dillon bits -= op;
163*3aa7d58aSMatthew Dillon op = (unsigned)(here.op);
164*3aa7d58aSMatthew Dillon if (op & 16) { /* distance base */
165*3aa7d58aSMatthew Dillon dist = (unsigned)(here.val);
166*3aa7d58aSMatthew Dillon op &= 15; /* number of extra bits */
167*3aa7d58aSMatthew Dillon if (bits < op) {
168*3aa7d58aSMatthew Dillon hold += (unsigned long)(PUP(in)) << bits;
169*3aa7d58aSMatthew Dillon bits += 8;
170*3aa7d58aSMatthew Dillon if (bits < op) {
171*3aa7d58aSMatthew Dillon hold += (unsigned long)(PUP(in)) << bits;
172*3aa7d58aSMatthew Dillon bits += 8;
173*3aa7d58aSMatthew Dillon }
174*3aa7d58aSMatthew Dillon }
175*3aa7d58aSMatthew Dillon dist += (unsigned)hold & ((1U << op) - 1);
176*3aa7d58aSMatthew Dillon #ifdef INFLATE_STRICT
177*3aa7d58aSMatthew Dillon if (dist > dmax) {
178*3aa7d58aSMatthew Dillon strm->msg = (char *)"invalid distance too far back";
179*3aa7d58aSMatthew Dillon state->mode = BAD;
180*3aa7d58aSMatthew Dillon break;
181*3aa7d58aSMatthew Dillon }
182*3aa7d58aSMatthew Dillon #endif
183*3aa7d58aSMatthew Dillon hold >>= op;
184*3aa7d58aSMatthew Dillon bits -= op;
185*3aa7d58aSMatthew Dillon Tracevv((stderr, "inflate: distance %u\n", dist));
186*3aa7d58aSMatthew Dillon op = (unsigned)(out - beg); /* max distance in output */
187*3aa7d58aSMatthew Dillon if (dist > op) { /* see if copy from window */
188*3aa7d58aSMatthew Dillon op = dist - op; /* distance back in window */
189*3aa7d58aSMatthew Dillon if (op > whave) {
190*3aa7d58aSMatthew Dillon if (state->sane) {
191*3aa7d58aSMatthew Dillon strm->msg =
192*3aa7d58aSMatthew Dillon (const char *)"invalid distance too far back";
193*3aa7d58aSMatthew Dillon state->mode = BAD;
194*3aa7d58aSMatthew Dillon break;
195*3aa7d58aSMatthew Dillon }
196*3aa7d58aSMatthew Dillon #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
197*3aa7d58aSMatthew Dillon if (len <= op - whave) {
198*3aa7d58aSMatthew Dillon do {
199*3aa7d58aSMatthew Dillon PUP(out) = 0;
200*3aa7d58aSMatthew Dillon } while (--len);
201*3aa7d58aSMatthew Dillon continue;
202*3aa7d58aSMatthew Dillon }
203*3aa7d58aSMatthew Dillon len -= op - whave;
204*3aa7d58aSMatthew Dillon do {
205*3aa7d58aSMatthew Dillon PUP(out) = 0;
206*3aa7d58aSMatthew Dillon } while (--op > whave);
207*3aa7d58aSMatthew Dillon if (op == 0) {
208*3aa7d58aSMatthew Dillon from = out - dist;
209*3aa7d58aSMatthew Dillon do {
210*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
211*3aa7d58aSMatthew Dillon } while (--len);
212*3aa7d58aSMatthew Dillon continue;
213*3aa7d58aSMatthew Dillon }
214*3aa7d58aSMatthew Dillon #endif
215*3aa7d58aSMatthew Dillon }
216*3aa7d58aSMatthew Dillon from = window - OFF;
217*3aa7d58aSMatthew Dillon if (wnext == 0) { /* very common case */
218*3aa7d58aSMatthew Dillon from += wsize - op;
219*3aa7d58aSMatthew Dillon if (op < len) { /* some from window */
220*3aa7d58aSMatthew Dillon len -= op;
221*3aa7d58aSMatthew Dillon do {
222*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
223*3aa7d58aSMatthew Dillon } while (--op);
224*3aa7d58aSMatthew Dillon from = out - dist; /* rest from output */
225*3aa7d58aSMatthew Dillon }
226*3aa7d58aSMatthew Dillon }
227*3aa7d58aSMatthew Dillon else if (wnext < op) { /* wrap around window */
228*3aa7d58aSMatthew Dillon from += wsize + wnext - op;
229*3aa7d58aSMatthew Dillon op -= wnext;
230*3aa7d58aSMatthew Dillon if (op < len) { /* some from end of window */
231*3aa7d58aSMatthew Dillon len -= op;
232*3aa7d58aSMatthew Dillon do {
233*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
234*3aa7d58aSMatthew Dillon } while (--op);
235*3aa7d58aSMatthew Dillon from = window - OFF;
236*3aa7d58aSMatthew Dillon if (wnext < len) { /* some from start of window */
237*3aa7d58aSMatthew Dillon op = wnext;
238*3aa7d58aSMatthew Dillon len -= op;
239*3aa7d58aSMatthew Dillon do {
240*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
241*3aa7d58aSMatthew Dillon } while (--op);
242*3aa7d58aSMatthew Dillon from = out - dist; /* rest from output */
243*3aa7d58aSMatthew Dillon }
244*3aa7d58aSMatthew Dillon }
245*3aa7d58aSMatthew Dillon }
246*3aa7d58aSMatthew Dillon else { /* contiguous in window */
247*3aa7d58aSMatthew Dillon from += wnext - op;
248*3aa7d58aSMatthew Dillon if (op < len) { /* some from window */
249*3aa7d58aSMatthew Dillon len -= op;
250*3aa7d58aSMatthew Dillon do {
251*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
252*3aa7d58aSMatthew Dillon } while (--op);
253*3aa7d58aSMatthew Dillon from = out - dist; /* rest from output */
254*3aa7d58aSMatthew Dillon }
255*3aa7d58aSMatthew Dillon }
256*3aa7d58aSMatthew Dillon while (len > 2) {
257*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
258*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
259*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
260*3aa7d58aSMatthew Dillon len -= 3;
261*3aa7d58aSMatthew Dillon }
262*3aa7d58aSMatthew Dillon if (len) {
263*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
264*3aa7d58aSMatthew Dillon if (len > 1)
265*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
266*3aa7d58aSMatthew Dillon }
267*3aa7d58aSMatthew Dillon }
268*3aa7d58aSMatthew Dillon else {
269*3aa7d58aSMatthew Dillon from = out - dist; /* copy direct from output */
270*3aa7d58aSMatthew Dillon do { /* minimum length is three */
271*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
272*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
273*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
274*3aa7d58aSMatthew Dillon len -= 3;
275*3aa7d58aSMatthew Dillon } while (len > 2);
276*3aa7d58aSMatthew Dillon if (len) {
277*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
278*3aa7d58aSMatthew Dillon if (len > 1)
279*3aa7d58aSMatthew Dillon PUP(out) = PUP(from);
280*3aa7d58aSMatthew Dillon }
281*3aa7d58aSMatthew Dillon }
282*3aa7d58aSMatthew Dillon }
283*3aa7d58aSMatthew Dillon else if ((op & 64) == 0) { /* 2nd level distance code */
284*3aa7d58aSMatthew Dillon here = dcode[here.val + (hold & ((1U << op) - 1))];
285*3aa7d58aSMatthew Dillon goto dodist;
286*3aa7d58aSMatthew Dillon }
287*3aa7d58aSMatthew Dillon else {
288*3aa7d58aSMatthew Dillon strm->msg = (const char *)"invalid distance code";
289*3aa7d58aSMatthew Dillon state->mode = BAD;
290*3aa7d58aSMatthew Dillon break;
291*3aa7d58aSMatthew Dillon }
292*3aa7d58aSMatthew Dillon }
293*3aa7d58aSMatthew Dillon else if ((op & 64) == 0) { /* 2nd level length code */
294*3aa7d58aSMatthew Dillon here = lcode[here.val + (hold & ((1U << op) - 1))];
295*3aa7d58aSMatthew Dillon goto dolen;
296*3aa7d58aSMatthew Dillon }
297*3aa7d58aSMatthew Dillon else if (op & 32) { /* end-of-block */
298*3aa7d58aSMatthew Dillon Tracevv((stderr, "inflate: end of block\n"));
299*3aa7d58aSMatthew Dillon state->mode = TYPE;
300*3aa7d58aSMatthew Dillon break;
301*3aa7d58aSMatthew Dillon }
302*3aa7d58aSMatthew Dillon else {
303*3aa7d58aSMatthew Dillon strm->msg = (const char *)"invalid literal/length code";
304*3aa7d58aSMatthew Dillon state->mode = BAD;
305*3aa7d58aSMatthew Dillon break;
306*3aa7d58aSMatthew Dillon }
307*3aa7d58aSMatthew Dillon } while (in < last && out < end);
308*3aa7d58aSMatthew Dillon
309*3aa7d58aSMatthew Dillon /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
310*3aa7d58aSMatthew Dillon len = bits >> 3;
311*3aa7d58aSMatthew Dillon in -= len;
312*3aa7d58aSMatthew Dillon bits -= len << 3;
313*3aa7d58aSMatthew Dillon hold &= (1U << bits) - 1;
314*3aa7d58aSMatthew Dillon
315*3aa7d58aSMatthew Dillon /* update state and return */
316*3aa7d58aSMatthew Dillon strm->next_in = in + OFF;
317*3aa7d58aSMatthew Dillon strm->next_out = out + OFF;
318*3aa7d58aSMatthew Dillon strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
319*3aa7d58aSMatthew Dillon strm->avail_out = (unsigned)(out < end ?
320*3aa7d58aSMatthew Dillon 257 + (end - out) : 257 - (out - end));
321*3aa7d58aSMatthew Dillon state->hold = hold;
322*3aa7d58aSMatthew Dillon state->bits = bits;
323*3aa7d58aSMatthew Dillon return;
324*3aa7d58aSMatthew Dillon }
325*3aa7d58aSMatthew Dillon
326*3aa7d58aSMatthew Dillon /*
327*3aa7d58aSMatthew Dillon inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
328*3aa7d58aSMatthew Dillon - Using bit fields for code structure
329*3aa7d58aSMatthew Dillon - Different op definition to avoid & for extra bits (do & for table bits)
330*3aa7d58aSMatthew Dillon - Three separate decoding do-loops for direct, window, and wnext == 0
331*3aa7d58aSMatthew Dillon - Special case for distance > 1 copies to do overlapped load and store copy
332*3aa7d58aSMatthew Dillon - Explicit branch predictions (based on measured branch probabilities)
333*3aa7d58aSMatthew Dillon - Deferring match copy and interspersed it with decoding subsequent codes
334*3aa7d58aSMatthew Dillon - Swapping literal/length else
335*3aa7d58aSMatthew Dillon - Swapping window/direct else
336*3aa7d58aSMatthew Dillon - Larger unrolled copy loops (three is about right)
337*3aa7d58aSMatthew Dillon - Moving len -= 3 statement into middle of loop
338*3aa7d58aSMatthew Dillon */
339*3aa7d58aSMatthew Dillon
340*3aa7d58aSMatthew Dillon #endif /* !ASMINF */
341