1*593dc095SDavid du Colombier /* inffast.c -- fast decoding
2*593dc095SDavid du Colombier * Copyright (C) 1995-2004 Mark Adler
37dd7cddfSDavid du Colombier * For conditions of distribution and use, see copyright notice in zlib.h
47dd7cddfSDavid du Colombier */
57dd7cddfSDavid du Colombier
67dd7cddfSDavid du Colombier #include "zutil.h"
77dd7cddfSDavid du Colombier #include "inftrees.h"
8*593dc095SDavid du Colombier #include "inflate.h"
97dd7cddfSDavid du Colombier #include "inffast.h"
107dd7cddfSDavid du Colombier
11*593dc095SDavid du Colombier #ifndef ASMINF
127dd7cddfSDavid du Colombier
13*593dc095SDavid du Colombier /* Allow machine dependent optimization for post-increment or pre-increment.
14*593dc095SDavid du Colombier Based on testing to date,
15*593dc095SDavid du Colombier Pre-increment preferred for:
16*593dc095SDavid du Colombier - PowerPC G3 (Adler)
17*593dc095SDavid du Colombier - MIPS R5000 (Randers-Pehrson)
18*593dc095SDavid du Colombier Post-increment preferred for:
19*593dc095SDavid du Colombier - none
20*593dc095SDavid du Colombier No measurable difference:
21*593dc095SDavid du Colombier - Pentium III (Anderson)
22*593dc095SDavid du Colombier - M68060 (Nikl)
23*593dc095SDavid du Colombier */
24*593dc095SDavid du Colombier #ifdef POSTINC
25*593dc095SDavid du Colombier # define OFF 0
26*593dc095SDavid du Colombier # define PUP(a) *(a)++
27*593dc095SDavid du Colombier #else
28*593dc095SDavid du Colombier # define OFF 1
29*593dc095SDavid du Colombier # define PUP(a) *++(a)
30*593dc095SDavid du Colombier #endif
317dd7cddfSDavid du Colombier
32*593dc095SDavid du Colombier /*
33*593dc095SDavid du Colombier Decode literal, length, and distance codes and write out the resulting
34*593dc095SDavid du Colombier literal and match bytes until either not enough input or output is
35*593dc095SDavid du Colombier available, an end-of-block is encountered, or a data error is encountered.
36*593dc095SDavid du Colombier When large enough input and output buffers are supplied to inflate(), for
37*593dc095SDavid du Colombier example, a 16K input buffer and a 64K output buffer, more than 95% of the
38*593dc095SDavid du Colombier inflate execution time is spent in this routine.
397dd7cddfSDavid du Colombier
40*593dc095SDavid du Colombier Entry assumptions:
417dd7cddfSDavid du Colombier
42*593dc095SDavid du Colombier state->mode == LEN
43*593dc095SDavid du Colombier strm->avail_in >= 6
44*593dc095SDavid du Colombier strm->avail_out >= 258
45*593dc095SDavid du Colombier start >= strm->avail_out
46*593dc095SDavid du Colombier state->bits < 8
47*593dc095SDavid du Colombier
48*593dc095SDavid du Colombier On return, state->mode is one of:
49*593dc095SDavid du Colombier
50*593dc095SDavid du Colombier LEN -- ran out of enough output space or enough available input
51*593dc095SDavid du Colombier TYPE -- reached end of block code, inflate() to interpret next block
52*593dc095SDavid du Colombier BAD -- error in block data
53*593dc095SDavid du Colombier
54*593dc095SDavid du Colombier Notes:
55*593dc095SDavid du Colombier
56*593dc095SDavid du Colombier - The maximum input bits used by a length/distance pair is 15 bits for the
57*593dc095SDavid du Colombier length code, 5 bits for the length extra, 15 bits for the distance code,
58*593dc095SDavid du Colombier and 13 bits for the distance extra. This totals 48 bits, or six bytes.
59*593dc095SDavid du Colombier Therefore if strm->avail_in >= 6, then there is enough input to avoid
60*593dc095SDavid du Colombier checking for available input while decoding.
61*593dc095SDavid du Colombier
62*593dc095SDavid du Colombier - The maximum bytes that a single length/distance pair can output is 258
63*593dc095SDavid du Colombier bytes, which is the maximum length that can be coded. inflate_fast()
64*593dc095SDavid du Colombier requires strm->avail_out >= 258 for each loop to avoid checking for
65*593dc095SDavid du Colombier output space.
66*593dc095SDavid du Colombier */
inflate_fast(strm,start)67*593dc095SDavid du Colombier void inflate_fast(strm, start)
68*593dc095SDavid du Colombier z_streamp strm;
69*593dc095SDavid du Colombier unsigned start; /* inflate()'s starting value for strm->avail_out */
707dd7cddfSDavid du Colombier {
71*593dc095SDavid du Colombier struct inflate_state FAR *state;
72*593dc095SDavid du Colombier unsigned char FAR *in; /* local strm->next_in */
73*593dc095SDavid du Colombier unsigned char FAR *last; /* while in < last, enough input available */
74*593dc095SDavid du Colombier unsigned char FAR *out; /* local strm->next_out */
75*593dc095SDavid du Colombier unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
76*593dc095SDavid du Colombier unsigned char FAR *end; /* while out < end, enough space available */
77*593dc095SDavid du Colombier unsigned wsize; /* window size or zero if not using window */
78*593dc095SDavid du Colombier unsigned whave; /* valid bytes in the window */
79*593dc095SDavid du Colombier unsigned write; /* window write index */
80*593dc095SDavid du Colombier unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
81*593dc095SDavid du Colombier unsigned long hold; /* local strm->hold */
82*593dc095SDavid du Colombier unsigned bits; /* local strm->bits */
83*593dc095SDavid du Colombier code const FAR *lcode; /* local strm->lencode */
84*593dc095SDavid du Colombier code const FAR *dcode; /* local strm->distcode */
85*593dc095SDavid du Colombier unsigned lmask; /* mask for first level of length codes */
86*593dc095SDavid du Colombier unsigned dmask; /* mask for first level of distance codes */
87*593dc095SDavid du Colombier code this; /* retrieved table entry */
88*593dc095SDavid du Colombier unsigned op; /* code bits, operation, extra bits, or */
89*593dc095SDavid du Colombier /* window position, window bytes to copy */
90*593dc095SDavid du Colombier unsigned len; /* match length, unused bytes */
91*593dc095SDavid du Colombier unsigned dist; /* match distance */
92*593dc095SDavid du Colombier unsigned char FAR *from; /* where to copy match from */
937dd7cddfSDavid du Colombier
94*593dc095SDavid du Colombier /* copy state to local variables */
95*593dc095SDavid du Colombier state = (struct inflate_state FAR *)strm->state;
96*593dc095SDavid du Colombier in = strm->next_in - OFF;
97*593dc095SDavid du Colombier last = in + (strm->avail_in - 5);
98*593dc095SDavid du Colombier out = strm->next_out - OFF;
99*593dc095SDavid du Colombier beg = out - (start - strm->avail_out);
100*593dc095SDavid du Colombier end = out + (strm->avail_out - 257);
101*593dc095SDavid du Colombier wsize = state->wsize;
102*593dc095SDavid du Colombier whave = state->whave;
103*593dc095SDavid du Colombier write = state->write;
104*593dc095SDavid du Colombier window = state->window;
105*593dc095SDavid du Colombier hold = state->hold;
106*593dc095SDavid du Colombier bits = state->bits;
107*593dc095SDavid du Colombier lcode = state->lencode;
108*593dc095SDavid du Colombier dcode = state->distcode;
109*593dc095SDavid du Colombier lmask = (1U << state->lenbits) - 1;
110*593dc095SDavid du Colombier dmask = (1U << state->distbits) - 1;
1117dd7cddfSDavid du Colombier
112*593dc095SDavid du Colombier /* decode literals and length/distances until end-of-block or not enough
113*593dc095SDavid du Colombier input data or output space */
1147dd7cddfSDavid du Colombier do {
115*593dc095SDavid du Colombier if (bits < 15) {
116*593dc095SDavid du Colombier hold += (unsigned long)(PUP(in)) << bits;
117*593dc095SDavid du Colombier bits += 8;
118*593dc095SDavid du Colombier hold += (unsigned long)(PUP(in)) << bits;
119*593dc095SDavid du Colombier bits += 8;
1207dd7cddfSDavid du Colombier }
121*593dc095SDavid du Colombier this = lcode[hold & lmask];
122*593dc095SDavid du Colombier dolen:
123*593dc095SDavid du Colombier op = (unsigned)(this.bits);
124*593dc095SDavid du Colombier hold >>= op;
125*593dc095SDavid du Colombier bits -= op;
126*593dc095SDavid du Colombier op = (unsigned)(this.op);
127*593dc095SDavid du Colombier if (op == 0) { /* literal */
128*593dc095SDavid du Colombier Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
129*593dc095SDavid du Colombier "inflate: literal '%c'\n" :
130*593dc095SDavid du Colombier "inflate: literal 0x%02x\n", this.val));
131*593dc095SDavid du Colombier PUP(out) = (unsigned char)(this.val);
132*593dc095SDavid du Colombier }
133*593dc095SDavid du Colombier else if (op & 16) { /* length base */
134*593dc095SDavid du Colombier len = (unsigned)(this.val);
135*593dc095SDavid du Colombier op &= 15; /* number of extra bits */
136*593dc095SDavid du Colombier if (op) {
137*593dc095SDavid du Colombier if (bits < op) {
138*593dc095SDavid du Colombier hold += (unsigned long)(PUP(in)) << bits;
139*593dc095SDavid du Colombier bits += 8;
140*593dc095SDavid du Colombier }
141*593dc095SDavid du Colombier len += (unsigned)hold & ((1U << op) - 1);
142*593dc095SDavid du Colombier hold >>= op;
143*593dc095SDavid du Colombier bits -= op;
144*593dc095SDavid du Colombier }
145*593dc095SDavid du Colombier Tracevv((stderr, "inflate: length %u\n", len));
146*593dc095SDavid du Colombier if (bits < 15) {
147*593dc095SDavid du Colombier hold += (unsigned long)(PUP(in)) << bits;
148*593dc095SDavid du Colombier bits += 8;
149*593dc095SDavid du Colombier hold += (unsigned long)(PUP(in)) << bits;
150*593dc095SDavid du Colombier bits += 8;
151*593dc095SDavid du Colombier }
152*593dc095SDavid du Colombier this = dcode[hold & dmask];
153*593dc095SDavid du Colombier dodist:
154*593dc095SDavid du Colombier op = (unsigned)(this.bits);
155*593dc095SDavid du Colombier hold >>= op;
156*593dc095SDavid du Colombier bits -= op;
157*593dc095SDavid du Colombier op = (unsigned)(this.op);
158*593dc095SDavid du Colombier if (op & 16) { /* distance base */
159*593dc095SDavid du Colombier dist = (unsigned)(this.val);
160*593dc095SDavid du Colombier op &= 15; /* number of extra bits */
161*593dc095SDavid du Colombier if (bits < op) {
162*593dc095SDavid du Colombier hold += (unsigned long)(PUP(in)) << bits;
163*593dc095SDavid du Colombier bits += 8;
164*593dc095SDavid du Colombier if (bits < op) {
165*593dc095SDavid du Colombier hold += (unsigned long)(PUP(in)) << bits;
166*593dc095SDavid du Colombier bits += 8;
1677dd7cddfSDavid du Colombier }
1687dd7cddfSDavid du Colombier }
169*593dc095SDavid du Colombier dist += (unsigned)hold & ((1U << op) - 1);
170*593dc095SDavid du Colombier hold >>= op;
171*593dc095SDavid du Colombier bits -= op;
172*593dc095SDavid du Colombier Tracevv((stderr, "inflate: distance %u\n", dist));
173*593dc095SDavid du Colombier op = (unsigned)(out - beg); /* max distance in output */
174*593dc095SDavid du Colombier if (dist > op) { /* see if copy from window */
175*593dc095SDavid du Colombier op = dist - op; /* distance back in window */
176*593dc095SDavid du Colombier if (op > whave) {
177*593dc095SDavid du Colombier strm->msg = (char *)"invalid distance too far back";
178*593dc095SDavid du Colombier state->mode = BAD;
1797dd7cddfSDavid du Colombier break;
1807dd7cddfSDavid du Colombier }
181*593dc095SDavid du Colombier from = window - OFF;
182*593dc095SDavid du Colombier if (write == 0) { /* very common case */
183*593dc095SDavid du Colombier from += wsize - op;
184*593dc095SDavid du Colombier if (op < len) { /* some from window */
185*593dc095SDavid du Colombier len -= op;
186*593dc095SDavid du Colombier do {
187*593dc095SDavid du Colombier PUP(out) = PUP(from);
188*593dc095SDavid du Colombier } while (--op);
189*593dc095SDavid du Colombier from = out - dist; /* rest from output */
1907dd7cddfSDavid du Colombier }
1917dd7cddfSDavid du Colombier }
192*593dc095SDavid du Colombier else if (write < op) { /* wrap around window */
193*593dc095SDavid du Colombier from += wsize + write - op;
194*593dc095SDavid du Colombier op -= write;
195*593dc095SDavid du Colombier if (op < len) { /* some from end of window */
196*593dc095SDavid du Colombier len -= op;
197*593dc095SDavid du Colombier do {
198*593dc095SDavid du Colombier PUP(out) = PUP(from);
199*593dc095SDavid du Colombier } while (--op);
200*593dc095SDavid du Colombier from = window - OFF;
201*593dc095SDavid du Colombier if (write < len) { /* some from start of window */
202*593dc095SDavid du Colombier op = write;
203*593dc095SDavid du Colombier len -= op;
204*593dc095SDavid du Colombier do {
205*593dc095SDavid du Colombier PUP(out) = PUP(from);
206*593dc095SDavid du Colombier } while (--op);
207*593dc095SDavid du Colombier from = out - dist; /* rest from output */
208*593dc095SDavid du Colombier }
209*593dc095SDavid du Colombier }
210*593dc095SDavid du Colombier }
211*593dc095SDavid du Colombier else { /* contiguous in window */
212*593dc095SDavid du Colombier from += write - op;
213*593dc095SDavid du Colombier if (op < len) { /* some from window */
214*593dc095SDavid du Colombier len -= op;
215*593dc095SDavid du Colombier do {
216*593dc095SDavid du Colombier PUP(out) = PUP(from);
217*593dc095SDavid du Colombier } while (--op);
218*593dc095SDavid du Colombier from = out - dist; /* rest from output */
219*593dc095SDavid du Colombier }
220*593dc095SDavid du Colombier }
221*593dc095SDavid du Colombier while (len > 2) {
222*593dc095SDavid du Colombier PUP(out) = PUP(from);
223*593dc095SDavid du Colombier PUP(out) = PUP(from);
224*593dc095SDavid du Colombier PUP(out) = PUP(from);
225*593dc095SDavid du Colombier len -= 3;
226*593dc095SDavid du Colombier }
227*593dc095SDavid du Colombier if (len) {
228*593dc095SDavid du Colombier PUP(out) = PUP(from);
229*593dc095SDavid du Colombier if (len > 1)
230*593dc095SDavid du Colombier PUP(out) = PUP(from);
231*593dc095SDavid du Colombier }
232*593dc095SDavid du Colombier }
233*593dc095SDavid du Colombier else {
234*593dc095SDavid du Colombier from = out - dist; /* copy direct from output */
235*593dc095SDavid du Colombier do { /* minimum length is three */
236*593dc095SDavid du Colombier PUP(out) = PUP(from);
237*593dc095SDavid du Colombier PUP(out) = PUP(from);
238*593dc095SDavid du Colombier PUP(out) = PUP(from);
239*593dc095SDavid du Colombier len -= 3;
240*593dc095SDavid du Colombier } while (len > 2);
241*593dc095SDavid du Colombier if (len) {
242*593dc095SDavid du Colombier PUP(out) = PUP(from);
243*593dc095SDavid du Colombier if (len > 1)
244*593dc095SDavid du Colombier PUP(out) = PUP(from);
245*593dc095SDavid du Colombier }
246*593dc095SDavid du Colombier }
247*593dc095SDavid du Colombier }
248*593dc095SDavid du Colombier else if ((op & 64) == 0) { /* 2nd level distance code */
249*593dc095SDavid du Colombier this = dcode[this.val + (hold & ((1U << op) - 1))];
250*593dc095SDavid du Colombier goto dodist;
251*593dc095SDavid du Colombier }
252*593dc095SDavid du Colombier else {
253*593dc095SDavid du Colombier strm->msg = (char *)"invalid distance code";
254*593dc095SDavid du Colombier state->mode = BAD;
2557dd7cddfSDavid du Colombier break;
2567dd7cddfSDavid du Colombier }
2577dd7cddfSDavid du Colombier }
258*593dc095SDavid du Colombier else if ((op & 64) == 0) { /* 2nd level length code */
259*593dc095SDavid du Colombier this = lcode[this.val + (hold & ((1U << op) - 1))];
260*593dc095SDavid du Colombier goto dolen;
2617dd7cddfSDavid du Colombier }
262*593dc095SDavid du Colombier else if (op & 32) { /* end-of-block */
263*593dc095SDavid du Colombier Tracevv((stderr, "inflate: end of block\n"));
264*593dc095SDavid du Colombier state->mode = TYPE;
265*593dc095SDavid du Colombier break;
2667dd7cddfSDavid du Colombier }
267*593dc095SDavid du Colombier else {
268*593dc095SDavid du Colombier strm->msg = (char *)"invalid literal/length code";
269*593dc095SDavid du Colombier state->mode = BAD;
270*593dc095SDavid du Colombier break;
271*593dc095SDavid du Colombier }
272*593dc095SDavid du Colombier } while (in < last && out < end);
2737dd7cddfSDavid du Colombier
274*593dc095SDavid du Colombier /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
275*593dc095SDavid du Colombier len = bits >> 3;
276*593dc095SDavid du Colombier in -= len;
277*593dc095SDavid du Colombier bits -= len << 3;
278*593dc095SDavid du Colombier hold &= (1U << bits) - 1;
279*593dc095SDavid du Colombier
280*593dc095SDavid du Colombier /* update state and return */
281*593dc095SDavid du Colombier strm->next_in = in + OFF;
282*593dc095SDavid du Colombier strm->next_out = out + OFF;
283*593dc095SDavid du Colombier strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
284*593dc095SDavid du Colombier strm->avail_out = (unsigned)(out < end ?
285*593dc095SDavid du Colombier 257 + (end - out) : 257 - (out - end));
286*593dc095SDavid du Colombier state->hold = hold;
287*593dc095SDavid du Colombier state->bits = bits;
288*593dc095SDavid du Colombier return;
2897dd7cddfSDavid du Colombier }
290*593dc095SDavid du Colombier
291*593dc095SDavid du Colombier /*
292*593dc095SDavid du Colombier inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
293*593dc095SDavid du Colombier - Using bit fields for code structure
294*593dc095SDavid du Colombier - Different op definition to avoid & for extra bits (do & for table bits)
295*593dc095SDavid du Colombier - Three separate decoding do-loops for direct, window, and write == 0
296*593dc095SDavid du Colombier - Special case for distance > 1 copies to do overlapped load and store copy
297*593dc095SDavid du Colombier - Explicit branch predictions (based on measured branch probabilities)
298*593dc095SDavid du Colombier - Deferring match copy and interspersed it with decoding subsequent codes
299*593dc095SDavid du Colombier - Swapping literal/length else
300*593dc095SDavid du Colombier - Swapping window/direct else
301*593dc095SDavid du Colombier - Larger unrolled copy loops (three is about right)
302*593dc095SDavid du Colombier - Moving len -= 3 statement into middle of loop
303*593dc095SDavid du Colombier */
304*593dc095SDavid du Colombier
305*593dc095SDavid du Colombier #endif /* !ASMINF */
306