xref: /plan9/sys/src/cmd/gs/zlib/inffast.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1*593dc095SDavid du Colombier /* inffast.c -- fast decoding
2*593dc095SDavid du Colombier  * Copyright (C) 1995-2004 Mark Adler
37dd7cddfSDavid du Colombier  * For conditions of distribution and use, see copyright notice in zlib.h
47dd7cddfSDavid du Colombier  */
57dd7cddfSDavid du Colombier 
67dd7cddfSDavid du Colombier #include "zutil.h"
77dd7cddfSDavid du Colombier #include "inftrees.h"
8*593dc095SDavid du Colombier #include "inflate.h"
97dd7cddfSDavid du Colombier #include "inffast.h"
107dd7cddfSDavid du Colombier 
11*593dc095SDavid du Colombier #ifndef ASMINF
127dd7cddfSDavid du Colombier 
13*593dc095SDavid du Colombier /* Allow machine dependent optimization for post-increment or pre-increment.
14*593dc095SDavid du Colombier    Based on testing to date,
15*593dc095SDavid du Colombier    Pre-increment preferred for:
16*593dc095SDavid du Colombier    - PowerPC G3 (Adler)
17*593dc095SDavid du Colombier    - MIPS R5000 (Randers-Pehrson)
18*593dc095SDavid du Colombier    Post-increment preferred for:
19*593dc095SDavid du Colombier    - none
20*593dc095SDavid du Colombier    No measurable difference:
21*593dc095SDavid du Colombier    - Pentium III (Anderson)
22*593dc095SDavid du Colombier    - M68060 (Nikl)
23*593dc095SDavid du Colombier  */
24*593dc095SDavid du Colombier #ifdef POSTINC
25*593dc095SDavid du Colombier #  define OFF 0
26*593dc095SDavid du Colombier #  define PUP(a) *(a)++
27*593dc095SDavid du Colombier #else
28*593dc095SDavid du Colombier #  define OFF 1
29*593dc095SDavid du Colombier #  define PUP(a) *++(a)
30*593dc095SDavid du Colombier #endif
317dd7cddfSDavid du Colombier 
32*593dc095SDavid du Colombier /*
33*593dc095SDavid du Colombier    Decode literal, length, and distance codes and write out the resulting
34*593dc095SDavid du Colombier    literal and match bytes until either not enough input or output is
35*593dc095SDavid du Colombier    available, an end-of-block is encountered, or a data error is encountered.
36*593dc095SDavid du Colombier    When large enough input and output buffers are supplied to inflate(), for
37*593dc095SDavid du Colombier    example, a 16K input buffer and a 64K output buffer, more than 95% of the
38*593dc095SDavid du Colombier    inflate execution time is spent in this routine.
397dd7cddfSDavid du Colombier 
40*593dc095SDavid du Colombier    Entry assumptions:
417dd7cddfSDavid du Colombier 
42*593dc095SDavid du Colombier         state->mode == LEN
43*593dc095SDavid du Colombier         strm->avail_in >= 6
44*593dc095SDavid du Colombier         strm->avail_out >= 258
45*593dc095SDavid du Colombier         start >= strm->avail_out
46*593dc095SDavid du Colombier         state->bits < 8
47*593dc095SDavid du Colombier 
48*593dc095SDavid du Colombier    On return, state->mode is one of:
49*593dc095SDavid du Colombier 
50*593dc095SDavid du Colombier         LEN -- ran out of enough output space or enough available input
51*593dc095SDavid du Colombier         TYPE -- reached end of block code, inflate() to interpret next block
52*593dc095SDavid du Colombier         BAD -- error in block data
53*593dc095SDavid du Colombier 
54*593dc095SDavid du Colombier    Notes:
55*593dc095SDavid du Colombier 
56*593dc095SDavid du Colombier     - The maximum input bits used by a length/distance pair is 15 bits for the
57*593dc095SDavid du Colombier       length code, 5 bits for the length extra, 15 bits for the distance code,
58*593dc095SDavid du Colombier       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
59*593dc095SDavid du Colombier       Therefore if strm->avail_in >= 6, then there is enough input to avoid
60*593dc095SDavid du Colombier       checking for available input while decoding.
61*593dc095SDavid du Colombier 
62*593dc095SDavid du Colombier     - The maximum bytes that a single length/distance pair can output is 258
63*593dc095SDavid du Colombier       bytes, which is the maximum length that can be coded.  inflate_fast()
64*593dc095SDavid du Colombier       requires strm->avail_out >= 258 for each loop to avoid checking for
65*593dc095SDavid du Colombier       output space.
66*593dc095SDavid du Colombier  */
inflate_fast(strm,start)67*593dc095SDavid du Colombier void inflate_fast(strm, start)
68*593dc095SDavid du Colombier z_streamp strm;
69*593dc095SDavid du Colombier unsigned start;         /* inflate()'s starting value for strm->avail_out */
707dd7cddfSDavid du Colombier {
71*593dc095SDavid du Colombier     struct inflate_state FAR *state;
72*593dc095SDavid du Colombier     unsigned char FAR *in;      /* local strm->next_in */
73*593dc095SDavid du Colombier     unsigned char FAR *last;    /* while in < last, enough input available */
74*593dc095SDavid du Colombier     unsigned char FAR *out;     /* local strm->next_out */
75*593dc095SDavid du Colombier     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
76*593dc095SDavid du Colombier     unsigned char FAR *end;     /* while out < end, enough space available */
77*593dc095SDavid du Colombier     unsigned wsize;             /* window size or zero if not using window */
78*593dc095SDavid du Colombier     unsigned whave;             /* valid bytes in the window */
79*593dc095SDavid du Colombier     unsigned write;             /* window write index */
80*593dc095SDavid du Colombier     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
81*593dc095SDavid du Colombier     unsigned long hold;         /* local strm->hold */
82*593dc095SDavid du Colombier     unsigned bits;              /* local strm->bits */
83*593dc095SDavid du Colombier     code const FAR *lcode;      /* local strm->lencode */
84*593dc095SDavid du Colombier     code const FAR *dcode;      /* local strm->distcode */
85*593dc095SDavid du Colombier     unsigned lmask;             /* mask for first level of length codes */
86*593dc095SDavid du Colombier     unsigned dmask;             /* mask for first level of distance codes */
87*593dc095SDavid du Colombier     code this;                  /* retrieved table entry */
88*593dc095SDavid du Colombier     unsigned op;                /* code bits, operation, extra bits, or */
89*593dc095SDavid du Colombier                                 /*  window position, window bytes to copy */
90*593dc095SDavid du Colombier     unsigned len;               /* match length, unused bytes */
91*593dc095SDavid du Colombier     unsigned dist;              /* match distance */
92*593dc095SDavid du Colombier     unsigned char FAR *from;    /* where to copy match from */
937dd7cddfSDavid du Colombier 
94*593dc095SDavid du Colombier     /* copy state to local variables */
95*593dc095SDavid du Colombier     state = (struct inflate_state FAR *)strm->state;
96*593dc095SDavid du Colombier     in = strm->next_in - OFF;
97*593dc095SDavid du Colombier     last = in + (strm->avail_in - 5);
98*593dc095SDavid du Colombier     out = strm->next_out - OFF;
99*593dc095SDavid du Colombier     beg = out - (start - strm->avail_out);
100*593dc095SDavid du Colombier     end = out + (strm->avail_out - 257);
101*593dc095SDavid du Colombier     wsize = state->wsize;
102*593dc095SDavid du Colombier     whave = state->whave;
103*593dc095SDavid du Colombier     write = state->write;
104*593dc095SDavid du Colombier     window = state->window;
105*593dc095SDavid du Colombier     hold = state->hold;
106*593dc095SDavid du Colombier     bits = state->bits;
107*593dc095SDavid du Colombier     lcode = state->lencode;
108*593dc095SDavid du Colombier     dcode = state->distcode;
109*593dc095SDavid du Colombier     lmask = (1U << state->lenbits) - 1;
110*593dc095SDavid du Colombier     dmask = (1U << state->distbits) - 1;
1117dd7cddfSDavid du Colombier 
112*593dc095SDavid du Colombier     /* decode literals and length/distances until end-of-block or not enough
113*593dc095SDavid du Colombier        input data or output space */
1147dd7cddfSDavid du Colombier     do {
115*593dc095SDavid du Colombier         if (bits < 15) {
116*593dc095SDavid du Colombier             hold += (unsigned long)(PUP(in)) << bits;
117*593dc095SDavid du Colombier             bits += 8;
118*593dc095SDavid du Colombier             hold += (unsigned long)(PUP(in)) << bits;
119*593dc095SDavid du Colombier             bits += 8;
1207dd7cddfSDavid du Colombier         }
121*593dc095SDavid du Colombier         this = lcode[hold & lmask];
122*593dc095SDavid du Colombier       dolen:
123*593dc095SDavid du Colombier         op = (unsigned)(this.bits);
124*593dc095SDavid du Colombier         hold >>= op;
125*593dc095SDavid du Colombier         bits -= op;
126*593dc095SDavid du Colombier         op = (unsigned)(this.op);
127*593dc095SDavid du Colombier         if (op == 0) {                          /* literal */
128*593dc095SDavid du Colombier             Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
129*593dc095SDavid du Colombier                     "inflate:         literal '%c'\n" :
130*593dc095SDavid du Colombier                     "inflate:         literal 0x%02x\n", this.val));
131*593dc095SDavid du Colombier             PUP(out) = (unsigned char)(this.val);
132*593dc095SDavid du Colombier         }
133*593dc095SDavid du Colombier         else if (op & 16) {                     /* length base */
134*593dc095SDavid du Colombier             len = (unsigned)(this.val);
135*593dc095SDavid du Colombier             op &= 15;                           /* number of extra bits */
136*593dc095SDavid du Colombier             if (op) {
137*593dc095SDavid du Colombier                 if (bits < op) {
138*593dc095SDavid du Colombier                     hold += (unsigned long)(PUP(in)) << bits;
139*593dc095SDavid du Colombier                     bits += 8;
140*593dc095SDavid du Colombier                 }
141*593dc095SDavid du Colombier                 len += (unsigned)hold & ((1U << op) - 1);
142*593dc095SDavid du Colombier                 hold >>= op;
143*593dc095SDavid du Colombier                 bits -= op;
144*593dc095SDavid du Colombier             }
145*593dc095SDavid du Colombier             Tracevv((stderr, "inflate:         length %u\n", len));
146*593dc095SDavid du Colombier             if (bits < 15) {
147*593dc095SDavid du Colombier                 hold += (unsigned long)(PUP(in)) << bits;
148*593dc095SDavid du Colombier                 bits += 8;
149*593dc095SDavid du Colombier                 hold += (unsigned long)(PUP(in)) << bits;
150*593dc095SDavid du Colombier                 bits += 8;
151*593dc095SDavid du Colombier             }
152*593dc095SDavid du Colombier             this = dcode[hold & dmask];
153*593dc095SDavid du Colombier           dodist:
154*593dc095SDavid du Colombier             op = (unsigned)(this.bits);
155*593dc095SDavid du Colombier             hold >>= op;
156*593dc095SDavid du Colombier             bits -= op;
157*593dc095SDavid du Colombier             op = (unsigned)(this.op);
158*593dc095SDavid du Colombier             if (op & 16) {                      /* distance base */
159*593dc095SDavid du Colombier                 dist = (unsigned)(this.val);
160*593dc095SDavid du Colombier                 op &= 15;                       /* number of extra bits */
161*593dc095SDavid du Colombier                 if (bits < op) {
162*593dc095SDavid du Colombier                     hold += (unsigned long)(PUP(in)) << bits;
163*593dc095SDavid du Colombier                     bits += 8;
164*593dc095SDavid du Colombier                     if (bits < op) {
165*593dc095SDavid du Colombier                         hold += (unsigned long)(PUP(in)) << bits;
166*593dc095SDavid du Colombier                         bits += 8;
1677dd7cddfSDavid du Colombier                     }
1687dd7cddfSDavid du Colombier                 }
169*593dc095SDavid du Colombier                 dist += (unsigned)hold & ((1U << op) - 1);
170*593dc095SDavid du Colombier                 hold >>= op;
171*593dc095SDavid du Colombier                 bits -= op;
172*593dc095SDavid du Colombier                 Tracevv((stderr, "inflate:         distance %u\n", dist));
173*593dc095SDavid du Colombier                 op = (unsigned)(out - beg);     /* max distance in output */
174*593dc095SDavid du Colombier                 if (dist > op) {                /* see if copy from window */
175*593dc095SDavid du Colombier                     op = dist - op;             /* distance back in window */
176*593dc095SDavid du Colombier                     if (op > whave) {
177*593dc095SDavid du Colombier                         strm->msg = (char *)"invalid distance too far back";
178*593dc095SDavid du Colombier                         state->mode = BAD;
1797dd7cddfSDavid du Colombier                         break;
1807dd7cddfSDavid du Colombier                     }
181*593dc095SDavid du Colombier                     from = window - OFF;
182*593dc095SDavid du Colombier                     if (write == 0) {           /* very common case */
183*593dc095SDavid du Colombier                         from += wsize - op;
184*593dc095SDavid du Colombier                         if (op < len) {         /* some from window */
185*593dc095SDavid du Colombier                             len -= op;
186*593dc095SDavid du Colombier                             do {
187*593dc095SDavid du Colombier                                 PUP(out) = PUP(from);
188*593dc095SDavid du Colombier                             } while (--op);
189*593dc095SDavid du Colombier                             from = out - dist;  /* rest from output */
1907dd7cddfSDavid du Colombier                         }
1917dd7cddfSDavid du Colombier                     }
192*593dc095SDavid du Colombier                     else if (write < op) {      /* wrap around window */
193*593dc095SDavid du Colombier                         from += wsize + write - op;
194*593dc095SDavid du Colombier                         op -= write;
195*593dc095SDavid du Colombier                         if (op < len) {         /* some from end of window */
196*593dc095SDavid du Colombier                             len -= op;
197*593dc095SDavid du Colombier                             do {
198*593dc095SDavid du Colombier                                 PUP(out) = PUP(from);
199*593dc095SDavid du Colombier                             } while (--op);
200*593dc095SDavid du Colombier                             from = window - OFF;
201*593dc095SDavid du Colombier                             if (write < len) {  /* some from start of window */
202*593dc095SDavid du Colombier                                 op = write;
203*593dc095SDavid du Colombier                                 len -= op;
204*593dc095SDavid du Colombier                                 do {
205*593dc095SDavid du Colombier                                     PUP(out) = PUP(from);
206*593dc095SDavid du Colombier                                 } while (--op);
207*593dc095SDavid du Colombier                                 from = out - dist;      /* rest from output */
208*593dc095SDavid du Colombier                             }
209*593dc095SDavid du Colombier                         }
210*593dc095SDavid du Colombier                     }
211*593dc095SDavid du Colombier                     else {                      /* contiguous in window */
212*593dc095SDavid du Colombier                         from += write - op;
213*593dc095SDavid du Colombier                         if (op < len) {         /* some from window */
214*593dc095SDavid du Colombier                             len -= op;
215*593dc095SDavid du Colombier                             do {
216*593dc095SDavid du Colombier                                 PUP(out) = PUP(from);
217*593dc095SDavid du Colombier                             } while (--op);
218*593dc095SDavid du Colombier                             from = out - dist;  /* rest from output */
219*593dc095SDavid du Colombier                         }
220*593dc095SDavid du Colombier                     }
221*593dc095SDavid du Colombier                     while (len > 2) {
222*593dc095SDavid du Colombier                         PUP(out) = PUP(from);
223*593dc095SDavid du Colombier                         PUP(out) = PUP(from);
224*593dc095SDavid du Colombier                         PUP(out) = PUP(from);
225*593dc095SDavid du Colombier                         len -= 3;
226*593dc095SDavid du Colombier                     }
227*593dc095SDavid du Colombier                     if (len) {
228*593dc095SDavid du Colombier                         PUP(out) = PUP(from);
229*593dc095SDavid du Colombier                         if (len > 1)
230*593dc095SDavid du Colombier                             PUP(out) = PUP(from);
231*593dc095SDavid du Colombier                     }
232*593dc095SDavid du Colombier                 }
233*593dc095SDavid du Colombier                 else {
234*593dc095SDavid du Colombier                     from = out - dist;          /* copy direct from output */
235*593dc095SDavid du Colombier                     do {                        /* minimum length is three */
236*593dc095SDavid du Colombier                         PUP(out) = PUP(from);
237*593dc095SDavid du Colombier                         PUP(out) = PUP(from);
238*593dc095SDavid du Colombier                         PUP(out) = PUP(from);
239*593dc095SDavid du Colombier                         len -= 3;
240*593dc095SDavid du Colombier                     } while (len > 2);
241*593dc095SDavid du Colombier                     if (len) {
242*593dc095SDavid du Colombier                         PUP(out) = PUP(from);
243*593dc095SDavid du Colombier                         if (len > 1)
244*593dc095SDavid du Colombier                             PUP(out) = PUP(from);
245*593dc095SDavid du Colombier                     }
246*593dc095SDavid du Colombier                 }
247*593dc095SDavid du Colombier             }
248*593dc095SDavid du Colombier             else if ((op & 64) == 0) {          /* 2nd level distance code */
249*593dc095SDavid du Colombier                 this = dcode[this.val + (hold & ((1U << op) - 1))];
250*593dc095SDavid du Colombier                 goto dodist;
251*593dc095SDavid du Colombier             }
252*593dc095SDavid du Colombier             else {
253*593dc095SDavid du Colombier                 strm->msg = (char *)"invalid distance code";
254*593dc095SDavid du Colombier                 state->mode = BAD;
2557dd7cddfSDavid du Colombier                 break;
2567dd7cddfSDavid du Colombier             }
2577dd7cddfSDavid du Colombier         }
258*593dc095SDavid du Colombier         else if ((op & 64) == 0) {              /* 2nd level length code */
259*593dc095SDavid du Colombier             this = lcode[this.val + (hold & ((1U << op) - 1))];
260*593dc095SDavid du Colombier             goto dolen;
2617dd7cddfSDavid du Colombier         }
262*593dc095SDavid du Colombier         else if (op & 32) {                     /* end-of-block */
263*593dc095SDavid du Colombier             Tracevv((stderr, "inflate:         end of block\n"));
264*593dc095SDavid du Colombier             state->mode = TYPE;
265*593dc095SDavid du Colombier             break;
2667dd7cddfSDavid du Colombier         }
267*593dc095SDavid du Colombier         else {
268*593dc095SDavid du Colombier             strm->msg = (char *)"invalid literal/length code";
269*593dc095SDavid du Colombier             state->mode = BAD;
270*593dc095SDavid du Colombier             break;
271*593dc095SDavid du Colombier         }
272*593dc095SDavid du Colombier     } while (in < last && out < end);
2737dd7cddfSDavid du Colombier 
274*593dc095SDavid du Colombier     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
275*593dc095SDavid du Colombier     len = bits >> 3;
276*593dc095SDavid du Colombier     in -= len;
277*593dc095SDavid du Colombier     bits -= len << 3;
278*593dc095SDavid du Colombier     hold &= (1U << bits) - 1;
279*593dc095SDavid du Colombier 
280*593dc095SDavid du Colombier     /* update state and return */
281*593dc095SDavid du Colombier     strm->next_in = in + OFF;
282*593dc095SDavid du Colombier     strm->next_out = out + OFF;
283*593dc095SDavid du Colombier     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
284*593dc095SDavid du Colombier     strm->avail_out = (unsigned)(out < end ?
285*593dc095SDavid du Colombier                                  257 + (end - out) : 257 - (out - end));
286*593dc095SDavid du Colombier     state->hold = hold;
287*593dc095SDavid du Colombier     state->bits = bits;
288*593dc095SDavid du Colombier     return;
2897dd7cddfSDavid du Colombier }
290*593dc095SDavid du Colombier 
291*593dc095SDavid du Colombier /*
292*593dc095SDavid du Colombier    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
293*593dc095SDavid du Colombier    - Using bit fields for code structure
294*593dc095SDavid du Colombier    - Different op definition to avoid & for extra bits (do & for table bits)
295*593dc095SDavid du Colombier    - Three separate decoding do-loops for direct, window, and write == 0
296*593dc095SDavid du Colombier    - Special case for distance > 1 copies to do overlapped load and store copy
297*593dc095SDavid du Colombier    - Explicit branch predictions (based on measured branch probabilities)
298*593dc095SDavid du Colombier    - Deferring match copy and interspersed it with decoding subsequent codes
299*593dc095SDavid du Colombier    - Swapping literal/length else
300*593dc095SDavid du Colombier    - Swapping window/direct else
301*593dc095SDavid du Colombier    - Larger unrolled copy loops (three is about right)
302*593dc095SDavid du Colombier    - Moving len -= 3 statement into middle of loop
303*593dc095SDavid du Colombier  */
304*593dc095SDavid du Colombier 
305*593dc095SDavid du Colombier #endif /* !ASMINF */
306