xref: /minix3/common/dist/zlib/contrib/inflate86/inffas86.c (revision 44bedb31d842b4b0444105519bcf929a69fe2dc1)
1 /*	$NetBSD: inffas86.c,v 1.1.1.1 2006/01/14 20:10:53 christos Exp $	*/
2 
3 /* inffas86.c is a hand tuned assembler version of
4  *
5  * inffast.c -- fast decoding
6  * Copyright (C) 1995-2003 Mark Adler
7  * For conditions of distribution and use, see copyright notice in zlib.h
8  *
9  * Copyright (C) 2003 Chris Anderson <christop@charm.net>
10  * Please use the copyright conditions above.
11  *
12  * Dec-29-2003 -- I added AMD64 inflate asm support.  This version is also
13  * slightly quicker on x86 systems because, instead of using rep movsb to copy
14  * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
15  * bytes.  I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
16  * from http://fedora.linux.duke.edu/fc1_x86_64
17  * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
18  * 1GB ram.  The 64-bit version is about 4% faster than the 32-bit version,
19  * when decompressing mozilla-source-1.3.tar.gz.
20  *
21  * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
22  * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
23  * the moment.  I have successfully compiled and tested this code with gcc2.96,
24  * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
25  * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
26  * enabled.  I will attempt to merge the MMX code into this version.  Newer
27  * versions of this and inffast.S can be found at
28  * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
29  */
30 
31 #include "zutil.h"
32 #include "inftrees.h"
33 #include "inflate.h"
34 #include "inffast.h"
35 
36 /* Mark Adler's comments from inffast.c: */
37 
38 /*
39    Decode literal, length, and distance codes and write out the resulting
40    literal and match bytes until either not enough input or output is
41    available, an end-of-block is encountered, or a data error is encountered.
42    When large enough input and output buffers are supplied to inflate(), for
43    example, a 16K input buffer and a 64K output buffer, more than 95% of the
44    inflate execution time is spent in this routine.
45 
46    Entry assumptions:
47 
48         state->mode == LEN
49         strm->avail_in >= 6
50         strm->avail_out >= 258
51         start >= strm->avail_out
52         state->bits < 8
53 
54    On return, state->mode is one of:
55 
56         LEN -- ran out of enough output space or enough available input
57         TYPE -- reached end of block code, inflate() to interpret next block
58         BAD -- error in block data
59 
60    Notes:
61 
62     - The maximum input bits used by a length/distance pair is 15 bits for the
63       length code, 5 bits for the length extra, 15 bits for the distance code,
64       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
65       Therefore if strm->avail_in >= 6, then there is enough input to avoid
66       checking for available input while decoding.
67 
68     - The maximum bytes that a single length/distance pair can output is 258
69       bytes, which is the maximum length that can be coded.  inflate_fast()
70       requires strm->avail_out >= 258 for each loop to avoid checking for
71       output space.
72  */
inflate_fast(strm,start)73 void inflate_fast(strm, start)
74 z_streamp strm;
75 unsigned start;         /* inflate()'s starting value for strm->avail_out */
76 {
77     struct inflate_state FAR *state;
78     struct inffast_ar {
79 /* 64   32                               x86  x86_64 */
80 /* ar offset                              register */
81 /*  0    0 */ void *esp;                /* esp save */
82 /*  8    4 */ void *ebp;                /* ebp save */
83 /* 16    8 */ unsigned char FAR *in;    /* esi rsi  local strm->next_in */
84 /* 24   12 */ unsigned char FAR *last;  /*     r9   while in < last */
85 /* 32   16 */ unsigned char FAR *out;   /* edi rdi  local strm->next_out */
86 /* 40   20 */ unsigned char FAR *beg;   /*          inflate()'s init next_out */
87 /* 48   24 */ unsigned char FAR *end;   /*     r10  while out < end */
88 /* 56   28 */ unsigned char FAR *window;/*          size of window, wsize!=0 */
89 /* 64   32 */ code const FAR *lcode;    /* ebp rbp  local strm->lencode */
90 /* 72   36 */ code const FAR *dcode;    /*     r11  local strm->distcode */
91 /* 80   40 */ unsigned long hold;       /* edx rdx  local strm->hold */
92 /* 88   44 */ unsigned bits;            /* ebx rbx  local strm->bits */
93 /* 92   48 */ unsigned wsize;           /*          window size */
94 /* 96   52 */ unsigned write;           /*          window write index */
95 /*100   56 */ unsigned lmask;           /*     r12  mask for lcode */
96 /*104   60 */ unsigned dmask;           /*     r13  mask for dcode */
97 /*108   64 */ unsigned len;             /*     r14  match length */
98 /*112   68 */ unsigned dist;            /*     r15  match distance */
99 /*116   72 */ unsigned status;          /*          set when state chng*/
100     } ar;
101 
102 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
103 #define PAD_AVAIL_IN 6
104 #define PAD_AVAIL_OUT 258
105 #else
106 #define PAD_AVAIL_IN 5
107 #define PAD_AVAIL_OUT 257
108 #endif
109 
110     /* copy state to local variables */
111     state = (struct inflate_state FAR *)strm->state;
112     ar.in = strm->next_in;
113     ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
114     ar.out = strm->next_out;
115     ar.beg = ar.out - (start - strm->avail_out);
116     ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
117     ar.wsize = state->wsize;
118     ar.write = state->write;
119     ar.window = state->window;
120     ar.hold = state->hold;
121     ar.bits = state->bits;
122     ar.lcode = state->lencode;
123     ar.dcode = state->distcode;
124     ar.lmask = (1U << state->lenbits) - 1;
125     ar.dmask = (1U << state->distbits) - 1;
126 
127     /* decode literals and length/distances until end-of-block or not enough
128        input data or output space */
129 
130     /* align in on 1/2 hold size boundary */
131     while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
132         ar.hold += (unsigned long)*ar.in++ << ar.bits;
133         ar.bits += 8;
134     }
135 
136 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
137     __asm__ __volatile__ (
138 "        leaq    %0, %%rax\n"
139 "        movq    %%rbp, 8(%%rax)\n"       /* save regs rbp and rsp */
140 "        movq    %%rsp, (%%rax)\n"
141 "        movq    %%rax, %%rsp\n"          /* make rsp point to &ar */
142 "        movq    16(%%rsp), %%rsi\n"      /* rsi  = in */
143 "        movq    32(%%rsp), %%rdi\n"      /* rdi  = out */
144 "        movq    24(%%rsp), %%r9\n"       /* r9   = last */
145 "        movq    48(%%rsp), %%r10\n"      /* r10  = end */
146 "        movq    64(%%rsp), %%rbp\n"      /* rbp  = lcode */
147 "        movq    72(%%rsp), %%r11\n"      /* r11  = dcode */
148 "        movq    80(%%rsp), %%rdx\n"      /* rdx  = hold */
149 "        movl    88(%%rsp), %%ebx\n"      /* ebx  = bits */
150 "        movl    100(%%rsp), %%r12d\n"    /* r12d = lmask */
151 "        movl    104(%%rsp), %%r13d\n"    /* r13d = dmask */
152                                           /* r14d = len */
153                                           /* r15d = dist */
154 "        cld\n"
155 "        cmpq    %%rdi, %%r10\n"
156 "        je      .L_one_time\n"           /* if only one decode left */
157 "        cmpq    %%rsi, %%r9\n"
158 "        je      .L_one_time\n"
159 "        jmp     .L_do_loop\n"
160 
161 ".L_one_time:\n"
162 "        movq    %%r12, %%r8\n"           /* r8 = lmask */
163 "        cmpb    $32, %%bl\n"
164 "        ja      .L_get_length_code_one_time\n"
165 
166 "        lodsl\n"                         /* eax = *(uint *)in++ */
167 "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
168 "        addb    $32, %%bl\n"             /* bits += 32 */
169 "        shlq    %%cl, %%rax\n"
170 "        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
171 "        jmp     .L_get_length_code_one_time\n"
172 
173 ".align 32,0x90\n"
174 ".L_while_test:\n"
175 "        cmpq    %%rdi, %%r10\n"
176 "        jbe     .L_break_loop\n"
177 "        cmpq    %%rsi, %%r9\n"
178 "        jbe     .L_break_loop\n"
179 
180 ".L_do_loop:\n"
181 "        movq    %%r12, %%r8\n"           /* r8 = lmask */
182 "        cmpb    $32, %%bl\n"
183 "        ja      .L_get_length_code\n"    /* if (32 < bits) */
184 
185 "        lodsl\n"                         /* eax = *(uint *)in++ */
186 "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
187 "        addb    $32, %%bl\n"             /* bits += 32 */
188 "        shlq    %%cl, %%rax\n"
189 "        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
190 
191 ".L_get_length_code:\n"
192 "        andq    %%rdx, %%r8\n"            /* r8 &= hold */
193 "        movl    (%%rbp,%%r8,4), %%eax\n"  /* eax = lcode[hold & lmask] */
194 
195 "        movb    %%ah, %%cl\n"            /* cl = this.bits */
196 "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
197 "        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
198 
199 "        testb   %%al, %%al\n"
200 "        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
201 
202 "        movq    %%r12, %%r8\n"            /* r8 = lmask */
203 "        shrl    $16, %%eax\n"            /* output this.val char */
204 "        stosb\n"
205 
206 ".L_get_length_code_one_time:\n"
207 "        andq    %%rdx, %%r8\n"            /* r8 &= hold */
208 "        movl    (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
209 
210 ".L_dolen:\n"
211 "        movb    %%ah, %%cl\n"            /* cl = this.bits */
212 "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
213 "        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
214 
215 "        testb   %%al, %%al\n"
216 "        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
217 
218 "        shrl    $16, %%eax\n"            /* output this.val char */
219 "        stosb\n"
220 "        jmp     .L_while_test\n"
221 
222 ".align 32,0x90\n"
223 ".L_test_for_length_base:\n"
224 "        movl    %%eax, %%r14d\n"         /* len = this */
225 "        shrl    $16, %%r14d\n"           /* len = this.val */
226 "        movb    %%al, %%cl\n"
227 
228 "        testb   $16, %%al\n"
229 "        jz      .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
230 "        andb    $15, %%cl\n"             /* op &= 15 */
231 "        jz      .L_decode_distance\n"    /* if (!op) */
232 
233 ".L_add_bits_to_len:\n"
234 "        subb    %%cl, %%bl\n"
235 "        xorl    %%eax, %%eax\n"
236 "        incl    %%eax\n"
237 "        shll    %%cl, %%eax\n"
238 "        decl    %%eax\n"
239 "        andl    %%edx, %%eax\n"          /* eax &= hold */
240 "        shrq    %%cl, %%rdx\n"
241 "        addl    %%eax, %%r14d\n"         /* len += hold & mask[op] */
242 
243 ".L_decode_distance:\n"
244 "        movq    %%r13, %%r8\n"           /* r8 = dmask */
245 "        cmpb    $32, %%bl\n"
246 "        ja      .L_get_distance_code\n"  /* if (32 < bits) */
247 
248 "        lodsl\n"                         /* eax = *(uint *)in++ */
249 "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
250 "        addb    $32, %%bl\n"             /* bits += 32 */
251 "        shlq    %%cl, %%rax\n"
252 "        orq     %%rax, %%rdx\n"          /* hold |= *((uint *)in)++ << bits */
253 
254 ".L_get_distance_code:\n"
255 "        andq    %%rdx, %%r8\n"           /* r8 &= hold */
256 "        movl    (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
257 
258 ".L_dodist:\n"
259 "        movl    %%eax, %%r15d\n"         /* dist = this */
260 "        shrl    $16, %%r15d\n"           /* dist = this.val */
261 "        movb    %%ah, %%cl\n"
262 "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
263 "        shrq    %%cl, %%rdx\n"           /* hold >>= this.bits */
264 "        movb    %%al, %%cl\n"            /* cl = this.op */
265 
266 "        testb   $16, %%al\n"             /* if ((op & 16) == 0) */
267 "        jz      .L_test_for_second_level_dist\n"
268 "        andb    $15, %%cl\n"             /* op &= 15 */
269 "        jz      .L_check_dist_one\n"
270 
271 ".L_add_bits_to_dist:\n"
272 "        subb    %%cl, %%bl\n"
273 "        xorl    %%eax, %%eax\n"
274 "        incl    %%eax\n"
275 "        shll    %%cl, %%eax\n"
276 "        decl    %%eax\n"                 /* (1 << op) - 1 */
277 "        andl    %%edx, %%eax\n"          /* eax &= hold */
278 "        shrq    %%cl, %%rdx\n"
279 "        addl    %%eax, %%r15d\n"         /* dist += hold & ((1 << op) - 1) */
280 
281 ".L_check_window:\n"
282 "        movq    %%rsi, %%r8\n"           /* save in so from can use it's reg */
283 "        movq    %%rdi, %%rax\n"
284 "        subq    40(%%rsp), %%rax\n"      /* nbytes = out - beg */
285 
286 "        cmpl    %%r15d, %%eax\n"
287 "        jb      .L_clip_window\n"        /* if (dist > nbytes) 4.2% */
288 
289 "        movl    %%r14d, %%ecx\n"         /* ecx = len */
290 "        movq    %%rdi, %%rsi\n"
291 "        subq    %%r15, %%rsi\n"          /* from = out - dist */
292 
293 "        sarl    %%ecx\n"
294 "        jnc     .L_copy_two\n"           /* if len % 2 == 0 */
295 
296 "        rep     movsw\n"
297 "        movb    (%%rsi), %%al\n"
298 "        movb    %%al, (%%rdi)\n"
299 "        incq    %%rdi\n"
300 
301 "        movq    %%r8, %%rsi\n"           /* move in back to %rsi, toss from */
302 "        jmp     .L_while_test\n"
303 
304 ".L_copy_two:\n"
305 "        rep     movsw\n"
306 "        movq    %%r8, %%rsi\n"           /* move in back to %rsi, toss from */
307 "        jmp     .L_while_test\n"
308 
309 ".align 32,0x90\n"
310 ".L_check_dist_one:\n"
311 "        cmpl    $1, %%r15d\n"            /* if dist 1, is a memset */
312 "        jne     .L_check_window\n"
313 "        cmpq    %%rdi, 40(%%rsp)\n"      /* if out == beg, outside window */
314 "        je      .L_check_window\n"
315 
316 "        movl    %%r14d, %%ecx\n"         /* ecx = len */
317 "        movb    -1(%%rdi), %%al\n"
318 "        movb    %%al, %%ah\n"
319 
320 "        sarl    %%ecx\n"
321 "        jnc     .L_set_two\n"
322 "        movb    %%al, (%%rdi)\n"
323 "        incq    %%rdi\n"
324 
325 ".L_set_two:\n"
326 "        rep     stosw\n"
327 "        jmp     .L_while_test\n"
328 
329 ".align 32,0x90\n"
330 ".L_test_for_second_level_length:\n"
331 "        testb   $64, %%al\n"
332 "        jnz     .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
333 
334 "        xorl    %%eax, %%eax\n"
335 "        incl    %%eax\n"
336 "        shll    %%cl, %%eax\n"
337 "        decl    %%eax\n"
338 "        andl    %%edx, %%eax\n"         /* eax &= hold */
339 "        addl    %%r14d, %%eax\n"        /* eax += len */
340 "        movl    (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
341 "        jmp     .L_dolen\n"
342 
343 ".align 32,0x90\n"
344 ".L_test_for_second_level_dist:\n"
345 "        testb   $64, %%al\n"
346 "        jnz     .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
347 
348 "        xorl    %%eax, %%eax\n"
349 "        incl    %%eax\n"
350 "        shll    %%cl, %%eax\n"
351 "        decl    %%eax\n"
352 "        andl    %%edx, %%eax\n"         /* eax &= hold */
353 "        addl    %%r15d, %%eax\n"        /* eax += dist */
354 "        movl    (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
355 "        jmp     .L_dodist\n"
356 
357 ".align 32,0x90\n"
358 ".L_clip_window:\n"
359 "        movl    %%eax, %%ecx\n"         /* ecx = nbytes */
360 "        movl    92(%%rsp), %%eax\n"     /* eax = wsize, prepare for dist cmp */
361 "        negl    %%ecx\n"                /* nbytes = -nbytes */
362 
363 "        cmpl    %%r15d, %%eax\n"
364 "        jb      .L_invalid_distance_too_far\n" /* if (dist > wsize) */
365 
366 "        addl    %%r15d, %%ecx\n"         /* nbytes = dist - nbytes */
367 "        cmpl    $0, 96(%%rsp)\n"
368 "        jne     .L_wrap_around_window\n" /* if (write != 0) */
369 
370 "        movq    56(%%rsp), %%rsi\n"     /* from  = window */
371 "        subl    %%ecx, %%eax\n"         /* eax  -= nbytes */
372 "        addq    %%rax, %%rsi\n"         /* from += wsize - nbytes */
373 
374 "        movl    %%r14d, %%eax\n"        /* eax = len */
375 "        cmpl    %%ecx, %%r14d\n"
376 "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
377 
378 "        subl    %%ecx, %%eax\n"         /* eax -= nbytes */
379 "        rep     movsb\n"
380 "        movq    %%rdi, %%rsi\n"
381 "        subq    %%r15, %%rsi\n"         /* from = &out[ -dist ] */
382 "        jmp     .L_do_copy\n"
383 
384 ".align 32,0x90\n"
385 ".L_wrap_around_window:\n"
386 "        movl    96(%%rsp), %%eax\n"     /* eax = write */
387 "        cmpl    %%eax, %%ecx\n"
388 "        jbe     .L_contiguous_in_window\n" /* if (write >= nbytes) */
389 
390 "        movl    92(%%rsp), %%esi\n"     /* from  = wsize */
391 "        addq    56(%%rsp), %%rsi\n"     /* from += window */
392 "        addq    %%rax, %%rsi\n"         /* from += write */
393 "        subq    %%rcx, %%rsi\n"         /* from -= nbytes */
394 "        subl    %%eax, %%ecx\n"         /* nbytes -= write */
395 
396 "        movl    %%r14d, %%eax\n"        /* eax = len */
397 "        cmpl    %%ecx, %%eax\n"
398 "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
399 
400 "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
401 "        rep     movsb\n"
402 "        movq    56(%%rsp), %%rsi\n"     /* from = window */
403 "        movl    96(%%rsp), %%ecx\n"     /* nbytes = write */
404 "        cmpl    %%ecx, %%eax\n"
405 "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
406 
407 "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
408 "        rep     movsb\n"
409 "        movq    %%rdi, %%rsi\n"
410 "        subq    %%r15, %%rsi\n"         /* from = out - dist */
411 "        jmp     .L_do_copy\n"
412 
413 ".align 32,0x90\n"
414 ".L_contiguous_in_window:\n"
415 "        movq    56(%%rsp), %%rsi\n"     /* rsi = window */
416 "        addq    %%rax, %%rsi\n"
417 "        subq    %%rcx, %%rsi\n"         /* from += write - nbytes */
418 
419 "        movl    %%r14d, %%eax\n"        /* eax = len */
420 "        cmpl    %%ecx, %%eax\n"
421 "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
422 
423 "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
424 "        rep     movsb\n"
425 "        movq    %%rdi, %%rsi\n"
426 "        subq    %%r15, %%rsi\n"         /* from = out - dist */
427 "        jmp     .L_do_copy\n"           /* if (nbytes >= len) */
428 
429 ".align 32,0x90\n"
430 ".L_do_copy:\n"
431 "        movl    %%eax, %%ecx\n"         /* ecx = len */
432 "        rep     movsb\n"
433 
434 "        movq    %%r8, %%rsi\n"          /* move in back to %esi, toss from */
435 "        jmp     .L_while_test\n"
436 
437 ".L_test_for_end_of_block:\n"
438 "        testb   $32, %%al\n"
439 "        jz      .L_invalid_literal_length_code\n"
440 "        movl    $1, 116(%%rsp)\n"
441 "        jmp     .L_break_loop_with_status\n"
442 
443 ".L_invalid_literal_length_code:\n"
444 "        movl    $2, 116(%%rsp)\n"
445 "        jmp     .L_break_loop_with_status\n"
446 
447 ".L_invalid_distance_code:\n"
448 "        movl    $3, 116(%%rsp)\n"
449 "        jmp     .L_break_loop_with_status\n"
450 
451 ".L_invalid_distance_too_far:\n"
452 "        movl    $4, 116(%%rsp)\n"
453 "        jmp     .L_break_loop_with_status\n"
454 
455 ".L_break_loop:\n"
456 "        movl    $0, 116(%%rsp)\n"
457 
458 ".L_break_loop_with_status:\n"
459 /* put in, out, bits, and hold back into ar and pop esp */
460 "        movq    %%rsi, 16(%%rsp)\n"     /* in */
461 "        movq    %%rdi, 32(%%rsp)\n"     /* out */
462 "        movl    %%ebx, 88(%%rsp)\n"     /* bits */
463 "        movq    %%rdx, 80(%%rsp)\n"     /* hold */
464 "        movq    (%%rsp), %%rax\n"       /* restore rbp and rsp */
465 "        movq    8(%%rsp), %%rbp\n"
466 "        movq    %%rax, %%rsp\n"
467           :
468           : "m" (ar)
469           : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
470             "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
471     );
472 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
473     __asm__ __volatile__ (
474 "        leal    %0, %%eax\n"
475 "        movl    %%esp, (%%eax)\n"        /* save esp, ebp */
476 "        movl    %%ebp, 4(%%eax)\n"
477 "        movl    %%eax, %%esp\n"
478 "        movl    8(%%esp), %%esi\n"       /* esi = in */
479 "        movl    16(%%esp), %%edi\n"      /* edi = out */
480 "        movl    40(%%esp), %%edx\n"      /* edx = hold */
481 "        movl    44(%%esp), %%ebx\n"      /* ebx = bits */
482 "        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
483 
484 "        cld\n"
485 "        jmp     .L_do_loop\n"
486 
487 ".align 32,0x90\n"
488 ".L_while_test:\n"
489 "        cmpl    %%edi, 24(%%esp)\n"      /* out < end */
490 "        jbe     .L_break_loop\n"
491 "        cmpl    %%esi, 12(%%esp)\n"      /* in < last */
492 "        jbe     .L_break_loop\n"
493 
494 ".L_do_loop:\n"
495 "        cmpb    $15, %%bl\n"
496 "        ja      .L_get_length_code\n"    /* if (15 < bits) */
497 
498 "        xorl    %%eax, %%eax\n"
499 "        lodsw\n"                         /* al = *(ushort *)in++ */
500 "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
501 "        addb    $16, %%bl\n"             /* bits += 16 */
502 "        shll    %%cl, %%eax\n"
503 "        orl     %%eax, %%edx\n"        /* hold |= *((ushort *)in)++ << bits */
504 
505 ".L_get_length_code:\n"
506 "        movl    56(%%esp), %%eax\n"      /* eax = lmask */
507 "        andl    %%edx, %%eax\n"          /* eax &= hold */
508 "        movl    (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
509 
510 ".L_dolen:\n"
511 "        movb    %%ah, %%cl\n"            /* cl = this.bits */
512 "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
513 "        shrl    %%cl, %%edx\n"           /* hold >>= this.bits */
514 
515 "        testb   %%al, %%al\n"
516 "        jnz     .L_test_for_length_base\n" /* if (op != 0) 45.7% */
517 
518 "        shrl    $16, %%eax\n"            /* output this.val char */
519 "        stosb\n"
520 "        jmp     .L_while_test\n"
521 
522 ".align 32,0x90\n"
523 ".L_test_for_length_base:\n"
524 "        movl    %%eax, %%ecx\n"          /* len = this */
525 "        shrl    $16, %%ecx\n"            /* len = this.val */
526 "        movl    %%ecx, 64(%%esp)\n"      /* save len */
527 "        movb    %%al, %%cl\n"
528 
529 "        testb   $16, %%al\n"
530 "        jz      .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
531 "        andb    $15, %%cl\n"             /* op &= 15 */
532 "        jz      .L_decode_distance\n"    /* if (!op) */
533 "        cmpb    %%cl, %%bl\n"
534 "        jae     .L_add_bits_to_len\n"    /* if (op <= bits) */
535 
536 "        movb    %%cl, %%ch\n"            /* stash op in ch, freeing cl */
537 "        xorl    %%eax, %%eax\n"
538 "        lodsw\n"                         /* al = *(ushort *)in++ */
539 "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
540 "        addb    $16, %%bl\n"             /* bits += 16 */
541 "        shll    %%cl, %%eax\n"
542 "        orl     %%eax, %%edx\n"         /* hold |= *((ushort *)in)++ << bits */
543 "        movb    %%ch, %%cl\n"            /* move op back to ecx */
544 
545 ".L_add_bits_to_len:\n"
546 "        subb    %%cl, %%bl\n"
547 "        xorl    %%eax, %%eax\n"
548 "        incl    %%eax\n"
549 "        shll    %%cl, %%eax\n"
550 "        decl    %%eax\n"
551 "        andl    %%edx, %%eax\n"          /* eax &= hold */
552 "        shrl    %%cl, %%edx\n"
553 "        addl    %%eax, 64(%%esp)\n"      /* len += hold & mask[op] */
554 
555 ".L_decode_distance:\n"
556 "        cmpb    $15, %%bl\n"
557 "        ja      .L_get_distance_code\n"  /* if (15 < bits) */
558 
559 "        xorl    %%eax, %%eax\n"
560 "        lodsw\n"                         /* al = *(ushort *)in++ */
561 "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
562 "        addb    $16, %%bl\n"             /* bits += 16 */
563 "        shll    %%cl, %%eax\n"
564 "        orl     %%eax, %%edx\n"         /* hold |= *((ushort *)in)++ << bits */
565 
566 ".L_get_distance_code:\n"
567 "        movl    60(%%esp), %%eax\n"      /* eax = dmask */
568 "        movl    36(%%esp), %%ecx\n"      /* ecx = dcode */
569 "        andl    %%edx, %%eax\n"          /* eax &= hold */
570 "        movl    (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
571 
572 ".L_dodist:\n"
573 "        movl    %%eax, %%ebp\n"          /* dist = this */
574 "        shrl    $16, %%ebp\n"            /* dist = this.val */
575 "        movb    %%ah, %%cl\n"
576 "        subb    %%ah, %%bl\n"            /* bits -= this.bits */
577 "        shrl    %%cl, %%edx\n"           /* hold >>= this.bits */
578 "        movb    %%al, %%cl\n"            /* cl = this.op */
579 
580 "        testb   $16, %%al\n"             /* if ((op & 16) == 0) */
581 "        jz      .L_test_for_second_level_dist\n"
582 "        andb    $15, %%cl\n"             /* op &= 15 */
583 "        jz      .L_check_dist_one\n"
584 "        cmpb    %%cl, %%bl\n"
585 "        jae     .L_add_bits_to_dist\n"   /* if (op <= bits) 97.6% */
586 
587 "        movb    %%cl, %%ch\n"            /* stash op in ch, freeing cl */
588 "        xorl    %%eax, %%eax\n"
589 "        lodsw\n"                         /* al = *(ushort *)in++ */
590 "        movb    %%bl, %%cl\n"            /* cl = bits, needs it for shifting */
591 "        addb    $16, %%bl\n"             /* bits += 16 */
592 "        shll    %%cl, %%eax\n"
593 "        orl     %%eax, %%edx\n"        /* hold |= *((ushort *)in)++ << bits */
594 "        movb    %%ch, %%cl\n"            /* move op back to ecx */
595 
596 ".L_add_bits_to_dist:\n"
597 "        subb    %%cl, %%bl\n"
598 "        xorl    %%eax, %%eax\n"
599 "        incl    %%eax\n"
600 "        shll    %%cl, %%eax\n"
601 "        decl    %%eax\n"                 /* (1 << op) - 1 */
602 "        andl    %%edx, %%eax\n"          /* eax &= hold */
603 "        shrl    %%cl, %%edx\n"
604 "        addl    %%eax, %%ebp\n"          /* dist += hold & ((1 << op) - 1) */
605 
606 ".L_check_window:\n"
607 "        movl    %%esi, 8(%%esp)\n"       /* save in so from can use it's reg */
608 "        movl    %%edi, %%eax\n"
609 "        subl    20(%%esp), %%eax\n"      /* nbytes = out - beg */
610 
611 "        cmpl    %%ebp, %%eax\n"
612 "        jb      .L_clip_window\n"        /* if (dist > nbytes) 4.2% */
613 
614 "        movl    64(%%esp), %%ecx\n"      /* ecx = len */
615 "        movl    %%edi, %%esi\n"
616 "        subl    %%ebp, %%esi\n"          /* from = out - dist */
617 
618 "        sarl    %%ecx\n"
619 "        jnc     .L_copy_two\n"           /* if len % 2 == 0 */
620 
621 "        rep     movsw\n"
622 "        movb    (%%esi), %%al\n"
623 "        movb    %%al, (%%edi)\n"
624 "        incl    %%edi\n"
625 
626 "        movl    8(%%esp), %%esi\n"       /* move in back to %esi, toss from */
627 "        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
628 "        jmp     .L_while_test\n"
629 
630 ".L_copy_two:\n"
631 "        rep     movsw\n"
632 "        movl    8(%%esp), %%esi\n"       /* move in back to %esi, toss from */
633 "        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
634 "        jmp     .L_while_test\n"
635 
636 ".align 32,0x90\n"
637 ".L_check_dist_one:\n"
638 "        cmpl    $1, %%ebp\n"            /* if dist 1, is a memset */
639 "        jne     .L_check_window\n"
640 "        cmpl    %%edi, 20(%%esp)\n"
641 "        je      .L_check_window\n"      /* out == beg, if outside window */
642 
643 "        movl    64(%%esp), %%ecx\n"      /* ecx = len */
644 "        movb    -1(%%edi), %%al\n"
645 "        movb    %%al, %%ah\n"
646 
647 "        sarl    %%ecx\n"
648 "        jnc     .L_set_two\n"
649 "        movb    %%al, (%%edi)\n"
650 "        incl    %%edi\n"
651 
652 ".L_set_two:\n"
653 "        rep     stosw\n"
654 "        movl    32(%%esp), %%ebp\n"      /* ebp = lcode */
655 "        jmp     .L_while_test\n"
656 
657 ".align 32,0x90\n"
658 ".L_test_for_second_level_length:\n"
659 "        testb   $64, %%al\n"
660 "        jnz     .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
661 
662 "        xorl    %%eax, %%eax\n"
663 "        incl    %%eax\n"
664 "        shll    %%cl, %%eax\n"
665 "        decl    %%eax\n"
666 "        andl    %%edx, %%eax\n"         /* eax &= hold */
667 "        addl    64(%%esp), %%eax\n"     /* eax += len */
668 "        movl    (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
669 "        jmp     .L_dolen\n"
670 
671 ".align 32,0x90\n"
672 ".L_test_for_second_level_dist:\n"
673 "        testb   $64, %%al\n"
674 "        jnz     .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
675 
676 "        xorl    %%eax, %%eax\n"
677 "        incl    %%eax\n"
678 "        shll    %%cl, %%eax\n"
679 "        decl    %%eax\n"
680 "        andl    %%edx, %%eax\n"         /* eax &= hold */
681 "        addl    %%ebp, %%eax\n"         /* eax += dist */
682 "        movl    36(%%esp), %%ecx\n"     /* ecx = dcode */
683 "        movl    (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
684 "        jmp     .L_dodist\n"
685 
686 ".align 32,0x90\n"
687 ".L_clip_window:\n"
688 "        movl    %%eax, %%ecx\n"
689 "        movl    48(%%esp), %%eax\n"     /* eax = wsize */
690 "        negl    %%ecx\n"                /* nbytes = -nbytes */
691 "        movl    28(%%esp), %%esi\n"     /* from = window */
692 
693 "        cmpl    %%ebp, %%eax\n"
694 "        jb      .L_invalid_distance_too_far\n" /* if (dist > wsize) */
695 
696 "        addl    %%ebp, %%ecx\n"         /* nbytes = dist - nbytes */
697 "        cmpl    $0, 52(%%esp)\n"
698 "        jne     .L_wrap_around_window\n" /* if (write != 0) */
699 
700 "        subl    %%ecx, %%eax\n"
701 "        addl    %%eax, %%esi\n"         /* from += wsize - nbytes */
702 
703 "        movl    64(%%esp), %%eax\n"     /* eax = len */
704 "        cmpl    %%ecx, %%eax\n"
705 "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
706 
707 "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
708 "        rep     movsb\n"
709 "        movl    %%edi, %%esi\n"
710 "        subl    %%ebp, %%esi\n"         /* from = out - dist */
711 "        jmp     .L_do_copy\n"
712 
713 ".align 32,0x90\n"
714 ".L_wrap_around_window:\n"
715 "        movl    52(%%esp), %%eax\n"     /* eax = write */
716 "        cmpl    %%eax, %%ecx\n"
717 "        jbe     .L_contiguous_in_window\n" /* if (write >= nbytes) */
718 
719 "        addl    48(%%esp), %%esi\n"     /* from += wsize */
720 "        addl    %%eax, %%esi\n"         /* from += write */
721 "        subl    %%ecx, %%esi\n"         /* from -= nbytes */
722 "        subl    %%eax, %%ecx\n"         /* nbytes -= write */
723 
724 "        movl    64(%%esp), %%eax\n"     /* eax = len */
725 "        cmpl    %%ecx, %%eax\n"
726 "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
727 
728 "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
729 "        rep     movsb\n"
730 "        movl    28(%%esp), %%esi\n"     /* from = window */
731 "        movl    52(%%esp), %%ecx\n"     /* nbytes = write */
732 "        cmpl    %%ecx, %%eax\n"
733 "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
734 
735 "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
736 "        rep     movsb\n"
737 "        movl    %%edi, %%esi\n"
738 "        subl    %%ebp, %%esi\n"         /* from = out - dist */
739 "        jmp     .L_do_copy\n"
740 
741 ".align 32,0x90\n"
742 ".L_contiguous_in_window:\n"
743 "        addl    %%eax, %%esi\n"
744 "        subl    %%ecx, %%esi\n"         /* from += write - nbytes */
745 
746 "        movl    64(%%esp), %%eax\n"     /* eax = len */
747 "        cmpl    %%ecx, %%eax\n"
748 "        jbe     .L_do_copy\n"           /* if (nbytes >= len) */
749 
750 "        subl    %%ecx, %%eax\n"         /* len -= nbytes */
751 "        rep     movsb\n"
752 "        movl    %%edi, %%esi\n"
753 "        subl    %%ebp, %%esi\n"         /* from = out - dist */
754 "        jmp     .L_do_copy\n"           /* if (nbytes >= len) */
755 
756 ".align 32,0x90\n"
757 ".L_do_copy:\n"
758 "        movl    %%eax, %%ecx\n"
759 "        rep     movsb\n"
760 
761 "        movl    8(%%esp), %%esi\n"      /* move in back to %esi, toss from */
762 "        movl    32(%%esp), %%ebp\n"     /* ebp = lcode */
763 "        jmp     .L_while_test\n"
764 
765 ".L_test_for_end_of_block:\n"
766 "        testb   $32, %%al\n"
767 "        jz      .L_invalid_literal_length_code\n"
768 "        movl    $1, 72(%%esp)\n"
769 "        jmp     .L_break_loop_with_status\n"
770 
771 ".L_invalid_literal_length_code:\n"
772 "        movl    $2, 72(%%esp)\n"
773 "        jmp     .L_break_loop_with_status\n"
774 
775 ".L_invalid_distance_code:\n"
776 "        movl    $3, 72(%%esp)\n"
777 "        jmp     .L_break_loop_with_status\n"
778 
779 ".L_invalid_distance_too_far:\n"
780 "        movl    8(%%esp), %%esi\n"
781 "        movl    $4, 72(%%esp)\n"
782 "        jmp     .L_break_loop_with_status\n"
783 
784 ".L_break_loop:\n"
785 "        movl    $0, 72(%%esp)\n"
786 
787 ".L_break_loop_with_status:\n"
788 /* put in, out, bits, and hold back into ar and pop esp */
789 "        movl    %%esi, 8(%%esp)\n"      /* save in */
790 "        movl    %%edi, 16(%%esp)\n"     /* save out */
791 "        movl    %%ebx, 44(%%esp)\n"     /* save bits */
792 "        movl    %%edx, 40(%%esp)\n"     /* save hold */
793 "        movl    4(%%esp), %%ebp\n"      /* restore esp, ebp */
794 "        movl    (%%esp), %%esp\n"
795           :
796           : "m" (ar)
797           : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
798     );
799 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
800     __asm {
801 	lea	eax, ar
802 	mov	[eax], esp         /* save esp, ebp */
803 	mov	[eax+4], ebp
804 	mov	esp, eax
805 	mov	esi, [esp+8]       /* esi = in */
806 	mov	edi, [esp+16]      /* edi = out */
807 	mov	edx, [esp+40]      /* edx = hold */
808 	mov	ebx, [esp+44]      /* ebx = bits */
809 	mov	ebp, [esp+32]      /* ebp = lcode */
810 
811 	cld
812 	jmp	L_do_loop
813 
814 ALIGN 4
815 L_while_test:
816 	cmp	[esp+24], edi
817 	jbe	L_break_loop
818 	cmp	[esp+12], esi
819 	jbe	L_break_loop
820 
821 L_do_loop:
822 	cmp	bl, 15
823 	ja	L_get_length_code    /* if (15 < bits) */
824 
825 	xor	eax, eax
826 	lodsw                         /* al = *(ushort *)in++ */
827 	mov	cl, bl            /* cl = bits, needs it for shifting */
828 	add	bl, 16             /* bits += 16 */
829 	shl	eax, cl
830 	or	edx, eax        /* hold |= *((ushort *)in)++ << bits */
831 
832 L_get_length_code:
833 	mov	eax, [esp+56]      /* eax = lmask */
834 	and	eax, edx          /* eax &= hold */
835 	mov	eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
836 
837 L_dolen:
838 	mov	cl, ah            /* cl = this.bits */
839 	sub	bl, ah            /* bits -= this.bits */
840 	shr	edx, cl           /* hold >>= this.bits */
841 
842 	test	al, al
843 	jnz	L_test_for_length_base /* if (op != 0) 45.7% */
844 
845 	shr	eax, 16            /* output this.val char */
846 	stosb
847 	jmp	L_while_test
848 
849 ALIGN 4
850 L_test_for_length_base:
851 	mov	ecx, eax          /* len = this */
852 	shr	ecx, 16            /* len = this.val */
853 	mov	[esp+64], ecx      /* save len */
854 	mov	cl, al
855 
856 	test	al, 16
857 	jz	L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
858 	and	cl, 15             /* op &= 15 */
859 	jz	L_decode_distance    /* if (!op) */
860 	cmp	bl, cl
861 	jae	L_add_bits_to_len    /* if (op <= bits) */
862 
863 	mov	ch, cl            /* stash op in ch, freeing cl */
864 	xor	eax, eax
865 	lodsw                         /* al = *(ushort *)in++ */
866 	mov	cl, bl            /* cl = bits, needs it for shifting */
867 	add	bl, 16             /* bits += 16 */
868 	shl	eax, cl
869 	or	edx, eax         /* hold |= *((ushort *)in)++ << bits */
870 	mov	cl, ch            /* move op back to ecx */
871 
872 L_add_bits_to_len:
873 	sub	bl, cl
874 	xor	eax, eax
875 	inc	eax
876 	shl	eax, cl
877 	dec	eax
878 	and	eax, edx          /* eax &= hold */
879 	shr	edx, cl
880 	add	[esp+64], eax      /* len += hold & mask[op] */
881 
882 L_decode_distance:
883 	cmp	bl, 15
884 	ja	L_get_distance_code  /* if (15 < bits) */
885 
886 	xor	eax, eax
887 	lodsw                         /* al = *(ushort *)in++ */
888 	mov	cl, bl            /* cl = bits, needs it for shifting */
889 	add	bl, 16             /* bits += 16 */
890 	shl	eax, cl
891 	or	edx, eax         /* hold |= *((ushort *)in)++ << bits */
892 
893 L_get_distance_code:
894 	mov	eax, [esp+60]      /* eax = dmask */
895 	mov	ecx, [esp+36]      /* ecx = dcode */
896 	and	eax, edx          /* eax &= hold */
897 	mov	eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
898 
899 L_dodist:
900 	mov	ebp, eax          /* dist = this */
901 	shr	ebp, 16            /* dist = this.val */
902 	mov	cl, ah
903 	sub	bl, ah            /* bits -= this.bits */
904 	shr	edx, cl           /* hold >>= this.bits */
905 	mov	cl, al            /* cl = this.op */
906 
907 	test	al, 16             /* if ((op & 16) == 0) */
908 	jz	L_test_for_second_level_dist
909 	and	cl, 15             /* op &= 15 */
910 	jz	L_check_dist_one
911 	cmp	bl, cl
912 	jae	L_add_bits_to_dist   /* if (op <= bits) 97.6% */
913 
914 	mov	ch, cl            /* stash op in ch, freeing cl */
915 	xor	eax, eax
916 	lodsw                         /* al = *(ushort *)in++ */
917 	mov	cl, bl            /* cl = bits, needs it for shifting */
918 	add	bl, 16             /* bits += 16 */
919 	shl	eax, cl
920 	or	edx, eax        /* hold |= *((ushort *)in)++ << bits */
921 	mov	cl, ch            /* move op back to ecx */
922 
923 L_add_bits_to_dist:
924 	sub	bl, cl
925 	xor	eax, eax
926 	inc	eax
927 	shl	eax, cl
928 	dec	eax                 /* (1 << op) - 1 */
929 	and	eax, edx          /* eax &= hold */
930 	shr	edx, cl
931 	add	ebp, eax          /* dist += hold & ((1 << op) - 1) */
932 
933 L_check_window:
934 	mov	[esp+8], esi       /* save in so from can use it's reg */
935 	mov	eax, edi
936 	sub	eax, [esp+20]      /* nbytes = out - beg */
937 
938 	cmp	eax, ebp
939 	jb	L_clip_window        /* if (dist > nbytes) 4.2% */
940 
941 	mov	ecx, [esp+64]      /* ecx = len */
942 	mov	esi, edi
943 	sub	esi, ebp          /* from = out - dist */
944 
945 	sar	ecx, 1
946 	jnc	L_copy_two
947 
948 	rep     movsw
949 	mov	al, [esi]
950 	mov	[edi], al
951 	inc	edi
952 
953 	mov	esi, [esp+8]      /* move in back to %esi, toss from */
954 	mov	ebp, [esp+32]     /* ebp = lcode */
955 	jmp	L_while_test
956 
957 L_copy_two:
958 	rep     movsw
959 	mov	esi, [esp+8]      /* move in back to %esi, toss from */
960 	mov	ebp, [esp+32]     /* ebp = lcode */
961 	jmp	L_while_test
962 
963 ALIGN 4
964 L_check_dist_one:
965 	cmp	ebp, 1            /* if dist 1, is a memset */
966 	jne	L_check_window
967 	cmp	[esp+20], edi
968 	je	L_check_window    /* out == beg, if outside window */
969 
970 	mov	ecx, [esp+64]     /* ecx = len */
971 	mov	al, [edi-1]
972 	mov	ah, al
973 
974 	sar	ecx, 1
975 	jnc	L_set_two
976 	mov	[edi], al         /* memset out with from[-1] */
977 	inc	edi
978 
979 L_set_two:
980 	rep     stosw
981 	mov	ebp, [esp+32]     /* ebp = lcode */
982 	jmp	L_while_test
983 
984 ALIGN 4
985 L_test_for_second_level_length:
986 	test	al, 64
987 	jnz	L_test_for_end_of_block /* if ((op & 64) != 0) */
988 
989 	xor	eax, eax
990 	inc	eax
991 	shl	eax, cl
992 	dec	eax
993 	and	eax, edx         /* eax &= hold */
994 	add	eax, [esp+64]     /* eax += len */
995 	mov	eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
996 	jmp	L_dolen
997 
998 ALIGN 4
999 L_test_for_second_level_dist:
1000 	test	al, 64
1001 	jnz	L_invalid_distance_code /* if ((op & 64) != 0) */
1002 
1003 	xor	eax, eax
1004 	inc	eax
1005 	shl	eax, cl
1006 	dec	eax
1007 	and	eax, edx         /* eax &= hold */
1008 	add	eax, ebp         /* eax += dist */
1009 	mov	ecx, [esp+36]     /* ecx = dcode */
1010 	mov	eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
1011 	jmp	L_dodist
1012 
1013 ALIGN 4
1014 L_clip_window:
1015 	mov	ecx, eax
1016 	mov	eax, [esp+48]     /* eax = wsize */
1017 	neg	ecx                /* nbytes = -nbytes */
1018 	mov	esi, [esp+28]     /* from = window */
1019 
1020 	cmp	eax, ebp
1021 	jb	L_invalid_distance_too_far /* if (dist > wsize) */
1022 
1023 	add	ecx, ebp         /* nbytes = dist - nbytes */
1024 	cmp	dword ptr [esp+52], 0
1025 	jne	L_wrap_around_window /* if (write != 0) */
1026 
1027 	sub	eax, ecx
1028 	add	esi, eax         /* from += wsize - nbytes */
1029 
1030 	mov	eax, [esp+64]    /* eax = len */
1031 	cmp	eax, ecx
1032 	jbe	L_do_copy          /* if (nbytes >= len) */
1033 
1034 	sub	eax, ecx         /* len -= nbytes */
1035 	rep     movsb
1036 	mov	esi, edi
1037 	sub	esi, ebp         /* from = out - dist */
1038 	jmp	L_do_copy
1039 
1040 ALIGN 4
1041 L_wrap_around_window:
1042 	mov	eax, [esp+52]    /* eax = write */
1043 	cmp	ecx, eax
1044 	jbe	L_contiguous_in_window /* if (write >= nbytes) */
1045 
1046 	add	esi, [esp+48]    /* from += wsize */
1047 	add	esi, eax         /* from += write */
1048 	sub	esi, ecx         /* from -= nbytes */
1049 	sub	ecx, eax         /* nbytes -= write */
1050 
1051 	mov	eax, [esp+64]    /* eax = len */
1052 	cmp	eax, ecx
1053 	jbe	L_do_copy          /* if (nbytes >= len) */
1054 
1055 	sub	eax, ecx         /* len -= nbytes */
1056 	rep     movsb
1057 	mov	esi, [esp+28]     /* from = window */
1058 	mov	ecx, [esp+52]     /* nbytes = write */
1059 	cmp	eax, ecx
1060 	jbe	L_do_copy          /* if (nbytes >= len) */
1061 
1062 	sub	eax, ecx         /* len -= nbytes */
1063 	rep     movsb
1064 	mov	esi, edi
1065 	sub	esi, ebp         /* from = out - dist */
1066 	jmp	L_do_copy
1067 
1068 ALIGN 4
1069 L_contiguous_in_window:
1070 	add	esi, eax
1071 	sub	esi, ecx         /* from += write - nbytes */
1072 
1073 	mov	eax, [esp+64]    /* eax = len */
1074 	cmp	eax, ecx
1075 	jbe	L_do_copy          /* if (nbytes >= len) */
1076 
1077 	sub	eax, ecx         /* len -= nbytes */
1078 	rep     movsb
1079 	mov	esi, edi
1080 	sub	esi, ebp         /* from = out - dist */
1081 	jmp	L_do_copy
1082 
1083 ALIGN 4
1084 L_do_copy:
1085 	mov	ecx, eax
1086 	rep     movsb
1087 
1088 	mov	esi, [esp+8]      /* move in back to %esi, toss from */
1089 	mov	ebp, [esp+32]     /* ebp = lcode */
1090 	jmp	L_while_test
1091 
1092 L_test_for_end_of_block:
1093 	test	al, 32
1094 	jz	L_invalid_literal_length_code
1095 	mov	dword ptr [esp+72], 1
1096 	jmp	L_break_loop_with_status
1097 
1098 L_invalid_literal_length_code:
1099 	mov	dword ptr [esp+72], 2
1100 	jmp	L_break_loop_with_status
1101 
1102 L_invalid_distance_code:
1103 	mov	dword ptr [esp+72], 3
1104 	jmp	L_break_loop_with_status
1105 
1106 L_invalid_distance_too_far:
1107 	mov	esi, [esp+4]
1108 	mov	dword ptr [esp+72], 4
1109 	jmp	L_break_loop_with_status
1110 
1111 L_break_loop:
1112 	mov	dword ptr [esp+72], 0
1113 
1114 L_break_loop_with_status:
1115 /* put in, out, bits, and hold back into ar and pop esp */
1116 	mov	[esp+8], esi     /* save in */
1117 	mov	[esp+16], edi    /* save out */
1118 	mov	[esp+44], ebx    /* save bits */
1119 	mov	[esp+40], edx    /* save hold */
1120 	mov	ebp, [esp+4]     /* restore esp, ebp */
1121 	mov	esp, [esp]
1122     }
1123 #else
1124 #error "x86 architecture not defined"
1125 #endif
1126 
1127     if (ar.status > 1) {
1128         if (ar.status == 2)
1129             strm->msg = "invalid literal/length code";
1130         else if (ar.status == 3)
1131             strm->msg = "invalid distance code";
1132         else
1133             strm->msg = "invalid distance too far back";
1134         state->mode = BAD;
1135     }
1136     else if ( ar.status == 1 ) {
1137         state->mode = TYPE;
1138     }
1139 
1140     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
1141     ar.len = ar.bits >> 3;
1142     ar.in -= ar.len;
1143     ar.bits -= ar.len << 3;
1144     ar.hold &= (1U << ar.bits) - 1;
1145 
1146     /* update state and return */
1147     strm->next_in = ar.in;
1148     strm->next_out = ar.out;
1149     strm->avail_in = (unsigned)(ar.in < ar.last ?
1150                                 PAD_AVAIL_IN + (ar.last - ar.in) :
1151                                 PAD_AVAIL_IN - (ar.in - ar.last));
1152     strm->avail_out = (unsigned)(ar.out < ar.end ?
1153                                  PAD_AVAIL_OUT + (ar.end - ar.out) :
1154                                  PAD_AVAIL_OUT - (ar.out - ar.end));
1155     state->hold = ar.hold;
1156     state->bits = ar.bits;
1157     return;
1158 }
1159 
1160