xref: /minix3/common/dist/zlib/contrib/inflate86/inffast.S (revision 44bedb31d842b4b0444105519bcf929a69fe2dc1)
1*44bedb31SLionel Sambuc/*
2*44bedb31SLionel Sambuc * inffast.S is a hand tuned assembler version of:
3*44bedb31SLionel Sambuc *
4*44bedb31SLionel Sambuc * inffast.c -- fast decoding
5*44bedb31SLionel Sambuc * Copyright (C) 1995-2003 Mark Adler
6*44bedb31SLionel Sambuc * For conditions of distribution and use, see copyright notice in zlib.h
7*44bedb31SLionel Sambuc *
8*44bedb31SLionel Sambuc * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9*44bedb31SLionel Sambuc * Please use the copyright conditions above.
10*44bedb31SLionel Sambuc *
11*44bedb31SLionel Sambuc * This version (Jan-23-2003) of inflate_fast was coded and tested under
12*44bedb31SLionel Sambuc * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution.  On that
13*44bedb31SLionel Sambuc * machine, I found that gzip style archives decompressed about 20% faster than
14*44bedb31SLionel Sambuc * the gcc-3.2 -O3 -fomit-frame-pointer compiled version.  Your results will
15*44bedb31SLionel Sambuc * depend on how large of a buffer is used for z_stream.next_in & next_out
16*44bedb31SLionel Sambuc * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
17*44bedb31SLionel Sambuc * stream processing I/O and crc32/addler32.  In my case, this routine used
18*44bedb31SLionel Sambuc * 70% of the cpu time and crc32 used 20%.
19*44bedb31SLionel Sambuc *
20*44bedb31SLionel Sambuc * I am confident that this version will work in the general case, but I have
21*44bedb31SLionel Sambuc * not tested a wide variety of datasets or a wide variety of platforms.
22*44bedb31SLionel Sambuc *
23*44bedb31SLionel Sambuc * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
24*44bedb31SLionel Sambuc * It should be a runtime flag instead of compile time flag...
25*44bedb31SLionel Sambuc *
26*44bedb31SLionel Sambuc * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
27*44bedb31SLionel Sambuc * With -DUSE_MMX, only MMX code is compiled.  With -DNO_MMX, only non-MMX code
28*44bedb31SLionel Sambuc * is compiled.  Without either option, runtime detection is enabled.  Runtime
29*44bedb31SLionel Sambuc * detection should work on all modern cpus and the recomended algorithm (flip
30*44bedb31SLionel Sambuc * ID bit on eflags and then use the cpuid instruction) is used in many
31*44bedb31SLionel Sambuc * multimedia applications.  Tested under win2k with gcc-2.95 and gas-2.12
32*44bedb31SLionel Sambuc * distributed with cygwin3.  Compiling with gcc-2.95 -c inffast.S -o
33*44bedb31SLionel Sambuc * inffast.obj generates a COFF object which can then be linked with MSVC++
34*44bedb31SLionel Sambuc * compiled code.  Tested under FreeBSD 4.7 with gcc-2.95.
35*44bedb31SLionel Sambuc *
36*44bedb31SLionel Sambuc * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
37*44bedb31SLionel Sambuc * slower than compiler generated code).  Adjusted cpuid check to use the MMX
38*44bedb31SLionel Sambuc * code only for Pentiums < P4 until I have more data on the P4.  Speed
39*44bedb31SLionel Sambuc * improvment is only about 15% on the Athlon when compared with code generated
40*44bedb31SLionel Sambuc * with MSVC++.  Not sure yet, but I think the P4 will also be slower using the
41*44bedb31SLionel Sambuc * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
42*44bedb31SLionel Sambuc * have less latency than MMX ops.  Added code to buffer the last 11 bytes of
43*44bedb31SLionel Sambuc * the input stream since the MMX code grabs bits in chunks of 32, which
44*44bedb31SLionel Sambuc * differs from the inffast.c algorithm.  I don't think there would have been
45*44bedb31SLionel Sambuc * read overruns where a page boundary was crossed (a segfault), but there
46*44bedb31SLionel Sambuc * could have been overruns when next_in ends on unaligned memory (unintialized
47*44bedb31SLionel Sambuc * memory read).
48*44bedb31SLionel Sambuc *
49*44bedb31SLionel Sambuc * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX.  I created a C
50*44bedb31SLionel Sambuc * version of the non-MMX code so that it doesn't depend on zstrm and zstate
51*44bedb31SLionel Sambuc * structure offsets which are hard coded in this file.  This was last tested
52*44bedb31SLionel Sambuc * with zlib-1.2.0 which is currently in beta testing, newer versions of this
53*44bedb31SLionel Sambuc * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
54*44bedb31SLionel Sambuc * http://www.charm.net/~christop/zlib/
55*44bedb31SLionel Sambuc */
56*44bedb31SLionel Sambuc
57*44bedb31SLionel Sambuc
58*44bedb31SLionel Sambuc/*
59*44bedb31SLionel Sambuc * if you have underscore linking problems (_inflate_fast undefined), try
60*44bedb31SLionel Sambuc * using -DGAS_COFF
61*44bedb31SLionel Sambuc */
62*44bedb31SLionel Sambuc#if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
63*44bedb31SLionel Sambuc
64*44bedb31SLionel Sambuc#if defined( WIN32 ) || defined( __CYGWIN__ )
65*44bedb31SLionel Sambuc#define GAS_COFF /* windows object format */
66*44bedb31SLionel Sambuc#else
67*44bedb31SLionel Sambuc#define GAS_ELF
68*44bedb31SLionel Sambuc#endif
69*44bedb31SLionel Sambuc
70*44bedb31SLionel Sambuc#endif /* ! GAS_COFF && ! GAS_ELF */
71*44bedb31SLionel Sambuc
72*44bedb31SLionel Sambuc
73*44bedb31SLionel Sambuc#if defined( GAS_COFF )
74*44bedb31SLionel Sambuc
75*44bedb31SLionel Sambuc/* coff externals have underscores */
76*44bedb31SLionel Sambuc#define inflate_fast _inflate_fast
77*44bedb31SLionel Sambuc#define inflate_fast_use_mmx _inflate_fast_use_mmx
78*44bedb31SLionel Sambuc
79*44bedb31SLionel Sambuc#endif /* GAS_COFF */
80*44bedb31SLionel Sambuc
81*44bedb31SLionel Sambuc
82*44bedb31SLionel Sambuc.file "inffast.S"
83*44bedb31SLionel Sambuc
84*44bedb31SLionel Sambuc.globl inflate_fast
85*44bedb31SLionel Sambuc
86*44bedb31SLionel Sambuc.text
87*44bedb31SLionel Sambuc.align 4,0
88*44bedb31SLionel Sambuc.L_invalid_literal_length_code_msg:
89*44bedb31SLionel Sambuc.string "invalid literal/length code"
90*44bedb31SLionel Sambuc
91*44bedb31SLionel Sambuc.align 4,0
92*44bedb31SLionel Sambuc.L_invalid_distance_code_msg:
93*44bedb31SLionel Sambuc.string "invalid distance code"
94*44bedb31SLionel Sambuc
95*44bedb31SLionel Sambuc.align 4,0
96*44bedb31SLionel Sambuc.L_invalid_distance_too_far_msg:
97*44bedb31SLionel Sambuc.string "invalid distance too far back"
98*44bedb31SLionel Sambuc
99*44bedb31SLionel Sambuc#if ! defined( NO_MMX )
100*44bedb31SLionel Sambuc.align 4,0
101*44bedb31SLionel Sambuc.L_mask: /* mask[N] = ( 1 << N ) - 1 */
102*44bedb31SLionel Sambuc.long 0
103*44bedb31SLionel Sambuc.long 1
104*44bedb31SLionel Sambuc.long 3
105*44bedb31SLionel Sambuc.long 7
106*44bedb31SLionel Sambuc.long 15
107*44bedb31SLionel Sambuc.long 31
108*44bedb31SLionel Sambuc.long 63
109*44bedb31SLionel Sambuc.long 127
110*44bedb31SLionel Sambuc.long 255
111*44bedb31SLionel Sambuc.long 511
112*44bedb31SLionel Sambuc.long 1023
113*44bedb31SLionel Sambuc.long 2047
114*44bedb31SLionel Sambuc.long 4095
115*44bedb31SLionel Sambuc.long 8191
116*44bedb31SLionel Sambuc.long 16383
117*44bedb31SLionel Sambuc.long 32767
118*44bedb31SLionel Sambuc.long 65535
119*44bedb31SLionel Sambuc.long 131071
120*44bedb31SLionel Sambuc.long 262143
121*44bedb31SLionel Sambuc.long 524287
122*44bedb31SLionel Sambuc.long 1048575
123*44bedb31SLionel Sambuc.long 2097151
124*44bedb31SLionel Sambuc.long 4194303
125*44bedb31SLionel Sambuc.long 8388607
126*44bedb31SLionel Sambuc.long 16777215
127*44bedb31SLionel Sambuc.long 33554431
128*44bedb31SLionel Sambuc.long 67108863
129*44bedb31SLionel Sambuc.long 134217727
130*44bedb31SLionel Sambuc.long 268435455
131*44bedb31SLionel Sambuc.long 536870911
132*44bedb31SLionel Sambuc.long 1073741823
133*44bedb31SLionel Sambuc.long 2147483647
134*44bedb31SLionel Sambuc.long 4294967295
135*44bedb31SLionel Sambuc#endif /* NO_MMX */
136*44bedb31SLionel Sambuc
137*44bedb31SLionel Sambuc.text
138*44bedb31SLionel Sambuc
139*44bedb31SLionel Sambuc/*
140*44bedb31SLionel Sambuc * struct z_stream offsets, in zlib.h
141*44bedb31SLionel Sambuc */
142*44bedb31SLionel Sambuc#define next_in_strm   0   /* strm->next_in */
143*44bedb31SLionel Sambuc#define avail_in_strm  4   /* strm->avail_in */
144*44bedb31SLionel Sambuc#define next_out_strm  12  /* strm->next_out */
145*44bedb31SLionel Sambuc#define avail_out_strm 16  /* strm->avail_out */
146*44bedb31SLionel Sambuc#define msg_strm       24  /* strm->msg */
147*44bedb31SLionel Sambuc#define state_strm     28  /* strm->state */
148*44bedb31SLionel Sambuc
149*44bedb31SLionel Sambuc/*
150*44bedb31SLionel Sambuc * struct inflate_state offsets, in inflate.h
151*44bedb31SLionel Sambuc */
152*44bedb31SLionel Sambuc#define mode_state     0   /* state->mode */
153*44bedb31SLionel Sambuc#define wsize_state    32  /* state->wsize */
154*44bedb31SLionel Sambuc#define write_state    40  /* state->write */
155*44bedb31SLionel Sambuc#define window_state   44  /* state->window */
156*44bedb31SLionel Sambuc#define hold_state     48  /* state->hold */
157*44bedb31SLionel Sambuc#define bits_state     52  /* state->bits */
158*44bedb31SLionel Sambuc#define lencode_state  68  /* state->lencode */
159*44bedb31SLionel Sambuc#define distcode_state 72  /* state->distcode */
160*44bedb31SLionel Sambuc#define lenbits_state  76  /* state->lenbits */
161*44bedb31SLionel Sambuc#define distbits_state 80  /* state->distbits */
162*44bedb31SLionel Sambuc
163*44bedb31SLionel Sambuc/*
164*44bedb31SLionel Sambuc * inflate_fast's activation record
165*44bedb31SLionel Sambuc */
166*44bedb31SLionel Sambuc#define local_var_size 64 /* how much local space for vars */
167*44bedb31SLionel Sambuc#define strm_sp        88 /* first arg: z_stream * (local_var_size + 24) */
168*44bedb31SLionel Sambuc#define start_sp       92 /* second arg: unsigned int (local_var_size + 28) */
169*44bedb31SLionel Sambuc
170*44bedb31SLionel Sambuc/*
171*44bedb31SLionel Sambuc * offsets for local vars on stack
172*44bedb31SLionel Sambuc */
173*44bedb31SLionel Sambuc#define out            60  /* unsigned char* */
174*44bedb31SLionel Sambuc#define window         56  /* unsigned char* */
175*44bedb31SLionel Sambuc#define wsize          52  /* unsigned int */
176*44bedb31SLionel Sambuc#define write          48  /* unsigned int */
177*44bedb31SLionel Sambuc#define in             44  /* unsigned char* */
178*44bedb31SLionel Sambuc#define beg            40  /* unsigned char* */
179*44bedb31SLionel Sambuc#define buf            28  /* char[ 12 ] */
180*44bedb31SLionel Sambuc#define len            24  /* unsigned int */
181*44bedb31SLionel Sambuc#define last           20  /* unsigned char* */
182*44bedb31SLionel Sambuc#define end            16  /* unsigned char* */
183*44bedb31SLionel Sambuc#define dcode          12  /* code* */
184*44bedb31SLionel Sambuc#define lcode           8  /* code* */
185*44bedb31SLionel Sambuc#define dmask           4  /* unsigned int */
186*44bedb31SLionel Sambuc#define lmask           0  /* unsigned int */
187*44bedb31SLionel Sambuc
188*44bedb31SLionel Sambuc/*
189*44bedb31SLionel Sambuc * typedef enum inflate_mode consts, in inflate.h
190*44bedb31SLionel Sambuc */
191*44bedb31SLionel Sambuc#define INFLATE_MODE_TYPE 11  /* state->mode flags enum-ed in inflate.h */
192*44bedb31SLionel Sambuc#define INFLATE_MODE_BAD  26
193*44bedb31SLionel Sambuc
194*44bedb31SLionel Sambuc
195*44bedb31SLionel Sambuc#if ! defined( USE_MMX ) && ! defined( NO_MMX )
196*44bedb31SLionel Sambuc
197*44bedb31SLionel Sambuc#define RUN_TIME_MMX
198*44bedb31SLionel Sambuc
199*44bedb31SLionel Sambuc#define CHECK_MMX    1
200*44bedb31SLionel Sambuc#define DO_USE_MMX   2
201*44bedb31SLionel Sambuc#define DONT_USE_MMX 3
202*44bedb31SLionel Sambuc
203*44bedb31SLionel Sambuc.globl inflate_fast_use_mmx
204*44bedb31SLionel Sambuc
205*44bedb31SLionel Sambuc.data
206*44bedb31SLionel Sambuc
207*44bedb31SLionel Sambuc.align 4,0
208*44bedb31SLionel Sambucinflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
209*44bedb31SLionel Sambuc.long CHECK_MMX
210*44bedb31SLionel Sambuc
211*44bedb31SLionel Sambuc#if defined( GAS_ELF )
212*44bedb31SLionel Sambuc/* elf info */
213*44bedb31SLionel Sambuc.type   inflate_fast_use_mmx,@object
214*44bedb31SLionel Sambuc.size   inflate_fast_use_mmx,4
215*44bedb31SLionel Sambuc#endif
216*44bedb31SLionel Sambuc
217*44bedb31SLionel Sambuc#endif /* RUN_TIME_MMX */
218*44bedb31SLionel Sambuc
219*44bedb31SLionel Sambuc#if defined( GAS_COFF )
220*44bedb31SLionel Sambuc/* coff info: scl 2 = extern, type 32 = function */
221*44bedb31SLionel Sambuc.def inflate_fast; .scl 2; .type 32; .endef
222*44bedb31SLionel Sambuc#endif
223*44bedb31SLionel Sambuc
224*44bedb31SLionel Sambuc.text
225*44bedb31SLionel Sambuc
226*44bedb31SLionel Sambuc.align 32,0x90
227*44bedb31SLionel Sambucinflate_fast:
228*44bedb31SLionel Sambuc        pushl   %edi
229*44bedb31SLionel Sambuc        pushl   %esi
230*44bedb31SLionel Sambuc        pushl   %ebp
231*44bedb31SLionel Sambuc        pushl   %ebx
232*44bedb31SLionel Sambuc        pushf   /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
233*44bedb31SLionel Sambuc        subl    $local_var_size, %esp
234*44bedb31SLionel Sambuc        cld
235*44bedb31SLionel Sambuc
236*44bedb31SLionel Sambuc#define strm_r  %esi
237*44bedb31SLionel Sambuc#define state_r %edi
238*44bedb31SLionel Sambuc
239*44bedb31SLionel Sambuc        movl    strm_sp(%esp), strm_r
240*44bedb31SLionel Sambuc        movl    state_strm(strm_r), state_r
241*44bedb31SLionel Sambuc
242*44bedb31SLionel Sambuc        /* in = strm->next_in;
243*44bedb31SLionel Sambuc         * out = strm->next_out;
244*44bedb31SLionel Sambuc         * last = in + strm->avail_in - 11;
245*44bedb31SLionel Sambuc         * beg = out - (start - strm->avail_out);
246*44bedb31SLionel Sambuc         * end = out + (strm->avail_out - 257);
247*44bedb31SLionel Sambuc         */
248*44bedb31SLionel Sambuc        movl    avail_in_strm(strm_r), %edx
249*44bedb31SLionel Sambuc        movl    next_in_strm(strm_r), %eax
250*44bedb31SLionel Sambuc
251*44bedb31SLionel Sambuc        addl    %eax, %edx      /* avail_in += next_in */
252*44bedb31SLionel Sambuc        subl    $11, %edx       /* avail_in -= 11 */
253*44bedb31SLionel Sambuc
254*44bedb31SLionel Sambuc        movl    %eax, in(%esp)
255*44bedb31SLionel Sambuc        movl    %edx, last(%esp)
256*44bedb31SLionel Sambuc
257*44bedb31SLionel Sambuc        movl    start_sp(%esp), %ebp
258*44bedb31SLionel Sambuc        movl    avail_out_strm(strm_r), %ecx
259*44bedb31SLionel Sambuc        movl    next_out_strm(strm_r), %ebx
260*44bedb31SLionel Sambuc
261*44bedb31SLionel Sambuc        subl    %ecx, %ebp      /* start -= avail_out */
262*44bedb31SLionel Sambuc        negl    %ebp            /* start = -start */
263*44bedb31SLionel Sambuc        addl    %ebx, %ebp      /* start += next_out */
264*44bedb31SLionel Sambuc
265*44bedb31SLionel Sambuc        subl    $257, %ecx      /* avail_out -= 257 */
266*44bedb31SLionel Sambuc        addl    %ebx, %ecx      /* avail_out += out */
267*44bedb31SLionel Sambuc
268*44bedb31SLionel Sambuc        movl    %ebx, out(%esp)
269*44bedb31SLionel Sambuc        movl    %ebp, beg(%esp)
270*44bedb31SLionel Sambuc        movl    %ecx, end(%esp)
271*44bedb31SLionel Sambuc
272*44bedb31SLionel Sambuc        /* wsize = state->wsize;
273*44bedb31SLionel Sambuc         * write = state->write;
274*44bedb31SLionel Sambuc         * window = state->window;
275*44bedb31SLionel Sambuc         * hold = state->hold;
276*44bedb31SLionel Sambuc         * bits = state->bits;
277*44bedb31SLionel Sambuc         * lcode = state->lencode;
278*44bedb31SLionel Sambuc         * dcode = state->distcode;
279*44bedb31SLionel Sambuc         * lmask = ( 1 << state->lenbits ) - 1;
280*44bedb31SLionel Sambuc         * dmask = ( 1 << state->distbits ) - 1;
281*44bedb31SLionel Sambuc         */
282*44bedb31SLionel Sambuc
283*44bedb31SLionel Sambuc        movl    lencode_state(state_r), %eax
284*44bedb31SLionel Sambuc        movl    distcode_state(state_r), %ecx
285*44bedb31SLionel Sambuc
286*44bedb31SLionel Sambuc        movl    %eax, lcode(%esp)
287*44bedb31SLionel Sambuc        movl    %ecx, dcode(%esp)
288*44bedb31SLionel Sambuc
289*44bedb31SLionel Sambuc        movl    $1, %eax
290*44bedb31SLionel Sambuc        movl    lenbits_state(state_r), %ecx
291*44bedb31SLionel Sambuc        shll    %cl, %eax
292*44bedb31SLionel Sambuc        decl    %eax
293*44bedb31SLionel Sambuc        movl    %eax, lmask(%esp)
294*44bedb31SLionel Sambuc
295*44bedb31SLionel Sambuc        movl    $1, %eax
296*44bedb31SLionel Sambuc        movl    distbits_state(state_r), %ecx
297*44bedb31SLionel Sambuc        shll    %cl, %eax
298*44bedb31SLionel Sambuc        decl    %eax
299*44bedb31SLionel Sambuc        movl    %eax, dmask(%esp)
300*44bedb31SLionel Sambuc
301*44bedb31SLionel Sambuc        movl    wsize_state(state_r), %eax
302*44bedb31SLionel Sambuc        movl    write_state(state_r), %ecx
303*44bedb31SLionel Sambuc        movl    window_state(state_r), %edx
304*44bedb31SLionel Sambuc
305*44bedb31SLionel Sambuc        movl    %eax, wsize(%esp)
306*44bedb31SLionel Sambuc        movl    %ecx, write(%esp)
307*44bedb31SLionel Sambuc        movl    %edx, window(%esp)
308*44bedb31SLionel Sambuc
309*44bedb31SLionel Sambuc        movl    hold_state(state_r), %ebp
310*44bedb31SLionel Sambuc        movl    bits_state(state_r), %ebx
311*44bedb31SLionel Sambuc
312*44bedb31SLionel Sambuc#undef strm_r
313*44bedb31SLionel Sambuc#undef state_r
314*44bedb31SLionel Sambuc
315*44bedb31SLionel Sambuc#define in_r       %esi
316*44bedb31SLionel Sambuc#define from_r     %esi
317*44bedb31SLionel Sambuc#define out_r      %edi
318*44bedb31SLionel Sambuc
319*44bedb31SLionel Sambuc        movl    in(%esp), in_r
320*44bedb31SLionel Sambuc        movl    last(%esp), %ecx
321*44bedb31SLionel Sambuc        cmpl    in_r, %ecx
322*44bedb31SLionel Sambuc        ja      .L_align_long           /* if in < last */
323*44bedb31SLionel Sambuc
324*44bedb31SLionel Sambuc        addl    $11, %ecx               /* ecx = &in[ avail_in ] */
325*44bedb31SLionel Sambuc        subl    in_r, %ecx              /* ecx = avail_in */
326*44bedb31SLionel Sambuc        movl    $12, %eax
327*44bedb31SLionel Sambuc        subl    %ecx, %eax              /* eax = 12 - avail_in */
328*44bedb31SLionel Sambuc        leal    buf(%esp), %edi
329*44bedb31SLionel Sambuc        rep     movsb                   /* memcpy( buf, in, avail_in ) */
330*44bedb31SLionel Sambuc        movl    %eax, %ecx
331*44bedb31SLionel Sambuc        xorl    %eax, %eax
332*44bedb31SLionel Sambuc        rep     stosb         /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
333*44bedb31SLionel Sambuc        leal    buf(%esp), in_r         /* in = buf */
334*44bedb31SLionel Sambuc        movl    in_r, last(%esp)        /* last = in, do just one iteration */
335*44bedb31SLionel Sambuc        jmp     .L_is_aligned
336*44bedb31SLionel Sambuc
337*44bedb31SLionel Sambuc        /* align in_r on long boundary */
338*44bedb31SLionel Sambuc.L_align_long:
339*44bedb31SLionel Sambuc        testl   $3, in_r
340*44bedb31SLionel Sambuc        jz      .L_is_aligned
341*44bedb31SLionel Sambuc        xorl    %eax, %eax
342*44bedb31SLionel Sambuc        movb    (in_r), %al
343*44bedb31SLionel Sambuc        incl    in_r
344*44bedb31SLionel Sambuc        movl    %ebx, %ecx
345*44bedb31SLionel Sambuc        addl    $8, %ebx
346*44bedb31SLionel Sambuc        shll    %cl, %eax
347*44bedb31SLionel Sambuc        orl     %eax, %ebp
348*44bedb31SLionel Sambuc        jmp     .L_align_long
349*44bedb31SLionel Sambuc
350*44bedb31SLionel Sambuc.L_is_aligned:
351*44bedb31SLionel Sambuc        movl    out(%esp), out_r
352*44bedb31SLionel Sambuc
353*44bedb31SLionel Sambuc#if defined( NO_MMX )
354*44bedb31SLionel Sambuc        jmp     .L_do_loop
355*44bedb31SLionel Sambuc#endif
356*44bedb31SLionel Sambuc
357*44bedb31SLionel Sambuc#if defined( USE_MMX )
358*44bedb31SLionel Sambuc        jmp     .L_init_mmx
359*44bedb31SLionel Sambuc#endif
360*44bedb31SLionel Sambuc
361*44bedb31SLionel Sambuc/*** Runtime MMX check ***/
362*44bedb31SLionel Sambuc
363*44bedb31SLionel Sambuc#if defined( RUN_TIME_MMX )
364*44bedb31SLionel Sambuc.L_check_mmx:
365*44bedb31SLionel Sambuc        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
366*44bedb31SLionel Sambuc        je      .L_init_mmx
367*44bedb31SLionel Sambuc        ja      .L_do_loop /* > 2 */
368*44bedb31SLionel Sambuc
369*44bedb31SLionel Sambuc        pushl   %eax
370*44bedb31SLionel Sambuc        pushl   %ebx
371*44bedb31SLionel Sambuc        pushl   %ecx
372*44bedb31SLionel Sambuc        pushl   %edx
373*44bedb31SLionel Sambuc        pushf
374*44bedb31SLionel Sambuc        movl    (%esp), %eax      /* copy eflags to eax */
375*44bedb31SLionel Sambuc        xorl    $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
376*44bedb31SLionel Sambuc                                   * to see if cpu supports cpuid...
377*44bedb31SLionel Sambuc                                   * ID bit method not supported by NexGen but
378*44bedb31SLionel Sambuc                                   * bios may load a cpuid instruction and
379*44bedb31SLionel Sambuc                                   * cpuid may be disabled on Cyrix 5-6x86 */
380*44bedb31SLionel Sambuc        popf
381*44bedb31SLionel Sambuc        pushf
382*44bedb31SLionel Sambuc        popl    %edx              /* copy new eflags to edx */
383*44bedb31SLionel Sambuc        xorl    %eax, %edx        /* test if ID bit is flipped */
384*44bedb31SLionel Sambuc        jz      .L_dont_use_mmx   /* not flipped if zero */
385*44bedb31SLionel Sambuc        xorl    %eax, %eax
386*44bedb31SLionel Sambuc        cpuid
387*44bedb31SLionel Sambuc        cmpl    $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
388*44bedb31SLionel Sambuc        jne     .L_dont_use_mmx
389*44bedb31SLionel Sambuc        cmpl    $0x6c65746e, %ecx
390*44bedb31SLionel Sambuc        jne     .L_dont_use_mmx
391*44bedb31SLionel Sambuc        cmpl    $0x49656e69, %edx
392*44bedb31SLionel Sambuc        jne     .L_dont_use_mmx
393*44bedb31SLionel Sambuc        movl    $1, %eax
394*44bedb31SLionel Sambuc        cpuid                     /* get cpu features */
395*44bedb31SLionel Sambuc        shrl    $8, %eax
396*44bedb31SLionel Sambuc        andl    $15, %eax
397*44bedb31SLionel Sambuc        cmpl    $6, %eax          /* check for Pentium family, is 0xf for P4 */
398*44bedb31SLionel Sambuc        jne     .L_dont_use_mmx
399*44bedb31SLionel Sambuc        testl   $0x800000, %edx   /* test if MMX feature is set (bit 23) */
400*44bedb31SLionel Sambuc        jnz     .L_use_mmx
401*44bedb31SLionel Sambuc        jmp     .L_dont_use_mmx
402*44bedb31SLionel Sambuc.L_use_mmx:
403*44bedb31SLionel Sambuc        movl    $DO_USE_MMX, inflate_fast_use_mmx
404*44bedb31SLionel Sambuc        jmp     .L_check_mmx_pop
405*44bedb31SLionel Sambuc.L_dont_use_mmx:
406*44bedb31SLionel Sambuc        movl    $DONT_USE_MMX, inflate_fast_use_mmx
407*44bedb31SLionel Sambuc.L_check_mmx_pop:
408*44bedb31SLionel Sambuc        popl    %edx
409*44bedb31SLionel Sambuc        popl    %ecx
410*44bedb31SLionel Sambuc        popl    %ebx
411*44bedb31SLionel Sambuc        popl    %eax
412*44bedb31SLionel Sambuc        jmp     .L_check_mmx
413*44bedb31SLionel Sambuc#endif
414*44bedb31SLionel Sambuc
415*44bedb31SLionel Sambuc
416*44bedb31SLionel Sambuc/*** Non-MMX code ***/
417*44bedb31SLionel Sambuc
418*44bedb31SLionel Sambuc#if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
419*44bedb31SLionel Sambuc
420*44bedb31SLionel Sambuc#define hold_r     %ebp
421*44bedb31SLionel Sambuc#define bits_r     %bl
422*44bedb31SLionel Sambuc#define bitslong_r %ebx
423*44bedb31SLionel Sambuc
424*44bedb31SLionel Sambuc.align 32,0x90
425*44bedb31SLionel Sambuc.L_while_test:
426*44bedb31SLionel Sambuc        /* while (in < last && out < end)
427*44bedb31SLionel Sambuc         */
428*44bedb31SLionel Sambuc        cmpl    out_r, end(%esp)
429*44bedb31SLionel Sambuc        jbe     .L_break_loop           /* if (out >= end) */
430*44bedb31SLionel Sambuc
431*44bedb31SLionel Sambuc        cmpl    in_r, last(%esp)
432*44bedb31SLionel Sambuc        jbe     .L_break_loop
433*44bedb31SLionel Sambuc
434*44bedb31SLionel Sambuc.L_do_loop:
435*44bedb31SLionel Sambuc        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
436*44bedb31SLionel Sambuc         *
437*44bedb31SLionel Sambuc         * do {
438*44bedb31SLionel Sambuc         *   if (bits < 15) {
439*44bedb31SLionel Sambuc         *     hold |= *((unsigned short *)in)++ << bits;
440*44bedb31SLionel Sambuc         *     bits += 16
441*44bedb31SLionel Sambuc         *   }
442*44bedb31SLionel Sambuc         *   this = lcode[hold & lmask]
443*44bedb31SLionel Sambuc         */
444*44bedb31SLionel Sambuc        cmpb    $15, bits_r
445*44bedb31SLionel Sambuc        ja      .L_get_length_code      /* if (15 < bits) */
446*44bedb31SLionel Sambuc
447*44bedb31SLionel Sambuc        xorl    %eax, %eax
448*44bedb31SLionel Sambuc        lodsw                           /* al = *(ushort *)in++ */
449*44bedb31SLionel Sambuc        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
450*44bedb31SLionel Sambuc        addb    $16, bits_r             /* bits += 16 */
451*44bedb31SLionel Sambuc        shll    %cl, %eax
452*44bedb31SLionel Sambuc        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
453*44bedb31SLionel Sambuc
454*44bedb31SLionel Sambuc.L_get_length_code:
455*44bedb31SLionel Sambuc        movl    lmask(%esp), %edx       /* edx = lmask */
456*44bedb31SLionel Sambuc        movl    lcode(%esp), %ecx       /* ecx = lcode */
457*44bedb31SLionel Sambuc        andl    hold_r, %edx            /* edx &= hold */
458*44bedb31SLionel Sambuc        movl    (%ecx,%edx,4), %eax     /* eax = lcode[hold & lmask] */
459*44bedb31SLionel Sambuc
460*44bedb31SLionel Sambuc.L_dolen:
461*44bedb31SLionel Sambuc        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
462*44bedb31SLionel Sambuc         *
463*44bedb31SLionel Sambuc         * dolen:
464*44bedb31SLionel Sambuc         *    bits -= this.bits;
465*44bedb31SLionel Sambuc         *    hold >>= this.bits
466*44bedb31SLionel Sambuc         */
467*44bedb31SLionel Sambuc        movb    %ah, %cl                /* cl = this.bits */
468*44bedb31SLionel Sambuc        subb    %ah, bits_r             /* bits -= this.bits */
469*44bedb31SLionel Sambuc        shrl    %cl, hold_r             /* hold >>= this.bits */
470*44bedb31SLionel Sambuc
471*44bedb31SLionel Sambuc        /* check if op is a literal
472*44bedb31SLionel Sambuc         * if (op == 0) {
473*44bedb31SLionel Sambuc         *    PUP(out) = this.val;
474*44bedb31SLionel Sambuc         *  }
475*44bedb31SLionel Sambuc         */
476*44bedb31SLionel Sambuc        testb   %al, %al
477*44bedb31SLionel Sambuc        jnz     .L_test_for_length_base /* if (op != 0) 45.7% */
478*44bedb31SLionel Sambuc
479*44bedb31SLionel Sambuc        shrl    $16, %eax               /* output this.val char */
480*44bedb31SLionel Sambuc        stosb
481*44bedb31SLionel Sambuc        jmp     .L_while_test
482*44bedb31SLionel Sambuc
483*44bedb31SLionel Sambuc.L_test_for_length_base:
484*44bedb31SLionel Sambuc        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
485*44bedb31SLionel Sambuc         *
486*44bedb31SLionel Sambuc         * else if (op & 16) {
487*44bedb31SLionel Sambuc         *   len = this.val
488*44bedb31SLionel Sambuc         *   op &= 15
489*44bedb31SLionel Sambuc         *   if (op) {
490*44bedb31SLionel Sambuc         *     if (op > bits) {
491*44bedb31SLionel Sambuc         *       hold |= *((unsigned short *)in)++ << bits;
492*44bedb31SLionel Sambuc         *       bits += 16
493*44bedb31SLionel Sambuc         *     }
494*44bedb31SLionel Sambuc         *     len += hold & mask[op];
495*44bedb31SLionel Sambuc         *     bits -= op;
496*44bedb31SLionel Sambuc         *     hold >>= op;
497*44bedb31SLionel Sambuc         *   }
498*44bedb31SLionel Sambuc         */
499*44bedb31SLionel Sambuc#define len_r %edx
500*44bedb31SLionel Sambuc        movl    %eax, len_r             /* len = this */
501*44bedb31SLionel Sambuc        shrl    $16, len_r              /* len = this.val */
502*44bedb31SLionel Sambuc        movb    %al, %cl
503*44bedb31SLionel Sambuc
504*44bedb31SLionel Sambuc        testb   $16, %al
505*44bedb31SLionel Sambuc        jz      .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
506*44bedb31SLionel Sambuc        andb    $15, %cl                /* op &= 15 */
507*44bedb31SLionel Sambuc        jz      .L_save_len             /* if (!op) */
508*44bedb31SLionel Sambuc        cmpb    %cl, bits_r
509*44bedb31SLionel Sambuc        jae     .L_add_bits_to_len      /* if (op <= bits) */
510*44bedb31SLionel Sambuc
511*44bedb31SLionel Sambuc        movb    %cl, %ch                /* stash op in ch, freeing cl */
512*44bedb31SLionel Sambuc        xorl    %eax, %eax
513*44bedb31SLionel Sambuc        lodsw                           /* al = *(ushort *)in++ */
514*44bedb31SLionel Sambuc        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
515*44bedb31SLionel Sambuc        addb    $16, bits_r             /* bits += 16 */
516*44bedb31SLionel Sambuc        shll    %cl, %eax
517*44bedb31SLionel Sambuc        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
518*44bedb31SLionel Sambuc        movb    %ch, %cl                /* move op back to ecx */
519*44bedb31SLionel Sambuc
520*44bedb31SLionel Sambuc.L_add_bits_to_len:
521*44bedb31SLionel Sambuc        movl    $1, %eax
522*44bedb31SLionel Sambuc        shll    %cl, %eax
523*44bedb31SLionel Sambuc        decl    %eax
524*44bedb31SLionel Sambuc        subb    %cl, bits_r
525*44bedb31SLionel Sambuc        andl    hold_r, %eax            /* eax &= hold */
526*44bedb31SLionel Sambuc        shrl    %cl, hold_r
527*44bedb31SLionel Sambuc        addl    %eax, len_r             /* len += hold & mask[op] */
528*44bedb31SLionel Sambuc
529*44bedb31SLionel Sambuc.L_save_len:
530*44bedb31SLionel Sambuc        movl    len_r, len(%esp)        /* save len */
531*44bedb31SLionel Sambuc#undef  len_r
532*44bedb31SLionel Sambuc
533*44bedb31SLionel Sambuc.L_decode_distance:
534*44bedb31SLionel Sambuc        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
535*44bedb31SLionel Sambuc         *
536*44bedb31SLionel Sambuc         *   if (bits < 15) {
537*44bedb31SLionel Sambuc         *     hold |= *((unsigned short *)in)++ << bits;
538*44bedb31SLionel Sambuc         *     bits += 16
539*44bedb31SLionel Sambuc         *   }
540*44bedb31SLionel Sambuc         *   this = dcode[hold & dmask];
541*44bedb31SLionel Sambuc         * dodist:
542*44bedb31SLionel Sambuc         *   bits -= this.bits;
543*44bedb31SLionel Sambuc         *   hold >>= this.bits;
544*44bedb31SLionel Sambuc         *   op = this.op;
545*44bedb31SLionel Sambuc         */
546*44bedb31SLionel Sambuc
547*44bedb31SLionel Sambuc        cmpb    $15, bits_r
548*44bedb31SLionel Sambuc        ja      .L_get_distance_code    /* if (15 < bits) */
549*44bedb31SLionel Sambuc
550*44bedb31SLionel Sambuc        xorl    %eax, %eax
551*44bedb31SLionel Sambuc        lodsw                           /* al = *(ushort *)in++ */
552*44bedb31SLionel Sambuc        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
553*44bedb31SLionel Sambuc        addb    $16, bits_r             /* bits += 16 */
554*44bedb31SLionel Sambuc        shll    %cl, %eax
555*44bedb31SLionel Sambuc        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
556*44bedb31SLionel Sambuc
557*44bedb31SLionel Sambuc.L_get_distance_code:
558*44bedb31SLionel Sambuc        movl    dmask(%esp), %edx       /* edx = dmask */
559*44bedb31SLionel Sambuc        movl    dcode(%esp), %ecx       /* ecx = dcode */
560*44bedb31SLionel Sambuc        andl    hold_r, %edx            /* edx &= hold */
561*44bedb31SLionel Sambuc        movl    (%ecx,%edx,4), %eax     /* eax = dcode[hold & dmask] */
562*44bedb31SLionel Sambuc
563*44bedb31SLionel Sambuc#define dist_r %edx
564*44bedb31SLionel Sambuc.L_dodist:
565*44bedb31SLionel Sambuc        movl    %eax, dist_r            /* dist = this */
566*44bedb31SLionel Sambuc        shrl    $16, dist_r             /* dist = this.val */
567*44bedb31SLionel Sambuc        movb    %ah, %cl
568*44bedb31SLionel Sambuc        subb    %ah, bits_r             /* bits -= this.bits */
569*44bedb31SLionel Sambuc        shrl    %cl, hold_r             /* hold >>= this.bits */
570*44bedb31SLionel Sambuc
571*44bedb31SLionel Sambuc        /* if (op & 16) {
572*44bedb31SLionel Sambuc         *   dist = this.val
573*44bedb31SLionel Sambuc         *   op &= 15
574*44bedb31SLionel Sambuc         *   if (op > bits) {
575*44bedb31SLionel Sambuc         *     hold |= *((unsigned short *)in)++ << bits;
576*44bedb31SLionel Sambuc         *     bits += 16
577*44bedb31SLionel Sambuc         *   }
578*44bedb31SLionel Sambuc         *   dist += hold & mask[op];
579*44bedb31SLionel Sambuc         *   bits -= op;
580*44bedb31SLionel Sambuc         *   hold >>= op;
581*44bedb31SLionel Sambuc         */
582*44bedb31SLionel Sambuc        movb    %al, %cl                /* cl = this.op */
583*44bedb31SLionel Sambuc
584*44bedb31SLionel Sambuc        testb   $16, %al                /* if ((op & 16) == 0) */
585*44bedb31SLionel Sambuc        jz      .L_test_for_second_level_dist
586*44bedb31SLionel Sambuc        andb    $15, %cl                /* op &= 15 */
587*44bedb31SLionel Sambuc        jz      .L_check_dist_one
588*44bedb31SLionel Sambuc        cmpb    %cl, bits_r
589*44bedb31SLionel Sambuc        jae     .L_add_bits_to_dist     /* if (op <= bits) 97.6% */
590*44bedb31SLionel Sambuc
591*44bedb31SLionel Sambuc        movb    %cl, %ch                /* stash op in ch, freeing cl */
592*44bedb31SLionel Sambuc        xorl    %eax, %eax
593*44bedb31SLionel Sambuc        lodsw                           /* al = *(ushort *)in++ */
594*44bedb31SLionel Sambuc        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
595*44bedb31SLionel Sambuc        addb    $16, bits_r             /* bits += 16 */
596*44bedb31SLionel Sambuc        shll    %cl, %eax
597*44bedb31SLionel Sambuc        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
598*44bedb31SLionel Sambuc        movb    %ch, %cl                /* move op back to ecx */
599*44bedb31SLionel Sambuc
600*44bedb31SLionel Sambuc.L_add_bits_to_dist:
601*44bedb31SLionel Sambuc        movl    $1, %eax
602*44bedb31SLionel Sambuc        shll    %cl, %eax
603*44bedb31SLionel Sambuc        decl    %eax                    /* (1 << op) - 1 */
604*44bedb31SLionel Sambuc        subb    %cl, bits_r
605*44bedb31SLionel Sambuc        andl    hold_r, %eax            /* eax &= hold */
606*44bedb31SLionel Sambuc        shrl    %cl, hold_r
607*44bedb31SLionel Sambuc        addl    %eax, dist_r            /* dist += hold & ((1 << op) - 1) */
608*44bedb31SLionel Sambuc        jmp     .L_check_window
609*44bedb31SLionel Sambuc
610*44bedb31SLionel Sambuc.L_check_window:
611*44bedb31SLionel Sambuc        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
612*44bedb31SLionel Sambuc         *       %ecx = nbytes
613*44bedb31SLionel Sambuc         *
614*44bedb31SLionel Sambuc         * nbytes = out - beg;
615*44bedb31SLionel Sambuc         * if (dist <= nbytes) {
616*44bedb31SLionel Sambuc         *   from = out - dist;
617*44bedb31SLionel Sambuc         *   do {
618*44bedb31SLionel Sambuc         *     PUP(out) = PUP(from);
619*44bedb31SLionel Sambuc         *   } while (--len > 0) {
620*44bedb31SLionel Sambuc         * }
621*44bedb31SLionel Sambuc         */
622*44bedb31SLionel Sambuc
623*44bedb31SLionel Sambuc        movl    in_r, in(%esp)          /* save in so from can use it's reg */
624*44bedb31SLionel Sambuc        movl    out_r, %eax
625*44bedb31SLionel Sambuc        subl    beg(%esp), %eax         /* nbytes = out - beg */
626*44bedb31SLionel Sambuc
627*44bedb31SLionel Sambuc        cmpl    dist_r, %eax
628*44bedb31SLionel Sambuc        jb      .L_clip_window          /* if (dist > nbytes) 4.2% */
629*44bedb31SLionel Sambuc
630*44bedb31SLionel Sambuc        movl    len(%esp), %ecx
631*44bedb31SLionel Sambuc        movl    out_r, from_r
632*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
633*44bedb31SLionel Sambuc
634*44bedb31SLionel Sambuc        subl    $3, %ecx
635*44bedb31SLionel Sambuc        movb    (from_r), %al
636*44bedb31SLionel Sambuc        movb    %al, (out_r)
637*44bedb31SLionel Sambuc        movb    1(from_r), %al
638*44bedb31SLionel Sambuc        movb    2(from_r), %dl
639*44bedb31SLionel Sambuc        addl    $3, from_r
640*44bedb31SLionel Sambuc        movb    %al, 1(out_r)
641*44bedb31SLionel Sambuc        movb    %dl, 2(out_r)
642*44bedb31SLionel Sambuc        addl    $3, out_r
643*44bedb31SLionel Sambuc        rep     movsb
644*44bedb31SLionel Sambuc
645*44bedb31SLionel Sambuc        movl    in(%esp), in_r          /* move in back to %esi, toss from */
646*44bedb31SLionel Sambuc        jmp     .L_while_test
647*44bedb31SLionel Sambuc
648*44bedb31SLionel Sambuc.align 16,0x90
649*44bedb31SLionel Sambuc.L_check_dist_one:
650*44bedb31SLionel Sambuc        cmpl    $1, dist_r
651*44bedb31SLionel Sambuc        jne     .L_check_window
652*44bedb31SLionel Sambuc        cmpl    out_r, beg(%esp)
653*44bedb31SLionel Sambuc        je      .L_check_window
654*44bedb31SLionel Sambuc
655*44bedb31SLionel Sambuc        decl    out_r
656*44bedb31SLionel Sambuc        movl    len(%esp), %ecx
657*44bedb31SLionel Sambuc        movb    (out_r), %al
658*44bedb31SLionel Sambuc        subl    $3, %ecx
659*44bedb31SLionel Sambuc
660*44bedb31SLionel Sambuc        movb    %al, 1(out_r)
661*44bedb31SLionel Sambuc        movb    %al, 2(out_r)
662*44bedb31SLionel Sambuc        movb    %al, 3(out_r)
663*44bedb31SLionel Sambuc        addl    $4, out_r
664*44bedb31SLionel Sambuc        rep     stosb
665*44bedb31SLionel Sambuc
666*44bedb31SLionel Sambuc        jmp     .L_while_test
667*44bedb31SLionel Sambuc
668*44bedb31SLionel Sambuc.align 16,0x90
669*44bedb31SLionel Sambuc.L_test_for_second_level_length:
670*44bedb31SLionel Sambuc        /* else if ((op & 64) == 0) {
671*44bedb31SLionel Sambuc         *   this = lcode[this.val + (hold & mask[op])];
672*44bedb31SLionel Sambuc         * }
673*44bedb31SLionel Sambuc         */
674*44bedb31SLionel Sambuc        testb   $64, %al
675*44bedb31SLionel Sambuc        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
676*44bedb31SLionel Sambuc
677*44bedb31SLionel Sambuc        movl    $1, %eax
678*44bedb31SLionel Sambuc        shll    %cl, %eax
679*44bedb31SLionel Sambuc        decl    %eax
680*44bedb31SLionel Sambuc        andl    hold_r, %eax            /* eax &= hold */
681*44bedb31SLionel Sambuc        addl    %edx, %eax              /* eax += this.val */
682*44bedb31SLionel Sambuc        movl    lcode(%esp), %edx       /* edx = lcode */
683*44bedb31SLionel Sambuc        movl    (%edx,%eax,4), %eax     /* eax = lcode[val + (hold&mask[op])] */
684*44bedb31SLionel Sambuc        jmp     .L_dolen
685*44bedb31SLionel Sambuc
686*44bedb31SLionel Sambuc.align 16,0x90
687*44bedb31SLionel Sambuc.L_test_for_second_level_dist:
688*44bedb31SLionel Sambuc        /* else if ((op & 64) == 0) {
689*44bedb31SLionel Sambuc         *   this = dcode[this.val + (hold & mask[op])];
690*44bedb31SLionel Sambuc         * }
691*44bedb31SLionel Sambuc         */
692*44bedb31SLionel Sambuc        testb   $64, %al
693*44bedb31SLionel Sambuc        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
694*44bedb31SLionel Sambuc
695*44bedb31SLionel Sambuc        movl    $1, %eax
696*44bedb31SLionel Sambuc        shll    %cl, %eax
697*44bedb31SLionel Sambuc        decl    %eax
698*44bedb31SLionel Sambuc        andl    hold_r, %eax            /* eax &= hold */
699*44bedb31SLionel Sambuc        addl    %edx, %eax              /* eax += this.val */
700*44bedb31SLionel Sambuc        movl    dcode(%esp), %edx       /* edx = dcode */
701*44bedb31SLionel Sambuc        movl    (%edx,%eax,4), %eax     /* eax = dcode[val + (hold&mask[op])] */
702*44bedb31SLionel Sambuc        jmp     .L_dodist
703*44bedb31SLionel Sambuc
704*44bedb31SLionel Sambuc.align 16,0x90
705*44bedb31SLionel Sambuc.L_clip_window:
706*44bedb31SLionel Sambuc        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
707*44bedb31SLionel Sambuc         *       %ecx = nbytes
708*44bedb31SLionel Sambuc         *
709*44bedb31SLionel Sambuc         * else {
710*44bedb31SLionel Sambuc         *   if (dist > wsize) {
711*44bedb31SLionel Sambuc         *     invalid distance
712*44bedb31SLionel Sambuc         *   }
713*44bedb31SLionel Sambuc         *   from = window;
714*44bedb31SLionel Sambuc         *   nbytes = dist - nbytes;
715*44bedb31SLionel Sambuc         *   if (write == 0) {
716*44bedb31SLionel Sambuc         *     from += wsize - nbytes;
717*44bedb31SLionel Sambuc         */
718*44bedb31SLionel Sambuc#define nbytes_r %ecx
719*44bedb31SLionel Sambuc        movl    %eax, nbytes_r
720*44bedb31SLionel Sambuc        movl    wsize(%esp), %eax       /* prepare for dist compare */
721*44bedb31SLionel Sambuc        negl    nbytes_r                /* nbytes = -nbytes */
722*44bedb31SLionel Sambuc        movl    window(%esp), from_r    /* from = window */
723*44bedb31SLionel Sambuc
724*44bedb31SLionel Sambuc        cmpl    dist_r, %eax
725*44bedb31SLionel Sambuc        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
726*44bedb31SLionel Sambuc
727*44bedb31SLionel Sambuc        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
728*44bedb31SLionel Sambuc        cmpl    $0, write(%esp)
729*44bedb31SLionel Sambuc        jne     .L_wrap_around_window   /* if (write != 0) */
730*44bedb31SLionel Sambuc
731*44bedb31SLionel Sambuc        subl    nbytes_r, %eax
732*44bedb31SLionel Sambuc        addl    %eax, from_r            /* from += wsize - nbytes */
733*44bedb31SLionel Sambuc
734*44bedb31SLionel Sambuc        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
735*44bedb31SLionel Sambuc         *       %ecx = nbytes, %eax = len
736*44bedb31SLionel Sambuc         *
737*44bedb31SLionel Sambuc         *     if (nbytes < len) {
738*44bedb31SLionel Sambuc         *       len -= nbytes;
739*44bedb31SLionel Sambuc         *       do {
740*44bedb31SLionel Sambuc         *         PUP(out) = PUP(from);
741*44bedb31SLionel Sambuc         *       } while (--nbytes);
742*44bedb31SLionel Sambuc         *       from = out - dist;
743*44bedb31SLionel Sambuc         *     }
744*44bedb31SLionel Sambuc         *   }
745*44bedb31SLionel Sambuc         */
746*44bedb31SLionel Sambuc#define len_r %eax
747*44bedb31SLionel Sambuc        movl    len(%esp), len_r
748*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
749*44bedb31SLionel Sambuc        jbe     .L_do_copy1             /* if (nbytes >= len) */
750*44bedb31SLionel Sambuc
751*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
752*44bedb31SLionel Sambuc        rep     movsb
753*44bedb31SLionel Sambuc        movl    out_r, from_r
754*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
755*44bedb31SLionel Sambuc        jmp     .L_do_copy1
756*44bedb31SLionel Sambuc
757*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
758*44bedb31SLionel Sambuc        jbe     .L_do_copy1             /* if (nbytes >= len) */
759*44bedb31SLionel Sambuc
760*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
761*44bedb31SLionel Sambuc        rep     movsb
762*44bedb31SLionel Sambuc        movl    out_r, from_r
763*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
764*44bedb31SLionel Sambuc        jmp     .L_do_copy1
765*44bedb31SLionel Sambuc
766*44bedb31SLionel Sambuc.L_wrap_around_window:
767*44bedb31SLionel Sambuc        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
768*44bedb31SLionel Sambuc         *       %ecx = nbytes, %eax = write, %eax = len
769*44bedb31SLionel Sambuc         *
770*44bedb31SLionel Sambuc         *   else if (write < nbytes) {
771*44bedb31SLionel Sambuc         *     from += wsize + write - nbytes;
772*44bedb31SLionel Sambuc         *     nbytes -= write;
773*44bedb31SLionel Sambuc         *     if (nbytes < len) {
774*44bedb31SLionel Sambuc         *       len -= nbytes;
775*44bedb31SLionel Sambuc         *       do {
776*44bedb31SLionel Sambuc         *         PUP(out) = PUP(from);
777*44bedb31SLionel Sambuc         *       } while (--nbytes);
778*44bedb31SLionel Sambuc         *       from = window;
779*44bedb31SLionel Sambuc         *       nbytes = write;
780*44bedb31SLionel Sambuc         *       if (nbytes < len) {
781*44bedb31SLionel Sambuc         *         len -= nbytes;
782*44bedb31SLionel Sambuc         *         do {
783*44bedb31SLionel Sambuc         *           PUP(out) = PUP(from);
784*44bedb31SLionel Sambuc         *         } while(--nbytes);
785*44bedb31SLionel Sambuc         *         from = out - dist;
786*44bedb31SLionel Sambuc         *       }
787*44bedb31SLionel Sambuc         *     }
788*44bedb31SLionel Sambuc         *   }
789*44bedb31SLionel Sambuc         */
790*44bedb31SLionel Sambuc#define write_r %eax
791*44bedb31SLionel Sambuc        movl    write(%esp), write_r
792*44bedb31SLionel Sambuc        cmpl    write_r, nbytes_r
793*44bedb31SLionel Sambuc        jbe     .L_contiguous_in_window /* if (write >= nbytes) */
794*44bedb31SLionel Sambuc
795*44bedb31SLionel Sambuc        addl    wsize(%esp), from_r
796*44bedb31SLionel Sambuc        addl    write_r, from_r
797*44bedb31SLionel Sambuc        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
798*44bedb31SLionel Sambuc        subl    write_r, nbytes_r       /* nbytes -= write */
799*44bedb31SLionel Sambuc#undef write_r
800*44bedb31SLionel Sambuc
801*44bedb31SLionel Sambuc        movl    len(%esp), len_r
802*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
803*44bedb31SLionel Sambuc        jbe     .L_do_copy1             /* if (nbytes >= len) */
804*44bedb31SLionel Sambuc
805*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
806*44bedb31SLionel Sambuc        rep     movsb
807*44bedb31SLionel Sambuc        movl    window(%esp), from_r    /* from = window */
808*44bedb31SLionel Sambuc        movl    write(%esp), nbytes_r   /* nbytes = write */
809*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
810*44bedb31SLionel Sambuc        jbe     .L_do_copy1             /* if (nbytes >= len) */
811*44bedb31SLionel Sambuc
812*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
813*44bedb31SLionel Sambuc        rep     movsb
814*44bedb31SLionel Sambuc        movl    out_r, from_r
815*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
816*44bedb31SLionel Sambuc        jmp     .L_do_copy1
817*44bedb31SLionel Sambuc
818*44bedb31SLionel Sambuc.L_contiguous_in_window:
819*44bedb31SLionel Sambuc        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
820*44bedb31SLionel Sambuc         *       %ecx = nbytes, %eax = write, %eax = len
821*44bedb31SLionel Sambuc         *
822*44bedb31SLionel Sambuc         *   else {
823*44bedb31SLionel Sambuc         *     from += write - nbytes;
824*44bedb31SLionel Sambuc         *     if (nbytes < len) {
825*44bedb31SLionel Sambuc         *       len -= nbytes;
826*44bedb31SLionel Sambuc         *       do {
827*44bedb31SLionel Sambuc         *         PUP(out) = PUP(from);
828*44bedb31SLionel Sambuc         *       } while (--nbytes);
829*44bedb31SLionel Sambuc         *       from = out - dist;
830*44bedb31SLionel Sambuc         *     }
831*44bedb31SLionel Sambuc         *   }
832*44bedb31SLionel Sambuc         */
833*44bedb31SLionel Sambuc#define write_r %eax
834*44bedb31SLionel Sambuc        addl    write_r, from_r
835*44bedb31SLionel Sambuc        subl    nbytes_r, from_r        /* from += write - nbytes */
836*44bedb31SLionel Sambuc#undef write_r
837*44bedb31SLionel Sambuc
838*44bedb31SLionel Sambuc        movl    len(%esp), len_r
839*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
840*44bedb31SLionel Sambuc        jbe     .L_do_copy1             /* if (nbytes >= len) */
841*44bedb31SLionel Sambuc
842*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
843*44bedb31SLionel Sambuc        rep     movsb
844*44bedb31SLionel Sambuc        movl    out_r, from_r
845*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
846*44bedb31SLionel Sambuc
847*44bedb31SLionel Sambuc.L_do_copy1:
848*44bedb31SLionel Sambuc        /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
849*44bedb31SLionel Sambuc         *       %eax = len
850*44bedb31SLionel Sambuc         *
851*44bedb31SLionel Sambuc         *     while (len > 0) {
852*44bedb31SLionel Sambuc         *       PUP(out) = PUP(from);
853*44bedb31SLionel Sambuc         *       len--;
854*44bedb31SLionel Sambuc         *     }
855*44bedb31SLionel Sambuc         *   }
856*44bedb31SLionel Sambuc         * } while (in < last && out < end);
857*44bedb31SLionel Sambuc         */
858*44bedb31SLionel Sambuc#undef nbytes_r
859*44bedb31SLionel Sambuc#define in_r %esi
860*44bedb31SLionel Sambuc        movl    len_r, %ecx
861*44bedb31SLionel Sambuc        rep     movsb
862*44bedb31SLionel Sambuc
863*44bedb31SLionel Sambuc        movl    in(%esp), in_r          /* move in back to %esi, toss from */
864*44bedb31SLionel Sambuc        jmp     .L_while_test
865*44bedb31SLionel Sambuc
866*44bedb31SLionel Sambuc#undef len_r
867*44bedb31SLionel Sambuc#undef dist_r
868*44bedb31SLionel Sambuc
869*44bedb31SLionel Sambuc#endif /* NO_MMX || RUN_TIME_MMX */
870*44bedb31SLionel Sambuc
871*44bedb31SLionel Sambuc
872*44bedb31SLionel Sambuc/*** MMX code ***/
873*44bedb31SLionel Sambuc
874*44bedb31SLionel Sambuc#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
875*44bedb31SLionel Sambuc
876*44bedb31SLionel Sambuc.align 32,0x90
877*44bedb31SLionel Sambuc.L_init_mmx:
878*44bedb31SLionel Sambuc        emms
879*44bedb31SLionel Sambuc
880*44bedb31SLionel Sambuc#undef  bits_r
881*44bedb31SLionel Sambuc#undef  bitslong_r
882*44bedb31SLionel Sambuc#define bitslong_r %ebp
883*44bedb31SLionel Sambuc#define hold_mm    %mm0
884*44bedb31SLionel Sambuc        movd    %ebp, hold_mm
885*44bedb31SLionel Sambuc        movl    %ebx, bitslong_r
886*44bedb31SLionel Sambuc
887*44bedb31SLionel Sambuc#define used_mm   %mm1
888*44bedb31SLionel Sambuc#define dmask2_mm %mm2
889*44bedb31SLionel Sambuc#define lmask2_mm %mm3
890*44bedb31SLionel Sambuc#define lmask_mm  %mm4
891*44bedb31SLionel Sambuc#define dmask_mm  %mm5
892*44bedb31SLionel Sambuc#define tmp_mm    %mm6
893*44bedb31SLionel Sambuc
894*44bedb31SLionel Sambuc        movd    lmask(%esp), lmask_mm
895*44bedb31SLionel Sambuc        movq    lmask_mm, lmask2_mm
896*44bedb31SLionel Sambuc        movd    dmask(%esp), dmask_mm
897*44bedb31SLionel Sambuc        movq    dmask_mm, dmask2_mm
898*44bedb31SLionel Sambuc        pxor    used_mm, used_mm
899*44bedb31SLionel Sambuc        movl    lcode(%esp), %ebx       /* ebx = lcode */
900*44bedb31SLionel Sambuc        jmp     .L_do_loop_mmx
901*44bedb31SLionel Sambuc
902*44bedb31SLionel Sambuc.align 32,0x90
903*44bedb31SLionel Sambuc.L_while_test_mmx:
904*44bedb31SLionel Sambuc        /* while (in < last && out < end)
905*44bedb31SLionel Sambuc         */
906*44bedb31SLionel Sambuc        cmpl    out_r, end(%esp)
907*44bedb31SLionel Sambuc        jbe     .L_break_loop           /* if (out >= end) */
908*44bedb31SLionel Sambuc
909*44bedb31SLionel Sambuc        cmpl    in_r, last(%esp)
910*44bedb31SLionel Sambuc        jbe     .L_break_loop
911*44bedb31SLionel Sambuc
912*44bedb31SLionel Sambuc.L_do_loop_mmx:
913*44bedb31SLionel Sambuc        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
914*44bedb31SLionel Sambuc
915*44bedb31SLionel Sambuc        cmpl    $32, bitslong_r
916*44bedb31SLionel Sambuc        ja      .L_get_length_code_mmx  /* if (32 < bits) */
917*44bedb31SLionel Sambuc
918*44bedb31SLionel Sambuc        movd    bitslong_r, tmp_mm
919*44bedb31SLionel Sambuc        movd    (in_r), %mm7
920*44bedb31SLionel Sambuc        addl    $4, in_r
921*44bedb31SLionel Sambuc        psllq   tmp_mm, %mm7
922*44bedb31SLionel Sambuc        addl    $32, bitslong_r
923*44bedb31SLionel Sambuc        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
924*44bedb31SLionel Sambuc
925*44bedb31SLionel Sambuc.L_get_length_code_mmx:
926*44bedb31SLionel Sambuc        pand    hold_mm, lmask_mm
927*44bedb31SLionel Sambuc        movd    lmask_mm, %eax
928*44bedb31SLionel Sambuc        movq    lmask2_mm, lmask_mm
929*44bedb31SLionel Sambuc        movl    (%ebx,%eax,4), %eax     /* eax = lcode[hold & lmask] */
930*44bedb31SLionel Sambuc
931*44bedb31SLionel Sambuc.L_dolen_mmx:
932*44bedb31SLionel Sambuc        movzbl  %ah, %ecx               /* ecx = this.bits */
933*44bedb31SLionel Sambuc        movd    %ecx, used_mm
934*44bedb31SLionel Sambuc        subl    %ecx, bitslong_r        /* bits -= this.bits */
935*44bedb31SLionel Sambuc
936*44bedb31SLionel Sambuc        testb   %al, %al
937*44bedb31SLionel Sambuc        jnz     .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
938*44bedb31SLionel Sambuc
939*44bedb31SLionel Sambuc        shrl    $16, %eax               /* output this.val char */
940*44bedb31SLionel Sambuc        stosb
941*44bedb31SLionel Sambuc        jmp     .L_while_test_mmx
942*44bedb31SLionel Sambuc
943*44bedb31SLionel Sambuc.L_test_for_length_base_mmx:
944*44bedb31SLionel Sambuc#define len_r  %edx
945*44bedb31SLionel Sambuc        movl    %eax, len_r             /* len = this */
946*44bedb31SLionel Sambuc        shrl    $16, len_r              /* len = this.val */
947*44bedb31SLionel Sambuc
948*44bedb31SLionel Sambuc        testb   $16, %al
949*44bedb31SLionel Sambuc        jz      .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
950*44bedb31SLionel Sambuc        andl    $15, %eax               /* op &= 15 */
951*44bedb31SLionel Sambuc        jz      .L_decode_distance_mmx  /* if (!op) */
952*44bedb31SLionel Sambuc
953*44bedb31SLionel Sambuc        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
954*44bedb31SLionel Sambuc        movd    %eax, used_mm
955*44bedb31SLionel Sambuc        movd    hold_mm, %ecx
956*44bedb31SLionel Sambuc        subl    %eax, bitslong_r
957*44bedb31SLionel Sambuc        andl    .L_mask(,%eax,4), %ecx
958*44bedb31SLionel Sambuc        addl    %ecx, len_r             /* len += hold & mask[op] */
959*44bedb31SLionel Sambuc
960*44bedb31SLionel Sambuc.L_decode_distance_mmx:
961*44bedb31SLionel Sambuc        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
962*44bedb31SLionel Sambuc
963*44bedb31SLionel Sambuc        cmpl    $32, bitslong_r
964*44bedb31SLionel Sambuc        ja      .L_get_dist_code_mmx    /* if (32 < bits) */
965*44bedb31SLionel Sambuc
966*44bedb31SLionel Sambuc        movd    bitslong_r, tmp_mm
967*44bedb31SLionel Sambuc        movd    (in_r), %mm7
968*44bedb31SLionel Sambuc        addl    $4, in_r
969*44bedb31SLionel Sambuc        psllq   tmp_mm, %mm7
970*44bedb31SLionel Sambuc        addl    $32, bitslong_r
971*44bedb31SLionel Sambuc        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
972*44bedb31SLionel Sambuc
973*44bedb31SLionel Sambuc.L_get_dist_code_mmx:
974*44bedb31SLionel Sambuc        movl    dcode(%esp), %ebx       /* ebx = dcode */
975*44bedb31SLionel Sambuc        pand    hold_mm, dmask_mm
976*44bedb31SLionel Sambuc        movd    dmask_mm, %eax
977*44bedb31SLionel Sambuc        movq    dmask2_mm, dmask_mm
978*44bedb31SLionel Sambuc        movl    (%ebx,%eax,4), %eax     /* eax = dcode[hold & lmask] */
979*44bedb31SLionel Sambuc
980*44bedb31SLionel Sambuc.L_dodist_mmx:
981*44bedb31SLionel Sambuc#define dist_r %ebx
982*44bedb31SLionel Sambuc        movzbl  %ah, %ecx               /* ecx = this.bits */
983*44bedb31SLionel Sambuc        movl    %eax, dist_r
984*44bedb31SLionel Sambuc        shrl    $16, dist_r             /* dist  = this.val */
985*44bedb31SLionel Sambuc        subl    %ecx, bitslong_r        /* bits -= this.bits */
986*44bedb31SLionel Sambuc        movd    %ecx, used_mm
987*44bedb31SLionel Sambuc
988*44bedb31SLionel Sambuc        testb   $16, %al                /* if ((op & 16) == 0) */
989*44bedb31SLionel Sambuc        jz      .L_test_for_second_level_dist_mmx
990*44bedb31SLionel Sambuc        andl    $15, %eax               /* op &= 15 */
991*44bedb31SLionel Sambuc        jz      .L_check_dist_one_mmx
992*44bedb31SLionel Sambuc
993*44bedb31SLionel Sambuc.L_add_bits_to_dist_mmx:
994*44bedb31SLionel Sambuc        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
995*44bedb31SLionel Sambuc        movd    %eax, used_mm           /* save bit length of current op */
996*44bedb31SLionel Sambuc        movd    hold_mm, %ecx           /* get the next bits on input stream */
997*44bedb31SLionel Sambuc        subl    %eax, bitslong_r        /* bits -= op bits */
998*44bedb31SLionel Sambuc        andl    .L_mask(,%eax,4), %ecx  /* ecx   = hold & mask[op] */
999*44bedb31SLionel Sambuc        addl    %ecx, dist_r            /* dist += hold & mask[op] */
1000*44bedb31SLionel Sambuc
1001*44bedb31SLionel Sambuc.L_check_window_mmx:
1002*44bedb31SLionel Sambuc        movl    in_r, in(%esp)          /* save in so from can use it's reg */
1003*44bedb31SLionel Sambuc        movl    out_r, %eax
1004*44bedb31SLionel Sambuc        subl    beg(%esp), %eax         /* nbytes = out - beg */
1005*44bedb31SLionel Sambuc
1006*44bedb31SLionel Sambuc        cmpl    dist_r, %eax
1007*44bedb31SLionel Sambuc        jb      .L_clip_window_mmx      /* if (dist > nbytes) 4.2% */
1008*44bedb31SLionel Sambuc
1009*44bedb31SLionel Sambuc        movl    len_r, %ecx
1010*44bedb31SLionel Sambuc        movl    out_r, from_r
1011*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
1012*44bedb31SLionel Sambuc
1013*44bedb31SLionel Sambuc        subl    $3, %ecx
1014*44bedb31SLionel Sambuc        movb    (from_r), %al
1015*44bedb31SLionel Sambuc        movb    %al, (out_r)
1016*44bedb31SLionel Sambuc        movb    1(from_r), %al
1017*44bedb31SLionel Sambuc        movb    2(from_r), %dl
1018*44bedb31SLionel Sambuc        addl    $3, from_r
1019*44bedb31SLionel Sambuc        movb    %al, 1(out_r)
1020*44bedb31SLionel Sambuc        movb    %dl, 2(out_r)
1021*44bedb31SLionel Sambuc        addl    $3, out_r
1022*44bedb31SLionel Sambuc        rep     movsb
1023*44bedb31SLionel Sambuc
1024*44bedb31SLionel Sambuc        movl    in(%esp), in_r          /* move in back to %esi, toss from */
1025*44bedb31SLionel Sambuc        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1026*44bedb31SLionel Sambuc        jmp     .L_while_test_mmx
1027*44bedb31SLionel Sambuc
1028*44bedb31SLionel Sambuc.align 16,0x90
1029*44bedb31SLionel Sambuc.L_check_dist_one_mmx:
1030*44bedb31SLionel Sambuc        cmpl    $1, dist_r
1031*44bedb31SLionel Sambuc        jne     .L_check_window_mmx
1032*44bedb31SLionel Sambuc        cmpl    out_r, beg(%esp)
1033*44bedb31SLionel Sambuc        je      .L_check_window_mmx
1034*44bedb31SLionel Sambuc
1035*44bedb31SLionel Sambuc        decl    out_r
1036*44bedb31SLionel Sambuc        movl    len_r, %ecx
1037*44bedb31SLionel Sambuc        movb    (out_r), %al
1038*44bedb31SLionel Sambuc        subl    $3, %ecx
1039*44bedb31SLionel Sambuc
1040*44bedb31SLionel Sambuc        movb    %al, 1(out_r)
1041*44bedb31SLionel Sambuc        movb    %al, 2(out_r)
1042*44bedb31SLionel Sambuc        movb    %al, 3(out_r)
1043*44bedb31SLionel Sambuc        addl    $4, out_r
1044*44bedb31SLionel Sambuc        rep     stosb
1045*44bedb31SLionel Sambuc
1046*44bedb31SLionel Sambuc        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1047*44bedb31SLionel Sambuc        jmp     .L_while_test_mmx
1048*44bedb31SLionel Sambuc
1049*44bedb31SLionel Sambuc.align 16,0x90
1050*44bedb31SLionel Sambuc.L_test_for_second_level_length_mmx:
1051*44bedb31SLionel Sambuc        testb   $64, %al
1052*44bedb31SLionel Sambuc        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
1053*44bedb31SLionel Sambuc
1054*44bedb31SLionel Sambuc        andl    $15, %eax
1055*44bedb31SLionel Sambuc        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1056*44bedb31SLionel Sambuc        movd    hold_mm, %ecx
1057*44bedb31SLionel Sambuc        andl    .L_mask(,%eax,4), %ecx
1058*44bedb31SLionel Sambuc        addl    len_r, %ecx
1059*44bedb31SLionel Sambuc        movl    (%ebx,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
1060*44bedb31SLionel Sambuc        jmp     .L_dolen_mmx
1061*44bedb31SLionel Sambuc
1062*44bedb31SLionel Sambuc.align 16,0x90
1063*44bedb31SLionel Sambuc.L_test_for_second_level_dist_mmx:
1064*44bedb31SLionel Sambuc        testb   $64, %al
1065*44bedb31SLionel Sambuc        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
1066*44bedb31SLionel Sambuc
1067*44bedb31SLionel Sambuc        andl    $15, %eax
1068*44bedb31SLionel Sambuc        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1069*44bedb31SLionel Sambuc        movd    hold_mm, %ecx
1070*44bedb31SLionel Sambuc        andl    .L_mask(,%eax,4), %ecx
1071*44bedb31SLionel Sambuc        movl    dcode(%esp), %eax       /* ecx = dcode */
1072*44bedb31SLionel Sambuc        addl    dist_r, %ecx
1073*44bedb31SLionel Sambuc        movl    (%eax,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
1074*44bedb31SLionel Sambuc        jmp     .L_dodist_mmx
1075*44bedb31SLionel Sambuc
1076*44bedb31SLionel Sambuc.align 16,0x90
1077*44bedb31SLionel Sambuc.L_clip_window_mmx:
1078*44bedb31SLionel Sambuc#define nbytes_r %ecx
1079*44bedb31SLionel Sambuc        movl    %eax, nbytes_r
1080*44bedb31SLionel Sambuc        movl    wsize(%esp), %eax       /* prepare for dist compare */
1081*44bedb31SLionel Sambuc        negl    nbytes_r                /* nbytes = -nbytes */
1082*44bedb31SLionel Sambuc        movl    window(%esp), from_r    /* from = window */
1083*44bedb31SLionel Sambuc
1084*44bedb31SLionel Sambuc        cmpl    dist_r, %eax
1085*44bedb31SLionel Sambuc        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
1086*44bedb31SLionel Sambuc
1087*44bedb31SLionel Sambuc        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
1088*44bedb31SLionel Sambuc        cmpl    $0, write(%esp)
1089*44bedb31SLionel Sambuc        jne     .L_wrap_around_window_mmx /* if (write != 0) */
1090*44bedb31SLionel Sambuc
1091*44bedb31SLionel Sambuc        subl    nbytes_r, %eax
1092*44bedb31SLionel Sambuc        addl    %eax, from_r            /* from += wsize - nbytes */
1093*44bedb31SLionel Sambuc
1094*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
1095*44bedb31SLionel Sambuc        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1096*44bedb31SLionel Sambuc
1097*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
1098*44bedb31SLionel Sambuc        rep     movsb
1099*44bedb31SLionel Sambuc        movl    out_r, from_r
1100*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
1101*44bedb31SLionel Sambuc        jmp     .L_do_copy1_mmx
1102*44bedb31SLionel Sambuc
1103*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
1104*44bedb31SLionel Sambuc        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1105*44bedb31SLionel Sambuc
1106*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
1107*44bedb31SLionel Sambuc        rep     movsb
1108*44bedb31SLionel Sambuc        movl    out_r, from_r
1109*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
1110*44bedb31SLionel Sambuc        jmp     .L_do_copy1_mmx
1111*44bedb31SLionel Sambuc
1112*44bedb31SLionel Sambuc.L_wrap_around_window_mmx:
1113*44bedb31SLionel Sambuc#define write_r %eax
1114*44bedb31SLionel Sambuc        movl    write(%esp), write_r
1115*44bedb31SLionel Sambuc        cmpl    write_r, nbytes_r
1116*44bedb31SLionel Sambuc        jbe     .L_contiguous_in_window_mmx /* if (write >= nbytes) */
1117*44bedb31SLionel Sambuc
1118*44bedb31SLionel Sambuc        addl    wsize(%esp), from_r
1119*44bedb31SLionel Sambuc        addl    write_r, from_r
1120*44bedb31SLionel Sambuc        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
1121*44bedb31SLionel Sambuc        subl    write_r, nbytes_r       /* nbytes -= write */
1122*44bedb31SLionel Sambuc#undef write_r
1123*44bedb31SLionel Sambuc
1124*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
1125*44bedb31SLionel Sambuc        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1126*44bedb31SLionel Sambuc
1127*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
1128*44bedb31SLionel Sambuc        rep     movsb
1129*44bedb31SLionel Sambuc        movl    window(%esp), from_r    /* from = window */
1130*44bedb31SLionel Sambuc        movl    write(%esp), nbytes_r   /* nbytes = write */
1131*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
1132*44bedb31SLionel Sambuc        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1133*44bedb31SLionel Sambuc
1134*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
1135*44bedb31SLionel Sambuc        rep     movsb
1136*44bedb31SLionel Sambuc        movl    out_r, from_r
1137*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
1138*44bedb31SLionel Sambuc        jmp     .L_do_copy1_mmx
1139*44bedb31SLionel Sambuc
1140*44bedb31SLionel Sambuc.L_contiguous_in_window_mmx:
1141*44bedb31SLionel Sambuc#define write_r %eax
1142*44bedb31SLionel Sambuc        addl    write_r, from_r
1143*44bedb31SLionel Sambuc        subl    nbytes_r, from_r        /* from += write - nbytes */
1144*44bedb31SLionel Sambuc#undef write_r
1145*44bedb31SLionel Sambuc
1146*44bedb31SLionel Sambuc        cmpl    nbytes_r, len_r
1147*44bedb31SLionel Sambuc        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1148*44bedb31SLionel Sambuc
1149*44bedb31SLionel Sambuc        subl    nbytes_r, len_r         /* len -= nbytes */
1150*44bedb31SLionel Sambuc        rep     movsb
1151*44bedb31SLionel Sambuc        movl    out_r, from_r
1152*44bedb31SLionel Sambuc        subl    dist_r, from_r          /* from = out - dist */
1153*44bedb31SLionel Sambuc
1154*44bedb31SLionel Sambuc.L_do_copy1_mmx:
1155*44bedb31SLionel Sambuc#undef nbytes_r
1156*44bedb31SLionel Sambuc#define in_r %esi
1157*44bedb31SLionel Sambuc        movl    len_r, %ecx
1158*44bedb31SLionel Sambuc        rep     movsb
1159*44bedb31SLionel Sambuc
1160*44bedb31SLionel Sambuc        movl    in(%esp), in_r          /* move in back to %esi, toss from */
1161*44bedb31SLionel Sambuc        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1162*44bedb31SLionel Sambuc        jmp     .L_while_test_mmx
1163*44bedb31SLionel Sambuc
1164*44bedb31SLionel Sambuc#undef hold_r
1165*44bedb31SLionel Sambuc#undef bitslong_r
1166*44bedb31SLionel Sambuc
1167*44bedb31SLionel Sambuc#endif /* USE_MMX || RUN_TIME_MMX */
1168*44bedb31SLionel Sambuc
1169*44bedb31SLionel Sambuc
1170*44bedb31SLionel Sambuc/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
1171*44bedb31SLionel Sambuc
1172*44bedb31SLionel Sambuc.L_invalid_distance_code:
1173*44bedb31SLionel Sambuc        /* else {
1174*44bedb31SLionel Sambuc         *   strm->msg = "invalid distance code";
1175*44bedb31SLionel Sambuc         *   state->mode = BAD;
1176*44bedb31SLionel Sambuc         * }
1177*44bedb31SLionel Sambuc         */
1178*44bedb31SLionel Sambuc        movl    $.L_invalid_distance_code_msg, %ecx
1179*44bedb31SLionel Sambuc        movl    $INFLATE_MODE_BAD, %edx
1180*44bedb31SLionel Sambuc        jmp     .L_update_stream_state
1181*44bedb31SLionel Sambuc
1182*44bedb31SLionel Sambuc.L_test_for_end_of_block:
1183*44bedb31SLionel Sambuc        /* else if (op & 32) {
1184*44bedb31SLionel Sambuc         *   state->mode = TYPE;
1185*44bedb31SLionel Sambuc         *   break;
1186*44bedb31SLionel Sambuc         * }
1187*44bedb31SLionel Sambuc         */
1188*44bedb31SLionel Sambuc        testb   $32, %al
1189*44bedb31SLionel Sambuc        jz      .L_invalid_literal_length_code  /* if ((op & 32) == 0) */
1190*44bedb31SLionel Sambuc
1191*44bedb31SLionel Sambuc        movl    $0, %ecx
1192*44bedb31SLionel Sambuc        movl    $INFLATE_MODE_TYPE, %edx
1193*44bedb31SLionel Sambuc        jmp     .L_update_stream_state
1194*44bedb31SLionel Sambuc
1195*44bedb31SLionel Sambuc.L_invalid_literal_length_code:
1196*44bedb31SLionel Sambuc        /* else {
1197*44bedb31SLionel Sambuc         *   strm->msg = "invalid literal/length code";
1198*44bedb31SLionel Sambuc         *   state->mode = BAD;
1199*44bedb31SLionel Sambuc         * }
1200*44bedb31SLionel Sambuc         */
1201*44bedb31SLionel Sambuc        movl    $.L_invalid_literal_length_code_msg, %ecx
1202*44bedb31SLionel Sambuc        movl    $INFLATE_MODE_BAD, %edx
1203*44bedb31SLionel Sambuc        jmp     .L_update_stream_state
1204*44bedb31SLionel Sambuc
1205*44bedb31SLionel Sambuc.L_invalid_distance_too_far:
1206*44bedb31SLionel Sambuc        /* strm->msg = "invalid distance too far back";
1207*44bedb31SLionel Sambuc         * state->mode = BAD;
1208*44bedb31SLionel Sambuc         */
1209*44bedb31SLionel Sambuc        movl    in(%esp), in_r          /* from_r has in's reg, put in back */
1210*44bedb31SLionel Sambuc        movl    $.L_invalid_distance_too_far_msg, %ecx
1211*44bedb31SLionel Sambuc        movl    $INFLATE_MODE_BAD, %edx
1212*44bedb31SLionel Sambuc        jmp     .L_update_stream_state
1213*44bedb31SLionel Sambuc
1214*44bedb31SLionel Sambuc.L_update_stream_state:
1215*44bedb31SLionel Sambuc        /* set strm->msg = %ecx, strm->state->mode = %edx */
1216*44bedb31SLionel Sambuc        movl    strm_sp(%esp), %eax
1217*44bedb31SLionel Sambuc        testl   %ecx, %ecx              /* if (msg != NULL) */
1218*44bedb31SLionel Sambuc        jz      .L_skip_msg
1219*44bedb31SLionel Sambuc        movl    %ecx, msg_strm(%eax)    /* strm->msg = msg */
1220*44bedb31SLionel Sambuc.L_skip_msg:
1221*44bedb31SLionel Sambuc        movl    state_strm(%eax), %eax  /* state = strm->state */
1222*44bedb31SLionel Sambuc        movl    %edx, mode_state(%eax)  /* state->mode = edx (BAD | TYPE) */
1223*44bedb31SLionel Sambuc        jmp     .L_break_loop
1224*44bedb31SLionel Sambuc
1225*44bedb31SLionel Sambuc.align 32,0x90
1226*44bedb31SLionel Sambuc.L_break_loop:
1227*44bedb31SLionel Sambuc
1228*44bedb31SLionel Sambuc/*
1229*44bedb31SLionel Sambuc * Regs:
1230*44bedb31SLionel Sambuc *
1231*44bedb31SLionel Sambuc * bits = %ebp when mmx, and in %ebx when non-mmx
1232*44bedb31SLionel Sambuc * hold = %hold_mm when mmx, and in %ebp when non-mmx
1233*44bedb31SLionel Sambuc * in   = %esi
1234*44bedb31SLionel Sambuc * out  = %edi
1235*44bedb31SLionel Sambuc */
1236*44bedb31SLionel Sambuc
1237*44bedb31SLionel Sambuc#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1238*44bedb31SLionel Sambuc
1239*44bedb31SLionel Sambuc#if defined( RUN_TIME_MMX )
1240*44bedb31SLionel Sambuc
1241*44bedb31SLionel Sambuc        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
1242*44bedb31SLionel Sambuc        jne     .L_update_next_in
1243*44bedb31SLionel Sambuc
1244*44bedb31SLionel Sambuc#endif /* RUN_TIME_MMX */
1245*44bedb31SLionel Sambuc
1246*44bedb31SLionel Sambuc        movl    %ebp, %ebx
1247*44bedb31SLionel Sambuc
1248*44bedb31SLionel Sambuc.L_update_next_in:
1249*44bedb31SLionel Sambuc
1250*44bedb31SLionel Sambuc#endif
1251*44bedb31SLionel Sambuc
1252*44bedb31SLionel Sambuc#define strm_r  %eax
1253*44bedb31SLionel Sambuc#define state_r %edx
1254*44bedb31SLionel Sambuc
1255*44bedb31SLionel Sambuc        /* len = bits >> 3;
1256*44bedb31SLionel Sambuc         * in -= len;
1257*44bedb31SLionel Sambuc         * bits -= len << 3;
1258*44bedb31SLionel Sambuc         * hold &= (1U << bits) - 1;
1259*44bedb31SLionel Sambuc         * state->hold = hold;
1260*44bedb31SLionel Sambuc         * state->bits = bits;
1261*44bedb31SLionel Sambuc         * strm->next_in = in;
1262*44bedb31SLionel Sambuc         * strm->next_out = out;
1263*44bedb31SLionel Sambuc         */
1264*44bedb31SLionel Sambuc        movl    strm_sp(%esp), strm_r
1265*44bedb31SLionel Sambuc        movl    %ebx, %ecx
1266*44bedb31SLionel Sambuc        movl    state_strm(strm_r), state_r
1267*44bedb31SLionel Sambuc        shrl    $3, %ecx
1268*44bedb31SLionel Sambuc        subl    %ecx, in_r
1269*44bedb31SLionel Sambuc        shll    $3, %ecx
1270*44bedb31SLionel Sambuc        subl    %ecx, %ebx
1271*44bedb31SLionel Sambuc        movl    out_r, next_out_strm(strm_r)
1272*44bedb31SLionel Sambuc        movl    %ebx, bits_state(state_r)
1273*44bedb31SLionel Sambuc        movl    %ebx, %ecx
1274*44bedb31SLionel Sambuc
1275*44bedb31SLionel Sambuc        leal    buf(%esp), %ebx
1276*44bedb31SLionel Sambuc        cmpl    %ebx, last(%esp)
1277*44bedb31SLionel Sambuc        jne     .L_buf_not_used         /* if buf != last */
1278*44bedb31SLionel Sambuc
1279*44bedb31SLionel Sambuc        subl    %ebx, in_r              /* in -= buf */
1280*44bedb31SLionel Sambuc        movl    next_in_strm(strm_r), %ebx
1281*44bedb31SLionel Sambuc        movl    %ebx, last(%esp)        /* last = strm->next_in */
1282*44bedb31SLionel Sambuc        addl    %ebx, in_r              /* in += strm->next_in */
1283*44bedb31SLionel Sambuc        movl    avail_in_strm(strm_r), %ebx
1284*44bedb31SLionel Sambuc        subl    $11, %ebx
1285*44bedb31SLionel Sambuc        addl    %ebx, last(%esp)    /* last = &strm->next_in[ avail_in - 11 ] */
1286*44bedb31SLionel Sambuc
1287*44bedb31SLionel Sambuc.L_buf_not_used:
1288*44bedb31SLionel Sambuc        movl    in_r, next_in_strm(strm_r)
1289*44bedb31SLionel Sambuc
1290*44bedb31SLionel Sambuc        movl    $1, %ebx
1291*44bedb31SLionel Sambuc        shll    %cl, %ebx
1292*44bedb31SLionel Sambuc        decl    %ebx
1293*44bedb31SLionel Sambuc
1294*44bedb31SLionel Sambuc#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1295*44bedb31SLionel Sambuc
1296*44bedb31SLionel Sambuc#if defined( RUN_TIME_MMX )
1297*44bedb31SLionel Sambuc
1298*44bedb31SLionel Sambuc        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
1299*44bedb31SLionel Sambuc        jne     .L_update_hold
1300*44bedb31SLionel Sambuc
1301*44bedb31SLionel Sambuc#endif /* RUN_TIME_MMX */
1302*44bedb31SLionel Sambuc
1303*44bedb31SLionel Sambuc        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1304*44bedb31SLionel Sambuc        movd    hold_mm, %ebp
1305*44bedb31SLionel Sambuc
1306*44bedb31SLionel Sambuc        emms
1307*44bedb31SLionel Sambuc
1308*44bedb31SLionel Sambuc.L_update_hold:
1309*44bedb31SLionel Sambuc
1310*44bedb31SLionel Sambuc#endif /* USE_MMX || RUN_TIME_MMX */
1311*44bedb31SLionel Sambuc
1312*44bedb31SLionel Sambuc        andl    %ebx, %ebp
1313*44bedb31SLionel Sambuc        movl    %ebp, hold_state(state_r)
1314*44bedb31SLionel Sambuc
1315*44bedb31SLionel Sambuc#define last_r %ebx
1316*44bedb31SLionel Sambuc
1317*44bedb31SLionel Sambuc        /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
1318*44bedb31SLionel Sambuc        movl    last(%esp), last_r
1319*44bedb31SLionel Sambuc        cmpl    in_r, last_r
1320*44bedb31SLionel Sambuc        jbe     .L_last_is_smaller     /* if (in >= last) */
1321*44bedb31SLionel Sambuc
1322*44bedb31SLionel Sambuc        subl    in_r, last_r           /* last -= in */
1323*44bedb31SLionel Sambuc        addl    $11, last_r            /* last += 11 */
1324*44bedb31SLionel Sambuc        movl    last_r, avail_in_strm(strm_r)
1325*44bedb31SLionel Sambuc        jmp     .L_fixup_out
1326*44bedb31SLionel Sambuc.L_last_is_smaller:
1327*44bedb31SLionel Sambuc        subl    last_r, in_r           /* in -= last */
1328*44bedb31SLionel Sambuc        negl    in_r                   /* in = -in */
1329*44bedb31SLionel Sambuc        addl    $11, in_r              /* in += 11 */
1330*44bedb31SLionel Sambuc        movl    in_r, avail_in_strm(strm_r)
1331*44bedb31SLionel Sambuc
1332*44bedb31SLionel Sambuc#undef last_r
1333*44bedb31SLionel Sambuc#define end_r %ebx
1334*44bedb31SLionel Sambuc
1335*44bedb31SLionel Sambuc.L_fixup_out:
1336*44bedb31SLionel Sambuc        /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
1337*44bedb31SLionel Sambuc        movl    end(%esp), end_r
1338*44bedb31SLionel Sambuc        cmpl    out_r, end_r
1339*44bedb31SLionel Sambuc        jbe     .L_end_is_smaller      /* if (out >= end) */
1340*44bedb31SLionel Sambuc
1341*44bedb31SLionel Sambuc        subl    out_r, end_r           /* end -= out */
1342*44bedb31SLionel Sambuc        addl    $257, end_r            /* end += 257 */
1343*44bedb31SLionel Sambuc        movl    end_r, avail_out_strm(strm_r)
1344*44bedb31SLionel Sambuc        jmp     .L_done
1345*44bedb31SLionel Sambuc.L_end_is_smaller:
1346*44bedb31SLionel Sambuc        subl    end_r, out_r           /* out -= end */
1347*44bedb31SLionel Sambuc        negl    out_r                  /* out = -out */
1348*44bedb31SLionel Sambuc        addl    $257, out_r            /* out += 257 */
1349*44bedb31SLionel Sambuc        movl    out_r, avail_out_strm(strm_r)
1350*44bedb31SLionel Sambuc
1351*44bedb31SLionel Sambuc#undef end_r
1352*44bedb31SLionel Sambuc#undef strm_r
1353*44bedb31SLionel Sambuc#undef state_r
1354*44bedb31SLionel Sambuc
1355*44bedb31SLionel Sambuc.L_done:
1356*44bedb31SLionel Sambuc        addl    $local_var_size, %esp
1357*44bedb31SLionel Sambuc        popf
1358*44bedb31SLionel Sambuc        popl    %ebx
1359*44bedb31SLionel Sambuc        popl    %ebp
1360*44bedb31SLionel Sambuc        popl    %esi
1361*44bedb31SLionel Sambuc        popl    %edi
1362*44bedb31SLionel Sambuc        ret
1363*44bedb31SLionel Sambuc
1364*44bedb31SLionel Sambuc#if defined( GAS_ELF )
1365*44bedb31SLionel Sambuc/* elf info */
1366*44bedb31SLionel Sambuc.type inflate_fast,@function
1367*44bedb31SLionel Sambuc.size inflate_fast,.-inflate_fast
1368*44bedb31SLionel Sambuc#endif
1369