xref: /minix3/common/dist/zlib/contrib/masmx86/inffas32.asm (revision 44bedb31d842b4b0444105519bcf929a69fe2dc1)
1*44bedb31SLionel Sambuc;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
2*44bedb31SLionel Sambuc; *
3*44bedb31SLionel Sambuc; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
4*44bedb31SLionel Sambuc; *
5*44bedb31SLionel Sambuc; * Copyright (C) 1995-2003 Mark Adler
6*44bedb31SLionel Sambuc; * For conditions of distribution and use, see copyright notice in zlib.h
7*44bedb31SLionel Sambuc; *
8*44bedb31SLionel Sambuc; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9*44bedb31SLionel Sambuc; * Please use the copyright conditions above.
10*44bedb31SLionel Sambuc; *
11*44bedb31SLionel Sambuc; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12*44bedb31SLionel Sambuc; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
13*44bedb31SLionel Sambuc; * the moment.  I have successfully compiled and tested this code with gcc2.96,
14*44bedb31SLionel Sambuc; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
15*44bedb31SLionel Sambuc; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16*44bedb31SLionel Sambuc; * enabled.  I will attempt to merge the MMX code into this version.  Newer
17*44bedb31SLionel Sambuc; * versions of this and inffast.S can be found at
18*44bedb31SLionel Sambuc; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
19*44bedb31SLionel Sambuc; *
20*44bedb31SLionel Sambuc; * 2005 : modification by Gilles Vollant
21*44bedb31SLionel Sambuc; */
22*44bedb31SLionel Sambuc; For Visual C++ 4.x and higher and ML 6.x and higher
23*44bedb31SLionel Sambuc;   ml.exe is in directory \MASM611C of Win95 DDK
24*44bedb31SLionel Sambuc;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25*44bedb31SLionel Sambuc;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
26*44bedb31SLionel Sambuc;
27*44bedb31SLionel Sambuc;
28*44bedb31SLionel Sambuc;   compile with command line option
29*44bedb31SLionel Sambuc;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
30*44bedb31SLionel Sambuc
31*44bedb31SLionel Sambuc;   if you define NO_GZIP (see inflate.h), compile with
32*44bedb31SLionel Sambuc;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
33*44bedb31SLionel Sambuc
34*44bedb31SLionel Sambuc
35*44bedb31SLionel Sambuc; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36*44bedb31SLionel Sambuc; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37*44bedb31SLionel Sambuc;        in inflate_state in inflate.h)
38*44bedb31SLionel Sambuczlib1222sup      equ    8
39*44bedb31SLionel Sambuc
40*44bedb31SLionel Sambuc
41*44bedb31SLionel SambucIFDEF GUNZIP
42*44bedb31SLionel Sambuc  INFLATE_MODE_TYPE    equ 11
43*44bedb31SLionel Sambuc  INFLATE_MODE_BAD     equ 26
44*44bedb31SLionel SambucELSE
45*44bedb31SLionel Sambuc  IFNDEF NO_GUNZIP
46*44bedb31SLionel Sambuc    INFLATE_MODE_TYPE    equ 11
47*44bedb31SLionel Sambuc    INFLATE_MODE_BAD     equ 26
48*44bedb31SLionel Sambuc  ELSE
49*44bedb31SLionel Sambuc    INFLATE_MODE_TYPE    equ 3
50*44bedb31SLionel Sambuc    INFLATE_MODE_BAD     equ 17
51*44bedb31SLionel Sambuc  ENDIF
52*44bedb31SLionel SambucENDIF
53*44bedb31SLionel Sambuc
54*44bedb31SLionel Sambuc
55*44bedb31SLionel Sambuc; 75 "inffast.S"
56*44bedb31SLionel Sambuc;FILE "inffast.S"
57*44bedb31SLionel Sambuc
58*44bedb31SLionel Sambuc;;;GLOBAL _inflate_fast
59*44bedb31SLionel Sambuc
60*44bedb31SLionel Sambuc;;;SECTION .text
61*44bedb31SLionel Sambuc
62*44bedb31SLionel Sambuc
63*44bedb31SLionel Sambuc
64*44bedb31SLionel Sambuc	.586p
65*44bedb31SLionel Sambuc	.mmx
66*44bedb31SLionel Sambuc
67*44bedb31SLionel Sambuc	name	inflate_fast_x86
68*44bedb31SLionel Sambuc	.MODEL	FLAT
69*44bedb31SLionel Sambuc
70*44bedb31SLionel Sambuc_DATA			segment
71*44bedb31SLionel Sambucinflate_fast_use_mmx:
72*44bedb31SLionel Sambuc	dd	1
73*44bedb31SLionel Sambuc
74*44bedb31SLionel Sambuc
75*44bedb31SLionel Sambuc_TEXT			segment
76*44bedb31SLionel SambucPUBLIC _inflate_fast
77*44bedb31SLionel Sambuc
78*44bedb31SLionel SambucALIGN 4
79*44bedb31SLionel Sambuc_inflate_fast:
80*44bedb31SLionel Sambuc	jmp inflate_fast_entry
81*44bedb31SLionel Sambuc
82*44bedb31SLionel Sambuc
83*44bedb31SLionel Sambuc
84*44bedb31SLionel SambucALIGN 4
85*44bedb31SLionel Sambuc	db	'Fast decoding Code from Chris Anderson'
86*44bedb31SLionel Sambuc	db	0
87*44bedb31SLionel Sambuc
88*44bedb31SLionel SambucALIGN 4
89*44bedb31SLionel Sambucinvalid_literal_length_code_msg:
90*44bedb31SLionel Sambuc	db	'invalid literal/length code'
91*44bedb31SLionel Sambuc	db	0
92*44bedb31SLionel Sambuc
93*44bedb31SLionel SambucALIGN 4
94*44bedb31SLionel Sambucinvalid_distance_code_msg:
95*44bedb31SLionel Sambuc	db	'invalid distance code'
96*44bedb31SLionel Sambuc	db	0
97*44bedb31SLionel Sambuc
98*44bedb31SLionel SambucALIGN 4
99*44bedb31SLionel Sambucinvalid_distance_too_far_msg:
100*44bedb31SLionel Sambuc	db	'invalid distance too far back'
101*44bedb31SLionel Sambuc	db	0
102*44bedb31SLionel Sambuc
103*44bedb31SLionel Sambuc
104*44bedb31SLionel SambucALIGN 4
105*44bedb31SLionel Sambucinflate_fast_mask:
106*44bedb31SLionel Sambucdd	0
107*44bedb31SLionel Sambucdd	1
108*44bedb31SLionel Sambucdd	3
109*44bedb31SLionel Sambucdd	7
110*44bedb31SLionel Sambucdd	15
111*44bedb31SLionel Sambucdd	31
112*44bedb31SLionel Sambucdd	63
113*44bedb31SLionel Sambucdd	127
114*44bedb31SLionel Sambucdd	255
115*44bedb31SLionel Sambucdd	511
116*44bedb31SLionel Sambucdd	1023
117*44bedb31SLionel Sambucdd	2047
118*44bedb31SLionel Sambucdd	4095
119*44bedb31SLionel Sambucdd	8191
120*44bedb31SLionel Sambucdd	16383
121*44bedb31SLionel Sambucdd	32767
122*44bedb31SLionel Sambucdd	65535
123*44bedb31SLionel Sambucdd	131071
124*44bedb31SLionel Sambucdd	262143
125*44bedb31SLionel Sambucdd	524287
126*44bedb31SLionel Sambucdd	1048575
127*44bedb31SLionel Sambucdd	2097151
128*44bedb31SLionel Sambucdd	4194303
129*44bedb31SLionel Sambucdd	8388607
130*44bedb31SLionel Sambucdd	16777215
131*44bedb31SLionel Sambucdd	33554431
132*44bedb31SLionel Sambucdd	67108863
133*44bedb31SLionel Sambucdd	134217727
134*44bedb31SLionel Sambucdd	268435455
135*44bedb31SLionel Sambucdd	536870911
136*44bedb31SLionel Sambucdd	1073741823
137*44bedb31SLionel Sambucdd	2147483647
138*44bedb31SLionel Sambucdd	4294967295
139*44bedb31SLionel Sambuc
140*44bedb31SLionel Sambuc
141*44bedb31SLionel Sambucmode_state	 equ	0	;/* state->mode	*/
142*44bedb31SLionel Sambucwsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
143*44bedb31SLionel Sambucwrite_state	 equ	(36+4+zlib1222sup)	;/* state->write */
144*44bedb31SLionel Sambucwindow_state	 equ	(40+4+zlib1222sup)	;/* state->window */
145*44bedb31SLionel Sambuchold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
146*44bedb31SLionel Sambucbits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
147*44bedb31SLionel Sambuclencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
148*44bedb31SLionel Sambucdistcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
149*44bedb31SLionel Sambuclenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
150*44bedb31SLionel Sambucdistbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
151*44bedb31SLionel Sambuc
152*44bedb31SLionel Sambuc
153*44bedb31SLionel Sambuc;;SECTION .text
154*44bedb31SLionel Sambuc; 205 "inffast.S"
155*44bedb31SLionel Sambuc;GLOBAL	inflate_fast_use_mmx
156*44bedb31SLionel Sambuc
157*44bedb31SLionel Sambuc;SECTION .data
158*44bedb31SLionel Sambuc
159*44bedb31SLionel Sambuc
160*44bedb31SLionel Sambuc; GLOBAL inflate_fast_use_mmx:object
161*44bedb31SLionel Sambuc;.size inflate_fast_use_mmx, 4
162*44bedb31SLionel Sambuc; 226 "inffast.S"
163*44bedb31SLionel Sambuc;SECTION .text
164*44bedb31SLionel Sambuc
165*44bedb31SLionel SambucALIGN 4
166*44bedb31SLionel Sambucinflate_fast_entry:
167*44bedb31SLionel Sambuc	push  edi
168*44bedb31SLionel Sambuc	push  esi
169*44bedb31SLionel Sambuc	push  ebp
170*44bedb31SLionel Sambuc	push  ebx
171*44bedb31SLionel Sambuc	pushfd
172*44bedb31SLionel Sambuc	sub  esp,64
173*44bedb31SLionel Sambuc	cld
174*44bedb31SLionel Sambuc
175*44bedb31SLionel Sambuc
176*44bedb31SLionel Sambuc
177*44bedb31SLionel Sambuc
178*44bedb31SLionel Sambuc	mov  esi, [esp+88]
179*44bedb31SLionel Sambuc	mov  edi, [esi+28]
180*44bedb31SLionel Sambuc
181*44bedb31SLionel Sambuc
182*44bedb31SLionel Sambuc
183*44bedb31SLionel Sambuc
184*44bedb31SLionel Sambuc
185*44bedb31SLionel Sambuc
186*44bedb31SLionel Sambuc
187*44bedb31SLionel Sambuc	mov  edx, [esi+4]
188*44bedb31SLionel Sambuc	mov  eax, [esi+0]
189*44bedb31SLionel Sambuc
190*44bedb31SLionel Sambuc	add  edx,eax
191*44bedb31SLionel Sambuc	sub  edx,11
192*44bedb31SLionel Sambuc
193*44bedb31SLionel Sambuc	mov  [esp+44],eax
194*44bedb31SLionel Sambuc	mov  [esp+20],edx
195*44bedb31SLionel Sambuc
196*44bedb31SLionel Sambuc	mov  ebp, [esp+92]
197*44bedb31SLionel Sambuc	mov  ecx, [esi+16]
198*44bedb31SLionel Sambuc	mov  ebx, [esi+12]
199*44bedb31SLionel Sambuc
200*44bedb31SLionel Sambuc	sub  ebp,ecx
201*44bedb31SLionel Sambuc	neg  ebp
202*44bedb31SLionel Sambuc	add  ebp,ebx
203*44bedb31SLionel Sambuc
204*44bedb31SLionel Sambuc	sub  ecx,257
205*44bedb31SLionel Sambuc	add  ecx,ebx
206*44bedb31SLionel Sambuc
207*44bedb31SLionel Sambuc	mov  [esp+60],ebx
208*44bedb31SLionel Sambuc	mov  [esp+40],ebp
209*44bedb31SLionel Sambuc	mov  [esp+16],ecx
210*44bedb31SLionel Sambuc; 285 "inffast.S"
211*44bedb31SLionel Sambuc	mov  eax, [edi+lencode_state]
212*44bedb31SLionel Sambuc	mov  ecx, [edi+distcode_state]
213*44bedb31SLionel Sambuc
214*44bedb31SLionel Sambuc	mov  [esp+8],eax
215*44bedb31SLionel Sambuc	mov  [esp+12],ecx
216*44bedb31SLionel Sambuc
217*44bedb31SLionel Sambuc	mov  eax,1
218*44bedb31SLionel Sambuc	mov  ecx, [edi+lenbits_state]
219*44bedb31SLionel Sambuc	shl  eax,cl
220*44bedb31SLionel Sambuc	dec  eax
221*44bedb31SLionel Sambuc	mov  [esp+0],eax
222*44bedb31SLionel Sambuc
223*44bedb31SLionel Sambuc	mov  eax,1
224*44bedb31SLionel Sambuc	mov  ecx, [edi+distbits_state]
225*44bedb31SLionel Sambuc	shl  eax,cl
226*44bedb31SLionel Sambuc	dec  eax
227*44bedb31SLionel Sambuc	mov  [esp+4],eax
228*44bedb31SLionel Sambuc
229*44bedb31SLionel Sambuc	mov  eax, [edi+wsize_state]
230*44bedb31SLionel Sambuc	mov  ecx, [edi+write_state]
231*44bedb31SLionel Sambuc	mov  edx, [edi+window_state]
232*44bedb31SLionel Sambuc
233*44bedb31SLionel Sambuc	mov  [esp+52],eax
234*44bedb31SLionel Sambuc	mov  [esp+48],ecx
235*44bedb31SLionel Sambuc	mov  [esp+56],edx
236*44bedb31SLionel Sambuc
237*44bedb31SLionel Sambuc	mov  ebp, [edi+hold_state]
238*44bedb31SLionel Sambuc	mov  ebx, [edi+bits_state]
239*44bedb31SLionel Sambuc; 321 "inffast.S"
240*44bedb31SLionel Sambuc	mov  esi, [esp+44]
241*44bedb31SLionel Sambuc	mov  ecx, [esp+20]
242*44bedb31SLionel Sambuc	cmp  ecx,esi
243*44bedb31SLionel Sambuc	ja   L_align_long
244*44bedb31SLionel Sambuc
245*44bedb31SLionel Sambuc	add  ecx,11
246*44bedb31SLionel Sambuc	sub  ecx,esi
247*44bedb31SLionel Sambuc	mov  eax,12
248*44bedb31SLionel Sambuc	sub  eax,ecx
249*44bedb31SLionel Sambuc	lea  edi, [esp+28]
250*44bedb31SLionel Sambuc	rep movsb
251*44bedb31SLionel Sambuc	mov  ecx,eax
252*44bedb31SLionel Sambuc	xor  eax,eax
253*44bedb31SLionel Sambuc	rep stosb
254*44bedb31SLionel Sambuc	lea  esi, [esp+28]
255*44bedb31SLionel Sambuc	mov  [esp+20],esi
256*44bedb31SLionel Sambuc	jmp  L_is_aligned
257*44bedb31SLionel Sambuc
258*44bedb31SLionel Sambuc
259*44bedb31SLionel SambucL_align_long:
260*44bedb31SLionel Sambuc	test  esi,3
261*44bedb31SLionel Sambuc	jz   L_is_aligned
262*44bedb31SLionel Sambuc	xor  eax,eax
263*44bedb31SLionel Sambuc	mov  al, [esi]
264*44bedb31SLionel Sambuc	inc  esi
265*44bedb31SLionel Sambuc	mov  ecx,ebx
266*44bedb31SLionel Sambuc	add  ebx,8
267*44bedb31SLionel Sambuc	shl  eax,cl
268*44bedb31SLionel Sambuc	or  ebp,eax
269*44bedb31SLionel Sambuc	jmp L_align_long
270*44bedb31SLionel Sambuc
271*44bedb31SLionel SambucL_is_aligned:
272*44bedb31SLionel Sambuc	mov  edi, [esp+60]
273*44bedb31SLionel Sambuc; 366 "inffast.S"
274*44bedb31SLionel SambucL_check_mmx:
275*44bedb31SLionel Sambuc	cmp  dword ptr [inflate_fast_use_mmx],2
276*44bedb31SLionel Sambuc	je   L_init_mmx
277*44bedb31SLionel Sambuc	ja   L_do_loop
278*44bedb31SLionel Sambuc
279*44bedb31SLionel Sambuc	push  eax
280*44bedb31SLionel Sambuc	push  ebx
281*44bedb31SLionel Sambuc	push  ecx
282*44bedb31SLionel Sambuc	push  edx
283*44bedb31SLionel Sambuc	pushfd
284*44bedb31SLionel Sambuc	mov  eax, [esp]
285*44bedb31SLionel Sambuc	xor  dword ptr [esp],0200000h
286*44bedb31SLionel Sambuc
287*44bedb31SLionel Sambuc
288*44bedb31SLionel Sambuc
289*44bedb31SLionel Sambuc
290*44bedb31SLionel Sambuc	popfd
291*44bedb31SLionel Sambuc	pushfd
292*44bedb31SLionel Sambuc	pop  edx
293*44bedb31SLionel Sambuc	xor  edx,eax
294*44bedb31SLionel Sambuc	jz   L_dont_use_mmx
295*44bedb31SLionel Sambuc	xor  eax,eax
296*44bedb31SLionel Sambuc	cpuid
297*44bedb31SLionel Sambuc	cmp  ebx,0756e6547h
298*44bedb31SLionel Sambuc	jne  L_dont_use_mmx
299*44bedb31SLionel Sambuc	cmp  ecx,06c65746eh
300*44bedb31SLionel Sambuc	jne  L_dont_use_mmx
301*44bedb31SLionel Sambuc	cmp  edx,049656e69h
302*44bedb31SLionel Sambuc	jne  L_dont_use_mmx
303*44bedb31SLionel Sambuc	mov  eax,1
304*44bedb31SLionel Sambuc	cpuid
305*44bedb31SLionel Sambuc	shr  eax,8
306*44bedb31SLionel Sambuc	and  eax,15
307*44bedb31SLionel Sambuc	cmp  eax,6
308*44bedb31SLionel Sambuc	jne  L_dont_use_mmx
309*44bedb31SLionel Sambuc	test  edx,0800000h
310*44bedb31SLionel Sambuc	jnz  L_use_mmx
311*44bedb31SLionel Sambuc	jmp  L_dont_use_mmx
312*44bedb31SLionel SambucL_use_mmx:
313*44bedb31SLionel Sambuc	mov  dword ptr [inflate_fast_use_mmx],2
314*44bedb31SLionel Sambuc	jmp  L_check_mmx_pop
315*44bedb31SLionel SambucL_dont_use_mmx:
316*44bedb31SLionel Sambuc	mov  dword ptr [inflate_fast_use_mmx],3
317*44bedb31SLionel SambucL_check_mmx_pop:
318*44bedb31SLionel Sambuc	pop  edx
319*44bedb31SLionel Sambuc	pop  ecx
320*44bedb31SLionel Sambuc	pop  ebx
321*44bedb31SLionel Sambuc	pop  eax
322*44bedb31SLionel Sambuc	jmp  L_check_mmx
323*44bedb31SLionel Sambuc; 426 "inffast.S"
324*44bedb31SLionel SambucALIGN 4
325*44bedb31SLionel SambucL_do_loop:
326*44bedb31SLionel Sambuc; 437 "inffast.S"
327*44bedb31SLionel Sambuc	cmp  bl,15
328*44bedb31SLionel Sambuc	ja   L_get_length_code
329*44bedb31SLionel Sambuc
330*44bedb31SLionel Sambuc	xor  eax,eax
331*44bedb31SLionel Sambuc	lodsw
332*44bedb31SLionel Sambuc	mov  cl,bl
333*44bedb31SLionel Sambuc	add  bl,16
334*44bedb31SLionel Sambuc	shl  eax,cl
335*44bedb31SLionel Sambuc	or  ebp,eax
336*44bedb31SLionel Sambuc
337*44bedb31SLionel SambucL_get_length_code:
338*44bedb31SLionel Sambuc	mov  edx, [esp+0]
339*44bedb31SLionel Sambuc	mov  ecx, [esp+8]
340*44bedb31SLionel Sambuc	and  edx,ebp
341*44bedb31SLionel Sambuc	mov  eax, [ecx+edx*4]
342*44bedb31SLionel Sambuc
343*44bedb31SLionel SambucL_dolen:
344*44bedb31SLionel Sambuc
345*44bedb31SLionel Sambuc
346*44bedb31SLionel Sambuc
347*44bedb31SLionel Sambuc
348*44bedb31SLionel Sambuc
349*44bedb31SLionel Sambuc
350*44bedb31SLionel Sambuc	mov  cl,ah
351*44bedb31SLionel Sambuc	sub  bl,ah
352*44bedb31SLionel Sambuc	shr  ebp,cl
353*44bedb31SLionel Sambuc
354*44bedb31SLionel Sambuc
355*44bedb31SLionel Sambuc
356*44bedb31SLionel Sambuc
357*44bedb31SLionel Sambuc
358*44bedb31SLionel Sambuc
359*44bedb31SLionel Sambuc	test  al,al
360*44bedb31SLionel Sambuc	jnz   L_test_for_length_base
361*44bedb31SLionel Sambuc
362*44bedb31SLionel Sambuc	shr  eax,16
363*44bedb31SLionel Sambuc	stosb
364*44bedb31SLionel Sambuc
365*44bedb31SLionel SambucL_while_test:
366*44bedb31SLionel Sambuc
367*44bedb31SLionel Sambuc
368*44bedb31SLionel Sambuc	cmp  [esp+16],edi
369*44bedb31SLionel Sambuc	jbe  L_break_loop
370*44bedb31SLionel Sambuc
371*44bedb31SLionel Sambuc	cmp  [esp+20],esi
372*44bedb31SLionel Sambuc	ja   L_do_loop
373*44bedb31SLionel Sambuc	jmp  L_break_loop
374*44bedb31SLionel Sambuc
375*44bedb31SLionel SambucL_test_for_length_base:
376*44bedb31SLionel Sambuc; 502 "inffast.S"
377*44bedb31SLionel Sambuc	mov  edx,eax
378*44bedb31SLionel Sambuc	shr  edx,16
379*44bedb31SLionel Sambuc	mov  cl,al
380*44bedb31SLionel Sambuc
381*44bedb31SLionel Sambuc	test  al,16
382*44bedb31SLionel Sambuc	jz   L_test_for_second_level_length
383*44bedb31SLionel Sambuc	and  cl,15
384*44bedb31SLionel Sambuc	jz   L_save_len
385*44bedb31SLionel Sambuc	cmp  bl,cl
386*44bedb31SLionel Sambuc	jae  L_add_bits_to_len
387*44bedb31SLionel Sambuc
388*44bedb31SLionel Sambuc	mov  ch,cl
389*44bedb31SLionel Sambuc	xor  eax,eax
390*44bedb31SLionel Sambuc	lodsw
391*44bedb31SLionel Sambuc	mov  cl,bl
392*44bedb31SLionel Sambuc	add  bl,16
393*44bedb31SLionel Sambuc	shl  eax,cl
394*44bedb31SLionel Sambuc	or  ebp,eax
395*44bedb31SLionel Sambuc	mov  cl,ch
396*44bedb31SLionel Sambuc
397*44bedb31SLionel SambucL_add_bits_to_len:
398*44bedb31SLionel Sambuc	mov  eax,1
399*44bedb31SLionel Sambuc	shl  eax,cl
400*44bedb31SLionel Sambuc	dec  eax
401*44bedb31SLionel Sambuc	sub  bl,cl
402*44bedb31SLionel Sambuc	and  eax,ebp
403*44bedb31SLionel Sambuc	shr  ebp,cl
404*44bedb31SLionel Sambuc	add  edx,eax
405*44bedb31SLionel Sambuc
406*44bedb31SLionel SambucL_save_len:
407*44bedb31SLionel Sambuc	mov  [esp+24],edx
408*44bedb31SLionel Sambuc
409*44bedb31SLionel Sambuc
410*44bedb31SLionel SambucL_decode_distance:
411*44bedb31SLionel Sambuc; 549 "inffast.S"
412*44bedb31SLionel Sambuc	cmp  bl,15
413*44bedb31SLionel Sambuc	ja   L_get_distance_code
414*44bedb31SLionel Sambuc
415*44bedb31SLionel Sambuc	xor  eax,eax
416*44bedb31SLionel Sambuc	lodsw
417*44bedb31SLionel Sambuc	mov  cl,bl
418*44bedb31SLionel Sambuc	add  bl,16
419*44bedb31SLionel Sambuc	shl  eax,cl
420*44bedb31SLionel Sambuc	or  ebp,eax
421*44bedb31SLionel Sambuc
422*44bedb31SLionel SambucL_get_distance_code:
423*44bedb31SLionel Sambuc	mov  edx, [esp+4]
424*44bedb31SLionel Sambuc	mov  ecx, [esp+12]
425*44bedb31SLionel Sambuc	and  edx,ebp
426*44bedb31SLionel Sambuc	mov  eax, [ecx+edx*4]
427*44bedb31SLionel Sambuc
428*44bedb31SLionel Sambuc
429*44bedb31SLionel SambucL_dodist:
430*44bedb31SLionel Sambuc	mov  edx,eax
431*44bedb31SLionel Sambuc	shr  edx,16
432*44bedb31SLionel Sambuc	mov  cl,ah
433*44bedb31SLionel Sambuc	sub  bl,ah
434*44bedb31SLionel Sambuc	shr  ebp,cl
435*44bedb31SLionel Sambuc; 584 "inffast.S"
436*44bedb31SLionel Sambuc	mov  cl,al
437*44bedb31SLionel Sambuc
438*44bedb31SLionel Sambuc	test  al,16
439*44bedb31SLionel Sambuc	jz  L_test_for_second_level_dist
440*44bedb31SLionel Sambuc	and  cl,15
441*44bedb31SLionel Sambuc	jz  L_check_dist_one
442*44bedb31SLionel Sambuc	cmp  bl,cl
443*44bedb31SLionel Sambuc	jae  L_add_bits_to_dist
444*44bedb31SLionel Sambuc
445*44bedb31SLionel Sambuc	mov  ch,cl
446*44bedb31SLionel Sambuc	xor  eax,eax
447*44bedb31SLionel Sambuc	lodsw
448*44bedb31SLionel Sambuc	mov  cl,bl
449*44bedb31SLionel Sambuc	add  bl,16
450*44bedb31SLionel Sambuc	shl  eax,cl
451*44bedb31SLionel Sambuc	or  ebp,eax
452*44bedb31SLionel Sambuc	mov  cl,ch
453*44bedb31SLionel Sambuc
454*44bedb31SLionel SambucL_add_bits_to_dist:
455*44bedb31SLionel Sambuc	mov  eax,1
456*44bedb31SLionel Sambuc	shl  eax,cl
457*44bedb31SLionel Sambuc	dec  eax
458*44bedb31SLionel Sambuc	sub  bl,cl
459*44bedb31SLionel Sambuc	and  eax,ebp
460*44bedb31SLionel Sambuc	shr  ebp,cl
461*44bedb31SLionel Sambuc	add  edx,eax
462*44bedb31SLionel Sambuc	jmp  L_check_window
463*44bedb31SLionel Sambuc
464*44bedb31SLionel SambucL_check_window:
465*44bedb31SLionel Sambuc; 625 "inffast.S"
466*44bedb31SLionel Sambuc	mov  [esp+44],esi
467*44bedb31SLionel Sambuc	mov  eax,edi
468*44bedb31SLionel Sambuc	sub  eax, [esp+40]
469*44bedb31SLionel Sambuc
470*44bedb31SLionel Sambuc	cmp  eax,edx
471*44bedb31SLionel Sambuc	jb   L_clip_window
472*44bedb31SLionel Sambuc
473*44bedb31SLionel Sambuc	mov  ecx, [esp+24]
474*44bedb31SLionel Sambuc	mov  esi,edi
475*44bedb31SLionel Sambuc	sub  esi,edx
476*44bedb31SLionel Sambuc
477*44bedb31SLionel Sambuc	sub  ecx,3
478*44bedb31SLionel Sambuc	mov  al, [esi]
479*44bedb31SLionel Sambuc	mov  [edi],al
480*44bedb31SLionel Sambuc	mov  al, [esi+1]
481*44bedb31SLionel Sambuc	mov  dl, [esi+2]
482*44bedb31SLionel Sambuc	add  esi,3
483*44bedb31SLionel Sambuc	mov  [edi+1],al
484*44bedb31SLionel Sambuc	mov  [edi+2],dl
485*44bedb31SLionel Sambuc	add  edi,3
486*44bedb31SLionel Sambuc	rep movsb
487*44bedb31SLionel Sambuc
488*44bedb31SLionel Sambuc	mov  esi, [esp+44]
489*44bedb31SLionel Sambuc	jmp  L_while_test
490*44bedb31SLionel Sambuc
491*44bedb31SLionel SambucALIGN 4
492*44bedb31SLionel SambucL_check_dist_one:
493*44bedb31SLionel Sambuc	cmp  edx,1
494*44bedb31SLionel Sambuc	jne  L_check_window
495*44bedb31SLionel Sambuc	cmp  [esp+40],edi
496*44bedb31SLionel Sambuc	je  L_check_window
497*44bedb31SLionel Sambuc
498*44bedb31SLionel Sambuc	dec  edi
499*44bedb31SLionel Sambuc	mov  ecx, [esp+24]
500*44bedb31SLionel Sambuc	mov  al, [edi]
501*44bedb31SLionel Sambuc	sub  ecx,3
502*44bedb31SLionel Sambuc
503*44bedb31SLionel Sambuc	mov  [edi+1],al
504*44bedb31SLionel Sambuc	mov  [edi+2],al
505*44bedb31SLionel Sambuc	mov  [edi+3],al
506*44bedb31SLionel Sambuc	add  edi,4
507*44bedb31SLionel Sambuc	rep stosb
508*44bedb31SLionel Sambuc
509*44bedb31SLionel Sambuc	jmp  L_while_test
510*44bedb31SLionel Sambuc
511*44bedb31SLionel SambucALIGN 4
512*44bedb31SLionel SambucL_test_for_second_level_length:
513*44bedb31SLionel Sambuc
514*44bedb31SLionel Sambuc
515*44bedb31SLionel Sambuc
516*44bedb31SLionel Sambuc
517*44bedb31SLionel Sambuc	test  al,64
518*44bedb31SLionel Sambuc	jnz   L_test_for_end_of_block
519*44bedb31SLionel Sambuc
520*44bedb31SLionel Sambuc	mov  eax,1
521*44bedb31SLionel Sambuc	shl  eax,cl
522*44bedb31SLionel Sambuc	dec  eax
523*44bedb31SLionel Sambuc	and  eax,ebp
524*44bedb31SLionel Sambuc	add  eax,edx
525*44bedb31SLionel Sambuc	mov  edx, [esp+8]
526*44bedb31SLionel Sambuc	mov  eax, [edx+eax*4]
527*44bedb31SLionel Sambuc	jmp  L_dolen
528*44bedb31SLionel Sambuc
529*44bedb31SLionel SambucALIGN 4
530*44bedb31SLionel SambucL_test_for_second_level_dist:
531*44bedb31SLionel Sambuc
532*44bedb31SLionel Sambuc
533*44bedb31SLionel Sambuc
534*44bedb31SLionel Sambuc
535*44bedb31SLionel Sambuc	test  al,64
536*44bedb31SLionel Sambuc	jnz   L_invalid_distance_code
537*44bedb31SLionel Sambuc
538*44bedb31SLionel Sambuc	mov  eax,1
539*44bedb31SLionel Sambuc	shl  eax,cl
540*44bedb31SLionel Sambuc	dec  eax
541*44bedb31SLionel Sambuc	and  eax,ebp
542*44bedb31SLionel Sambuc	add  eax,edx
543*44bedb31SLionel Sambuc	mov  edx, [esp+12]
544*44bedb31SLionel Sambuc	mov  eax, [edx+eax*4]
545*44bedb31SLionel Sambuc	jmp  L_dodist
546*44bedb31SLionel Sambuc
547*44bedb31SLionel SambucALIGN 4
548*44bedb31SLionel SambucL_clip_window:
549*44bedb31SLionel Sambuc; 721 "inffast.S"
550*44bedb31SLionel Sambuc	mov  ecx,eax
551*44bedb31SLionel Sambuc	mov  eax, [esp+52]
552*44bedb31SLionel Sambuc	neg  ecx
553*44bedb31SLionel Sambuc	mov  esi, [esp+56]
554*44bedb31SLionel Sambuc
555*44bedb31SLionel Sambuc	cmp  eax,edx
556*44bedb31SLionel Sambuc	jb   L_invalid_distance_too_far
557*44bedb31SLionel Sambuc
558*44bedb31SLionel Sambuc	add  ecx,edx
559*44bedb31SLionel Sambuc	cmp  dword ptr [esp+48],0
560*44bedb31SLionel Sambuc	jne  L_wrap_around_window
561*44bedb31SLionel Sambuc
562*44bedb31SLionel Sambuc	sub  eax,ecx
563*44bedb31SLionel Sambuc	add  esi,eax
564*44bedb31SLionel Sambuc; 749 "inffast.S"
565*44bedb31SLionel Sambuc	mov  eax, [esp+24]
566*44bedb31SLionel Sambuc	cmp  eax,ecx
567*44bedb31SLionel Sambuc	jbe  L_do_copy1
568*44bedb31SLionel Sambuc
569*44bedb31SLionel Sambuc	sub  eax,ecx
570*44bedb31SLionel Sambuc	rep movsb
571*44bedb31SLionel Sambuc	mov  esi,edi
572*44bedb31SLionel Sambuc	sub  esi,edx
573*44bedb31SLionel Sambuc	jmp  L_do_copy1
574*44bedb31SLionel Sambuc
575*44bedb31SLionel Sambuc	cmp  eax,ecx
576*44bedb31SLionel Sambuc	jbe  L_do_copy1
577*44bedb31SLionel Sambuc
578*44bedb31SLionel Sambuc	sub  eax,ecx
579*44bedb31SLionel Sambuc	rep movsb
580*44bedb31SLionel Sambuc	mov  esi,edi
581*44bedb31SLionel Sambuc	sub  esi,edx
582*44bedb31SLionel Sambuc	jmp  L_do_copy1
583*44bedb31SLionel Sambuc
584*44bedb31SLionel SambucL_wrap_around_window:
585*44bedb31SLionel Sambuc; 793 "inffast.S"
586*44bedb31SLionel Sambuc	mov  eax, [esp+48]
587*44bedb31SLionel Sambuc	cmp  ecx,eax
588*44bedb31SLionel Sambuc	jbe  L_contiguous_in_window
589*44bedb31SLionel Sambuc
590*44bedb31SLionel Sambuc	add  esi, [esp+52]
591*44bedb31SLionel Sambuc	add  esi,eax
592*44bedb31SLionel Sambuc	sub  esi,ecx
593*44bedb31SLionel Sambuc	sub  ecx,eax
594*44bedb31SLionel Sambuc
595*44bedb31SLionel Sambuc
596*44bedb31SLionel Sambuc	mov  eax, [esp+24]
597*44bedb31SLionel Sambuc	cmp  eax,ecx
598*44bedb31SLionel Sambuc	jbe  L_do_copy1
599*44bedb31SLionel Sambuc
600*44bedb31SLionel Sambuc	sub  eax,ecx
601*44bedb31SLionel Sambuc	rep movsb
602*44bedb31SLionel Sambuc	mov  esi, [esp+56]
603*44bedb31SLionel Sambuc	mov  ecx, [esp+48]
604*44bedb31SLionel Sambuc	cmp  eax,ecx
605*44bedb31SLionel Sambuc	jbe  L_do_copy1
606*44bedb31SLionel Sambuc
607*44bedb31SLionel Sambuc	sub  eax,ecx
608*44bedb31SLionel Sambuc	rep movsb
609*44bedb31SLionel Sambuc	mov  esi,edi
610*44bedb31SLionel Sambuc	sub  esi,edx
611*44bedb31SLionel Sambuc	jmp  L_do_copy1
612*44bedb31SLionel Sambuc
613*44bedb31SLionel SambucL_contiguous_in_window:
614*44bedb31SLionel Sambuc; 836 "inffast.S"
615*44bedb31SLionel Sambuc	add  esi,eax
616*44bedb31SLionel Sambuc	sub  esi,ecx
617*44bedb31SLionel Sambuc
618*44bedb31SLionel Sambuc
619*44bedb31SLionel Sambuc	mov  eax, [esp+24]
620*44bedb31SLionel Sambuc	cmp  eax,ecx
621*44bedb31SLionel Sambuc	jbe  L_do_copy1
622*44bedb31SLionel Sambuc
623*44bedb31SLionel Sambuc	sub  eax,ecx
624*44bedb31SLionel Sambuc	rep movsb
625*44bedb31SLionel Sambuc	mov  esi,edi
626*44bedb31SLionel Sambuc	sub  esi,edx
627*44bedb31SLionel Sambuc
628*44bedb31SLionel SambucL_do_copy1:
629*44bedb31SLionel Sambuc; 862 "inffast.S"
630*44bedb31SLionel Sambuc	mov  ecx,eax
631*44bedb31SLionel Sambuc	rep movsb
632*44bedb31SLionel Sambuc
633*44bedb31SLionel Sambuc	mov  esi, [esp+44]
634*44bedb31SLionel Sambuc	jmp  L_while_test
635*44bedb31SLionel Sambuc; 878 "inffast.S"
636*44bedb31SLionel SambucALIGN 4
637*44bedb31SLionel SambucL_init_mmx:
638*44bedb31SLionel Sambuc	emms
639*44bedb31SLionel Sambuc
640*44bedb31SLionel Sambuc
641*44bedb31SLionel Sambuc
642*44bedb31SLionel Sambuc
643*44bedb31SLionel Sambuc
644*44bedb31SLionel Sambuc	movd mm0,ebp
645*44bedb31SLionel Sambuc	mov  ebp,ebx
646*44bedb31SLionel Sambuc; 896 "inffast.S"
647*44bedb31SLionel Sambuc	movd mm4,[esp+0]
648*44bedb31SLionel Sambuc	movq mm3,mm4
649*44bedb31SLionel Sambuc	movd mm5,[esp+4]
650*44bedb31SLionel Sambuc	movq mm2,mm5
651*44bedb31SLionel Sambuc	pxor mm1,mm1
652*44bedb31SLionel Sambuc	mov  ebx, [esp+8]
653*44bedb31SLionel Sambuc	jmp  L_do_loop_mmx
654*44bedb31SLionel Sambuc
655*44bedb31SLionel SambucALIGN 4
656*44bedb31SLionel SambucL_do_loop_mmx:
657*44bedb31SLionel Sambuc	psrlq mm0,mm1
658*44bedb31SLionel Sambuc
659*44bedb31SLionel Sambuc	cmp  ebp,32
660*44bedb31SLionel Sambuc	ja  L_get_length_code_mmx
661*44bedb31SLionel Sambuc
662*44bedb31SLionel Sambuc	movd mm6,ebp
663*44bedb31SLionel Sambuc	movd mm7,[esi]
664*44bedb31SLionel Sambuc	add  esi,4
665*44bedb31SLionel Sambuc	psllq mm7,mm6
666*44bedb31SLionel Sambuc	add  ebp,32
667*44bedb31SLionel Sambuc	por mm0,mm7
668*44bedb31SLionel Sambuc
669*44bedb31SLionel SambucL_get_length_code_mmx:
670*44bedb31SLionel Sambuc	pand mm4,mm0
671*44bedb31SLionel Sambuc	movd eax,mm4
672*44bedb31SLionel Sambuc	movq mm4,mm3
673*44bedb31SLionel Sambuc	mov  eax, [ebx+eax*4]
674*44bedb31SLionel Sambuc
675*44bedb31SLionel SambucL_dolen_mmx:
676*44bedb31SLionel Sambuc	movzx  ecx,ah
677*44bedb31SLionel Sambuc	movd mm1,ecx
678*44bedb31SLionel Sambuc	sub  ebp,ecx
679*44bedb31SLionel Sambuc
680*44bedb31SLionel Sambuc	test  al,al
681*44bedb31SLionel Sambuc	jnz L_test_for_length_base_mmx
682*44bedb31SLionel Sambuc
683*44bedb31SLionel Sambuc	shr  eax,16
684*44bedb31SLionel Sambuc	stosb
685*44bedb31SLionel Sambuc
686*44bedb31SLionel SambucL_while_test_mmx:
687*44bedb31SLionel Sambuc
688*44bedb31SLionel Sambuc
689*44bedb31SLionel Sambuc	cmp  [esp+16],edi
690*44bedb31SLionel Sambuc	jbe L_break_loop
691*44bedb31SLionel Sambuc
692*44bedb31SLionel Sambuc	cmp  [esp+20],esi
693*44bedb31SLionel Sambuc	ja L_do_loop_mmx
694*44bedb31SLionel Sambuc	jmp L_break_loop
695*44bedb31SLionel Sambuc
696*44bedb31SLionel SambucL_test_for_length_base_mmx:
697*44bedb31SLionel Sambuc
698*44bedb31SLionel Sambuc	mov  edx,eax
699*44bedb31SLionel Sambuc	shr  edx,16
700*44bedb31SLionel Sambuc
701*44bedb31SLionel Sambuc	test  al,16
702*44bedb31SLionel Sambuc	jz  L_test_for_second_level_length_mmx
703*44bedb31SLionel Sambuc	and  eax,15
704*44bedb31SLionel Sambuc	jz L_decode_distance_mmx
705*44bedb31SLionel Sambuc
706*44bedb31SLionel Sambuc	psrlq mm0,mm1
707*44bedb31SLionel Sambuc	movd mm1,eax
708*44bedb31SLionel Sambuc	movd ecx,mm0
709*44bedb31SLionel Sambuc	sub  ebp,eax
710*44bedb31SLionel Sambuc	and  ecx, [inflate_fast_mask+eax*4]
711*44bedb31SLionel Sambuc	add  edx,ecx
712*44bedb31SLionel Sambuc
713*44bedb31SLionel SambucL_decode_distance_mmx:
714*44bedb31SLionel Sambuc	psrlq mm0,mm1
715*44bedb31SLionel Sambuc
716*44bedb31SLionel Sambuc	cmp  ebp,32
717*44bedb31SLionel Sambuc	ja L_get_dist_code_mmx
718*44bedb31SLionel Sambuc
719*44bedb31SLionel Sambuc	movd mm6,ebp
720*44bedb31SLionel Sambuc	movd mm7,[esi]
721*44bedb31SLionel Sambuc	add  esi,4
722*44bedb31SLionel Sambuc	psllq mm7,mm6
723*44bedb31SLionel Sambuc	add  ebp,32
724*44bedb31SLionel Sambuc	por mm0,mm7
725*44bedb31SLionel Sambuc
726*44bedb31SLionel SambucL_get_dist_code_mmx:
727*44bedb31SLionel Sambuc	mov  ebx, [esp+12]
728*44bedb31SLionel Sambuc	pand mm5,mm0
729*44bedb31SLionel Sambuc	movd eax,mm5
730*44bedb31SLionel Sambuc	movq mm5,mm2
731*44bedb31SLionel Sambuc	mov  eax, [ebx+eax*4]
732*44bedb31SLionel Sambuc
733*44bedb31SLionel SambucL_dodist_mmx:
734*44bedb31SLionel Sambuc
735*44bedb31SLionel Sambuc	movzx  ecx,ah
736*44bedb31SLionel Sambuc	mov  ebx,eax
737*44bedb31SLionel Sambuc	shr  ebx,16
738*44bedb31SLionel Sambuc	sub  ebp,ecx
739*44bedb31SLionel Sambuc	movd mm1,ecx
740*44bedb31SLionel Sambuc
741*44bedb31SLionel Sambuc	test  al,16
742*44bedb31SLionel Sambuc	jz L_test_for_second_level_dist_mmx
743*44bedb31SLionel Sambuc	and  eax,15
744*44bedb31SLionel Sambuc	jz L_check_dist_one_mmx
745*44bedb31SLionel Sambuc
746*44bedb31SLionel SambucL_add_bits_to_dist_mmx:
747*44bedb31SLionel Sambuc	psrlq mm0,mm1
748*44bedb31SLionel Sambuc	movd mm1,eax
749*44bedb31SLionel Sambuc	movd ecx,mm0
750*44bedb31SLionel Sambuc	sub  ebp,eax
751*44bedb31SLionel Sambuc	and  ecx, [inflate_fast_mask+eax*4]
752*44bedb31SLionel Sambuc	add  ebx,ecx
753*44bedb31SLionel Sambuc
754*44bedb31SLionel SambucL_check_window_mmx:
755*44bedb31SLionel Sambuc	mov  [esp+44],esi
756*44bedb31SLionel Sambuc	mov  eax,edi
757*44bedb31SLionel Sambuc	sub  eax, [esp+40]
758*44bedb31SLionel Sambuc
759*44bedb31SLionel Sambuc	cmp  eax,ebx
760*44bedb31SLionel Sambuc	jb L_clip_window_mmx
761*44bedb31SLionel Sambuc
762*44bedb31SLionel Sambuc	mov  ecx,edx
763*44bedb31SLionel Sambuc	mov  esi,edi
764*44bedb31SLionel Sambuc	sub  esi,ebx
765*44bedb31SLionel Sambuc
766*44bedb31SLionel Sambuc	sub  ecx,3
767*44bedb31SLionel Sambuc	mov  al, [esi]
768*44bedb31SLionel Sambuc	mov  [edi],al
769*44bedb31SLionel Sambuc	mov  al, [esi+1]
770*44bedb31SLionel Sambuc	mov  dl, [esi+2]
771*44bedb31SLionel Sambuc	add  esi,3
772*44bedb31SLionel Sambuc	mov  [edi+1],al
773*44bedb31SLionel Sambuc	mov  [edi+2],dl
774*44bedb31SLionel Sambuc	add  edi,3
775*44bedb31SLionel Sambuc	rep movsb
776*44bedb31SLionel Sambuc
777*44bedb31SLionel Sambuc	mov  esi, [esp+44]
778*44bedb31SLionel Sambuc	mov  ebx, [esp+8]
779*44bedb31SLionel Sambuc	jmp  L_while_test_mmx
780*44bedb31SLionel Sambuc
781*44bedb31SLionel SambucALIGN 4
782*44bedb31SLionel SambucL_check_dist_one_mmx:
783*44bedb31SLionel Sambuc	cmp  ebx,1
784*44bedb31SLionel Sambuc	jne  L_check_window_mmx
785*44bedb31SLionel Sambuc	cmp  [esp+40],edi
786*44bedb31SLionel Sambuc	je   L_check_window_mmx
787*44bedb31SLionel Sambuc
788*44bedb31SLionel Sambuc	dec  edi
789*44bedb31SLionel Sambuc	mov  ecx,edx
790*44bedb31SLionel Sambuc	mov  al, [edi]
791*44bedb31SLionel Sambuc	sub  ecx,3
792*44bedb31SLionel Sambuc
793*44bedb31SLionel Sambuc	mov  [edi+1],al
794*44bedb31SLionel Sambuc	mov  [edi+2],al
795*44bedb31SLionel Sambuc	mov  [edi+3],al
796*44bedb31SLionel Sambuc	add  edi,4
797*44bedb31SLionel Sambuc	rep stosb
798*44bedb31SLionel Sambuc
799*44bedb31SLionel Sambuc	mov  ebx, [esp+8]
800*44bedb31SLionel Sambuc	jmp  L_while_test_mmx
801*44bedb31SLionel Sambuc
802*44bedb31SLionel SambucALIGN 4
803*44bedb31SLionel SambucL_test_for_second_level_length_mmx:
804*44bedb31SLionel Sambuc	test  al,64
805*44bedb31SLionel Sambuc	jnz L_test_for_end_of_block
806*44bedb31SLionel Sambuc
807*44bedb31SLionel Sambuc	and  eax,15
808*44bedb31SLionel Sambuc	psrlq mm0,mm1
809*44bedb31SLionel Sambuc	movd ecx,mm0
810*44bedb31SLionel Sambuc	and  ecx, [inflate_fast_mask+eax*4]
811*44bedb31SLionel Sambuc	add  ecx,edx
812*44bedb31SLionel Sambuc	mov  eax, [ebx+ecx*4]
813*44bedb31SLionel Sambuc	jmp L_dolen_mmx
814*44bedb31SLionel Sambuc
815*44bedb31SLionel SambucALIGN 4
816*44bedb31SLionel SambucL_test_for_second_level_dist_mmx:
817*44bedb31SLionel Sambuc	test  al,64
818*44bedb31SLionel Sambuc	jnz L_invalid_distance_code
819*44bedb31SLionel Sambuc
820*44bedb31SLionel Sambuc	and  eax,15
821*44bedb31SLionel Sambuc	psrlq mm0,mm1
822*44bedb31SLionel Sambuc	movd ecx,mm0
823*44bedb31SLionel Sambuc	and  ecx, [inflate_fast_mask+eax*4]
824*44bedb31SLionel Sambuc	mov  eax, [esp+12]
825*44bedb31SLionel Sambuc	add  ecx,ebx
826*44bedb31SLionel Sambuc	mov  eax, [eax+ecx*4]
827*44bedb31SLionel Sambuc	jmp  L_dodist_mmx
828*44bedb31SLionel Sambuc
829*44bedb31SLionel SambucALIGN 4
830*44bedb31SLionel SambucL_clip_window_mmx:
831*44bedb31SLionel Sambuc
832*44bedb31SLionel Sambuc	mov  ecx,eax
833*44bedb31SLionel Sambuc	mov  eax, [esp+52]
834*44bedb31SLionel Sambuc	neg  ecx
835*44bedb31SLionel Sambuc	mov  esi, [esp+56]
836*44bedb31SLionel Sambuc
837*44bedb31SLionel Sambuc	cmp  eax,ebx
838*44bedb31SLionel Sambuc	jb  L_invalid_distance_too_far
839*44bedb31SLionel Sambuc
840*44bedb31SLionel Sambuc	add  ecx,ebx
841*44bedb31SLionel Sambuc	cmp  dword ptr [esp+48],0
842*44bedb31SLionel Sambuc	jne  L_wrap_around_window_mmx
843*44bedb31SLionel Sambuc
844*44bedb31SLionel Sambuc	sub  eax,ecx
845*44bedb31SLionel Sambuc	add  esi,eax
846*44bedb31SLionel Sambuc
847*44bedb31SLionel Sambuc	cmp  edx,ecx
848*44bedb31SLionel Sambuc	jbe  L_do_copy1_mmx
849*44bedb31SLionel Sambuc
850*44bedb31SLionel Sambuc	sub  edx,ecx
851*44bedb31SLionel Sambuc	rep movsb
852*44bedb31SLionel Sambuc	mov  esi,edi
853*44bedb31SLionel Sambuc	sub  esi,ebx
854*44bedb31SLionel Sambuc	jmp  L_do_copy1_mmx
855*44bedb31SLionel Sambuc
856*44bedb31SLionel Sambuc	cmp  edx,ecx
857*44bedb31SLionel Sambuc	jbe  L_do_copy1_mmx
858*44bedb31SLionel Sambuc
859*44bedb31SLionel Sambuc	sub  edx,ecx
860*44bedb31SLionel Sambuc	rep movsb
861*44bedb31SLionel Sambuc	mov  esi,edi
862*44bedb31SLionel Sambuc	sub  esi,ebx
863*44bedb31SLionel Sambuc	jmp  L_do_copy1_mmx
864*44bedb31SLionel Sambuc
865*44bedb31SLionel SambucL_wrap_around_window_mmx:
866*44bedb31SLionel Sambuc
867*44bedb31SLionel Sambuc	mov  eax, [esp+48]
868*44bedb31SLionel Sambuc	cmp  ecx,eax
869*44bedb31SLionel Sambuc	jbe  L_contiguous_in_window_mmx
870*44bedb31SLionel Sambuc
871*44bedb31SLionel Sambuc	add  esi, [esp+52]
872*44bedb31SLionel Sambuc	add  esi,eax
873*44bedb31SLionel Sambuc	sub  esi,ecx
874*44bedb31SLionel Sambuc	sub  ecx,eax
875*44bedb31SLionel Sambuc
876*44bedb31SLionel Sambuc
877*44bedb31SLionel Sambuc	cmp  edx,ecx
878*44bedb31SLionel Sambuc	jbe  L_do_copy1_mmx
879*44bedb31SLionel Sambuc
880*44bedb31SLionel Sambuc	sub  edx,ecx
881*44bedb31SLionel Sambuc	rep movsb
882*44bedb31SLionel Sambuc	mov  esi, [esp+56]
883*44bedb31SLionel Sambuc	mov  ecx, [esp+48]
884*44bedb31SLionel Sambuc	cmp  edx,ecx
885*44bedb31SLionel Sambuc	jbe  L_do_copy1_mmx
886*44bedb31SLionel Sambuc
887*44bedb31SLionel Sambuc	sub  edx,ecx
888*44bedb31SLionel Sambuc	rep movsb
889*44bedb31SLionel Sambuc	mov  esi,edi
890*44bedb31SLionel Sambuc	sub  esi,ebx
891*44bedb31SLionel Sambuc	jmp  L_do_copy1_mmx
892*44bedb31SLionel Sambuc
893*44bedb31SLionel SambucL_contiguous_in_window_mmx:
894*44bedb31SLionel Sambuc
895*44bedb31SLionel Sambuc	add  esi,eax
896*44bedb31SLionel Sambuc	sub  esi,ecx
897*44bedb31SLionel Sambuc
898*44bedb31SLionel Sambuc
899*44bedb31SLionel Sambuc	cmp  edx,ecx
900*44bedb31SLionel Sambuc	jbe  L_do_copy1_mmx
901*44bedb31SLionel Sambuc
902*44bedb31SLionel Sambuc	sub  edx,ecx
903*44bedb31SLionel Sambuc	rep movsb
904*44bedb31SLionel Sambuc	mov  esi,edi
905*44bedb31SLionel Sambuc	sub  esi,ebx
906*44bedb31SLionel Sambuc
907*44bedb31SLionel SambucL_do_copy1_mmx:
908*44bedb31SLionel Sambuc
909*44bedb31SLionel Sambuc
910*44bedb31SLionel Sambuc	mov  ecx,edx
911*44bedb31SLionel Sambuc	rep movsb
912*44bedb31SLionel Sambuc
913*44bedb31SLionel Sambuc	mov  esi, [esp+44]
914*44bedb31SLionel Sambuc	mov  ebx, [esp+8]
915*44bedb31SLionel Sambuc	jmp  L_while_test_mmx
916*44bedb31SLionel Sambuc; 1174 "inffast.S"
917*44bedb31SLionel SambucL_invalid_distance_code:
918*44bedb31SLionel Sambuc
919*44bedb31SLionel Sambuc
920*44bedb31SLionel Sambuc
921*44bedb31SLionel Sambuc
922*44bedb31SLionel Sambuc
923*44bedb31SLionel Sambuc	mov  ecx, invalid_distance_code_msg
924*44bedb31SLionel Sambuc	mov  edx,INFLATE_MODE_BAD
925*44bedb31SLionel Sambuc	jmp  L_update_stream_state
926*44bedb31SLionel Sambuc
927*44bedb31SLionel SambucL_test_for_end_of_block:
928*44bedb31SLionel Sambuc
929*44bedb31SLionel Sambuc
930*44bedb31SLionel Sambuc
931*44bedb31SLionel Sambuc
932*44bedb31SLionel Sambuc
933*44bedb31SLionel Sambuc	test  al,32
934*44bedb31SLionel Sambuc	jz  L_invalid_literal_length_code
935*44bedb31SLionel Sambuc
936*44bedb31SLionel Sambuc	mov  ecx,0
937*44bedb31SLionel Sambuc	mov  edx,INFLATE_MODE_TYPE
938*44bedb31SLionel Sambuc	jmp  L_update_stream_state
939*44bedb31SLionel Sambuc
940*44bedb31SLionel SambucL_invalid_literal_length_code:
941*44bedb31SLionel Sambuc
942*44bedb31SLionel Sambuc
943*44bedb31SLionel Sambuc
944*44bedb31SLionel Sambuc
945*44bedb31SLionel Sambuc
946*44bedb31SLionel Sambuc	mov  ecx, invalid_literal_length_code_msg
947*44bedb31SLionel Sambuc	mov  edx,INFLATE_MODE_BAD
948*44bedb31SLionel Sambuc	jmp  L_update_stream_state
949*44bedb31SLionel Sambuc
950*44bedb31SLionel SambucL_invalid_distance_too_far:
951*44bedb31SLionel Sambuc
952*44bedb31SLionel Sambuc
953*44bedb31SLionel Sambuc
954*44bedb31SLionel Sambuc	mov  esi, [esp+44]
955*44bedb31SLionel Sambuc	mov  ecx, invalid_distance_too_far_msg
956*44bedb31SLionel Sambuc	mov  edx,INFLATE_MODE_BAD
957*44bedb31SLionel Sambuc	jmp  L_update_stream_state
958*44bedb31SLionel Sambuc
959*44bedb31SLionel SambucL_update_stream_state:
960*44bedb31SLionel Sambuc
961*44bedb31SLionel Sambuc	mov  eax, [esp+88]
962*44bedb31SLionel Sambuc	test  ecx,ecx
963*44bedb31SLionel Sambuc	jz  L_skip_msg
964*44bedb31SLionel Sambuc	mov  [eax+24],ecx
965*44bedb31SLionel SambucL_skip_msg:
966*44bedb31SLionel Sambuc	mov  eax, [eax+28]
967*44bedb31SLionel Sambuc	mov  [eax+mode_state],edx
968*44bedb31SLionel Sambuc	jmp  L_break_loop
969*44bedb31SLionel Sambuc
970*44bedb31SLionel SambucALIGN 4
971*44bedb31SLionel SambucL_break_loop:
972*44bedb31SLionel Sambuc; 1243 "inffast.S"
973*44bedb31SLionel Sambuc	cmp  dword ptr [inflate_fast_use_mmx],2
974*44bedb31SLionel Sambuc	jne  L_update_next_in
975*44bedb31SLionel Sambuc
976*44bedb31SLionel Sambuc
977*44bedb31SLionel Sambuc
978*44bedb31SLionel Sambuc	mov  ebx,ebp
979*44bedb31SLionel Sambuc
980*44bedb31SLionel SambucL_update_next_in:
981*44bedb31SLionel Sambuc; 1266 "inffast.S"
982*44bedb31SLionel Sambuc	mov  eax, [esp+88]
983*44bedb31SLionel Sambuc	mov  ecx,ebx
984*44bedb31SLionel Sambuc	mov  edx, [eax+28]
985*44bedb31SLionel Sambuc	shr  ecx,3
986*44bedb31SLionel Sambuc	sub  esi,ecx
987*44bedb31SLionel Sambuc	shl  ecx,3
988*44bedb31SLionel Sambuc	sub  ebx,ecx
989*44bedb31SLionel Sambuc	mov  [eax+12],edi
990*44bedb31SLionel Sambuc	mov  [edx+bits_state],ebx
991*44bedb31SLionel Sambuc	mov  ecx,ebx
992*44bedb31SLionel Sambuc
993*44bedb31SLionel Sambuc	lea  ebx, [esp+28]
994*44bedb31SLionel Sambuc	cmp  [esp+20],ebx
995*44bedb31SLionel Sambuc	jne  L_buf_not_used
996*44bedb31SLionel Sambuc
997*44bedb31SLionel Sambuc	sub  esi,ebx
998*44bedb31SLionel Sambuc	mov  ebx, [eax+0]
999*44bedb31SLionel Sambuc	mov  [esp+20],ebx
1000*44bedb31SLionel Sambuc	add  esi,ebx
1001*44bedb31SLionel Sambuc	mov  ebx, [eax+4]
1002*44bedb31SLionel Sambuc	sub  ebx,11
1003*44bedb31SLionel Sambuc	add  [esp+20],ebx
1004*44bedb31SLionel Sambuc
1005*44bedb31SLionel SambucL_buf_not_used:
1006*44bedb31SLionel Sambuc	mov  [eax+0],esi
1007*44bedb31SLionel Sambuc
1008*44bedb31SLionel Sambuc	mov  ebx,1
1009*44bedb31SLionel Sambuc	shl  ebx,cl
1010*44bedb31SLionel Sambuc	dec  ebx
1011*44bedb31SLionel Sambuc
1012*44bedb31SLionel Sambuc
1013*44bedb31SLionel Sambuc
1014*44bedb31SLionel Sambuc
1015*44bedb31SLionel Sambuc
1016*44bedb31SLionel Sambuc	cmp  dword ptr [inflate_fast_use_mmx],2
1017*44bedb31SLionel Sambuc	jne  L_update_hold
1018*44bedb31SLionel Sambuc
1019*44bedb31SLionel Sambuc
1020*44bedb31SLionel Sambuc
1021*44bedb31SLionel Sambuc	psrlq mm0,mm1
1022*44bedb31SLionel Sambuc	movd ebp,mm0
1023*44bedb31SLionel Sambuc
1024*44bedb31SLionel Sambuc	emms
1025*44bedb31SLionel Sambuc
1026*44bedb31SLionel SambucL_update_hold:
1027*44bedb31SLionel Sambuc
1028*44bedb31SLionel Sambuc
1029*44bedb31SLionel Sambuc
1030*44bedb31SLionel Sambuc	and  ebp,ebx
1031*44bedb31SLionel Sambuc	mov  [edx+hold_state],ebp
1032*44bedb31SLionel Sambuc
1033*44bedb31SLionel Sambuc
1034*44bedb31SLionel Sambuc
1035*44bedb31SLionel Sambuc
1036*44bedb31SLionel Sambuc	mov  ebx, [esp+20]
1037*44bedb31SLionel Sambuc	cmp  ebx,esi
1038*44bedb31SLionel Sambuc	jbe  L_last_is_smaller
1039*44bedb31SLionel Sambuc
1040*44bedb31SLionel Sambuc	sub  ebx,esi
1041*44bedb31SLionel Sambuc	add  ebx,11
1042*44bedb31SLionel Sambuc	mov  [eax+4],ebx
1043*44bedb31SLionel Sambuc	jmp  L_fixup_out
1044*44bedb31SLionel SambucL_last_is_smaller:
1045*44bedb31SLionel Sambuc	sub  esi,ebx
1046*44bedb31SLionel Sambuc	neg  esi
1047*44bedb31SLionel Sambuc	add  esi,11
1048*44bedb31SLionel Sambuc	mov  [eax+4],esi
1049*44bedb31SLionel Sambuc
1050*44bedb31SLionel Sambuc
1051*44bedb31SLionel Sambuc
1052*44bedb31SLionel Sambuc
1053*44bedb31SLionel SambucL_fixup_out:
1054*44bedb31SLionel Sambuc
1055*44bedb31SLionel Sambuc	mov  ebx, [esp+16]
1056*44bedb31SLionel Sambuc	cmp  ebx,edi
1057*44bedb31SLionel Sambuc	jbe  L_end_is_smaller
1058*44bedb31SLionel Sambuc
1059*44bedb31SLionel Sambuc	sub  ebx,edi
1060*44bedb31SLionel Sambuc	add  ebx,257
1061*44bedb31SLionel Sambuc	mov  [eax+16],ebx
1062*44bedb31SLionel Sambuc	jmp  L_done
1063*44bedb31SLionel SambucL_end_is_smaller:
1064*44bedb31SLionel Sambuc	sub  edi,ebx
1065*44bedb31SLionel Sambuc	neg  edi
1066*44bedb31SLionel Sambuc	add  edi,257
1067*44bedb31SLionel Sambuc	mov  [eax+16],edi
1068*44bedb31SLionel Sambuc
1069*44bedb31SLionel Sambuc
1070*44bedb31SLionel Sambuc
1071*44bedb31SLionel Sambuc
1072*44bedb31SLionel Sambuc
1073*44bedb31SLionel SambucL_done:
1074*44bedb31SLionel Sambuc	add  esp,64
1075*44bedb31SLionel Sambuc	popfd
1076*44bedb31SLionel Sambuc	pop  ebx
1077*44bedb31SLionel Sambuc	pop  ebp
1078*44bedb31SLionel Sambuc	pop  esi
1079*44bedb31SLionel Sambuc	pop  edi
1080*44bedb31SLionel Sambuc	ret
1081*44bedb31SLionel Sambuc
1082*44bedb31SLionel Sambuc_TEXT	ends
1083*44bedb31SLionel Sambucend
1084