xref: /isa-l/igzip/igzip_compare_types.asm (revision ba1a0006802c8f857e536282e77a9b4ca34f43e8)
1660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2660f49b0SGreg Tucker;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3660f49b0SGreg Tucker;
4660f49b0SGreg Tucker;  Redistribution and use in source and binary forms, with or without
5660f49b0SGreg Tucker;  modification, are permitted provided that the following conditions
6660f49b0SGreg Tucker;  are met:
7660f49b0SGreg Tucker;    * Redistributions of source code must retain the above copyright
8660f49b0SGreg Tucker;      notice, this list of conditions and the following disclaimer.
9660f49b0SGreg Tucker;    * Redistributions in binary form must reproduce the above copyright
10660f49b0SGreg Tucker;      notice, this list of conditions and the following disclaimer in
11660f49b0SGreg Tucker;      the documentation and/or other materials provided with the
12660f49b0SGreg Tucker;      distribution.
13660f49b0SGreg Tucker;    * Neither the name of Intel Corporation nor the names of its
14660f49b0SGreg Tucker;      contributors may be used to endorse or promote products derived
15660f49b0SGreg Tucker;      from this software without specific prior written permission.
16660f49b0SGreg Tucker;
17660f49b0SGreg Tucker;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18660f49b0SGreg Tucker;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19660f49b0SGreg Tucker;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20660f49b0SGreg Tucker;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21660f49b0SGreg Tucker;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22660f49b0SGreg Tucker;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23660f49b0SGreg Tucker;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24660f49b0SGreg Tucker;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25660f49b0SGreg Tucker;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26660f49b0SGreg Tucker;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27660f49b0SGreg Tucker;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29660f49b0SGreg Tucker
30660f49b0SGreg Tucker%include "options.asm"
31f97de75fSRoy Oursler%include "stdmac.asm"
32f97de75fSRoy Oursler
33660f49b0SGreg Tucker%ifndef UTILS_ASM
34660f49b0SGreg Tucker%define UTILS_ASM
35660f49b0SGreg Tucker; compare macro
36660f49b0SGreg Tucker
37660f49b0SGreg Tucker;; sttni2 is faster, but it can't be debugged
38660f49b0SGreg Tucker;; so following code is based on "mine5"
39660f49b0SGreg Tucker
4073454909SRoy Oursler;; compares 8 bytes at a time, using xor
4173454909SRoy Oursler;; assumes the input buffer has size at least 8
4273454909SRoy Oursler;; compare_r src1, src2, result, result_max, tmp
4373454909SRoy Oursler%macro compare_r 5
44660f49b0SGreg Tucker%define %%src1		%1
45660f49b0SGreg Tucker%define %%src2		%2
46660f49b0SGreg Tucker%define %%result	%3
4773454909SRoy Oursler%define %%result_max	%4
4873454909SRoy Oursler%define %%tmp		%5
4973454909SRoy Oursler%define %%tmp16		%5w	; tmp as a 16-bit register
50660f49b0SGreg Tucker
5173454909SRoy Oursler	sub	%%result_max, 16
5273454909SRoy Oursler	cmp	%%result, %%result_max
5373454909SRoy Oursler	jg	%%_by_8
5473454909SRoy Oursler
55660f49b0SGreg Tucker%%loop1:
56660f49b0SGreg Tucker	mov	%%tmp, [%%src1 + %%result]
57660f49b0SGreg Tucker	xor	%%tmp, [%%src2 + %%result]
5873454909SRoy Oursler	jnz	%%miscompare_reg
59660f49b0SGreg Tucker	add	%%result, 8
60660f49b0SGreg Tucker
61660f49b0SGreg Tucker	mov	%%tmp, [%%src1 + %%result]
62660f49b0SGreg Tucker	xor	%%tmp, [%%src2 + %%result]
6373454909SRoy Oursler	jnz	%%miscompare_reg
64660f49b0SGreg Tucker	add	%%result, 8
6573454909SRoy Oursler	cmp	%%result, %%result_max
6673454909SRoy Oursler	jle	%%loop1
67660f49b0SGreg Tucker
6873454909SRoy Oursler%%_by_8:
6973454909SRoy Oursler	add	%%result_max, 8
7073454909SRoy Oursler	cmp	%%result, %%result_max
7173454909SRoy Oursler	jg	%%_cmp_last
72660f49b0SGreg Tucker
73660f49b0SGreg Tucker	; compare last two bytes
7473454909SRoy Oursler	mov	%%tmp, [%%src1 + %%result]
7573454909SRoy Oursler	xor	%%tmp, [%%src2 + %%result]
7673454909SRoy Oursler	jnz	%%miscompare_reg
7773454909SRoy Oursler	add	%%result, 8
78660f49b0SGreg Tucker
7973454909SRoy Oursler%%_cmp_last:
8073454909SRoy Oursler	add	%%result_max, 8
8173454909SRoy Oursler	cmp	%%result, %%result_max
8273454909SRoy Oursler	je	%%end
8373454909SRoy Oursler
8473454909SRoy Oursler	lea	%%result, [%%result_max - 8]
8573454909SRoy Oursler
8673454909SRoy Oursler	mov	%%tmp, [%%src1 + %%result]
8773454909SRoy Oursler	xor	%%tmp, [%%src2 + %%result]
8873454909SRoy Oursler	jnz	%%miscompare_reg
8973454909SRoy Oursler	add	%%result, 8
90660f49b0SGreg Tucker	jmp	%%end
91660f49b0SGreg Tucker
9273454909SRoy Oursler%%miscompare_reg:
93660f49b0SGreg Tucker	bsf	%%tmp, %%tmp
94660f49b0SGreg Tucker	shr	%%tmp, 3
95660f49b0SGreg Tucker	add	%%result, %%tmp
96660f49b0SGreg Tucker%%end:
97660f49b0SGreg Tucker%endm
98660f49b0SGreg Tucker
99660f49b0SGreg Tucker;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
10073454909SRoy Oursler;; assumes the input buffer has size at least 8
10173454909SRoy Oursler;; compare_x src1, src2, result, result_max, tmp, xtmp1, xtmp2
10273454909SRoy Oursler%macro compare_x 7
103660f49b0SGreg Tucker%define %%src1		%1
104660f49b0SGreg Tucker%define %%src2		%2
10573454909SRoy Oursler%define %%result	%3	; Accumulator for match_length
10673454909SRoy Oursler%define %%result_max	%4
10773454909SRoy Oursler%define %%tmp		%5
10873454909SRoy Oursler%define %%tmp16		%5w	; tmp as a 16-bit register
10973454909SRoy Oursler%define %%tmp32		%5d	; tmp as a 32-bit register
11073454909SRoy Oursler%define %%xtmp		%6
11173454909SRoy Oursler%define %%xtmp2		%7
112660f49b0SGreg Tucker
11373454909SRoy Oursler	sub	%%result_max, 32
11473454909SRoy Oursler	cmp	%%result, %%result_max
11573454909SRoy Oursler	jg	%%_by_16
11673454909SRoy Oursler
117660f49b0SGreg Tucker%%loop1:
118f97de75fSRoy Oursler	MOVDQU		%%xtmp, [%%src1 + %%result]
119f97de75fSRoy Oursler	MOVDQU		%%xtmp2, [%%src2 + %%result]
120f97de75fSRoy Oursler	PCMPEQB		%%xtmp, %%xtmp, %%xtmp2
121f97de75fSRoy Oursler	PMOVMSKB	%%tmp32, %%xtmp
122660f49b0SGreg Tucker	xor		%%tmp, 0xFFFF
12373454909SRoy Oursler	jnz		%%miscompare_vect
124660f49b0SGreg Tucker	add		%%result, 16
125660f49b0SGreg Tucker
126f97de75fSRoy Oursler	MOVDQU		%%xtmp, [%%src1 + %%result]
127f97de75fSRoy Oursler	MOVDQU		%%xtmp2, [%%src2 + %%result]
128f97de75fSRoy Oursler	PCMPEQB		%%xtmp, %%xtmp, %%xtmp2
129f97de75fSRoy Oursler	PMOVMSKB	%%tmp32, %%xtmp
130660f49b0SGreg Tucker	xor		%%tmp, 0xFFFF
13173454909SRoy Oursler	jnz		%%miscompare_vect
132660f49b0SGreg Tucker	add		%%result, 16
133660f49b0SGreg Tucker
13473454909SRoy Oursler	cmp	%%result, %%result_max
13573454909SRoy Oursler	jle	%%loop1
13673454909SRoy Oursler
13773454909SRoy Oursler%%_by_16:
13873454909SRoy Oursler	add	%%result_max, 16
13973454909SRoy Oursler	cmp	%%result, %%result_max
14073454909SRoy Oursler	jg	%%_by_8
14173454909SRoy Oursler
14273454909SRoy Oursler	MOVDQU		%%xtmp, [%%src1 + %%result]
14373454909SRoy Oursler	MOVDQU		%%xtmp2, [%%src2 + %%result]
14473454909SRoy Oursler	PCMPEQB		%%xtmp, %%xtmp, %%xtmp2
14573454909SRoy Oursler	PMOVMSKB	%%tmp32, %%xtmp
14673454909SRoy Oursler	xor		%%tmp, 0xFFFF
14773454909SRoy Oursler	jnz		%%miscompare_vect
14873454909SRoy Oursler	add		%%result, 16
14973454909SRoy Oursler
15073454909SRoy Oursler%%_by_8:
15173454909SRoy Oursler	add	%%result_max, 8
15273454909SRoy Oursler	cmp	%%result, %%result_max
15373454909SRoy Oursler	jg	%%_cmp_last
154660f49b0SGreg Tucker
155660f49b0SGreg Tucker	; compare last two bytes
15673454909SRoy Oursler	mov	%%tmp, [%%src1 + %%result]
15773454909SRoy Oursler	xor	%%tmp, [%%src2 + %%result]
15873454909SRoy Oursler	jnz	%%miscompare_reg
15973454909SRoy Oursler	add	%%result, 8
160660f49b0SGreg Tucker
16173454909SRoy Oursler%%_cmp_last:
16273454909SRoy Oursler	add	%%result_max, 8
16373454909SRoy Oursler	cmp	%%result, %%result_max
16473454909SRoy Oursler	je	%%end
16573454909SRoy Oursler
16673454909SRoy Oursler	lea	%%result, [%%result_max - 8]
16773454909SRoy Oursler
16873454909SRoy Oursler	mov	%%tmp, [%%src1 + %%result]
16973454909SRoy Oursler	xor	%%tmp, [%%src2 + %%result]
17073454909SRoy Oursler	jnz	%%miscompare_reg
17173454909SRoy Oursler	add	%%result, 8
172660f49b0SGreg Tucker	jmp	%%end
173660f49b0SGreg Tucker
17473454909SRoy Oursler%%miscompare_reg:
175660f49b0SGreg Tucker	bsf	%%tmp, %%tmp
176660f49b0SGreg Tucker	shr	%%tmp, 3
177660f49b0SGreg Tucker	add	%%result, %%tmp
178660f49b0SGreg Tucker	jmp	%%end
17973454909SRoy Oursler
18073454909SRoy Oursler%%miscompare_vect:
181660f49b0SGreg Tucker	bsf	%%tmp, %%tmp
182660f49b0SGreg Tucker	add	%%result, %%tmp
183660f49b0SGreg Tucker%%end:
184660f49b0SGreg Tucker%endm
185660f49b0SGreg Tucker
186660f49b0SGreg Tucker;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
18773454909SRoy Oursler;; assumes the input buffer has size at least 8
18873454909SRoy Oursler;; compare_y src1, src2, result, result_max, tmp, xtmp1, xtmp2
18973454909SRoy Oursler%macro compare_y 7
190660f49b0SGreg Tucker%define %%src1		%1
191660f49b0SGreg Tucker%define %%src2		%2
19273454909SRoy Oursler%define %%result	%3	; Accumulator for match_length
19373454909SRoy Oursler%define %%result_max	%4
19473454909SRoy Oursler%define %%tmp		%5
19573454909SRoy Oursler%define %%tmp16		%5w	; tmp as a 16-bit register
19673454909SRoy Oursler%define %%tmp32		%5d	; tmp as a 32-bit register
19773454909SRoy Oursler%define %%ytmp		%6
19873454909SRoy Oursler%define %%ytmp2		%7
199660f49b0SGreg Tucker
20073454909SRoy Oursler	sub	%%result_max, 64
20173454909SRoy Oursler	cmp	%%result, %%result_max
20273454909SRoy Oursler	jg	%%_by_32
20373454909SRoy Oursler
204660f49b0SGreg Tucker%%loop1:
205660f49b0SGreg Tucker	vmovdqu		%%ytmp, [%%src1 + %%result]
206660f49b0SGreg Tucker	vmovdqu		%%ytmp2, [%%src2 + %%result]
207660f49b0SGreg Tucker	vpcmpeqb	%%ytmp, %%ytmp, %%ytmp2
208660f49b0SGreg Tucker	vpmovmskb	%%tmp, %%ytmp
209660f49b0SGreg Tucker	xor		%%tmp32, 0xFFFFFFFF
21073454909SRoy Oursler	jnz		%%miscompare_vect
211660f49b0SGreg Tucker	add		%%result, 32
212660f49b0SGreg Tucker
213660f49b0SGreg Tucker	vmovdqu		%%ytmp, [%%src1 + %%result]
214660f49b0SGreg Tucker	vmovdqu		%%ytmp2, [%%src2 + %%result]
215660f49b0SGreg Tucker	vpcmpeqb	%%ytmp, %%ytmp, %%ytmp2
216660f49b0SGreg Tucker	vpmovmskb	%%tmp, %%ytmp
217660f49b0SGreg Tucker	xor		%%tmp32, 0xFFFFFFFF
21873454909SRoy Oursler	jnz		%%miscompare_vect
219660f49b0SGreg Tucker	add		%%result, 32
220660f49b0SGreg Tucker
22173454909SRoy Oursler	cmp	%%result, %%result_max
22273454909SRoy Oursler	jle	%%loop1
22373454909SRoy Oursler
22473454909SRoy Oursler%%_by_32:
22573454909SRoy Oursler	add	%%result_max, 32
22673454909SRoy Oursler	cmp	%%result, %%result_max
22773454909SRoy Oursler	jg	%%_by_16
22873454909SRoy Oursler
22973454909SRoy Oursler	vmovdqu		%%ytmp, [%%src1 + %%result]
23073454909SRoy Oursler	vmovdqu		%%ytmp2, [%%src2 + %%result]
23173454909SRoy Oursler	vpcmpeqb	%%ytmp, %%ytmp, %%ytmp2
23273454909SRoy Oursler	vpmovmskb	%%tmp, %%ytmp
23373454909SRoy Oursler	xor		%%tmp32, 0xFFFFFFFF
23473454909SRoy Oursler	jnz		%%miscompare_vect
23573454909SRoy Oursler	add		%%result, 32
23673454909SRoy Oursler
23773454909SRoy Oursler%%_by_16:
23873454909SRoy Oursler	add	%%result_max, 16
23973454909SRoy Oursler	cmp	%%result, %%result_max
24073454909SRoy Oursler	jg	%%_by_8
24173454909SRoy Oursler
24273454909SRoy Oursler	vmovdqu		%%ytmp %+ x, [%%src1 + %%result]
24373454909SRoy Oursler	vmovdqu		%%ytmp2 %+ x, [%%src2 + %%result]
24473454909SRoy Oursler	vpcmpeqb	%%ytmp %+ x, %%ytmp %+ x, %%ytmp2 %+ x
24573454909SRoy Oursler	vpmovmskb	%%tmp, %%ytmp %+ x
24673454909SRoy Oursler	xor		%%tmp32, 0xFFFF
24773454909SRoy Oursler	jnz		%%miscompare_vect
24873454909SRoy Oursler	add		%%result, 16
24973454909SRoy Oursler
25073454909SRoy Oursler%%_by_8:
25173454909SRoy Oursler	add	%%result_max, 8
25273454909SRoy Oursler	cmp	%%result, %%result_max
25373454909SRoy Oursler	jg	%%_cmp_last
25473454909SRoy Oursler
25573454909SRoy Oursler	mov	%%tmp, [%%src1 + %%result]
25673454909SRoy Oursler	xor	%%tmp, [%%src2 + %%result]
25773454909SRoy Oursler	jnz	%%miscompare_reg
25873454909SRoy Oursler	add	%%result, 8
25973454909SRoy Oursler
26073454909SRoy Oursler%%_cmp_last:
26173454909SRoy Oursler	add	%%result_max, 8
26273454909SRoy Oursler	cmp	%%result, %%result_max
26373454909SRoy Oursler	je	%%end
26473454909SRoy Oursler
26573454909SRoy Oursler	lea	%%result, [%%result_max - 8]
266660f49b0SGreg Tucker
267660f49b0SGreg Tucker	; compare last two bytes
26873454909SRoy Oursler	mov	%%tmp, [%%src1 + %%result]
26973454909SRoy Oursler	xor	%%tmp, [%%src2 + %%result]
27073454909SRoy Oursler	jnz	%%miscompare_reg
27173454909SRoy Oursler	add	%%result, 8
272660f49b0SGreg Tucker	jmp	%%end
273660f49b0SGreg Tucker
27473454909SRoy Oursler%%miscompare_reg:
275660f49b0SGreg Tucker	bsf	%%tmp, %%tmp
276660f49b0SGreg Tucker	shr	%%tmp, 3
277660f49b0SGreg Tucker	add	%%result, %%tmp
278660f49b0SGreg Tucker	jmp	%%end
27973454909SRoy Oursler
28073454909SRoy Oursler%%miscompare_vect:
281*ba1a0006SRoy Oursler	tzcnt	%%tmp, %%tmp
282*ba1a0006SRoy Oursler	add	%%result, %%tmp
283*ba1a0006SRoy Oursler%%end:
284*ba1a0006SRoy Oursler%endm
285*ba1a0006SRoy Oursler
286*ba1a0006SRoy Oursler;; compares 64 bytes at a time
287*ba1a0006SRoy Oursler;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2
288*ba1a0006SRoy Oursler;; Clobbers result_max
289*ba1a0006SRoy Oursler%macro compare_z 8
290*ba1a0006SRoy Oursler%define %%src1		%1
291*ba1a0006SRoy Oursler%define %%src2		%2
292*ba1a0006SRoy Oursler%define %%result	%3	; Accumulator for match_length
293*ba1a0006SRoy Oursler%define %%result_max	%4
294*ba1a0006SRoy Oursler%define %%tmp		%5	; tmp as a 16-bit register
295*ba1a0006SRoy Oursler%define %%ktmp		%6
296*ba1a0006SRoy Oursler%define %%ztmp		%7
297*ba1a0006SRoy Oursler%define %%ztmp2		%8
298*ba1a0006SRoy Oursler
299*ba1a0006SRoy Oursler	sub	%%result_max, 128
300*ba1a0006SRoy Oursler	cmp	%%result, %%result_max
301*ba1a0006SRoy Oursler	jg	%%_by_64
302*ba1a0006SRoy Oursler
303*ba1a0006SRoy Oursler%%loop1:
304*ba1a0006SRoy Oursler	vmovdqu8	%%ztmp, [%%src1 + %%result]
305*ba1a0006SRoy Oursler	vmovdqu8	%%ztmp2, [%%src2 + %%result]
306*ba1a0006SRoy Oursler	vpcmpb		%%ktmp, %%ztmp, %%ztmp2, NEQ
307*ba1a0006SRoy Oursler	ktestq		%%ktmp, %%ktmp
308*ba1a0006SRoy Oursler	jnz		%%miscompare
309*ba1a0006SRoy Oursler	add		%%result, 64
310*ba1a0006SRoy Oursler
311*ba1a0006SRoy Oursler	vmovdqu8	%%ztmp, [%%src1 + %%result]
312*ba1a0006SRoy Oursler	vmovdqu8	%%ztmp2, [%%src2 + %%result]
313*ba1a0006SRoy Oursler	vpcmpb		%%ktmp, %%ztmp, %%ztmp2, NEQ
314*ba1a0006SRoy Oursler	ktestq		%%ktmp, %%ktmp
315*ba1a0006SRoy Oursler	jnz		%%miscompare
316*ba1a0006SRoy Oursler	add		%%result, 64
317*ba1a0006SRoy Oursler
318*ba1a0006SRoy Oursler	cmp	%%result, %%result_max
319*ba1a0006SRoy Oursler	jle	%%loop1
320*ba1a0006SRoy Oursler
321*ba1a0006SRoy Oursler%%_by_64:
322*ba1a0006SRoy Oursler	add	%%result_max, 64
323*ba1a0006SRoy Oursler	cmp	%%result, %%result_max
324*ba1a0006SRoy Oursler	jg	%%_less_than_64
325*ba1a0006SRoy Oursler
326*ba1a0006SRoy Oursler	vmovdqu8	%%ztmp, [%%src1 + %%result]
327*ba1a0006SRoy Oursler	vmovdqu8	%%ztmp2, [%%src2 + %%result]
328*ba1a0006SRoy Oursler	vpcmpb		%%ktmp, %%ztmp, %%ztmp2, NEQ
329*ba1a0006SRoy Oursler	ktestq		%%ktmp, %%ktmp
330*ba1a0006SRoy Oursler	jnz		%%miscompare
331*ba1a0006SRoy Oursler	add		%%result, 64
332*ba1a0006SRoy Oursler
333*ba1a0006SRoy Oursler%%_less_than_64:
334*ba1a0006SRoy Oursler	add	%%result_max, 64
335*ba1a0006SRoy Oursler	sub	%%result_max, %%result
336*ba1a0006SRoy Oursler	jle	%%end
337*ba1a0006SRoy Oursler
338*ba1a0006SRoy Oursler	mov	%%tmp, -1
339*ba1a0006SRoy Oursler	bzhi	%%tmp, %%tmp, %%result_max
340*ba1a0006SRoy Oursler	kmovq	%%ktmp, %%tmp
341*ba1a0006SRoy Oursler
342*ba1a0006SRoy Oursler	vmovdqu8	%%ztmp {%%ktmp}{z}, [%%src1 + %%result]
343*ba1a0006SRoy Oursler	vmovdqu8	%%ztmp2 {%%ktmp}{z}, [%%src2 + %%result]
344*ba1a0006SRoy Oursler	vpcmpb		%%ktmp, %%ztmp, %%ztmp2, NEQ
345*ba1a0006SRoy Oursler	ktestq		%%ktmp, %%ktmp
346*ba1a0006SRoy Oursler	jnz		%%miscompare
347*ba1a0006SRoy Oursler	add		%%result, %%result_max
348*ba1a0006SRoy Oursler
349*ba1a0006SRoy Oursler	jmp	%%end
350*ba1a0006SRoy Oursler%%miscompare:
351*ba1a0006SRoy Oursler	kmovq	%%tmp, %%ktmp
352*ba1a0006SRoy Oursler	tzcnt	%%tmp, %%tmp
353660f49b0SGreg Tucker	add	%%result, %%tmp
354660f49b0SGreg Tucker%%end:
355660f49b0SGreg Tucker%endm
356660f49b0SGreg Tucker
35773454909SRoy Oursler%macro compare250 7
358660f49b0SGreg Tucker%define %%src1		%1
359660f49b0SGreg Tucker%define %%src2		%2
360660f49b0SGreg Tucker%define %%result	%3
36173454909SRoy Oursler%define %%result_max	%4
36273454909SRoy Oursler%define %%tmp		%5
36373454909SRoy Oursler%define %%xtmp0		%6x
36473454909SRoy Oursler%define %%xtmp1		%7x
36573454909SRoy Oursler%define %%ytmp0		%6
36673454909SRoy Oursler%define %%ytmp1		%7
367660f49b0SGreg Tucker
36873454909SRoy Oursler	mov	%%tmp, 250
36973454909SRoy Oursler	cmp	%%result_max, 250
37073454909SRoy Oursler	cmovg	%%result_max, %%tmp
371d389b8d6SRoy Oursler
372d389b8d6SRoy Oursler%if (COMPARE_TYPE == 1)
37373454909SRoy Oursler	compare_r	%%src1, %%src2, %%result, %%result_max, %%tmp
374d389b8d6SRoy Oursler%elif (COMPARE_TYPE == 2)
37573454909SRoy Oursler	compare_x	%%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1
376d389b8d6SRoy Oursler%elif (COMPARE_TYPE == 3)
37773454909SRoy Oursler	compare_y	%%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1
378d389b8d6SRoy Oursler%else
379d389b8d6SRoy Oursler%error Unknown Compare type COMPARE_TYPE
380d389b8d6SRoy Oursler % error
381d389b8d6SRoy Oursler%endif
382d389b8d6SRoy Oursler%endmacro
383d389b8d6SRoy Oursler
38473454909SRoy Oursler; Assumes the buffer has at least 8 bytes
38573454909SRoy Oursler; Accumulates match length onto result
38673454909SRoy Oursler%macro compare_large 7
38773454909SRoy Oursler%define %%src1		%1
38873454909SRoy Oursler%define %%src2		%2
38973454909SRoy Oursler%define %%result	%3
39073454909SRoy Oursler%define %%result_max	%4
39173454909SRoy Oursler%define %%tmp		%5
39273454909SRoy Oursler%define %%xtmp0		%6x
39373454909SRoy Oursler%define %%xtmp1		%7x
39473454909SRoy Oursler%define %%ytmp0		%6
39573454909SRoy Oursler%define %%ytmp1		%7
39673454909SRoy Oursler
39773454909SRoy Oursler%if (COMPARE_TYPE == 1)
39873454909SRoy Oursler	compare_r	%%src1, %%src2, %%result, %%result_max, %%tmp
39973454909SRoy Oursler%elif (COMPARE_TYPE == 2)
40073454909SRoy Oursler	compare_x	%%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1
40173454909SRoy Oursler%elif (COMPARE_TYPE == 3)
40273454909SRoy Oursler	compare_y	%%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1
40373454909SRoy Oursler%else
40473454909SRoy Oursler%error Unknown Compare type COMPARE_TYPE
40573454909SRoy Oursler % error
40673454909SRoy Oursler%endif
40773454909SRoy Oursler%endmacro
408d389b8d6SRoy Oursler
409660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
410660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
411660f49b0SGreg Tucker
412660f49b0SGreg Tucker;; compare size, src1, src2, result, tmp
413660f49b0SGreg Tucker%macro compare 5
414660f49b0SGreg Tucker%define %%size		%1
415660f49b0SGreg Tucker%define %%src1		%2
416660f49b0SGreg Tucker%define %%src2		%3
417660f49b0SGreg Tucker%define %%result	%4
418660f49b0SGreg Tucker%define %%tmp		%5
419660f49b0SGreg Tucker%define %%tmp8		%5b	; tmp as a 8-bit register
420660f49b0SGreg Tucker
421660f49b0SGreg Tucker	xor	%%result, %%result
422660f49b0SGreg Tucker	sub	%%size, 7
423660f49b0SGreg Tucker	jle	%%lab2
424660f49b0SGreg Tucker%%loop1:
425660f49b0SGreg Tucker	mov	%%tmp, [%%src1 + %%result]
426660f49b0SGreg Tucker	xor	%%tmp, [%%src2 + %%result]
427660f49b0SGreg Tucker	jnz	%%miscompare
428660f49b0SGreg Tucker	add	%%result, 8
429660f49b0SGreg Tucker	sub	%%size, 8
430660f49b0SGreg Tucker	jg	%%loop1
431660f49b0SGreg Tucker%%lab2:
432660f49b0SGreg Tucker	;; if we fall through from above, we have found no mismatches,
433660f49b0SGreg Tucker	;; %%size+7 is the number of bytes left to look at, and %%result is the
434660f49b0SGreg Tucker	;; number of bytes that have matched
435660f49b0SGreg Tucker	add	%%size, 7
436660f49b0SGreg Tucker	jle	%%end
437660f49b0SGreg Tucker%%loop3:
438660f49b0SGreg Tucker	mov	%%tmp8, [%%src1 + %%result]
439660f49b0SGreg Tucker	cmp	%%tmp8, [%%src2 + %%result]
440660f49b0SGreg Tucker	jne	%%end
441660f49b0SGreg Tucker	inc	%%result
442660f49b0SGreg Tucker	dec	%%size
443660f49b0SGreg Tucker	jg	%%loop3
444660f49b0SGreg Tucker	jmp	%%end
445660f49b0SGreg Tucker%%miscompare:
446660f49b0SGreg Tucker	bsf	%%tmp, %%tmp
447660f49b0SGreg Tucker	shr	%%tmp, 3
448660f49b0SGreg Tucker	add	%%result, %%tmp
449660f49b0SGreg Tucker%%end:
450660f49b0SGreg Tucker%endm
451660f49b0SGreg Tucker
452660f49b0SGreg Tucker%endif	;UTILS_ASM
453