xref: /isa-l/igzip/stdmac.asm (revision fce71b0670fd92bacdea22e11fcd4e233d9b5c20)
1660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2660f49b0SGreg Tucker;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3660f49b0SGreg Tucker;
4660f49b0SGreg Tucker;  Redistribution and use in source and binary forms, with or without
5660f49b0SGreg Tucker;  modification, are permitted provided that the following conditions
6660f49b0SGreg Tucker;  are met:
7660f49b0SGreg Tucker;    * Redistributions of source code must retain the above copyright
8660f49b0SGreg Tucker;      notice, this list of conditions and the following disclaimer.
9660f49b0SGreg Tucker;    * Redistributions in binary form must reproduce the above copyright
10660f49b0SGreg Tucker;      notice, this list of conditions and the following disclaimer in
11660f49b0SGreg Tucker;      the documentation and/or other materials provided with the
12660f49b0SGreg Tucker;      distribution.
13660f49b0SGreg Tucker;    * Neither the name of Intel Corporation nor the names of its
14660f49b0SGreg Tucker;      contributors may be used to endorse or promote products derived
15660f49b0SGreg Tucker;      from this software without specific prior written permission.
16660f49b0SGreg Tucker;
17660f49b0SGreg Tucker;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18660f49b0SGreg Tucker;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19660f49b0SGreg Tucker;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20660f49b0SGreg Tucker;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21660f49b0SGreg Tucker;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22660f49b0SGreg Tucker;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23660f49b0SGreg Tucker;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24660f49b0SGreg Tucker;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25660f49b0SGreg Tucker;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26660f49b0SGreg Tucker;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27660f49b0SGreg Tucker;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29660f49b0SGreg Tucker
30f97de75fSRoy Oursler%ifndef STDMAC_ASM
31f97de75fSRoy Oursler%define STDMAC_ASM
32660f49b0SGreg Tucker;; internal macro used by push_all
33660f49b0SGreg Tucker;; push args L to R
34660f49b0SGreg Tucker%macro push_all_ 1-*
35660f49b0SGreg Tucker%xdefine _PUSH_ALL_REGS_COUNT_ %0
36660f49b0SGreg Tucker%rep %0
37660f49b0SGreg Tucker	push %1
38660f49b0SGreg Tucker	%rotate 1
39660f49b0SGreg Tucker%endrep
40660f49b0SGreg Tucker%endmacro
41660f49b0SGreg Tucker
42660f49b0SGreg Tucker;; internal macro used by pop_all
43660f49b0SGreg Tucker;; pop args R to L
44660f49b0SGreg Tucker%macro pop_all_ 1-*
45660f49b0SGreg Tucker%rep %0
46660f49b0SGreg Tucker	%rotate -1
47660f49b0SGreg Tucker	pop %1
48660f49b0SGreg Tucker%endrep
49660f49b0SGreg Tucker%endmacro
50660f49b0SGreg Tucker
51660f49b0SGreg Tucker%xdefine _PUSH_ALL_REGS_COUNT_ 0
52660f49b0SGreg Tucker%xdefine _ALLOC_STACK_VAL_     0
53660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
54660f49b0SGreg Tucker;; STACK_OFFSET
55660f49b0SGreg Tucker;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK
56660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
57660f49b0SGreg Tucker%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_)
58660f49b0SGreg Tucker
59660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
60660f49b0SGreg Tucker;; PUSH_ALL reg1, reg2, ...
61660f49b0SGreg Tucker;; push args L to R, remember regs for pop_all
62660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
63660f49b0SGreg Tucker%macro PUSH_ALL 1+
64660f49b0SGreg Tucker%xdefine _PUSH_ALL_REGS_ %1
65660f49b0SGreg Tucker	push_all_ %1
66660f49b0SGreg Tucker%endmacro
67660f49b0SGreg Tucker
68660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69660f49b0SGreg Tucker;; POP_ALL
70660f49b0SGreg Tucker;; push args from prev "push_all" R to L
71660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
72660f49b0SGreg Tucker%macro POP_ALL 0
73660f49b0SGreg Tucker	pop_all_ _PUSH_ALL_REGS_
74660f49b0SGreg Tucker%xdefine _PUSH_ALL_REGS_COUNT_ 0
75660f49b0SGreg Tucker%endmacro
76660f49b0SGreg Tucker
77660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
78660f49b0SGreg Tucker;; ALLOC_STACK n
79660f49b0SGreg Tucker;; subtract n from the stack pointer and remember the value for restore_stack
80660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
81660f49b0SGreg Tucker%macro ALLOC_STACK 1
82660f49b0SGreg Tucker%xdefine _ALLOC_STACK_VAL_ %1
83660f49b0SGreg Tucker	sub	rsp, %1
84660f49b0SGreg Tucker%endmacro
85660f49b0SGreg Tucker
86660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
87660f49b0SGreg Tucker;; RESTORE_STACK
88660f49b0SGreg Tucker;; add n to the stack pointer, where n is the arg to the previous alloc_stack
89660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
90660f49b0SGreg Tucker%macro RESTORE_STACK 0
91660f49b0SGreg Tucker	add	rsp, _ALLOC_STACK_VAL_
92660f49b0SGreg Tucker%xdefine _ALLOC_STACK_VAL_     0
93660f49b0SGreg Tucker%endmacro
94660f49b0SGreg Tucker
95660f49b0SGreg Tucker
96660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
97660f49b0SGreg Tucker;; NOPN n
98660f49b0SGreg Tucker;; Create n bytes of NOP, using nops of up to 8 bytes each
99660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
100660f49b0SGreg Tucker%macro NOPN 1
101660f49b0SGreg Tucker
102660f49b0SGreg Tucker %assign %%i %1
103660f49b0SGreg Tucker %rep 200
104660f49b0SGreg Tucker  %if (%%i < 9)
105660f49b0SGreg Tucker	nopn %%i
106660f49b0SGreg Tucker	%exitrep
107660f49b0SGreg Tucker  %else
108660f49b0SGreg Tucker	nopn 8
109660f49b0SGreg Tucker	%assign %%i (%%i - 8)
110660f49b0SGreg Tucker  %endif
111660f49b0SGreg Tucker %endrep
112660f49b0SGreg Tucker%endmacro
113660f49b0SGreg Tucker
114660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
115660f49b0SGreg Tucker;; nopn n
116660f49b0SGreg Tucker;; Create n bytes of NOP, where n is between 1 and 9
117660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
118660f49b0SGreg Tucker%macro nopn 1
119660f49b0SGreg Tucker%if (%1 == 1)
120660f49b0SGreg Tucker	nop
121660f49b0SGreg Tucker%elif (%1 == 2)
122660f49b0SGreg Tucker	db	0x66
123660f49b0SGreg Tucker	nop
124660f49b0SGreg Tucker%elif (%1 == 3)
125660f49b0SGreg Tucker	db	0x0F
126660f49b0SGreg Tucker	db	0x1F
127660f49b0SGreg Tucker	db	0x00
128660f49b0SGreg Tucker%elif (%1 == 4)
129660f49b0SGreg Tucker	db	0x0F
130660f49b0SGreg Tucker	db	0x1F
131660f49b0SGreg Tucker	db	0x40
132660f49b0SGreg Tucker	db	0x00
133660f49b0SGreg Tucker%elif (%1 == 5)
134660f49b0SGreg Tucker	db	0x0F
135660f49b0SGreg Tucker	db	0x1F
136660f49b0SGreg Tucker	db	0x44
137660f49b0SGreg Tucker	db	0x00
138660f49b0SGreg Tucker	db	0x00
139660f49b0SGreg Tucker%elif (%1 == 6)
140660f49b0SGreg Tucker	db	0x66
141660f49b0SGreg Tucker	db	0x0F
142660f49b0SGreg Tucker	db	0x1F
143660f49b0SGreg Tucker	db	0x44
144660f49b0SGreg Tucker	db	0x00
145660f49b0SGreg Tucker	db	0x00
146660f49b0SGreg Tucker%elif (%1 == 7)
147660f49b0SGreg Tucker	db	0x0F
148660f49b0SGreg Tucker	db	0x1F
149660f49b0SGreg Tucker	db	0x80
150660f49b0SGreg Tucker	db	0x00
151660f49b0SGreg Tucker	db	0x00
152660f49b0SGreg Tucker	db	0x00
153660f49b0SGreg Tucker	db	0x00
154660f49b0SGreg Tucker%elif (%1 == 8)
155660f49b0SGreg Tucker	db	0x0F
156660f49b0SGreg Tucker	db	0x1F
157660f49b0SGreg Tucker	db	0x84
158660f49b0SGreg Tucker	db	0x00
159660f49b0SGreg Tucker	db	0x00
160660f49b0SGreg Tucker	db	0x00
161660f49b0SGreg Tucker	db	0x00
162660f49b0SGreg Tucker	db	0x00
163660f49b0SGreg Tucker%elif (%1 == 9)
164660f49b0SGreg Tucker	db	0x66
165660f49b0SGreg Tucker	db	0x0F
166660f49b0SGreg Tucker	db	0x1F
167660f49b0SGreg Tucker	db	0x84
168660f49b0SGreg Tucker	db	0x00
169660f49b0SGreg Tucker	db	0x00
170660f49b0SGreg Tucker	db	0x00
171660f49b0SGreg Tucker	db	0x00
172660f49b0SGreg Tucker	db	0x00
173660f49b0SGreg Tucker%else
174660f49b0SGreg Tucker%error Invalid value to nopn
175660f49b0SGreg Tucker%endif
176660f49b0SGreg Tucker%endmacro
177660f49b0SGreg Tucker
178660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
179660f49b0SGreg Tucker;; rolx64 dst, src, amount
180660f49b0SGreg Tucker;; Emulate a rolx instruction using rorx, assuming data 64 bits wide
181660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
182660f49b0SGreg Tucker%macro rolx64 3
183660f49b0SGreg Tucker	rorx %1, %2, (64-%3)
184660f49b0SGreg Tucker%endm
185660f49b0SGreg Tucker
186660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
187660f49b0SGreg Tucker;; rolx32 dst, src, amount
188660f49b0SGreg Tucker;; Emulate a rolx instruction using rorx, assuming data 32 bits wide
189660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
190660f49b0SGreg Tucker%macro rolx32 3
191660f49b0SGreg Tucker	rorx %1, %2, (32-%3)
192660f49b0SGreg Tucker%endm
193660f49b0SGreg Tucker
194660f49b0SGreg Tucker
195660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
196660f49b0SGreg Tucker;; Define a function void ssc(uint64_t x)
197660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
198660f49b0SGreg Tucker%macro DEF_SSC 0
199660f49b0SGreg Tuckerglobal ssc
200660f49b0SGreg Tuckerssc:
201660f49b0SGreg Tucker	mov	rax, rbx
202660f49b0SGreg Tucker	mov	rbx, rcx
203660f49b0SGreg Tucker	db	0x64
204660f49b0SGreg Tucker	db	0x67
205660f49b0SGreg Tucker	nop
206660f49b0SGreg Tucker	mov	rbx, rax
207660f49b0SGreg Tucker	ret
208660f49b0SGreg Tucker%endm
2097c91df5eSRoy Oursler
210cf30138cSRoy Oursler%macro	MOVDQU	2
211cf30138cSRoy Oursler%define	%%dest	%1
212cf30138cSRoy Oursler%define	%%src	%2
213cf30138cSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
214cf30138cSRoy Oursler	vmovdqu	%%dest, %%src
215cf30138cSRoy Oursler%else
216cf30138cSRoy Oursler	movdqu	%%dest, %%src
217cf30138cSRoy Oursler%endif
218cf30138cSRoy Oursler%endm
219cf30138cSRoy Oursler
220*fce71b06SRoy Oursler%macro	MOVDQA	2
221*fce71b06SRoy Oursler%define	%%dest	%1
222*fce71b06SRoy Oursler%define	%%src	%2
223*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
224*fce71b06SRoy Oursler	vmovdqa	%%dest, %%src
225*fce71b06SRoy Oursler%else
226*fce71b06SRoy Oursler	movdqa	%%dest, %%src
227*fce71b06SRoy Oursler%endif
228*fce71b06SRoy Oursler%endm
229*fce71b06SRoy Oursler
230cf30138cSRoy Oursler%macro	MOVD	2
231cf30138cSRoy Oursler%define	%%dest	%1
232cf30138cSRoy Oursler%define	%%src	%2
233cf30138cSRoy Oursler%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
234cf30138cSRoy Oursler	vmovd	%%dest, %%src
235cf30138cSRoy Oursler%else
236cf30138cSRoy Oursler	movd	%%dest, %%src
237cf30138cSRoy Oursler%endif
238cf30138cSRoy Oursler%endm
239cf30138cSRoy Oursler
240cf30138cSRoy Oursler%macro	MOVQ	2
241cf30138cSRoy Oursler%define	%%dest	%1
242cf30138cSRoy Oursler%define	%%src	%2
243cf30138cSRoy Oursler%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
244cf30138cSRoy Oursler	vmovq	%%dest, %%src
245cf30138cSRoy Oursler%else
246cf30138cSRoy Oursler	movq	%%dest, %%src
247cf30138cSRoy Oursler%endif
248cf30138cSRoy Oursler%endm
249cf30138cSRoy Oursler
250*fce71b06SRoy Oursler;; Move register if the src and dest are not equal
251*fce71b06SRoy Oursler%macro MOVNIDN 2
252*fce71b06SRoy Oursler%define dest %1
253*fce71b06SRoy Oursler%define src %2
254*fce71b06SRoy Oursler%ifnidn dest, src
255*fce71b06SRoy Oursler	mov	dest, src
256*fce71b06SRoy Oursler%endif
257*fce71b06SRoy Oursler%endm
258*fce71b06SRoy Oursler
259*fce71b06SRoy Oursler%macro MOVDQANIDN 2
260*fce71b06SRoy Oursler%define dest %1
261*fce71b06SRoy Oursler%define src %2
262*fce71b06SRoy Oursler%ifnidn dest, src
263*fce71b06SRoy Oursler	MOVDQA	dest, src
264*fce71b06SRoy Oursler%endif
265*fce71b06SRoy Oursler%endm
266*fce71b06SRoy Oursler
267*fce71b06SRoy Oursler%macro PSHUFD	3
268*fce71b06SRoy Oursler%define	%%dest	%1
269*fce71b06SRoy Oursler%define %%src1	%2
270*fce71b06SRoy Oursler%define	%%imm8	%3
271*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
272*fce71b06SRoy Oursler	vpshufd	%%dest, %%src1, %%imm8
273*fce71b06SRoy Oursler%else
274*fce71b06SRoy Oursler	pshufd	%%dest, %%src1, %%imm8
275*fce71b06SRoy Oursler%endif
276*fce71b06SRoy Oursler%endm
277*fce71b06SRoy Oursler
278*fce71b06SRoy Oursler%macro PSHUFB	3
279*fce71b06SRoy Oursler%define	%%dest	%1
280*fce71b06SRoy Oursler%define %%src1	%2
281*fce71b06SRoy Oursler%define	%%shuf	%3
282*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
283*fce71b06SRoy Oursler	vpshufb	%%dest, %%src1, %%shuf
284*fce71b06SRoy Oursler%else
285*fce71b06SRoy Oursler	MOVDQANIDN %%dest, %%src1
286*fce71b06SRoy Oursler	pshufb	%%dest, %%shuf
287*fce71b06SRoy Oursler%endif
288*fce71b06SRoy Oursler%endm
289*fce71b06SRoy Oursler
290*fce71b06SRoy Oursler%macro PBROADCASTD 2
291*fce71b06SRoy Oursler%define %%dest %1
292*fce71b06SRoy Oursler%define %%src %2
293*fce71b06SRoy Oursler%if (ARCH == 04)
294*fce71b06SRoy Oursler	vpbroadcastd %%dest, %%src
295*fce71b06SRoy Oursler%else
296*fce71b06SRoy Oursler	MOVD %%dest, %%src
297*fce71b06SRoy Oursler	PSHUFD %%dest, %%dest, 0
298*fce71b06SRoy Oursler%endif
299*fce71b06SRoy Oursler%endm
300*fce71b06SRoy Oursler
301*fce71b06SRoy Oursler;; Implement BZHI instruction on older architectures
302*fce71b06SRoy Oursler;; Clobbers rcx, unless rcx is %%index
303*fce71b06SRoy Oursler%macro	BZHI	4
304*fce71b06SRoy Oursler%define	%%dest		%1
305*fce71b06SRoy Oursler%define	%%src		%2
306*fce71b06SRoy Oursler%define	%%index		%3
307*fce71b06SRoy Oursler%define	%%tmp1		%4
308*fce71b06SRoy Oursler
309*fce71b06SRoy Oursler%ifdef USE_HSWNI
310*fce71b06SRoy Oursler	bzhi	%%dest, %%src, %%index
311*fce71b06SRoy Oursler%else
312*fce71b06SRoy Oursler	MOVNIDN	rcx, %%index
313*fce71b06SRoy Oursler	mov	%%tmp1, 1
314*fce71b06SRoy Oursler	shl	%%tmp1, cl
315*fce71b06SRoy Oursler	sub	%%tmp1, 1
316*fce71b06SRoy Oursler
317*fce71b06SRoy Oursler	MOVNIDN	%%dest, %%src
318*fce71b06SRoy Oursler
319*fce71b06SRoy Oursler	and	%%dest, %%tmp1
320*fce71b06SRoy Oursler%endif
321*fce71b06SRoy Oursler%endm
322*fce71b06SRoy Oursler
323*fce71b06SRoy Oursler;; Implement shrx instruction on older architectures
324*fce71b06SRoy Oursler;; Clobbers rcx, unless rcx is %%index
325*fce71b06SRoy Oursler%macro	SHRX	3
326*fce71b06SRoy Oursler%define	%%dest		%1
327*fce71b06SRoy Oursler%define	%%src		%2
328*fce71b06SRoy Oursler%define	%%index		%3
329*fce71b06SRoy Oursler
330*fce71b06SRoy Oursler%ifdef USE_HSWNI
331*fce71b06SRoy Oursler	shrx	%%dest, %%src, %%index
332*fce71b06SRoy Oursler%else
333*fce71b06SRoy Oursler	MOVNIDN	rcx, %%index
334*fce71b06SRoy Oursler	MOVNIDN	%%dest, %%src
335*fce71b06SRoy Oursler	shr	%%dest, cl
336*fce71b06SRoy Oursler%endif
337*fce71b06SRoy Oursler%endm
338*fce71b06SRoy Oursler
339*fce71b06SRoy Oursler;; Implement shlx instruction on older architectures
340*fce71b06SRoy Oursler;; Clobbers rcx, unless rcx is %%index
341*fce71b06SRoy Oursler%macro	SHLX	3
342*fce71b06SRoy Oursler%define	%%dest		%1
343*fce71b06SRoy Oursler%define	%%src		%2
344*fce71b06SRoy Oursler%define	%%index		%3
345*fce71b06SRoy Oursler
346*fce71b06SRoy Oursler%ifdef USE_HSWNI
347*fce71b06SRoy Oursler	shlx	%%dest, %%src, %%index
348*fce71b06SRoy Oursler%else
349*fce71b06SRoy Oursler	MOVNIDN	%%dest, %%src
350*fce71b06SRoy Oursler	MOVNIDN	rcx, %%index
351*fce71b06SRoy Oursler	shl	%%dest, cl
352*fce71b06SRoy Oursler%endif
353*fce71b06SRoy Oursler%endm
354*fce71b06SRoy Oursler
355cf30138cSRoy Oursler%macro	PINSRD	3
356cf30138cSRoy Oursler%define	%%dest	%1
357cf30138cSRoy Oursler%define	%%src	%2
358cf30138cSRoy Oursler%define	%%offset	%3
359cf30138cSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
360cf30138cSRoy Oursler	vpinsrd	%%dest, %%src, %%offset
361cf30138cSRoy Oursler%else
362cf30138cSRoy Oursler	pinsrd	%%dest, %%src, %%offset
363cf30138cSRoy Oursler%endif
364cf30138cSRoy Oursler%endm
365cf30138cSRoy Oursler
366cf30138cSRoy Oursler%macro	PEXTRD	3
367cf30138cSRoy Oursler%define	%%dest	%1
368cf30138cSRoy Oursler%define	%%src	%2
369cf30138cSRoy Oursler%define	%%offset	%3
370cf30138cSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
371cf30138cSRoy Oursler	vpextrd	%%dest, %%src, %%offset
372cf30138cSRoy Oursler%else
373cf30138cSRoy Oursler	pextrd	%%dest, %%src, %%offset
374cf30138cSRoy Oursler%endif
375cf30138cSRoy Oursler%endm
376cf30138cSRoy Oursler
377cf30138cSRoy Oursler%macro	PSRLDQ	2
378cf30138cSRoy Oursler%define	%%dest	%1
379cf30138cSRoy Oursler%define	%%offset	%2
380cf30138cSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
381cf30138cSRoy Oursler	vpsrldq	%%dest, %%offset
382cf30138cSRoy Oursler%else
383cf30138cSRoy Oursler	psrldq	%%dest, %%offset
384cf30138cSRoy Oursler%endif
385cf30138cSRoy Oursler%endm
386cf30138cSRoy Oursler
387*fce71b06SRoy Oursler%macro	PSLLD	3
388*fce71b06SRoy Oursler%define	%%dest	%1
389*fce71b06SRoy Oursler%define %%src	%2
390*fce71b06SRoy Oursler%define	%%offset	%3
391*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
392*fce71b06SRoy Oursler	vpslld	%%dest, %%src, %%offset
393*fce71b06SRoy Oursler%else
394*fce71b06SRoy Oursler	MOVDQANIDN %%dest, %%src
395*fce71b06SRoy Oursler	pslld	%%dest, %%offset
396*fce71b06SRoy Oursler%endif
397*fce71b06SRoy Oursler%endm
398*fce71b06SRoy Oursler
399cf30138cSRoy Oursler%macro	PAND	3
400cf30138cSRoy Oursler%define	%%dest	%1
401cf30138cSRoy Oursler%define	%%src1	%2
402cf30138cSRoy Oursler%define	%%src2	%3
403cf30138cSRoy Oursler%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
404cf30138cSRoy Oursler	vpand	%%dest, %%src1, %%src2
405cf30138cSRoy Oursler%else
406*fce71b06SRoy Oursler	MOVDQANIDN %%dest, %%src1
407cf30138cSRoy Oursler	pand	%%dest, %%src2
408cf30138cSRoy Oursler%endif
409cf30138cSRoy Oursler%endm
410f97de75fSRoy Oursler
411*fce71b06SRoy Oursler%macro	POR	3
412*fce71b06SRoy Oursler%define	%%dest	%1
413*fce71b06SRoy Oursler%define	%%src1	%2
414*fce71b06SRoy Oursler%define	%%src2	%3
415*fce71b06SRoy Oursler%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
416*fce71b06SRoy Oursler	vpor	%%dest, %%src1, %%src2
417*fce71b06SRoy Oursler%else
418*fce71b06SRoy Oursler	MOVDQANIDN %%dest, %%src1
419*fce71b06SRoy Oursler	por	%%dest, %%src2
420*fce71b06SRoy Oursler%endif
421*fce71b06SRoy Oursler%endm
422*fce71b06SRoy Oursler
423*fce71b06SRoy Oursler%macro PXOR	3
424*fce71b06SRoy Oursler%define	%%dest	%1
425*fce71b06SRoy Oursler%define %%src1	%2
426*fce71b06SRoy Oursler%define	%%src2	%3
427*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
428*fce71b06SRoy Oursler	vpxor	%%dest, %%src1, %%src2
429*fce71b06SRoy Oursler%else
430*fce71b06SRoy Oursler	MOVDQANIDN %%dest, %%src1
431*fce71b06SRoy Oursler	pxor	%%dest, %%src2
432*fce71b06SRoy Oursler%endif
433*fce71b06SRoy Oursler%endm
434*fce71b06SRoy Oursler
435*fce71b06SRoy Oursler%macro PADDD 3
436*fce71b06SRoy Oursler%define %%dest %1
437*fce71b06SRoy Oursler%define %%src1 %2
438*fce71b06SRoy Oursler%define %%src2 %3
439*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
440*fce71b06SRoy Oursler	vpaddd	%%dest, %%src1, %%src2
441*fce71b06SRoy Oursler%else
442*fce71b06SRoy Oursler	MOVDQANIDN %%dest, %%src1
443*fce71b06SRoy Oursler	paddd	%%dest, %%src2
444*fce71b06SRoy Oursler%endif
445*fce71b06SRoy Oursler%endm
446*fce71b06SRoy Oursler
447f97de75fSRoy Oursler%macro	PCMPEQB	3
448f97de75fSRoy Oursler%define	%%dest	%1
449f97de75fSRoy Oursler%define	%%src1	%2
450f97de75fSRoy Oursler%define	%%src2	%3
451f97de75fSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
452f97de75fSRoy Oursler	vpcmpeqb	%%dest, %%src1, %%src2
453f97de75fSRoy Oursler%else
454*fce71b06SRoy Oursler	MOVDQANIDN %%dest, %%src1
455f97de75fSRoy Oursler	pcmpeqb	%%dest, %%src2
456f97de75fSRoy Oursler%endif
457f97de75fSRoy Oursler%endm
458f97de75fSRoy Oursler
459f97de75fSRoy Oursler%macro	PMOVMSKB	2
460f97de75fSRoy Oursler%define	%%dest	%1
461f97de75fSRoy Oursler%define	%%src	%2
462f97de75fSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
463f97de75fSRoy Oursler	vpmovmskb	%%dest, %%src
464f97de75fSRoy Oursler%else
465f97de75fSRoy Oursler	pmovmskb	%%dest, %%src
466f97de75fSRoy Oursler%endif
467f97de75fSRoy Oursler%endm
468f97de75fSRoy Oursler
469f97de75fSRoy Oursler%endif 	;; ifndef STDMAC_ASM
470