xref: /isa-l/igzip/stdmac.asm (revision fce71b0670fd92bacdea22e11fcd4e233d9b5c20)
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4;  Redistribution and use in source and binary forms, with or without
5;  modification, are permitted provided that the following conditions
6;  are met:
7;    * Redistributions of source code must retain the above copyright
8;      notice, this list of conditions and the following disclaimer.
9;    * Redistributions in binary form must reproduce the above copyright
10;      notice, this list of conditions and the following disclaimer in
11;      the documentation and/or other materials provided with the
12;      distribution.
13;    * Neither the name of Intel Corporation nor the names of its
14;      contributors may be used to endorse or promote products derived
15;      from this software without specific prior written permission.
16;
17;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%ifndef STDMAC_ASM
31%define STDMAC_ASM
32;; internal macro used by push_all
33;; push args L to R
34%macro push_all_ 1-*
35%xdefine _PUSH_ALL_REGS_COUNT_ %0
36%rep %0
37	push %1
38	%rotate 1
39%endrep
40%endmacro
41
42;; internal macro used by pop_all
43;; pop args R to L
44%macro pop_all_ 1-*
45%rep %0
46	%rotate -1
47	pop %1
48%endrep
49%endmacro
50
51%xdefine _PUSH_ALL_REGS_COUNT_ 0
52%xdefine _ALLOC_STACK_VAL_     0
53;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
54;; STACK_OFFSET
55;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK
56;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
57%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_)
58
59;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
60;; PUSH_ALL reg1, reg2, ...
61;; push args L to R, remember regs for pop_all
62;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
63%macro PUSH_ALL 1+
64%xdefine _PUSH_ALL_REGS_ %1
65	push_all_ %1
66%endmacro
67
68;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
69;; POP_ALL
70;; push args from prev "push_all" R to L
71;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
72%macro POP_ALL 0
73	pop_all_ _PUSH_ALL_REGS_
74%xdefine _PUSH_ALL_REGS_COUNT_ 0
75%endmacro
76
77;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
78;; ALLOC_STACK n
79;; subtract n from the stack pointer and remember the value for restore_stack
80;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
81%macro ALLOC_STACK 1
82%xdefine _ALLOC_STACK_VAL_ %1
83	sub	rsp, %1
84%endmacro
85
86;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
87;; RESTORE_STACK
88;; add n to the stack pointer, where n is the arg to the previous alloc_stack
89;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
90%macro RESTORE_STACK 0
91	add	rsp, _ALLOC_STACK_VAL_
92%xdefine _ALLOC_STACK_VAL_     0
93%endmacro
94
95
96;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
97;; NOPN n
98;; Create n bytes of NOP, using nops of up to 8 bytes each
99;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
100%macro NOPN 1
101
102 %assign %%i %1
103 %rep 200
104  %if (%%i < 9)
105	nopn %%i
106	%exitrep
107  %else
108	nopn 8
109	%assign %%i (%%i - 8)
110  %endif
111 %endrep
112%endmacro
113
114;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
115;; nopn n
116;; Create n bytes of NOP, where n is between 1 and 9
117;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
118%macro nopn 1
119%if (%1 == 1)
120	nop
121%elif (%1 == 2)
122	db	0x66
123	nop
124%elif (%1 == 3)
125	db	0x0F
126	db	0x1F
127	db	0x00
128%elif (%1 == 4)
129	db	0x0F
130	db	0x1F
131	db	0x40
132	db	0x00
133%elif (%1 == 5)
134	db	0x0F
135	db	0x1F
136	db	0x44
137	db	0x00
138	db	0x00
139%elif (%1 == 6)
140	db	0x66
141	db	0x0F
142	db	0x1F
143	db	0x44
144	db	0x00
145	db	0x00
146%elif (%1 == 7)
147	db	0x0F
148	db	0x1F
149	db	0x80
150	db	0x00
151	db	0x00
152	db	0x00
153	db	0x00
154%elif (%1 == 8)
155	db	0x0F
156	db	0x1F
157	db	0x84
158	db	0x00
159	db	0x00
160	db	0x00
161	db	0x00
162	db	0x00
163%elif (%1 == 9)
164	db	0x66
165	db	0x0F
166	db	0x1F
167	db	0x84
168	db	0x00
169	db	0x00
170	db	0x00
171	db	0x00
172	db	0x00
173%else
174%error Invalid value to nopn
175%endif
176%endmacro
177
178;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
179;; rolx64 dst, src, amount
180;; Emulate a rolx instruction using rorx, assuming data 64 bits wide
181;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
182%macro rolx64 3
183	rorx %1, %2, (64-%3)
184%endm
185
186;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
187;; rolx32 dst, src, amount
188;; Emulate a rolx instruction using rorx, assuming data 32 bits wide
189;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
190%macro rolx32 3
191	rorx %1, %2, (32-%3)
192%endm
193
194
195;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
196;; Define a function void ssc(uint64_t x)
197;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
198%macro DEF_SSC 0
199global ssc
200ssc:
201	mov	rax, rbx
202	mov	rbx, rcx
203	db	0x64
204	db	0x67
205	nop
206	mov	rbx, rax
207	ret
208%endm
209
210%macro	MOVDQU	2
211%define	%%dest	%1
212%define	%%src	%2
213%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
214	vmovdqu	%%dest, %%src
215%else
216	movdqu	%%dest, %%src
217%endif
218%endm
219
220%macro	MOVDQA	2
221%define	%%dest	%1
222%define	%%src	%2
223%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
224	vmovdqa	%%dest, %%src
225%else
226	movdqa	%%dest, %%src
227%endif
228%endm
229
230%macro	MOVD	2
231%define	%%dest	%1
232%define	%%src	%2
233%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
234	vmovd	%%dest, %%src
235%else
236	movd	%%dest, %%src
237%endif
238%endm
239
240%macro	MOVQ	2
241%define	%%dest	%1
242%define	%%src	%2
243%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
244	vmovq	%%dest, %%src
245%else
246	movq	%%dest, %%src
247%endif
248%endm
249
250;; Move register if the src and dest are not equal
251%macro MOVNIDN 2
252%define dest %1
253%define src %2
254%ifnidn dest, src
255	mov	dest, src
256%endif
257%endm
258
259%macro MOVDQANIDN 2
260%define dest %1
261%define src %2
262%ifnidn dest, src
263	MOVDQA	dest, src
264%endif
265%endm
266
267%macro PSHUFD	3
268%define	%%dest	%1
269%define %%src1	%2
270%define	%%imm8	%3
271%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
272	vpshufd	%%dest, %%src1, %%imm8
273%else
274	pshufd	%%dest, %%src1, %%imm8
275%endif
276%endm
277
278%macro PSHUFB	3
279%define	%%dest	%1
280%define %%src1	%2
281%define	%%shuf	%3
282%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
283	vpshufb	%%dest, %%src1, %%shuf
284%else
285	MOVDQANIDN %%dest, %%src1
286	pshufb	%%dest, %%shuf
287%endif
288%endm
289
290%macro PBROADCASTD 2
291%define %%dest %1
292%define %%src %2
293%if (ARCH == 04)
294	vpbroadcastd %%dest, %%src
295%else
296	MOVD %%dest, %%src
297	PSHUFD %%dest, %%dest, 0
298%endif
299%endm
300
301;; Implement BZHI instruction on older architectures
302;; Clobbers rcx, unless rcx is %%index
303%macro	BZHI	4
304%define	%%dest		%1
305%define	%%src		%2
306%define	%%index		%3
307%define	%%tmp1		%4
308
309%ifdef USE_HSWNI
310	bzhi	%%dest, %%src, %%index
311%else
312	MOVNIDN	rcx, %%index
313	mov	%%tmp1, 1
314	shl	%%tmp1, cl
315	sub	%%tmp1, 1
316
317	MOVNIDN	%%dest, %%src
318
319	and	%%dest, %%tmp1
320%endif
321%endm
322
323;; Implement shrx instruction on older architectures
324;; Clobbers rcx, unless rcx is %%index
325%macro	SHRX	3
326%define	%%dest		%1
327%define	%%src		%2
328%define	%%index		%3
329
330%ifdef USE_HSWNI
331	shrx	%%dest, %%src, %%index
332%else
333	MOVNIDN	rcx, %%index
334	MOVNIDN	%%dest, %%src
335	shr	%%dest, cl
336%endif
337%endm
338
339;; Implement shlx instruction on older architectures
340;; Clobbers rcx, unless rcx is %%index
341%macro	SHLX	3
342%define	%%dest		%1
343%define	%%src		%2
344%define	%%index		%3
345
346%ifdef USE_HSWNI
347	shlx	%%dest, %%src, %%index
348%else
349	MOVNIDN	%%dest, %%src
350	MOVNIDN	rcx, %%index
351	shl	%%dest, cl
352%endif
353%endm
354
355%macro	PINSRD	3
356%define	%%dest	%1
357%define	%%src	%2
358%define	%%offset	%3
359%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
360	vpinsrd	%%dest, %%src, %%offset
361%else
362	pinsrd	%%dest, %%src, %%offset
363%endif
364%endm
365
366%macro	PEXTRD	3
367%define	%%dest	%1
368%define	%%src	%2
369%define	%%offset	%3
370%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
371	vpextrd	%%dest, %%src, %%offset
372%else
373	pextrd	%%dest, %%src, %%offset
374%endif
375%endm
376
377%macro	PSRLDQ	2
378%define	%%dest	%1
379%define	%%offset	%2
380%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
381	vpsrldq	%%dest, %%offset
382%else
383	psrldq	%%dest, %%offset
384%endif
385%endm
386
387%macro	PSLLD	3
388%define	%%dest	%1
389%define %%src	%2
390%define	%%offset	%3
391%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
392	vpslld	%%dest, %%src, %%offset
393%else
394	MOVDQANIDN %%dest, %%src
395	pslld	%%dest, %%offset
396%endif
397%endm
398
399%macro	PAND	3
400%define	%%dest	%1
401%define	%%src1	%2
402%define	%%src2	%3
403%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
404	vpand	%%dest, %%src1, %%src2
405%else
406	MOVDQANIDN %%dest, %%src1
407	pand	%%dest, %%src2
408%endif
409%endm
410
411%macro	POR	3
412%define	%%dest	%1
413%define	%%src1	%2
414%define	%%src2	%3
415%if (ARCH == 02 || ARCH == 03 || ARCH == 04)
416	vpor	%%dest, %%src1, %%src2
417%else
418	MOVDQANIDN %%dest, %%src1
419	por	%%dest, %%src2
420%endif
421%endm
422
423%macro PXOR	3
424%define	%%dest	%1
425%define %%src1	%2
426%define	%%src2	%3
427%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
428	vpxor	%%dest, %%src1, %%src2
429%else
430	MOVDQANIDN %%dest, %%src1
431	pxor	%%dest, %%src2
432%endif
433%endm
434
435%macro PADDD 3
436%define %%dest %1
437%define %%src1 %2
438%define %%src2 %3
439%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
440	vpaddd	%%dest, %%src1, %%src2
441%else
442	MOVDQANIDN %%dest, %%src1
443	paddd	%%dest, %%src2
444%endif
445%endm
446
447%macro	PCMPEQB	3
448%define	%%dest	%1
449%define	%%src1	%2
450%define	%%src2	%3
451%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
452	vpcmpeqb	%%dest, %%src1, %%src2
453%else
454	MOVDQANIDN %%dest, %%src1
455	pcmpeqb	%%dest, %%src2
456%endif
457%endm
458
459%macro	PMOVMSKB	2
460%define	%%dest	%1
461%define	%%src	%2
462%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04))
463	vpmovmskb	%%dest, %%src
464%else
465	pmovmskb	%%dest, %%src
466%endif
467%endm
468
469%endif 	;; ifndef STDMAC_ASM
470