xref: /isa-l_crypto/include/multibinary.asm (revision 860585444755e926bd72600b39758458c0a1c9da)
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;  Copyright(c) 2011-2019 Intel Corporation All rights reserved.
3;
4;  Redistribution and use in source and binary forms, with or without
5;  modification, are permitted provided that the following conditions
6;  are met:
7;    * Redistributions of source code must retain the above copyright
8;      notice, this list of conditions and the following disclaimer.
9;    * Redistributions in binary form must reproduce the above copyright
10;      notice, this list of conditions and the following disclaimer in
11;      the documentation and/or other materials provided with the
12;      distribution.
13;    * Neither the name of Intel Corporation nor the names of its
14;      contributors may be used to endorse or promote products derived
15;      from this software without specific prior written permission.
16;
17;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%ifndef _MULTIBINARY_ASM_
31%define _MULTIBINARY_ASM_
32
33%ifidn __OUTPUT_FORMAT__, elf32
34 %define mbin_def_ptr	dd
35 %define mbin_ptr_sz	dword
36 %define mbin_rdi	edi
37 %define mbin_rsi	esi
38 %define mbin_rax	eax
39 %define mbin_rbx	ebx
40 %define mbin_rcx	ecx
41 %define mbin_rdx	edx
42%else
43 %define mbin_def_ptr	dq
44 %define mbin_ptr_sz	qword
45 %define mbin_rdi	rdi
46 %define mbin_rsi	rsi
47 %define mbin_rax	rax
48 %define mbin_rbx	rbx
49 %define mbin_rcx	rcx
50 %define mbin_rdx	rdx
51%endif
52
53%ifndef AS_FEATURE_LEVEL
54%define AS_FEATURE_LEVEL 4
55%endif
56
57;;;;
58; multibinary macro:
59;   creates the visible entry point that uses HW optimized call pointer
60;   creates the init of the HW optimized call pointer
61;;;;
62%macro mbin_interface 1
63	;;;;
64	; *_dispatched is defaulted to *_mbinit and replaced on first call.
65	; Therefore, *_dispatch_init is only executed on first call.
66	;;;;
67	section .data
68	%1_dispatched:
69		mbin_def_ptr	%1_mbinit
70
71	section .text
72	mk_global %1, function
73	%1_mbinit:
74		;;; only called the first time to setup hardware match
75		call	%1_dispatch_init
76		;;; falls thru to execute the hw optimized code
77	%1:
78		jmp	mbin_ptr_sz [%1_dispatched]
79%endmacro
80
81;;;;;
82; mbin_dispatch_init parameters
83; Use this function when SSE/00/01 is a minimum requirement
84; 1-> function name
85; 2-> SSE/00/01 optimized function used as base
86; 3-> AVX or AVX/02 opt func
87; 4-> AVX2 or AVX/04 opt func
88;;;;;
89%macro mbin_dispatch_init 4
90	section .text
91	%1_dispatch_init:
92		push	mbin_rsi
93		push	mbin_rax
94		push	mbin_rbx
95		push	mbin_rcx
96		push	mbin_rdx
97		lea	mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
98
99		mov	eax, 1
100		cpuid
101		and	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
102		cmp	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
103		lea	mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
104		jne	_%1_init_done ; AVX is not available so end
105		mov	mbin_rsi, mbin_rbx
106
107		;; Try for AVX2
108		xor	ecx, ecx
109		mov	eax, 7
110		cpuid
111		test	ebx, FLAG_CPUID7_EBX_AVX2
112		lea	mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
113		cmovne	mbin_rsi, mbin_rbx
114
115		;; Does it have xmm and ymm support
116		xor	ecx, ecx
117		xgetbv
118		and	eax, FLAG_XGETBV_EAX_XMM_YMM
119		cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
120		je	_%1_init_done
121		lea	mbin_rsi, [%2 WRT_OPT]
122
123	_%1_init_done:
124		pop	mbin_rdx
125		pop	mbin_rcx
126		pop	mbin_rbx
127		pop	mbin_rax
128		mov	[%1_dispatched], mbin_rsi
129		pop	mbin_rsi
130		ret
131%endmacro
132
133;;;;;
134; mbin_dispatch_init2 parameters
135;  Cases where only base functions are available
136; 1-> function name
137; 2-> base function
138;;;;;
139%macro mbin_dispatch_init2 2
140	section .text
141	%1_dispatch_init:
142		push	mbin_rsi
143		lea	mbin_rsi, [%2 WRT_OPT] ; Default
144		mov	[%1_dispatched], mbin_rsi
145		pop	mbin_rsi
146		ret
147%endmacro
148
149;;;;;
150; mbin_dispatch_init5 parameters
151; 1-> function name
152; 2-> base function
153; 3-> SSE4_1 or 00/01 optimized function
154; 4-> AVX/02 opt func
155; 5-> AVX2/04 opt func
156;;;;;
157%macro mbin_dispatch_init5 5
158	section .text
159	%1_dispatch_init:
160		push	mbin_rsi
161		push	mbin_rax
162		push	mbin_rbx
163		push	mbin_rcx
164		push	mbin_rdx
165		lea	mbin_rsi, [%2 WRT_OPT] ; Default - use base function
166
167		mov	eax, 1
168		cpuid
169		; Test for SSE4.1
170		test	ecx, FLAG_CPUID1_ECX_SSE4_1
171		lea	mbin_rbx, [%3 WRT_OPT] ; SSE opt func
172		cmovne	mbin_rsi, mbin_rbx
173
174		and	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
175		cmp	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
176		lea	mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
177		jne	_%1_init_done ; AVX is not available so end
178		mov	mbin_rsi, mbin_rbx
179
180		;; Try for AVX2
181		xor	ecx, ecx
182		mov	eax, 7
183		cpuid
184		test	ebx, FLAG_CPUID7_EBX_AVX2
185		lea	mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
186		cmovne	mbin_rsi, mbin_rbx
187
188		;; Does it have xmm and ymm support
189		xor	ecx, ecx
190		xgetbv
191		and	eax, FLAG_XGETBV_EAX_XMM_YMM
192		cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
193		je	_%1_init_done
194		lea	mbin_rsi, [%3 WRT_OPT]
195
196	_%1_init_done:
197		pop	mbin_rdx
198		pop	mbin_rcx
199		pop	mbin_rbx
200		pop	mbin_rax
201		mov	[%1_dispatched], mbin_rsi
202		pop	mbin_rsi
203		ret
204%endmacro
205
206%if AS_FEATURE_LEVEL >= 6
207;;;;;
208; mbin_dispatch_init6 parameters
209; 1-> function name
210; 2-> base function
211; 3-> SSE4_1 or 00/01 optimized function
212; 4-> AVX/02 opt func
213; 5-> AVX2/04 opt func
214; 6-> AVX512/06 opt func
215;;;;;
216%macro mbin_dispatch_init6 6
217	section .text
218	%1_dispatch_init:
219		push	mbin_rsi
220		push	mbin_rax
221		push	mbin_rbx
222		push	mbin_rcx
223		push	mbin_rdx
224		push	mbin_rdi
225		lea	mbin_rsi, [%2 WRT_OPT] ; Default - use base function
226
227		mov	eax, 1
228		cpuid
229		mov	ebx, ecx ; save cpuid1.ecx
230		test	ecx, FLAG_CPUID1_ECX_SSE4_1
231		je	_%1_init_done	  ; Use base function if no SSE4_1
232		lea	mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
233
234		;; Test for XMM_YMM support/AVX
235		test	ecx, FLAG_CPUID1_ECX_OSXSAVE
236		je	_%1_init_done
237		xor	ecx, ecx
238		xgetbv	; xcr -> edx:eax
239		mov	edi, eax	  ; save xgetvb.eax
240
241		and	eax, FLAG_XGETBV_EAX_XMM_YMM
242		cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
243		jne	_%1_init_done
244		test	ebx, FLAG_CPUID1_ECX_AVX
245		je	_%1_init_done
246		lea	mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
247
248		;; Test for AVX2
249		xor	ecx, ecx
250		mov	eax, 7
251		cpuid
252		test	ebx, FLAG_CPUID7_EBX_AVX2
253		je	_%1_init_done		; No AVX2 possible
254		lea	mbin_rsi, [%5 WRT_OPT] 	; AVX2/04 opt func
255
256		;; Test for AVX512
257		and	edi, FLAG_XGETBV_EAX_ZMM_OPM
258		cmp	edi, FLAG_XGETBV_EAX_ZMM_OPM
259		jne	_%1_init_done	  ; No AVX512 possible
260		and	ebx, FLAGS_CPUID7_EBX_AVX512_G1
261		cmp	ebx, FLAGS_CPUID7_EBX_AVX512_G1
262		lea	mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
263		cmove	mbin_rsi, mbin_rbx
264
265	_%1_init_done:
266		pop	mbin_rdi
267		pop	mbin_rdx
268		pop	mbin_rcx
269		pop	mbin_rbx
270		pop	mbin_rax
271		mov	[%1_dispatched], mbin_rsi
272		pop	mbin_rsi
273		ret
274%endmacro
275
276%else
277%macro mbin_dispatch_init6 6
278	mbin_dispatch_init5 %1, %2, %3, %4, %5
279%endmacro
280%endif
281
282%if AS_FEATURE_LEVEL >= 10
283;;;;;
284; mbin_dispatch_init7 parameters
285; 1-> function name
286; 2-> base function
287; 3-> SSE4_2 or 00/01 optimized function
288; 4-> AVX/02 opt func
289; 5-> AVX2/04 opt func
290; 6-> AVX512/06 opt func
291; 7-> AVX512 Update/10 opt func
292;;;;;
293%macro mbin_dispatch_init7 7
294	section .text
295	%1_dispatch_init:
296		push	mbin_rsi
297		push	mbin_rax
298		push	mbin_rbx
299		push	mbin_rcx
300		push	mbin_rdx
301		push	mbin_rdi
302		lea	mbin_rsi, [%2 WRT_OPT] ; Default - use base function
303
304		mov	eax, 1
305		cpuid
306		mov	ebx, ecx ; save cpuid1.ecx
307		test	ecx, FLAG_CPUID1_ECX_SSE4_2
308		je	_%1_init_done	  ; Use base function if no SSE4_2
309		lea	mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
310
311		;; Test for XMM_YMM support/AVX
312		test	ecx, FLAG_CPUID1_ECX_OSXSAVE
313		je	_%1_init_done
314		xor	ecx, ecx
315		xgetbv	; xcr -> edx:eax
316		mov	edi, eax	  ; save xgetvb.eax
317
318		and	eax, FLAG_XGETBV_EAX_XMM_YMM
319		cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
320		jne	_%1_init_done
321		test	ebx, FLAG_CPUID1_ECX_AVX
322		je	_%1_init_done
323		lea	mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
324
325		;; Test for AVX2
326		xor	ecx, ecx
327		mov	eax, 7
328		cpuid
329		test	ebx, FLAG_CPUID7_EBX_AVX2
330		je	_%1_init_done		; No AVX2 possible
331		lea	mbin_rsi, [%5 WRT_OPT] 	; AVX2/04 opt func
332
333		;; Test for AVX512
334		and	edi, FLAG_XGETBV_EAX_ZMM_OPM
335		cmp	edi, FLAG_XGETBV_EAX_ZMM_OPM
336		jne	_%1_init_done	  ; No AVX512 possible
337		and	ebx, FLAGS_CPUID7_EBX_AVX512_G1
338		cmp	ebx, FLAGS_CPUID7_EBX_AVX512_G1
339		lea	mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
340		cmove	mbin_rsi, mbin_rbx
341
342		and	ecx, FLAGS_CPUID7_ECX_AVX512_G2
343		cmp	ecx, FLAGS_CPUID7_ECX_AVX512_G2
344		lea	mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
345		cmove	mbin_rsi, mbin_rbx
346
347	_%1_init_done:
348		pop	mbin_rdi
349		pop	mbin_rdx
350		pop	mbin_rcx
351		pop	mbin_rbx
352		pop	mbin_rax
353		mov	[%1_dispatched], mbin_rsi
354		pop	mbin_rsi
355		ret
356%endmacro
357%else
358%macro mbin_dispatch_init7 7
359	mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
360%endmacro
361%endif
362
363;;;;;
364; mbin_dispatch_sse_to_avx2_shani parameters
365; derived from mbin_dispatch_init
366; Use this function when SSE/00/01 is a minimum requirement
367; 1-> function name
368; 2-> SSE/00/01 optimized function used as base
369; 3-> AVX or AVX/02 opt func
370; 4-> AVX2 or AVX/04 opt func
371; 5-> SHANI opt for GLM
372;;;;;
373%macro mbin_dispatch_sse_to_avx2_shani 5
374	section .text
375	%1_dispatch_init:
376		push	mbin_rsi
377		push	mbin_rax
378		push	mbin_rbx
379		push	mbin_rcx
380		push	mbin_rdx
381		lea	mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
382
383		mov	eax, 1
384		cpuid
385		and	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
386		cmp	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
387		lea	mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
388		jne	_%1_shani_check ; AVX is not available so check shani
389		mov	mbin_rsi, mbin_rbx
390
391		;; Try for AVX2
392		xor	ecx, ecx
393		mov	eax, 7
394		cpuid
395		test	ebx, FLAG_CPUID7_EBX_AVX2
396		lea	mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
397		cmovne	mbin_rsi, mbin_rbx
398
399		;; Does it have xmm and ymm support
400		xor	ecx, ecx
401		xgetbv
402		and	eax, FLAG_XGETBV_EAX_XMM_YMM
403		cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
404		je	_%1_init_done
405		lea	mbin_rsi, [%2 WRT_OPT]
406
407	_%1_init_done:
408		pop	mbin_rdx
409		pop	mbin_rcx
410		pop	mbin_rbx
411		pop	mbin_rax
412		mov	[%1_dispatched], mbin_rsi
413		pop	mbin_rsi
414		ret
415
416	_%1_shani_check:
417		xor	ecx, ecx
418		mov	eax, 7
419		cpuid
420		test	ebx, FLAG_CPUID7_EBX_SHA
421		lea	mbin_rbx, [%5 WRT_OPT] ; SHANI opt func
422		cmovne	mbin_rsi, mbin_rbx
423		jmp	_%1_init_done ; end
424%endmacro
425
426;;;;;
427; mbin_dispatch_base_to_avx512_shani parameters
428; derived from mbin_dispatch_init6
429; 1-> function name
430; 2-> base function
431; 3-> SSE4_2 or 00/01 optimized function
432; 4-> AVX/02 opt func
433; 5-> AVX2/04 opt func
434; 6-> AVX512/06 opt func
435; 7-> SHANI opt for GLM
436; 8-> SHANI opt for CNL
437;;;;;
438%macro mbin_dispatch_base_to_avx512_shani 8
439	section .text
440	%1_dispatch_init:
441		push	mbin_rsi
442		push	mbin_rax
443		push	mbin_rbx
444		push	mbin_rcx
445		push	mbin_rdx
446		push	mbin_rdi
447		lea	mbin_rsi, [%2 WRT_OPT] ; Default - use base function
448
449		mov	eax, 1
450		cpuid
451		mov	ebx, ecx ; save cpuid1.ecx
452		test	ecx, FLAG_CPUID1_ECX_SSE4_2
453		je	_%1_init_done	  ; Use base function if no SSE4_2
454		lea	mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
455
456		;; Test for XMM_YMM support/AVX
457		test	ecx, FLAG_CPUID1_ECX_OSXSAVE
458		je	_%1_shani_check
459		xor	ecx, ecx
460		xgetbv	; xcr -> edx:eax
461		mov	edi, eax	  ; save xgetvb.eax
462
463		and	eax, FLAG_XGETBV_EAX_XMM_YMM
464		cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
465		jne	_%1_shani_check
466		test	ebx, FLAG_CPUID1_ECX_AVX
467		je	_%1_shani_check
468		lea	mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
469
470		;; Test for AVX2
471		xor	ecx, ecx
472		mov	eax, 7
473		cpuid
474		test	ebx, FLAG_CPUID7_EBX_AVX2
475		je	_%1_init_done		; No AVX2 possible
476		lea	mbin_rsi, [%5 WRT_OPT] 	; AVX2/04 opt func
477
478		;; Test for AVX512
479		and	edi, FLAG_XGETBV_EAX_ZMM_OPM
480		cmp	edi, FLAG_XGETBV_EAX_ZMM_OPM
481		jne	_%1_init_done	  ; No AVX512 possible
482		and	ebx, FLAGS_CPUID7_EBX_AVX512_G1
483		cmp	ebx, FLAGS_CPUID7_EBX_AVX512_G1
484		lea	mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
485		cmove	mbin_rsi, mbin_rbx
486
487		;; Test for SHANI
488		xor	ecx, ecx
489		mov	eax, 7
490		cpuid
491		test	ebx, FLAG_CPUID7_EBX_SHA
492		lea	mbin_rbx, [%8 WRT_OPT] ; SHANI opt sse func
493		cmovne	mbin_rsi, mbin_rbx
494
495	_%1_init_done:
496		pop	mbin_rdi
497		pop	mbin_rdx
498		pop	mbin_rcx
499		pop	mbin_rbx
500		pop	mbin_rax
501		mov	[%1_dispatched], mbin_rsi
502		pop	mbin_rsi
503		ret
504
505	_%1_shani_check:
506		xor	ecx, ecx
507		mov	eax, 7
508		cpuid
509		test	ebx, FLAG_CPUID7_EBX_SHA
510		lea	mbin_rbx, [%7 WRT_OPT] ; SHANI opt sse func
511		cmovne	mbin_rsi, mbin_rbx
512		jmp	_%1_init_done ; end
513%endmacro
514
515
516
517%endif ; ifndef _MULTIBINARY_ASM_
518