xref: /isa-l_crypto/sha512_mb/sha512_multibinary.asm (revision d28f1034f736e3eb791c3cf6bff3e2fa81fb5331)
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4;  Redistribution and use in source and binary forms, with or without
5;  modification, are permitted provided that the following conditions
6;  are met:
7;    * Redistributions of source code must retain the above copyright
8;      notice, this list of conditions and the following disclaimer.
9;    * Redistributions in binary form must reproduce the above copyright
10;      notice, this list of conditions and the following disclaimer in
11;      the documentation and/or other materials provided with the
12;      distribution.
13;    * Neither the name of Intel Corporation nor the names of its
14;      contributors may be used to endorse or promote products derived
15;      from this software without specific prior written permission.
16;
17;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%include "reg_sizes.asm"
31%include "multibinary.asm"
32
33;;;;;
34; mbin_dispatch_init_avoton parameters
35; Use this function when SSE/00/01 is a minimum requirement
36; if AVOTON is true, then use avoton_func instead of sse_func
37; 1-> function name
38; 2-> SSE/00/01 optimized function used as base
39; 3-> AVX or AVX/02 opt func
40; 4-> AVX2 or AVX/04 opt func
41; 5-> AVOTON opt func
42;;;;;
43%macro mbin_dispatch_init_avoton 5
44	section .text
45	%1_dispatch_init:
46		push	mbin_rsi
47		push	mbin_rax
48		push	mbin_rbx
49		push	mbin_rcx
50		push	mbin_rdx
51		push	mbin_rdi
52		lea	mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
53
54		mov	eax, 1
55		cpuid
56		lea	mbin_rdi, [%5 WRT_OPT]
57		and     eax, FLAG_CPUID1_EAX_STEP_MASK
58		cmp     eax, FLAG_CPUID1_EAX_AVOTON
59		; If Avoton, set Avoton symbol and exit
60		cmove   mbin_rsi, mbin_rdi
61		je	_%1_init_done
62
63		and	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
64		cmp	ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
65		lea	mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
66		jne	_%1_init_done ; AVX is not available so end
67		mov	mbin_rsi, mbin_rbx
68
69		;; Try for AVX2
70		xor	ecx, ecx
71		mov	eax, 7
72		cpuid
73		test	ebx, FLAG_CPUID7_EBX_AVX2
74		lea	mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
75		cmovne	mbin_rsi, mbin_rbx
76
77		;; Does it have xmm and ymm support
78		xor	ecx, ecx
79		xgetbv
80		and	eax, FLAG_XGETBV_EAX_XMM_YMM
81		cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
82		je	_%1_init_done
83		lea	mbin_rsi, [%2 WRT_OPT]
84
85	_%1_init_done:
86		pop	mbin_rdi
87		pop	mbin_rdx
88		pop	mbin_rcx
89		pop	mbin_rbx
90		pop	mbin_rax
91		mov	[%1_dispatched], mbin_rsi
92		pop	mbin_rsi
93		ret
94%endmacro
95
96;;;;;
97; mbin_dispatch_init6_avoton parameters
98; if AVOTON is true, then use avoton_func instead of sse_func
99; 1-> function name
100; 2-> base function
101; 3-> SSE4_1 or 00/01 optimized function
102; 4-> AVX/02 opt func
103; 5-> AVX2/04 opt func
104; 6-> AVX512/06 opt func
105; 7-> AVOTON opt func
106;;;;;
107%macro mbin_dispatch_init6_avoton 7
108	section .text
109	%1_dispatch_init:
110		push	mbin_rsi
111		push	mbin_rax
112		push	mbin_rbx
113		push	mbin_rcx
114		push	mbin_rdx
115		push	mbin_rdi
116		lea	mbin_rsi, [%2 WRT_OPT] ; Default - use base function
117
118		mov	eax, 1
119		cpuid
120		mov	ebx, ecx ; save cpuid1.ecx
121		test	ecx, FLAG_CPUID1_ECX_SSE4_1
122		je	_%1_init_done	  ; Use base function if no SSE4_1
123		lea	mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
124
125		lea	mbin_rdi, [%7 WRT_OPT]
126		and     eax, FLAG_CPUID1_EAX_STEP_MASK
127		cmp     eax, FLAG_CPUID1_EAX_AVOTON
128		; If Avoton, set Avoton symbol and exit
129		cmove   mbin_rsi, mbin_rdi
130		je	_%1_init_done
131
132
133		;; Test for XMM_YMM support/AVX
134		test	ecx, FLAG_CPUID1_ECX_OSXSAVE
135		je	_%1_init_done
136		xor	ecx, ecx
137		xgetbv	; xcr -> edx:eax
138		mov	edi, eax	  ; save xgetvb.eax
139
140		and	eax, FLAG_XGETBV_EAX_XMM_YMM
141		cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
142		jne	_%1_init_done
143		test	ebx, FLAG_CPUID1_ECX_AVX
144		je	_%1_init_done
145		lea	mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
146
147		;; Test for AVX2
148		xor	ecx, ecx
149		mov	eax, 7
150		cpuid
151		test	ebx, FLAG_CPUID7_EBX_AVX2
152		je	_%1_init_done		; No AVX2 possible
153		lea	mbin_rsi, [%5 WRT_OPT] 	; AVX2/04 opt func
154
155		;; Test for AVX512
156		and	edi, FLAG_XGETBV_EAX_ZMM_OPM
157		cmp	edi, FLAG_XGETBV_EAX_ZMM_OPM
158		jne	_%1_init_done	  ; No AVX512 possible
159		and	ebx, FLAGS_CPUID7_EBX_AVX512_G1
160		cmp	ebx, FLAGS_CPUID7_EBX_AVX512_G1
161		lea	mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
162		cmove	mbin_rsi, mbin_rbx
163
164	_%1_init_done:
165		pop	mbin_rdi
166		pop	mbin_rdx
167		pop	mbin_rcx
168		pop	mbin_rbx
169		pop	mbin_rax
170		mov	[%1_dispatched], mbin_rsi
171		pop	mbin_rsi
172		ret
173%endmacro
174
175default rel
176[bits 64]
177
178%define def_wrd 	dq
179%define wrd_sz  	qword
180%define arg1		rsi
181
182; declare the L3 ctx level symbols (these will then call the appropriate
183; L2 symbols)
184extern _sha512_ctx_mgr_init_sse
185extern _sha512_ctx_mgr_submit_sse
186extern _sha512_ctx_mgr_flush_sse
187
188extern _sha512_ctx_mgr_init_avx
189extern _sha512_ctx_mgr_submit_avx
190extern _sha512_ctx_mgr_flush_avx
191
192extern _sha512_ctx_mgr_init_avx2
193extern _sha512_ctx_mgr_submit_avx2
194extern _sha512_ctx_mgr_flush_avx2
195
196extern _sha512_ctx_mgr_init_base
197extern _sha512_ctx_mgr_submit_base
198extern _sha512_ctx_mgr_flush_base
199
200extern _sha512_ctx_mgr_init_avx512
201extern _sha512_ctx_mgr_submit_avx512
202extern _sha512_ctx_mgr_flush_avx512
203
204extern _sha512_ctx_mgr_init_sb_sse4
205extern _sha512_ctx_mgr_submit_sb_sse4
206extern _sha512_ctx_mgr_flush_sb_sse4
207
208;;; *_mbinit are initial values for *_dispatched; is updated on first call.
209;;; Therefore, *_dispatch_init is only executed on first call.
210
211; Initialise symbols
212mbin_interface _sha512_ctx_mgr_init
213mbin_interface _sha512_ctx_mgr_submit
214mbin_interface _sha512_ctx_mgr_flush
215
216; Reuse mbin_dispatch_init6 through replacing base by sse version
217mbin_dispatch_init6_avoton _sha512_ctx_mgr_init, _sha512_ctx_mgr_init_base, \
218       		_sha512_ctx_mgr_init_sse, _sha512_ctx_mgr_init_avx, \
219       		_sha512_ctx_mgr_init_avx2, _sha512_ctx_mgr_init_avx512, \
220       		_sha512_ctx_mgr_init_sb_sse4
221
222mbin_dispatch_init6_avoton _sha512_ctx_mgr_submit, _sha512_ctx_mgr_submit_base, \
223       		_sha512_ctx_mgr_submit_sse, _sha512_ctx_mgr_submit_avx, \
224       		_sha512_ctx_mgr_submit_avx2, _sha512_ctx_mgr_submit_avx512, \
225       		_sha512_ctx_mgr_submit_sb_sse4
226
227mbin_dispatch_init6_avoton _sha512_ctx_mgr_flush, _sha512_ctx_mgr_flush_base, \
228       		_sha512_ctx_mgr_flush_sse, _sha512_ctx_mgr_flush_avx, \
229       		_sha512_ctx_mgr_flush_avx2, _sha512_ctx_mgr_flush_avx512, \
230       		_sha512_ctx_mgr_flush_sb_sse4
231
232;;;       func				core, ver, snum
233slversion _sha512_ctx_mgr_init,		00,   04,  0175
234slversion _sha512_ctx_mgr_submit,	00,   04,  0176
235slversion _sha512_ctx_mgr_flush,		00,   04,  0177
236