xref: /isa-l/crc/crc_multibinary.asm (revision 9f75defd57b77c5292460a01e4a6f88b476b9953)
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;  Copyright(c) 2011-2015 Intel Corporation All rights reserved.
3;
4;  Redistribution and use in source and binary forms, with or without
5;  modification, are permitted provided that the following conditions
6;  are met:
7;    * Redistributions of source code must retain the above copyright
8;      notice, this list of conditions and the following disclaimer.
9;    * Redistributions in binary form must reproduce the above copyright
10;      notice, this list of conditions and the following disclaimer in
11;      the documentation and/or other materials provided with the
12;      distribution.
13;    * Neither the name of Intel Corporation nor the names of its
14;      contributors may be used to endorse or promote products derived
15;      from this software without specific prior written permission.
16;
17;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30default rel
31[bits 64]
32
33%include "reg_sizes.asm"
34
35extern crc32_iscsi_00
36extern crc32_iscsi_01
37extern crc32_iscsi_base
38
39extern crc32_ieee_01
40extern crc32_ieee_by4  ;; Optimized for SLM
41extern crc32_ieee_02
42extern crc32_ieee_base
43
44extern crc16_t10dif_01
45extern crc16_t10dif_by4  ;; Optimized for SLM
46extern crc16_t10dif_02
47extern crc16_t10dif_base
48
49extern crc32_gzip_refl_by8
50extern crc32_gzip_refl_by8_02
51extern crc32_gzip_refl_base
52
53extern crc16_t10dif_copy_by4
54extern crc16_t10dif_copy_by4_02
55extern crc16_t10dif_copy_base
56
57%if (AS_FEATURE_LEVEL) >= 10
58extern crc32_gzip_refl_by16_10
59extern crc32_ieee_by16_10
60extern crc32_iscsi_by16_10
61extern crc16_t10dif_by16_10
62%endif
63
64%include "multibinary.asm"
65
66section .data
67;;; *_mbinit are initial values for *_dispatched; is updated on first call.
68;;; Therefore, *_dispatch_init is only executed on first call.
69
70crc32_iscsi_dispatched:
71	dq	crc32_iscsi_mbinit
72
73crc32_ieee_dispatched:
74	dq	crc32_ieee_mbinit
75
76crc16_t10dif_dispatched:
77	dq	crc16_t10dif_mbinit
78
79section .text
80;;;;
81; crc32_iscsi multibinary function
82;;;;
83mk_global crc32_iscsi, function
84crc32_iscsi_mbinit:
85	endbranch
86	call	crc32_iscsi_dispatch_init
87crc32_iscsi:
88	endbranch
89	jmp	qword [crc32_iscsi_dispatched]
90
91crc32_iscsi_dispatch_init:
92	push 	rax
93	push	rbx
94	push	rcx
95	push	rdx
96	push	rsi
97	push	rdi
98	lea     rsi, [crc32_iscsi_base WRT_OPT] ; Default
99
100	mov	eax, 1
101	cpuid
102	mov	ebx, ecx ; save cpuid1.ecx
103	test    ecx, FLAG_CPUID1_ECX_SSE4_2
104	jz      .crc_iscsi_init_done ; use iscsi_base
105	lea     rsi, [crc32_iscsi_00 WRT_OPT]
106	test    ecx, FLAG_CPUID1_ECX_CLMUL
107	jz	.crc_iscsi_init_done ; use ieee_base
108	lea	rsi, [crc32_iscsi_01 WRT_OPT]
109
110	;; Test for XMM_YMM support/AVX
111	test	ecx, FLAG_CPUID1_ECX_OSXSAVE
112	je	.crc_iscsi_init_done
113	xor	ecx, ecx
114	xgetbv	; xcr -> edx:eax
115	mov	edi, eax	  ; save xgetvb.eax
116
117	and	eax, FLAG_XGETBV_EAX_XMM_YMM
118	cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
119	jne	.crc_iscsi_init_done
120	test	ebx, FLAG_CPUID1_ECX_AVX
121	je	.crc_iscsi_init_done
122	;; AVX/02 opt if available
123
124%if AS_FEATURE_LEVEL >= 10
125	;; Test for AVX2
126	xor	ecx, ecx
127	mov	eax, 7
128	cpuid
129	test	ebx, FLAG_CPUID7_EBX_AVX2
130	je	.crc_iscsi_init_done		; No AVX2 possible
131
132	;; Test for AVX512
133	and	edi, FLAG_XGETBV_EAX_ZMM_OPM
134	cmp	edi, FLAG_XGETBV_EAX_ZMM_OPM
135	jne	.crc_iscsi_init_done	  ; No AVX512 possible
136	and	ebx, FLAGS_CPUID7_EBX_AVX512_G1
137	cmp	ebx, FLAGS_CPUID7_EBX_AVX512_G1
138	jne	.crc_iscsi_init_done
139
140	and	ecx, FLAGS_CPUID7_ECX_AVX512_G2
141	cmp	ecx, FLAGS_CPUID7_ECX_AVX512_G2
142	lea	rbx, [crc32_iscsi_by16_10 WRT_OPT] ; AVX512/10 opt
143	cmove	rsi, rbx
144%endif
145
146.crc_iscsi_init_done:
147	mov	[crc32_iscsi_dispatched], rsi
148	pop	rdi
149	pop	rsi
150	pop	rdx
151	pop	rcx
152	pop	rbx
153	pop	rax
154	ret
155
156;;;;
157; crc32_ieee multibinary function
158;;;;
159mk_global crc32_ieee, function
160crc32_ieee_mbinit:
161	endbranch
162	call	crc32_ieee_dispatch_init
163crc32_ieee:
164	endbranch
165	jmp	qword [crc32_ieee_dispatched]
166
167crc32_ieee_dispatch_init:
168	push    rax
169	push    rbx
170	push    rcx
171	push    rdx
172	push    rsi
173	push	rdi
174	lea     rsi, [crc32_ieee_base WRT_OPT] ; Default
175
176	mov     eax, 1
177	cpuid
178	mov	ebx, ecx ; save cpuid1.ecx
179	test    ecx, FLAG_CPUID1_ECX_SSE3
180	jz      .crc_ieee_init_done ; use ieee_base
181	test    ecx, FLAG_CPUID1_ECX_CLMUL
182	jz	.crc_ieee_init_done ; use ieee_base
183	lea	rsi, [crc32_ieee_01 WRT_OPT]
184
185	;; Extra Avoton test
186	lea	rdx, [crc32_ieee_by4 WRT_OPT]
187	and     eax, FLAG_CPUID1_EAX_STEP_MASK
188	cmp     eax, FLAG_CPUID1_EAX_AVOTON
189	cmove   rsi, rdx
190
191	;; Test for XMM_YMM support/AVX
192	test	ecx, FLAG_CPUID1_ECX_OSXSAVE
193	je	.crc_ieee_init_done
194	xor	ecx, ecx
195	xgetbv	; xcr -> edx:eax
196	mov	edi, eax	  ; save xgetvb.eax
197
198	and	eax, FLAG_XGETBV_EAX_XMM_YMM
199	cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
200	jne	.crc_ieee_init_done
201	test	ebx, FLAG_CPUID1_ECX_AVX
202	je	.crc_ieee_init_done
203	lea	rsi, [crc32_ieee_02 WRT_OPT] ; AVX/02 opt
204
205%if AS_FEATURE_LEVEL >= 10
206	;; Test for AVX2
207	xor	ecx, ecx
208	mov	eax, 7
209	cpuid
210	test	ebx, FLAG_CPUID7_EBX_AVX2
211	je	.crc_ieee_init_done		; No AVX2 possible
212
213	;; Test for AVX512
214	and	edi, FLAG_XGETBV_EAX_ZMM_OPM
215	cmp	edi, FLAG_XGETBV_EAX_ZMM_OPM
216	jne	.crc_ieee_init_done	  ; No AVX512 possible
217	and	ebx, FLAGS_CPUID7_EBX_AVX512_G1
218	cmp	ebx, FLAGS_CPUID7_EBX_AVX512_G1
219	jne	.crc_ieee_init_done
220
221	and	ecx, FLAGS_CPUID7_ECX_AVX512_G2
222	cmp	ecx, FLAGS_CPUID7_ECX_AVX512_G2
223	lea	rbx, [crc32_ieee_by16_10 WRT_OPT] ; AVX512/10 opt
224	cmove	rsi, rbx
225%endif
226
227.crc_ieee_init_done:
228	mov     [crc32_ieee_dispatched], rsi
229	pop	rdi
230	pop     rsi
231	pop     rdx
232	pop     rcx
233	pop     rbx
234	pop     rax
235	ret
236
237;;;;
238; crc16_t10dif multibinary function
239;;;;
240mk_global crc16_t10dif, function
241crc16_t10dif_mbinit:
242	endbranch
243	call	crc16_t10dif_dispatch_init
244crc16_t10dif:
245	endbranch
246	jmp	qword [crc16_t10dif_dispatched]
247
248crc16_t10dif_dispatch_init:
249	push    rax
250	push    rbx
251	push    rcx
252	push    rdx
253	push    rsi
254	push    rdi
255	lea     rsi, [crc16_t10dif_base WRT_OPT] ; Default
256
257	mov     eax, 1
258	cpuid
259	mov	ebx, ecx ; save cpuid1.ecx
260	test    ecx, FLAG_CPUID1_ECX_SSE3
261	jz      .t10dif_init_done ; use t10dif_base
262	test    ecx, FLAG_CPUID1_ECX_CLMUL
263	jz	.t10dif_init_done ; use t10dif_base
264	lea	rsi, [crc16_t10dif_01 WRT_OPT]
265
266	;; Extra Avoton test
267	lea	rdx, [crc16_t10dif_by4 WRT_OPT]
268	and     eax, FLAG_CPUID1_EAX_STEP_MASK
269	cmp     eax, FLAG_CPUID1_EAX_AVOTON
270	cmove   rsi, rdx
271
272	;; Test for XMM_YMM support/AVX
273	test	ecx, FLAG_CPUID1_ECX_OSXSAVE
274	je	.t10dif_init_done
275	xor	ecx, ecx
276	xgetbv	; xcr -> edx:eax
277	mov	edi, eax	  ; save xgetvb.eax
278
279	and	eax, FLAG_XGETBV_EAX_XMM_YMM
280	cmp	eax, FLAG_XGETBV_EAX_XMM_YMM
281	jne	.t10dif_init_done
282	test	ebx, FLAG_CPUID1_ECX_AVX
283	je	.t10dif_init_done
284	lea	rsi, [crc16_t10dif_02 WRT_OPT] ; AVX/02 opt
285
286%if AS_FEATURE_LEVEL >= 10
287	;; Test for AVX2
288	xor	ecx, ecx
289	mov	eax, 7
290	cpuid
291	test	ebx, FLAG_CPUID7_EBX_AVX2
292	je	.t10dif_init_done		; No AVX2 possible
293
294	;; Test for AVX512
295	and	edi, FLAG_XGETBV_EAX_ZMM_OPM
296	cmp	edi, FLAG_XGETBV_EAX_ZMM_OPM
297	jne	.t10dif_init_done	  ; No AVX512 possible
298	and	ebx, FLAGS_CPUID7_EBX_AVX512_G1
299	cmp	ebx, FLAGS_CPUID7_EBX_AVX512_G1
300	jne	.t10dif_init_done
301
302	and	ecx, FLAGS_CPUID7_ECX_AVX512_G2
303	cmp	ecx, FLAGS_CPUID7_ECX_AVX512_G2
304	lea	rbx, [crc16_t10dif_by16_10 WRT_OPT] ; AVX512/10 opt
305	cmove	rsi, rbx
306%endif
307
308.t10dif_init_done:
309	mov     [crc16_t10dif_dispatched], rsi
310	pop     rdi
311	pop     rsi
312	pop     rdx
313	pop     rcx
314	pop     rbx
315	pop     rax
316	ret
317
318mbin_interface			crc32_gzip_refl
319mbin_dispatch_init_clmul	crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_by8, crc32_gzip_refl_by8_02, crc32_gzip_refl_by16_10
320
321mbin_interface			crc16_t10dif_copy
322mbin_dispatch_init_clmul	crc16_t10dif_copy, crc16_t10dif_copy_base, crc16_t10dif_copy_by4, crc16_t10dif_copy_by4_02, crc16_t10dif_copy_by4_02
323