1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2011-2015 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30default rel 31[bits 64] 32 33%include "reg_sizes.asm" 34 35extern crc32_iscsi_00 36extern crc32_iscsi_01 37extern crc32_iscsi_base 38 39extern crc32_ieee_01 40extern crc32_ieee_by4 ;; Optimized for SLM 41extern crc32_ieee_02 42extern crc32_ieee_base 43 44extern crc16_t10dif_01 45extern crc16_t10dif_by4 ;; Optimized for SLM 46extern crc16_t10dif_02 47extern crc16_t10dif_base 48 49extern crc32_gzip_refl_by8 50extern crc32_gzip_refl_by8_02 51extern crc32_gzip_refl_base 52 53extern crc16_t10dif_copy_by4 54extern crc16_t10dif_copy_by4_02 55extern crc16_t10dif_copy_base 56 57%if (AS_FEATURE_LEVEL) >= 10 58extern crc32_gzip_refl_by16_10 59extern crc32_ieee_by16_10 60extern crc32_iscsi_by16_10 61extern crc16_t10dif_by16_10 62%endif 63 64%include "multibinary.asm" 65 66section .data 67;;; *_mbinit are initial values for *_dispatched; is updated on first call. 68;;; Therefore, *_dispatch_init is only executed on first call. 69 70crc32_iscsi_dispatched: 71 dq crc32_iscsi_mbinit 72 73crc32_ieee_dispatched: 74 dq crc32_ieee_mbinit 75 76crc16_t10dif_dispatched: 77 dq crc16_t10dif_mbinit 78 79section .text 80;;;; 81; crc32_iscsi multibinary function 82;;;; 83mk_global crc32_iscsi, function 84crc32_iscsi_mbinit: 85 endbranch 86 call crc32_iscsi_dispatch_init 87crc32_iscsi: 88 endbranch 89 jmp qword [crc32_iscsi_dispatched] 90 91crc32_iscsi_dispatch_init: 92 push rax 93 push rbx 94 push rcx 95 push rdx 96 push rsi 97 push rdi 98 lea rsi, [crc32_iscsi_base WRT_OPT] ; Default 99 100 mov eax, 1 101 cpuid 102 mov ebx, ecx ; save cpuid1.ecx 103 test ecx, FLAG_CPUID1_ECX_SSE4_2 104 jz .crc_iscsi_init_done ; use iscsi_base 105 lea rsi, [crc32_iscsi_00 WRT_OPT] 106 test ecx, FLAG_CPUID1_ECX_CLMUL 107 jz .crc_iscsi_init_done ; use ieee_base 108 lea rsi, [crc32_iscsi_01 WRT_OPT] 109 110 ;; Test for XMM_YMM support/AVX 111 test ecx, FLAG_CPUID1_ECX_OSXSAVE 112 je .crc_iscsi_init_done 113 xor ecx, ecx 114 xgetbv ; xcr -> edx:eax 115 mov edi, eax ; save xgetvb.eax 116 117 and eax, FLAG_XGETBV_EAX_XMM_YMM 118 cmp eax, FLAG_XGETBV_EAX_XMM_YMM 119 jne .crc_iscsi_init_done 120 test ebx, FLAG_CPUID1_ECX_AVX 121 je .crc_iscsi_init_done 122 ;; AVX/02 opt if available 123 124%if AS_FEATURE_LEVEL >= 10 125 ;; Test for AVX2 126 xor ecx, ecx 127 mov eax, 7 128 cpuid 129 test ebx, FLAG_CPUID7_EBX_AVX2 130 je .crc_iscsi_init_done ; No AVX2 possible 131 132 ;; Test for AVX512 133 and edi, FLAG_XGETBV_EAX_ZMM_OPM 134 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM 135 jne .crc_iscsi_init_done ; No AVX512 possible 136 and ebx, FLAGS_CPUID7_EBX_AVX512_G1 137 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 138 jne .crc_iscsi_init_done 139 140 and ecx, FLAGS_CPUID7_ECX_AVX512_G2 141 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 142 lea rbx, [crc32_iscsi_by16_10 WRT_OPT] ; AVX512/10 opt 143 cmove rsi, rbx 144%endif 145 146.crc_iscsi_init_done: 147 mov [crc32_iscsi_dispatched], rsi 148 pop rdi 149 pop rsi 150 pop rdx 151 pop rcx 152 pop rbx 153 pop rax 154 ret 155 156;;;; 157; crc32_ieee multibinary function 158;;;; 159mk_global crc32_ieee, function 160crc32_ieee_mbinit: 161 endbranch 162 call crc32_ieee_dispatch_init 163crc32_ieee: 164 endbranch 165 jmp qword [crc32_ieee_dispatched] 166 167crc32_ieee_dispatch_init: 168 push rax 169 push rbx 170 push rcx 171 push rdx 172 push rsi 173 push rdi 174 lea rsi, [crc32_ieee_base WRT_OPT] ; Default 175 176 mov eax, 1 177 cpuid 178 mov ebx, ecx ; save cpuid1.ecx 179 test ecx, FLAG_CPUID1_ECX_SSE3 180 jz .crc_ieee_init_done ; use ieee_base 181 test ecx, FLAG_CPUID1_ECX_CLMUL 182 jz .crc_ieee_init_done ; use ieee_base 183 lea rsi, [crc32_ieee_01 WRT_OPT] 184 185 ;; Extra Avoton test 186 lea rdx, [crc32_ieee_by4 WRT_OPT] 187 and eax, FLAG_CPUID1_EAX_STEP_MASK 188 cmp eax, FLAG_CPUID1_EAX_AVOTON 189 cmove rsi, rdx 190 191 ;; Test for XMM_YMM support/AVX 192 test ecx, FLAG_CPUID1_ECX_OSXSAVE 193 je .crc_ieee_init_done 194 xor ecx, ecx 195 xgetbv ; xcr -> edx:eax 196 mov edi, eax ; save xgetvb.eax 197 198 and eax, FLAG_XGETBV_EAX_XMM_YMM 199 cmp eax, FLAG_XGETBV_EAX_XMM_YMM 200 jne .crc_ieee_init_done 201 test ebx, FLAG_CPUID1_ECX_AVX 202 je .crc_ieee_init_done 203 lea rsi, [crc32_ieee_02 WRT_OPT] ; AVX/02 opt 204 205%if AS_FEATURE_LEVEL >= 10 206 ;; Test for AVX2 207 xor ecx, ecx 208 mov eax, 7 209 cpuid 210 test ebx, FLAG_CPUID7_EBX_AVX2 211 je .crc_ieee_init_done ; No AVX2 possible 212 213 ;; Test for AVX512 214 and edi, FLAG_XGETBV_EAX_ZMM_OPM 215 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM 216 jne .crc_ieee_init_done ; No AVX512 possible 217 and ebx, FLAGS_CPUID7_EBX_AVX512_G1 218 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 219 jne .crc_ieee_init_done 220 221 and ecx, FLAGS_CPUID7_ECX_AVX512_G2 222 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 223 lea rbx, [crc32_ieee_by16_10 WRT_OPT] ; AVX512/10 opt 224 cmove rsi, rbx 225%endif 226 227.crc_ieee_init_done: 228 mov [crc32_ieee_dispatched], rsi 229 pop rdi 230 pop rsi 231 pop rdx 232 pop rcx 233 pop rbx 234 pop rax 235 ret 236 237;;;; 238; crc16_t10dif multibinary function 239;;;; 240mk_global crc16_t10dif, function 241crc16_t10dif_mbinit: 242 endbranch 243 call crc16_t10dif_dispatch_init 244crc16_t10dif: 245 endbranch 246 jmp qword [crc16_t10dif_dispatched] 247 248crc16_t10dif_dispatch_init: 249 push rax 250 push rbx 251 push rcx 252 push rdx 253 push rsi 254 push rdi 255 lea rsi, [crc16_t10dif_base WRT_OPT] ; Default 256 257 mov eax, 1 258 cpuid 259 mov ebx, ecx ; save cpuid1.ecx 260 test ecx, FLAG_CPUID1_ECX_SSE3 261 jz .t10dif_init_done ; use t10dif_base 262 test ecx, FLAG_CPUID1_ECX_CLMUL 263 jz .t10dif_init_done ; use t10dif_base 264 lea rsi, [crc16_t10dif_01 WRT_OPT] 265 266 ;; Extra Avoton test 267 lea rdx, [crc16_t10dif_by4 WRT_OPT] 268 and eax, FLAG_CPUID1_EAX_STEP_MASK 269 cmp eax, FLAG_CPUID1_EAX_AVOTON 270 cmove rsi, rdx 271 272 ;; Test for XMM_YMM support/AVX 273 test ecx, FLAG_CPUID1_ECX_OSXSAVE 274 je .t10dif_init_done 275 xor ecx, ecx 276 xgetbv ; xcr -> edx:eax 277 mov edi, eax ; save xgetvb.eax 278 279 and eax, FLAG_XGETBV_EAX_XMM_YMM 280 cmp eax, FLAG_XGETBV_EAX_XMM_YMM 281 jne .t10dif_init_done 282 test ebx, FLAG_CPUID1_ECX_AVX 283 je .t10dif_init_done 284 lea rsi, [crc16_t10dif_02 WRT_OPT] ; AVX/02 opt 285 286%if AS_FEATURE_LEVEL >= 10 287 ;; Test for AVX2 288 xor ecx, ecx 289 mov eax, 7 290 cpuid 291 test ebx, FLAG_CPUID7_EBX_AVX2 292 je .t10dif_init_done ; No AVX2 possible 293 294 ;; Test for AVX512 295 and edi, FLAG_XGETBV_EAX_ZMM_OPM 296 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM 297 jne .t10dif_init_done ; No AVX512 possible 298 and ebx, FLAGS_CPUID7_EBX_AVX512_G1 299 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 300 jne .t10dif_init_done 301 302 and ecx, FLAGS_CPUID7_ECX_AVX512_G2 303 cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 304 lea rbx, [crc16_t10dif_by16_10 WRT_OPT] ; AVX512/10 opt 305 cmove rsi, rbx 306%endif 307 308.t10dif_init_done: 309 mov [crc16_t10dif_dispatched], rsi 310 pop rdi 311 pop rsi 312 pop rdx 313 pop rcx 314 pop rbx 315 pop rax 316 ret 317 318mbin_interface crc32_gzip_refl 319mbin_dispatch_init_clmul crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_by8, crc32_gzip_refl_by8_02, crc32_gzip_refl_by16_10 320 321mbin_interface crc16_t10dif_copy 322mbin_dispatch_init_clmul crc16_t10dif_copy, crc16_t10dif_copy_base, crc16_t10dif_copy_by4, crc16_t10dif_copy_by4_02, crc16_t10dif_copy_by4_02 323