1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30%include "reg_sizes.asm" 31%include "multibinary.asm" 32 33;;;;; 34; mbin_dispatch_init_avoton parameters 35; Use this function when SSE/00/01 is a minimum requirement 36; if AVOTON is true, then use avoton_func instead of sse_func 37; 1-> function name 38; 2-> SSE/00/01 optimized function used as base 39; 3-> AVX or AVX/02 opt func 40; 4-> AVX2 or AVX/04 opt func 41; 5-> AVOTON opt func 42;;;;; 43%macro mbin_dispatch_init_avoton 5 44 section .text 45 %1_dispatch_init: 46 push mbin_rsi 47 push mbin_rax 48 push mbin_rbx 49 push mbin_rcx 50 push mbin_rdx 51 push mbin_rdi 52 lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01 53 54 mov eax, 1 55 cpuid 56 lea mbin_rdi, [%5 WRT_OPT] 57 and eax, FLAG_CPUID1_EAX_STEP_MASK 58 cmp eax, FLAG_CPUID1_EAX_AVOTON 59 ; If Avoton, set Avoton symbol and exit 60 cmove mbin_rsi, mbin_rdi 61 je _%1_init_done 62 63 and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) 64 cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) 65 lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func 66 jne _%1_init_done ; AVX is not available so end 67 mov mbin_rsi, mbin_rbx 68 69 ;; Try for AVX2 70 xor ecx, ecx 71 mov eax, 7 72 cpuid 73 test ebx, FLAG_CPUID7_EBX_AVX2 74 lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func 75 cmovne mbin_rsi, mbin_rbx 76 77 ;; Does it have xmm and ymm support 78 xor ecx, ecx 79 xgetbv 80 and eax, FLAG_XGETBV_EAX_XMM_YMM 81 cmp eax, FLAG_XGETBV_EAX_XMM_YMM 82 je _%1_init_done 83 lea mbin_rsi, [%2 WRT_OPT] 84 85 _%1_init_done: 86 pop mbin_rdi 87 pop mbin_rdx 88 pop mbin_rcx 89 pop mbin_rbx 90 pop mbin_rax 91 mov [%1_dispatched], mbin_rsi 92 pop mbin_rsi 93 ret 94%endmacro 95 96;;;;; 97; mbin_dispatch_init6_avoton parameters 98; if AVOTON is true, then use avoton_func instead of sse_func 99; 1-> function name 100; 2-> base function 101; 3-> SSE4_1 or 00/01 optimized function 102; 4-> AVX/02 opt func 103; 5-> AVX2/04 opt func 104; 6-> AVX512/06 opt func 105; 7-> AVOTON opt func 106;;;;; 107%macro mbin_dispatch_init6_avoton 7 108 section .text 109 %1_dispatch_init: 110 push mbin_rsi 111 push mbin_rax 112 push mbin_rbx 113 push mbin_rcx 114 push mbin_rdx 115 push mbin_rdi 116 lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function 117 118 mov eax, 1 119 cpuid 120 mov ebx, ecx ; save cpuid1.ecx 121 test ecx, FLAG_CPUID1_ECX_SSE4_1 122 je _%1_init_done ; Use base function if no SSE4_1 123 lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt 124 125 lea mbin_rdi, [%7 WRT_OPT] 126 and eax, FLAG_CPUID1_EAX_STEP_MASK 127 cmp eax, FLAG_CPUID1_EAX_AVOTON 128 ; If Avoton, set Avoton symbol and exit 129 cmove mbin_rsi, mbin_rdi 130 je _%1_init_done 131 132 133 ;; Test for XMM_YMM support/AVX 134 test ecx, FLAG_CPUID1_ECX_OSXSAVE 135 je _%1_init_done 136 xor ecx, ecx 137 xgetbv ; xcr -> edx:eax 138 mov edi, eax ; save xgetvb.eax 139 140 and eax, FLAG_XGETBV_EAX_XMM_YMM 141 cmp eax, FLAG_XGETBV_EAX_XMM_YMM 142 jne _%1_init_done 143 test ebx, FLAG_CPUID1_ECX_AVX 144 je _%1_init_done 145 lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt 146 147 ;; Test for AVX2 148 xor ecx, ecx 149 mov eax, 7 150 cpuid 151 test ebx, FLAG_CPUID7_EBX_AVX2 152 je _%1_init_done ; No AVX2 possible 153 lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func 154 155 ;; Test for AVX512 156 and edi, FLAG_XGETBV_EAX_ZMM_OPM 157 cmp edi, FLAG_XGETBV_EAX_ZMM_OPM 158 jne _%1_init_done ; No AVX512 possible 159 and ebx, FLAGS_CPUID7_EBX_AVX512_G1 160 cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 161 lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt 162 cmove mbin_rsi, mbin_rbx 163 164 _%1_init_done: 165 pop mbin_rdi 166 pop mbin_rdx 167 pop mbin_rcx 168 pop mbin_rbx 169 pop mbin_rax 170 mov [%1_dispatched], mbin_rsi 171 pop mbin_rsi 172 ret 173%endmacro 174 175default rel 176[bits 64] 177 178%define def_wrd dq 179%define wrd_sz qword 180%define arg1 rsi 181 182; declare the L3 ctx level symbols (these will then call the appropriate 183; L2 symbols) 184extern _sha512_ctx_mgr_init_sse 185extern _sha512_ctx_mgr_submit_sse 186extern _sha512_ctx_mgr_flush_sse 187 188extern _sha512_ctx_mgr_init_avx 189extern _sha512_ctx_mgr_submit_avx 190extern _sha512_ctx_mgr_flush_avx 191 192extern _sha512_ctx_mgr_init_avx2 193extern _sha512_ctx_mgr_submit_avx2 194extern _sha512_ctx_mgr_flush_avx2 195 196extern _sha512_ctx_mgr_init_base 197extern _sha512_ctx_mgr_submit_base 198extern _sha512_ctx_mgr_flush_base 199 200extern _sha512_ctx_mgr_init_avx512 201extern _sha512_ctx_mgr_submit_avx512 202extern _sha512_ctx_mgr_flush_avx512 203 204extern _sha512_ctx_mgr_init_sb_sse4 205extern _sha512_ctx_mgr_submit_sb_sse4 206extern _sha512_ctx_mgr_flush_sb_sse4 207 208;;; *_mbinit are initial values for *_dispatched; is updated on first call. 209;;; Therefore, *_dispatch_init is only executed on first call. 210 211; Initialise symbols 212mbin_interface _sha512_ctx_mgr_init 213mbin_interface _sha512_ctx_mgr_submit 214mbin_interface _sha512_ctx_mgr_flush 215 216; Reuse mbin_dispatch_init6 through replacing base by sse version 217mbin_dispatch_init6_avoton _sha512_ctx_mgr_init, _sha512_ctx_mgr_init_base, \ 218 _sha512_ctx_mgr_init_sse, _sha512_ctx_mgr_init_avx, \ 219 _sha512_ctx_mgr_init_avx2, _sha512_ctx_mgr_init_avx512, \ 220 _sha512_ctx_mgr_init_sb_sse4 221 222mbin_dispatch_init6_avoton _sha512_ctx_mgr_submit, _sha512_ctx_mgr_submit_base, \ 223 _sha512_ctx_mgr_submit_sse, _sha512_ctx_mgr_submit_avx, \ 224 _sha512_ctx_mgr_submit_avx2, _sha512_ctx_mgr_submit_avx512, \ 225 _sha512_ctx_mgr_submit_sb_sse4 226 227mbin_dispatch_init6_avoton _sha512_ctx_mgr_flush, _sha512_ctx_mgr_flush_base, \ 228 _sha512_ctx_mgr_flush_sse, _sha512_ctx_mgr_flush_avx, \ 229 _sha512_ctx_mgr_flush_avx2, _sha512_ctx_mgr_flush_avx512, \ 230 _sha512_ctx_mgr_flush_sb_sse4 231 232;;; func core, ver, snum 233slversion _sha512_ctx_mgr_init, 00, 04, 0175 234slversion _sha512_ctx_mgr_submit, 00, 04, 0176 235slversion _sha512_ctx_mgr_flush, 00, 04, 0177 236