1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2023 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30;;; 31;;; gf_vect_mad_avx512_gfni(len, vec, vec_i, mul_array, src, dest); 32;;; 33 34%include "reg_sizes.asm" 35%include "gf_vect_gfni.inc" 36 37%if AS_FEATURE_LEVEL >= 10 38 39%ifidn __OUTPUT_FORMAT__, elf64 40 %define arg0 rdi 41 %define arg1 rsi 42 %define arg2 rdx 43 %define arg3 rcx 44 %define arg4 r8 45 %define arg5 r9 46 %define tmp r11 47 %define func(x) x: endbranch 48 %define FUNC_SAVE 49 %define FUNC_RESTORE 50%endif 51 52%ifidn __OUTPUT_FORMAT__, win64 53 %define arg0 rcx 54 %define arg1 rdx 55 %define arg2 r8 56 %define arg3 r9 57 %define arg4 r12 ; must be saved and loaded 58 %define arg5 r13 59 %define tmp r11 60 %define stack_size 3*8 61 %define arg(x) [rsp + stack_size + 8 + 8*x] 62 %define func(x) proc_frame x 63 64 %macro FUNC_SAVE 0 65 sub rsp, stack_size 66 mov [rsp + 0*8], r12 67 mov [rsp + 1*8], r13 68 end_prolog 69 mov arg4, arg(4) 70 mov arg5, arg(5) 71 %endmacro 72 73 %macro FUNC_RESTORE 0 74 mov r12, [rsp + 0*8] 75 mov r13, [rsp + 1*8] 76 add rsp, stack_size 77 %endmacro 78%endif 79 80;;; gf_vect_mad_avx512_gfni(len, vec, vec_i, mul_array, src, dest) 81%define len arg0 82%define vec arg1 83%define vec_i arg2 84%define mul_array arg3 85%define src arg4 86%define dest arg5 87%define pos rax 88 89%ifndef EC_ALIGNED_ADDR 90;;; Use Un-aligned load/store 91 %define XLDR vmovdqu8 92 %define XSTR vmovdqu8 93%else 94;;; Use Non-temporal load/stor 95 %ifdef NO_NT_LDST 96 %define XLDR vmovdqa64 97 %define XSTR vmovdqa64 98 %else 99 %define XLDR vmovntdqa 100 %define XSTR vmovntdq 101 %endif 102%endif 103 104 105default rel 106 107[bits 64] 108section .text 109 110%define x0 zmm0 111%define xd zmm1 112%define xgft1 zmm2 113%define xret1 zmm3 114 115;; 116;; Encodes 64 bytes of a single source into 64 bytes (single parity disk) 117;; 118%macro ENCODE_64B 0-1 119%define %%KMASK %1 120 121%if %0 == 1 122 vmovdqu8 x0{%%KMASK}, [src + pos] ;Get next source vector 123 vmovdqu8 xd{%%KMASK}, [dest + pos] ;Get next dest vector 124%else 125 XLDR x0, [src + pos] ;Get next source vector 126 XLDR xd, [dest + pos] ;Get next dest vector 127%endif 128 129 GF_MUL_XOR EVEX, x0, xgft1, xret1, xd 130 131%if %0 == 1 132 vmovdqu8 [dest + pos]{%%KMASK}, xd 133%else 134 XSTR [dest + pos], xd 135%endif 136%endmacro 137 138align 16 139mk_global gf_vect_mad_avx512_gfni, function 140func(gf_vect_mad_avx512_gfni) 141 FUNC_SAVE 142 xor pos, pos 143 shl vec_i, 3 ;Multiply by 8 144 145 vbroadcastf32x2 xgft1, [vec_i + mul_array] 146 147 cmp len, 64 148 jl .len_lt_64 149.loop64: 150 ENCODE_64B 151 152 add pos, 64 ;Loop on 64 bytes at a time 153 sub len, 64 154 cmp len, 64 155 jge .loop64 156 157.len_lt_64: 158 cmp len, 0 159 jle .exit 160 161 xor tmp, tmp 162 bts tmp, len 163 dec tmp 164 kmovq k1, tmp 165 166 ENCODE_64B k1 167 168.exit: 169 vzeroupper 170 171 FUNC_RESTORE 172 ret 173 174endproc_frame 175%endif ; if AS_FEATURE_LEVEL >= 10 176