1660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2660f49b0SGreg Tucker; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3660f49b0SGreg Tucker; 4660f49b0SGreg Tucker; Redistribution and use in source and binary forms, with or without 5660f49b0SGreg Tucker; modification, are permitted provided that the following conditions 6660f49b0SGreg Tucker; are met: 7660f49b0SGreg Tucker; * Redistributions of source code must retain the above copyright 8660f49b0SGreg Tucker; notice, this list of conditions and the following disclaimer. 9660f49b0SGreg Tucker; * Redistributions in binary form must reproduce the above copyright 10660f49b0SGreg Tucker; notice, this list of conditions and the following disclaimer in 11660f49b0SGreg Tucker; the documentation and/or other materials provided with the 12660f49b0SGreg Tucker; distribution. 13660f49b0SGreg Tucker; * Neither the name of Intel Corporation nor the names of its 14660f49b0SGreg Tucker; contributors may be used to endorse or promote products derived 15660f49b0SGreg Tucker; from this software without specific prior written permission. 16660f49b0SGreg Tucker; 17660f49b0SGreg Tucker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18660f49b0SGreg Tucker; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19660f49b0SGreg Tucker; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20660f49b0SGreg Tucker; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21660f49b0SGreg Tucker; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22660f49b0SGreg Tucker; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23660f49b0SGreg Tucker; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24660f49b0SGreg Tucker; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25660f49b0SGreg Tucker; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26660f49b0SGreg Tucker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27660f49b0SGreg Tucker; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29660f49b0SGreg Tucker 30f97de75fSRoy Oursler%ifndef STDMAC_ASM 31f97de75fSRoy Oursler%define STDMAC_ASM 32660f49b0SGreg Tucker;; internal macro used by push_all 33660f49b0SGreg Tucker;; push args L to R 34660f49b0SGreg Tucker%macro push_all_ 1-* 35660f49b0SGreg Tucker%xdefine _PUSH_ALL_REGS_COUNT_ %0 36660f49b0SGreg Tucker%rep %0 37660f49b0SGreg Tucker push %1 38660f49b0SGreg Tucker %rotate 1 39660f49b0SGreg Tucker%endrep 40660f49b0SGreg Tucker%endmacro 41660f49b0SGreg Tucker 42660f49b0SGreg Tucker;; internal macro used by pop_all 43660f49b0SGreg Tucker;; pop args R to L 44660f49b0SGreg Tucker%macro pop_all_ 1-* 45660f49b0SGreg Tucker%rep %0 46660f49b0SGreg Tucker %rotate -1 47660f49b0SGreg Tucker pop %1 48660f49b0SGreg Tucker%endrep 49660f49b0SGreg Tucker%endmacro 50660f49b0SGreg Tucker 51660f49b0SGreg Tucker%xdefine _PUSH_ALL_REGS_COUNT_ 0 52660f49b0SGreg Tucker%xdefine _ALLOC_STACK_VAL_ 0 53660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 54660f49b0SGreg Tucker;; STACK_OFFSET 55660f49b0SGreg Tucker;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK 56660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 57660f49b0SGreg Tucker%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_) 58660f49b0SGreg Tucker 59660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 60660f49b0SGreg Tucker;; PUSH_ALL reg1, reg2, ... 61660f49b0SGreg Tucker;; push args L to R, remember regs for pop_all 62660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 63660f49b0SGreg Tucker%macro PUSH_ALL 1+ 64660f49b0SGreg Tucker%xdefine _PUSH_ALL_REGS_ %1 65660f49b0SGreg Tucker push_all_ %1 66660f49b0SGreg Tucker%endmacro 67660f49b0SGreg Tucker 68660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 69660f49b0SGreg Tucker;; POP_ALL 70660f49b0SGreg Tucker;; push args from prev "push_all" R to L 71660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 72660f49b0SGreg Tucker%macro POP_ALL 0 73660f49b0SGreg Tucker pop_all_ _PUSH_ALL_REGS_ 74660f49b0SGreg Tucker%xdefine _PUSH_ALL_REGS_COUNT_ 0 75660f49b0SGreg Tucker%endmacro 76660f49b0SGreg Tucker 77660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 78660f49b0SGreg Tucker;; ALLOC_STACK n 79660f49b0SGreg Tucker;; subtract n from the stack pointer and remember the value for restore_stack 80660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 81660f49b0SGreg Tucker%macro ALLOC_STACK 1 82660f49b0SGreg Tucker%xdefine _ALLOC_STACK_VAL_ %1 83660f49b0SGreg Tucker sub rsp, %1 84660f49b0SGreg Tucker%endmacro 85660f49b0SGreg Tucker 86660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 87660f49b0SGreg Tucker;; RESTORE_STACK 88660f49b0SGreg Tucker;; add n to the stack pointer, where n is the arg to the previous alloc_stack 89660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 90660f49b0SGreg Tucker%macro RESTORE_STACK 0 91660f49b0SGreg Tucker add rsp, _ALLOC_STACK_VAL_ 92660f49b0SGreg Tucker%xdefine _ALLOC_STACK_VAL_ 0 93660f49b0SGreg Tucker%endmacro 94660f49b0SGreg Tucker 95660f49b0SGreg Tucker 96660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 97660f49b0SGreg Tucker;; NOPN n 98660f49b0SGreg Tucker;; Create n bytes of NOP, using nops of up to 8 bytes each 99660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 100660f49b0SGreg Tucker%macro NOPN 1 101660f49b0SGreg Tucker 102660f49b0SGreg Tucker %assign %%i %1 103660f49b0SGreg Tucker %rep 200 104660f49b0SGreg Tucker %if (%%i < 9) 105660f49b0SGreg Tucker nopn %%i 106660f49b0SGreg Tucker %exitrep 107660f49b0SGreg Tucker %else 108660f49b0SGreg Tucker nopn 8 109660f49b0SGreg Tucker %assign %%i (%%i - 8) 110660f49b0SGreg Tucker %endif 111660f49b0SGreg Tucker %endrep 112660f49b0SGreg Tucker%endmacro 113660f49b0SGreg Tucker 114660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 115660f49b0SGreg Tucker;; nopn n 116660f49b0SGreg Tucker;; Create n bytes of NOP, where n is between 1 and 9 117660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 118660f49b0SGreg Tucker%macro nopn 1 119660f49b0SGreg Tucker%if (%1 == 1) 120660f49b0SGreg Tucker nop 121660f49b0SGreg Tucker%elif (%1 == 2) 122660f49b0SGreg Tucker db 0x66 123660f49b0SGreg Tucker nop 124660f49b0SGreg Tucker%elif (%1 == 3) 125660f49b0SGreg Tucker db 0x0F 126660f49b0SGreg Tucker db 0x1F 127660f49b0SGreg Tucker db 0x00 128660f49b0SGreg Tucker%elif (%1 == 4) 129660f49b0SGreg Tucker db 0x0F 130660f49b0SGreg Tucker db 0x1F 131660f49b0SGreg Tucker db 0x40 132660f49b0SGreg Tucker db 0x00 133660f49b0SGreg Tucker%elif (%1 == 5) 134660f49b0SGreg Tucker db 0x0F 135660f49b0SGreg Tucker db 0x1F 136660f49b0SGreg Tucker db 0x44 137660f49b0SGreg Tucker db 0x00 138660f49b0SGreg Tucker db 0x00 139660f49b0SGreg Tucker%elif (%1 == 6) 140660f49b0SGreg Tucker db 0x66 141660f49b0SGreg Tucker db 0x0F 142660f49b0SGreg Tucker db 0x1F 143660f49b0SGreg Tucker db 0x44 144660f49b0SGreg Tucker db 0x00 145660f49b0SGreg Tucker db 0x00 146660f49b0SGreg Tucker%elif (%1 == 7) 147660f49b0SGreg Tucker db 0x0F 148660f49b0SGreg Tucker db 0x1F 149660f49b0SGreg Tucker db 0x80 150660f49b0SGreg Tucker db 0x00 151660f49b0SGreg Tucker db 0x00 152660f49b0SGreg Tucker db 0x00 153660f49b0SGreg Tucker db 0x00 154660f49b0SGreg Tucker%elif (%1 == 8) 155660f49b0SGreg Tucker db 0x0F 156660f49b0SGreg Tucker db 0x1F 157660f49b0SGreg Tucker db 0x84 158660f49b0SGreg Tucker db 0x00 159660f49b0SGreg Tucker db 0x00 160660f49b0SGreg Tucker db 0x00 161660f49b0SGreg Tucker db 0x00 162660f49b0SGreg Tucker db 0x00 163660f49b0SGreg Tucker%elif (%1 == 9) 164660f49b0SGreg Tucker db 0x66 165660f49b0SGreg Tucker db 0x0F 166660f49b0SGreg Tucker db 0x1F 167660f49b0SGreg Tucker db 0x84 168660f49b0SGreg Tucker db 0x00 169660f49b0SGreg Tucker db 0x00 170660f49b0SGreg Tucker db 0x00 171660f49b0SGreg Tucker db 0x00 172660f49b0SGreg Tucker db 0x00 173660f49b0SGreg Tucker%else 174660f49b0SGreg Tucker%error Invalid value to nopn 175660f49b0SGreg Tucker%endif 176660f49b0SGreg Tucker%endmacro 177660f49b0SGreg Tucker 178660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 179660f49b0SGreg Tucker;; rolx64 dst, src, amount 180660f49b0SGreg Tucker;; Emulate a rolx instruction using rorx, assuming data 64 bits wide 181660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 182660f49b0SGreg Tucker%macro rolx64 3 183660f49b0SGreg Tucker rorx %1, %2, (64-%3) 184660f49b0SGreg Tucker%endm 185660f49b0SGreg Tucker 186660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 187660f49b0SGreg Tucker;; rolx32 dst, src, amount 188660f49b0SGreg Tucker;; Emulate a rolx instruction using rorx, assuming data 32 bits wide 189660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 190660f49b0SGreg Tucker%macro rolx32 3 191660f49b0SGreg Tucker rorx %1, %2, (32-%3) 192660f49b0SGreg Tucker%endm 193660f49b0SGreg Tucker 194660f49b0SGreg Tucker 195660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 196660f49b0SGreg Tucker;; Define a function void ssc(uint64_t x) 197660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 198660f49b0SGreg Tucker%macro DEF_SSC 0 199660f49b0SGreg Tuckerglobal ssc 200660f49b0SGreg Tuckerssc: 201660f49b0SGreg Tucker mov rax, rbx 202660f49b0SGreg Tucker mov rbx, rcx 203660f49b0SGreg Tucker db 0x64 204660f49b0SGreg Tucker db 0x67 205660f49b0SGreg Tucker nop 206660f49b0SGreg Tucker mov rbx, rax 207660f49b0SGreg Tucker ret 208660f49b0SGreg Tucker%endm 2097c91df5eSRoy Oursler 210cf30138cSRoy Oursler%macro MOVDQU 2 211cf30138cSRoy Oursler%define %%dest %1 212cf30138cSRoy Oursler%define %%src %2 213cf30138cSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 214cf30138cSRoy Oursler vmovdqu %%dest, %%src 215cf30138cSRoy Oursler%else 216cf30138cSRoy Oursler movdqu %%dest, %%src 217cf30138cSRoy Oursler%endif 218cf30138cSRoy Oursler%endm 219cf30138cSRoy Oursler 220*fce71b06SRoy Oursler%macro MOVDQA 2 221*fce71b06SRoy Oursler%define %%dest %1 222*fce71b06SRoy Oursler%define %%src %2 223*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 224*fce71b06SRoy Oursler vmovdqa %%dest, %%src 225*fce71b06SRoy Oursler%else 226*fce71b06SRoy Oursler movdqa %%dest, %%src 227*fce71b06SRoy Oursler%endif 228*fce71b06SRoy Oursler%endm 229*fce71b06SRoy Oursler 230cf30138cSRoy Oursler%macro MOVD 2 231cf30138cSRoy Oursler%define %%dest %1 232cf30138cSRoy Oursler%define %%src %2 233cf30138cSRoy Oursler%if (ARCH == 02 || ARCH == 03 || ARCH == 04) 234cf30138cSRoy Oursler vmovd %%dest, %%src 235cf30138cSRoy Oursler%else 236cf30138cSRoy Oursler movd %%dest, %%src 237cf30138cSRoy Oursler%endif 238cf30138cSRoy Oursler%endm 239cf30138cSRoy Oursler 240cf30138cSRoy Oursler%macro MOVQ 2 241cf30138cSRoy Oursler%define %%dest %1 242cf30138cSRoy Oursler%define %%src %2 243cf30138cSRoy Oursler%if (ARCH == 02 || ARCH == 03 || ARCH == 04) 244cf30138cSRoy Oursler vmovq %%dest, %%src 245cf30138cSRoy Oursler%else 246cf30138cSRoy Oursler movq %%dest, %%src 247cf30138cSRoy Oursler%endif 248cf30138cSRoy Oursler%endm 249cf30138cSRoy Oursler 250*fce71b06SRoy Oursler;; Move register if the src and dest are not equal 251*fce71b06SRoy Oursler%macro MOVNIDN 2 252*fce71b06SRoy Oursler%define dest %1 253*fce71b06SRoy Oursler%define src %2 254*fce71b06SRoy Oursler%ifnidn dest, src 255*fce71b06SRoy Oursler mov dest, src 256*fce71b06SRoy Oursler%endif 257*fce71b06SRoy Oursler%endm 258*fce71b06SRoy Oursler 259*fce71b06SRoy Oursler%macro MOVDQANIDN 2 260*fce71b06SRoy Oursler%define dest %1 261*fce71b06SRoy Oursler%define src %2 262*fce71b06SRoy Oursler%ifnidn dest, src 263*fce71b06SRoy Oursler MOVDQA dest, src 264*fce71b06SRoy Oursler%endif 265*fce71b06SRoy Oursler%endm 266*fce71b06SRoy Oursler 267*fce71b06SRoy Oursler%macro PSHUFD 3 268*fce71b06SRoy Oursler%define %%dest %1 269*fce71b06SRoy Oursler%define %%src1 %2 270*fce71b06SRoy Oursler%define %%imm8 %3 271*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 272*fce71b06SRoy Oursler vpshufd %%dest, %%src1, %%imm8 273*fce71b06SRoy Oursler%else 274*fce71b06SRoy Oursler pshufd %%dest, %%src1, %%imm8 275*fce71b06SRoy Oursler%endif 276*fce71b06SRoy Oursler%endm 277*fce71b06SRoy Oursler 278*fce71b06SRoy Oursler%macro PSHUFB 3 279*fce71b06SRoy Oursler%define %%dest %1 280*fce71b06SRoy Oursler%define %%src1 %2 281*fce71b06SRoy Oursler%define %%shuf %3 282*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 283*fce71b06SRoy Oursler vpshufb %%dest, %%src1, %%shuf 284*fce71b06SRoy Oursler%else 285*fce71b06SRoy Oursler MOVDQANIDN %%dest, %%src1 286*fce71b06SRoy Oursler pshufb %%dest, %%shuf 287*fce71b06SRoy Oursler%endif 288*fce71b06SRoy Oursler%endm 289*fce71b06SRoy Oursler 290*fce71b06SRoy Oursler%macro PBROADCASTD 2 291*fce71b06SRoy Oursler%define %%dest %1 292*fce71b06SRoy Oursler%define %%src %2 293*fce71b06SRoy Oursler%if (ARCH == 04) 294*fce71b06SRoy Oursler vpbroadcastd %%dest, %%src 295*fce71b06SRoy Oursler%else 296*fce71b06SRoy Oursler MOVD %%dest, %%src 297*fce71b06SRoy Oursler PSHUFD %%dest, %%dest, 0 298*fce71b06SRoy Oursler%endif 299*fce71b06SRoy Oursler%endm 300*fce71b06SRoy Oursler 301*fce71b06SRoy Oursler;; Implement BZHI instruction on older architectures 302*fce71b06SRoy Oursler;; Clobbers rcx, unless rcx is %%index 303*fce71b06SRoy Oursler%macro BZHI 4 304*fce71b06SRoy Oursler%define %%dest %1 305*fce71b06SRoy Oursler%define %%src %2 306*fce71b06SRoy Oursler%define %%index %3 307*fce71b06SRoy Oursler%define %%tmp1 %4 308*fce71b06SRoy Oursler 309*fce71b06SRoy Oursler%ifdef USE_HSWNI 310*fce71b06SRoy Oursler bzhi %%dest, %%src, %%index 311*fce71b06SRoy Oursler%else 312*fce71b06SRoy Oursler MOVNIDN rcx, %%index 313*fce71b06SRoy Oursler mov %%tmp1, 1 314*fce71b06SRoy Oursler shl %%tmp1, cl 315*fce71b06SRoy Oursler sub %%tmp1, 1 316*fce71b06SRoy Oursler 317*fce71b06SRoy Oursler MOVNIDN %%dest, %%src 318*fce71b06SRoy Oursler 319*fce71b06SRoy Oursler and %%dest, %%tmp1 320*fce71b06SRoy Oursler%endif 321*fce71b06SRoy Oursler%endm 322*fce71b06SRoy Oursler 323*fce71b06SRoy Oursler;; Implement shrx instruction on older architectures 324*fce71b06SRoy Oursler;; Clobbers rcx, unless rcx is %%index 325*fce71b06SRoy Oursler%macro SHRX 3 326*fce71b06SRoy Oursler%define %%dest %1 327*fce71b06SRoy Oursler%define %%src %2 328*fce71b06SRoy Oursler%define %%index %3 329*fce71b06SRoy Oursler 330*fce71b06SRoy Oursler%ifdef USE_HSWNI 331*fce71b06SRoy Oursler shrx %%dest, %%src, %%index 332*fce71b06SRoy Oursler%else 333*fce71b06SRoy Oursler MOVNIDN rcx, %%index 334*fce71b06SRoy Oursler MOVNIDN %%dest, %%src 335*fce71b06SRoy Oursler shr %%dest, cl 336*fce71b06SRoy Oursler%endif 337*fce71b06SRoy Oursler%endm 338*fce71b06SRoy Oursler 339*fce71b06SRoy Oursler;; Implement shlx instruction on older architectures 340*fce71b06SRoy Oursler;; Clobbers rcx, unless rcx is %%index 341*fce71b06SRoy Oursler%macro SHLX 3 342*fce71b06SRoy Oursler%define %%dest %1 343*fce71b06SRoy Oursler%define %%src %2 344*fce71b06SRoy Oursler%define %%index %3 345*fce71b06SRoy Oursler 346*fce71b06SRoy Oursler%ifdef USE_HSWNI 347*fce71b06SRoy Oursler shlx %%dest, %%src, %%index 348*fce71b06SRoy Oursler%else 349*fce71b06SRoy Oursler MOVNIDN %%dest, %%src 350*fce71b06SRoy Oursler MOVNIDN rcx, %%index 351*fce71b06SRoy Oursler shl %%dest, cl 352*fce71b06SRoy Oursler%endif 353*fce71b06SRoy Oursler%endm 354*fce71b06SRoy Oursler 355cf30138cSRoy Oursler%macro PINSRD 3 356cf30138cSRoy Oursler%define %%dest %1 357cf30138cSRoy Oursler%define %%src %2 358cf30138cSRoy Oursler%define %%offset %3 359cf30138cSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 360cf30138cSRoy Oursler vpinsrd %%dest, %%src, %%offset 361cf30138cSRoy Oursler%else 362cf30138cSRoy Oursler pinsrd %%dest, %%src, %%offset 363cf30138cSRoy Oursler%endif 364cf30138cSRoy Oursler%endm 365cf30138cSRoy Oursler 366cf30138cSRoy Oursler%macro PEXTRD 3 367cf30138cSRoy Oursler%define %%dest %1 368cf30138cSRoy Oursler%define %%src %2 369cf30138cSRoy Oursler%define %%offset %3 370cf30138cSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 371cf30138cSRoy Oursler vpextrd %%dest, %%src, %%offset 372cf30138cSRoy Oursler%else 373cf30138cSRoy Oursler pextrd %%dest, %%src, %%offset 374cf30138cSRoy Oursler%endif 375cf30138cSRoy Oursler%endm 376cf30138cSRoy Oursler 377cf30138cSRoy Oursler%macro PSRLDQ 2 378cf30138cSRoy Oursler%define %%dest %1 379cf30138cSRoy Oursler%define %%offset %2 380cf30138cSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 381cf30138cSRoy Oursler vpsrldq %%dest, %%offset 382cf30138cSRoy Oursler%else 383cf30138cSRoy Oursler psrldq %%dest, %%offset 384cf30138cSRoy Oursler%endif 385cf30138cSRoy Oursler%endm 386cf30138cSRoy Oursler 387*fce71b06SRoy Oursler%macro PSLLD 3 388*fce71b06SRoy Oursler%define %%dest %1 389*fce71b06SRoy Oursler%define %%src %2 390*fce71b06SRoy Oursler%define %%offset %3 391*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 392*fce71b06SRoy Oursler vpslld %%dest, %%src, %%offset 393*fce71b06SRoy Oursler%else 394*fce71b06SRoy Oursler MOVDQANIDN %%dest, %%src 395*fce71b06SRoy Oursler pslld %%dest, %%offset 396*fce71b06SRoy Oursler%endif 397*fce71b06SRoy Oursler%endm 398*fce71b06SRoy Oursler 399cf30138cSRoy Oursler%macro PAND 3 400cf30138cSRoy Oursler%define %%dest %1 401cf30138cSRoy Oursler%define %%src1 %2 402cf30138cSRoy Oursler%define %%src2 %3 403cf30138cSRoy Oursler%if (ARCH == 02 || ARCH == 03 || ARCH == 04) 404cf30138cSRoy Oursler vpand %%dest, %%src1, %%src2 405cf30138cSRoy Oursler%else 406*fce71b06SRoy Oursler MOVDQANIDN %%dest, %%src1 407cf30138cSRoy Oursler pand %%dest, %%src2 408cf30138cSRoy Oursler%endif 409cf30138cSRoy Oursler%endm 410f97de75fSRoy Oursler 411*fce71b06SRoy Oursler%macro POR 3 412*fce71b06SRoy Oursler%define %%dest %1 413*fce71b06SRoy Oursler%define %%src1 %2 414*fce71b06SRoy Oursler%define %%src2 %3 415*fce71b06SRoy Oursler%if (ARCH == 02 || ARCH == 03 || ARCH == 04) 416*fce71b06SRoy Oursler vpor %%dest, %%src1, %%src2 417*fce71b06SRoy Oursler%else 418*fce71b06SRoy Oursler MOVDQANIDN %%dest, %%src1 419*fce71b06SRoy Oursler por %%dest, %%src2 420*fce71b06SRoy Oursler%endif 421*fce71b06SRoy Oursler%endm 422*fce71b06SRoy Oursler 423*fce71b06SRoy Oursler%macro PXOR 3 424*fce71b06SRoy Oursler%define %%dest %1 425*fce71b06SRoy Oursler%define %%src1 %2 426*fce71b06SRoy Oursler%define %%src2 %3 427*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 428*fce71b06SRoy Oursler vpxor %%dest, %%src1, %%src2 429*fce71b06SRoy Oursler%else 430*fce71b06SRoy Oursler MOVDQANIDN %%dest, %%src1 431*fce71b06SRoy Oursler pxor %%dest, %%src2 432*fce71b06SRoy Oursler%endif 433*fce71b06SRoy Oursler%endm 434*fce71b06SRoy Oursler 435*fce71b06SRoy Oursler%macro PADDD 3 436*fce71b06SRoy Oursler%define %%dest %1 437*fce71b06SRoy Oursler%define %%src1 %2 438*fce71b06SRoy Oursler%define %%src2 %3 439*fce71b06SRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 440*fce71b06SRoy Oursler vpaddd %%dest, %%src1, %%src2 441*fce71b06SRoy Oursler%else 442*fce71b06SRoy Oursler MOVDQANIDN %%dest, %%src1 443*fce71b06SRoy Oursler paddd %%dest, %%src2 444*fce71b06SRoy Oursler%endif 445*fce71b06SRoy Oursler%endm 446*fce71b06SRoy Oursler 447f97de75fSRoy Oursler%macro PCMPEQB 3 448f97de75fSRoy Oursler%define %%dest %1 449f97de75fSRoy Oursler%define %%src1 %2 450f97de75fSRoy Oursler%define %%src2 %3 451f97de75fSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 452f97de75fSRoy Oursler vpcmpeqb %%dest, %%src1, %%src2 453f97de75fSRoy Oursler%else 454*fce71b06SRoy Oursler MOVDQANIDN %%dest, %%src1 455f97de75fSRoy Oursler pcmpeqb %%dest, %%src2 456f97de75fSRoy Oursler%endif 457f97de75fSRoy Oursler%endm 458f97de75fSRoy Oursler 459f97de75fSRoy Oursler%macro PMOVMSKB 2 460f97de75fSRoy Oursler%define %%dest %1 461f97de75fSRoy Oursler%define %%src %2 462f97de75fSRoy Oursler%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 463f97de75fSRoy Oursler vpmovmskb %%dest, %%src 464f97de75fSRoy Oursler%else 465f97de75fSRoy Oursler pmovmskb %%dest, %%src 466f97de75fSRoy Oursler%endif 467f97de75fSRoy Oursler%endm 468f97de75fSRoy Oursler 469f97de75fSRoy Oursler%endif ;; ifndef STDMAC_ASM 470