1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30%ifndef STDMAC_ASM 31%define STDMAC_ASM 32;; internal macro used by push_all 33;; push args L to R 34%macro push_all_ 1-* 35%xdefine _PUSH_ALL_REGS_COUNT_ %0 36%rep %0 37 push %1 38 %rotate 1 39%endrep 40%endmacro 41 42;; internal macro used by pop_all 43;; pop args R to L 44%macro pop_all_ 1-* 45%rep %0 46 %rotate -1 47 pop %1 48%endrep 49%endmacro 50 51%xdefine _PUSH_ALL_REGS_COUNT_ 0 52%xdefine _ALLOC_STACK_VAL_ 0 53;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 54;; STACK_OFFSET 55;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK 56;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 57%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_) 58 59;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 60;; PUSH_ALL reg1, reg2, ... 61;; push args L to R, remember regs for pop_all 62;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 63%macro PUSH_ALL 1+ 64%xdefine _PUSH_ALL_REGS_ %1 65 push_all_ %1 66%endmacro 67 68;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 69;; POP_ALL 70;; push args from prev "push_all" R to L 71;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 72%macro POP_ALL 0 73 pop_all_ _PUSH_ALL_REGS_ 74%xdefine _PUSH_ALL_REGS_COUNT_ 0 75%endmacro 76 77;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 78;; ALLOC_STACK n 79;; subtract n from the stack pointer and remember the value for restore_stack 80;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 81%macro ALLOC_STACK 1 82%xdefine _ALLOC_STACK_VAL_ %1 83 sub rsp, %1 84%endmacro 85 86;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 87;; RESTORE_STACK 88;; add n to the stack pointer, where n is the arg to the previous alloc_stack 89;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 90%macro RESTORE_STACK 0 91 add rsp, _ALLOC_STACK_VAL_ 92%xdefine _ALLOC_STACK_VAL_ 0 93%endmacro 94 95 96;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 97;; NOPN n 98;; Create n bytes of NOP, using nops of up to 8 bytes each 99;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 100%macro NOPN 1 101 102 %assign %%i %1 103 %rep 200 104 %if (%%i < 9) 105 nopn %%i 106 %exitrep 107 %else 108 nopn 8 109 %assign %%i (%%i - 8) 110 %endif 111 %endrep 112%endmacro 113 114;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 115;; nopn n 116;; Create n bytes of NOP, where n is between 1 and 9 117;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 118%macro nopn 1 119%if (%1 == 1) 120 nop 121%elif (%1 == 2) 122 db 0x66 123 nop 124%elif (%1 == 3) 125 db 0x0F 126 db 0x1F 127 db 0x00 128%elif (%1 == 4) 129 db 0x0F 130 db 0x1F 131 db 0x40 132 db 0x00 133%elif (%1 == 5) 134 db 0x0F 135 db 0x1F 136 db 0x44 137 db 0x00 138 db 0x00 139%elif (%1 == 6) 140 db 0x66 141 db 0x0F 142 db 0x1F 143 db 0x44 144 db 0x00 145 db 0x00 146%elif (%1 == 7) 147 db 0x0F 148 db 0x1F 149 db 0x80 150 db 0x00 151 db 0x00 152 db 0x00 153 db 0x00 154%elif (%1 == 8) 155 db 0x0F 156 db 0x1F 157 db 0x84 158 db 0x00 159 db 0x00 160 db 0x00 161 db 0x00 162 db 0x00 163%elif (%1 == 9) 164 db 0x66 165 db 0x0F 166 db 0x1F 167 db 0x84 168 db 0x00 169 db 0x00 170 db 0x00 171 db 0x00 172 db 0x00 173%else 174%error Invalid value to nopn 175%endif 176%endmacro 177 178;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 179;; rolx64 dst, src, amount 180;; Emulate a rolx instruction using rorx, assuming data 64 bits wide 181;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 182%macro rolx64 3 183 rorx %1, %2, (64-%3) 184%endm 185 186;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 187;; rolx32 dst, src, amount 188;; Emulate a rolx instruction using rorx, assuming data 32 bits wide 189;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 190%macro rolx32 3 191 rorx %1, %2, (32-%3) 192%endm 193 194 195;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 196;; Define a function void ssc(uint64_t x) 197;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 198%macro DEF_SSC 0 199global ssc 200ssc: 201 mov rax, rbx 202 mov rbx, rcx 203 db 0x64 204 db 0x67 205 nop 206 mov rbx, rax 207 ret 208%endm 209 210%macro MOVDQU 2 211%define %%dest %1 212%define %%src %2 213%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 214 vmovdqu %%dest, %%src 215%else 216 movdqu %%dest, %%src 217%endif 218%endm 219 220%macro MOVDQA 2 221%define %%dest %1 222%define %%src %2 223%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 224 vmovdqa %%dest, %%src 225%else 226 movdqa %%dest, %%src 227%endif 228%endm 229 230%macro MOVD 2 231%define %%dest %1 232%define %%src %2 233%if (ARCH == 02 || ARCH == 03 || ARCH == 04) 234 vmovd %%dest, %%src 235%else 236 movd %%dest, %%src 237%endif 238%endm 239 240%macro MOVQ 2 241%define %%dest %1 242%define %%src %2 243%if (ARCH == 02 || ARCH == 03 || ARCH == 04) 244 vmovq %%dest, %%src 245%else 246 movq %%dest, %%src 247%endif 248%endm 249 250;; Move register if the src and dest are not equal 251%macro MOVNIDN 2 252%define dest %1 253%define src %2 254%ifnidn dest, src 255 mov dest, src 256%endif 257%endm 258 259%macro MOVDQANIDN 2 260%define dest %1 261%define src %2 262%ifnidn dest, src 263 MOVDQA dest, src 264%endif 265%endm 266 267%macro PSHUFD 3 268%define %%dest %1 269%define %%src1 %2 270%define %%imm8 %3 271%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 272 vpshufd %%dest, %%src1, %%imm8 273%else 274 pshufd %%dest, %%src1, %%imm8 275%endif 276%endm 277 278%macro PSHUFB 3 279%define %%dest %1 280%define %%src1 %2 281%define %%shuf %3 282%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 283 vpshufb %%dest, %%src1, %%shuf 284%else 285 MOVDQANIDN %%dest, %%src1 286 pshufb %%dest, %%shuf 287%endif 288%endm 289 290%macro PBROADCASTD 2 291%define %%dest %1 292%define %%src %2 293%if (ARCH == 04) 294 vpbroadcastd %%dest, %%src 295%else 296 MOVD %%dest, %%src 297 PSHUFD %%dest, %%dest, 0 298%endif 299%endm 300 301;; Implement BZHI instruction on older architectures 302;; Clobbers rcx, unless rcx is %%index 303%macro BZHI 4 304%define %%dest %1 305%define %%src %2 306%define %%index %3 307%define %%tmp1 %4 308 309%ifdef USE_HSWNI 310 bzhi %%dest, %%src, %%index 311%else 312 MOVNIDN rcx, %%index 313 mov %%tmp1, 1 314 shl %%tmp1, cl 315 sub %%tmp1, 1 316 317 MOVNIDN %%dest, %%src 318 319 and %%dest, %%tmp1 320%endif 321%endm 322 323;; Implement shrx instruction on older architectures 324;; Clobbers rcx, unless rcx is %%index 325%macro SHRX 3 326%define %%dest %1 327%define %%src %2 328%define %%index %3 329 330%ifdef USE_HSWNI 331 shrx %%dest, %%src, %%index 332%else 333 MOVNIDN rcx, %%index 334 MOVNIDN %%dest, %%src 335 shr %%dest, cl 336%endif 337%endm 338 339;; Implement shlx instruction on older architectures 340;; Clobbers rcx, unless rcx is %%index 341%macro SHLX 3 342%define %%dest %1 343%define %%src %2 344%define %%index %3 345 346%ifdef USE_HSWNI 347 shlx %%dest, %%src, %%index 348%else 349 MOVNIDN %%dest, %%src 350 MOVNIDN rcx, %%index 351 shl %%dest, cl 352%endif 353%endm 354 355%macro PINSRD 3 356%define %%dest %1 357%define %%src %2 358%define %%offset %3 359%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 360 vpinsrd %%dest, %%src, %%offset 361%else 362 pinsrd %%dest, %%src, %%offset 363%endif 364%endm 365 366%macro PEXTRD 3 367%define %%dest %1 368%define %%src %2 369%define %%offset %3 370%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 371 vpextrd %%dest, %%src, %%offset 372%else 373 pextrd %%dest, %%src, %%offset 374%endif 375%endm 376 377%macro PSRLDQ 2 378%define %%dest %1 379%define %%offset %2 380%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 381 vpsrldq %%dest, %%offset 382%else 383 psrldq %%dest, %%offset 384%endif 385%endm 386 387%macro PSLLD 3 388%define %%dest %1 389%define %%src %2 390%define %%offset %3 391%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 392 vpslld %%dest, %%src, %%offset 393%else 394 MOVDQANIDN %%dest, %%src 395 pslld %%dest, %%offset 396%endif 397%endm 398 399%macro PAND 3 400%define %%dest %1 401%define %%src1 %2 402%define %%src2 %3 403%if (ARCH == 02 || ARCH == 03 || ARCH == 04) 404 vpand %%dest, %%src1, %%src2 405%else 406 MOVDQANIDN %%dest, %%src1 407 pand %%dest, %%src2 408%endif 409%endm 410 411%macro POR 3 412%define %%dest %1 413%define %%src1 %2 414%define %%src2 %3 415%if (ARCH == 02 || ARCH == 03 || ARCH == 04) 416 vpor %%dest, %%src1, %%src2 417%else 418 MOVDQANIDN %%dest, %%src1 419 por %%dest, %%src2 420%endif 421%endm 422 423%macro PXOR 3 424%define %%dest %1 425%define %%src1 %2 426%define %%src2 %3 427%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 428 vpxor %%dest, %%src1, %%src2 429%else 430 MOVDQANIDN %%dest, %%src1 431 pxor %%dest, %%src2 432%endif 433%endm 434 435%macro PADDD 3 436%define %%dest %1 437%define %%src1 %2 438%define %%src2 %3 439%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 440 vpaddd %%dest, %%src1, %%src2 441%else 442 MOVDQANIDN %%dest, %%src1 443 paddd %%dest, %%src2 444%endif 445%endm 446 447%macro PCMPEQB 3 448%define %%dest %1 449%define %%src1 %2 450%define %%src2 %3 451%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 452 vpcmpeqb %%dest, %%src1, %%src2 453%else 454 MOVDQANIDN %%dest, %%src1 455 pcmpeqb %%dest, %%src2 456%endif 457%endm 458 459%macro PMOVMSKB 2 460%define %%dest %1 461%define %%src %2 462%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) 463 vpmovmskb %%dest, %%src 464%else 465 pmovmskb %%dest, %%src 466%endif 467%endm 468 469%endif ;; ifndef STDMAC_ASM 470