1divert(-1) 2 3dnl m4 macros for amd64 assembler. 4 5dnl Copyright 1999-2005, 2008, 2009, 2011-2013, 2017 Free Software Foundation, 6dnl Inc. 7 8dnl This file is part of the GNU MP Library. 9dnl 10dnl The GNU MP Library is free software; you can redistribute it and/or modify 11dnl it under the terms of either: 12dnl 13dnl * the GNU Lesser General Public License as published by the Free 14dnl Software Foundation; either version 3 of the License, or (at your 15dnl option) any later version. 16dnl 17dnl or 18dnl 19dnl * the GNU General Public License as published by the Free Software 20dnl Foundation; either version 2 of the License, or (at your option) any 21dnl later version. 22dnl 23dnl or both in parallel, as here. 24dnl 25dnl The GNU MP Library is distributed in the hope that it will be useful, but 26dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28dnl for more details. 29dnl 30dnl You should have received copies of the GNU General Public License and the 31dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32dnl see https://www.gnu.org/licenses/. 33 34 35dnl Usage: CPUVEC_FUNCS_LIST 36dnl 37dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the 38dnl order they appear in that structure. 39 40define(CPUVEC_FUNCS_LIST, 41``add_n', 42`addlsh1_n', 43`addlsh2_n', 44`addmul_1', 45`addmul_2', 46`bdiv_dbm1c', 47`cnd_add_n', 48`cnd_sub_n', 49`com', 50`copyd', 51`copyi', 52`divexact_1', 53`divrem_1', 54`gcd_11', 55`lshift', 56`lshiftc', 57`mod_1', 58`mod_1_1p', 59`mod_1_1p_cps', 60`mod_1s_2p', 61`mod_1s_2p_cps', 62`mod_1s_4p', 63`mod_1s_4p_cps', 64`mod_34lsub1', 65`modexact_1c_odd', 66`mul_1', 67`mul_basecase', 68`mullo_basecase', 69`preinv_divrem_1', 70`preinv_mod_1', 71`redc_1', 72`redc_2', 73`rshift', 74`sqr_basecase', 75`sub_n', 76`sublsh1_n', 77`submul_1'') 78 79 80dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo) 81dnl 82dnl In the amd64 code we use explicit TEXT and ALIGN() calls in the code, 83dnl since different alignments are wanted in various circumstances. So for 84dnl instance, 85dnl 86dnl TEXT 87dnl ALIGN(16) 88dnl PROLOGUE(mpn_add_n) 89dnl ... 90dnl EPILOGUE() 91 92define(`PROLOGUE_cpu', 93m4_assert_numargs(1) 94` GLOBL $1 95 TYPE($1,`function') 96 COFF_TYPE($1) 97$1: 98') 99 100 101dnl Usage: COFF_TYPE(GSYM_PREFIX`'foo) 102dnl 103dnl Emit COFF style ".def ... .endef" type information for a function, when 104dnl supported. The argument should include any GSYM_PREFIX. 105dnl 106dnl See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE. 107 108define(COFF_TYPE, 109m4_assert_numargs(1) 110m4_assert_defined(`HAVE_COFF_TYPE') 111`ifelse(HAVE_COFF_TYPE,yes, 112 `.def $1 113 .scl 2 114 .type 32 115 .endef')') 116 117 118dnl Usage: ASSERT([cond][,instructions]) 119dnl 120dnl If WANT_ASSERT is 1, output the given instructions and expect the given 121dnl flags condition to then be satisfied. For example, 122dnl 123dnl ASSERT(ne, `cmpq %rax, %rbx') 124dnl 125dnl The instructions can be omitted to just assert a flags condition with 126dnl no extra calculation. For example, 127dnl 128dnl ASSERT(nc) 129dnl 130dnl When `instructions' is not empty, a pushfq/popfq is added for 131dnl convenience to preserve the flags, but the instructions themselves must 132dnl preserve any registers that matter. 133dnl 134dnl The condition can be omitted to just output the given instructions when 135dnl assertion checking is wanted. In this case the pushf/popf is omitted. 136dnl For example, 137dnl 138dnl ASSERT(, `movq %rax, VAR_KEEPVAL') 139 140define(ASSERT, 141m4_assert_numargs_range(1,2) 142m4_assert_defined(`WANT_ASSERT') 143`ifelse(WANT_ASSERT,1, 144`ifelse(`$1',, 145` $2', 146`ifelse(`$2',,, 147` pushfq') 148 $2 149 `j$1' L(ASSERT_ok`'ASSERT_counter) 150 ud2 C assertion failed 151L(ASSERT_ok`'ASSERT_counter): 152ifelse(`$2',,,` popfq') 153define(`ASSERT_counter',incr(ASSERT_counter))')')') 154 155define(ASSERT_counter,1) 156 157dnl LEA - load effective address 158dnl 159dnl FIXME: We should never create a GOT entry and therefore use the simpler 2nd 160dnl variant always. We need to understand what happens for not-yet-hidden 161dnl symbols first. 162dnl 163define(`LEA',`dnl 164ifdef(`PIC', 165 `mov $1@GOTPCREL(%rip), $2' 166, 167 `lea $1(%rip), $2') 168') 169 170 171define(`DEF_OBJECT', 172m4_assert_numargs_range(2,3) 173` ifelse($#,3,`$3',`RODATA') 174 ALIGN($2) 175$1: 176') 177 178define(`END_OBJECT', 179m4_assert_numargs(1) 180` SIZE(`$1',.-`$1')') 181 182 183define(`R32', 184 `ifelse($1,`%rax',`%eax', 185 $1,`%rbx',`%ebx', 186 $1,`%rcx',`%ecx', 187 $1,`%rdx',`%edx', 188 $1,`%rsi',`%esi', 189 $1,`%rdi',`%edi', 190 $1,`%rbp',`%ebp', 191 $1,`%r8',`%r8d', 192 $1,`%r9',`%r9d', 193 $1,`%r10',`%r10d', 194 $1,`%r11',`%r11d', 195 $1,`%r12',`%r12d', 196 $1,`%r13',`%r13d', 197 $1,`%r14',`%r14d', 198 $1,`%r15',`%r15d')') 199define(`R8', 200 `ifelse($1,`%rax',`%al', 201 $1,`%rbx',`%bl', 202 $1,`%rcx',`%cl', 203 $1,`%rdx',`%dl', 204 $1,`%rsi',`%sil', 205 $1,`%rdi',`%dil', 206 $1,`%rbp',`%bpl', 207 $1,`%r8',`%r8b', 208 $1,`%r9',`%r9b', 209 $1,`%r10',`%r10b', 210 $1,`%r11',`%r11b', 211 $1,`%r12',`%r12b', 212 $1,`%r13',`%r13b', 213 $1,`%r14',`%r14b', 214 $1,`%r15',`%r15b')') 215 216 217dnl Usage: CALL(funcname) 218dnl 219 220define(`CALL',`dnl 221ifdef(`PIC', 222 `call GSYM_PREFIX`'$1@PLT' 223, 224 `call GSYM_PREFIX`'$1' 225)') 226 227define(`TCALL',`dnl 228ifdef(`PIC', 229 `jmp GSYM_PREFIX`'$1@PLT' 230, 231 `jmp GSYM_PREFIX`'$1' 232)') 233 234 235define(`JUMPTABSECT', `.section .data.rel.ro.local,"a",@progbits') 236 237 238dnl Usage: JMPENT(targlabel,tablabel) 239 240define(`JMPENT',`dnl 241ifdef(`PIC', 242 `.long $1-$2'dnl 243, 244 `.quad $1'dnl 245)') 246 247 248dnl These macros are defined just for DOS64, where they provide calling 249dnl sequence glue code. 250 251define(`FUNC_ENTRY',`') 252define(`FUNC_EXIT',`') 253 254 255dnl Target ABI macros. 256 257define(`IFDOS', `') 258define(`IFSTD', `$1') 259define(`IFELF', `$1') 260 261 262dnl Usage: PROTECT(symbol) 263dnl 264dnl Used for private GMP symbols that should never be overridden by users. 265dnl This can save reloc entries and improve shlib sharing as well as 266dnl application startup times 267 268define(`PROTECT', `.hidden $1') 269 270 271dnl Usage: x86_lookup(target, key,value, key,value, ...) 272dnl 273dnl Look for `target' among the `key' parameters. 274dnl 275dnl x86_lookup expands to the corresponding `value', or generates an error 276dnl if `target' isn't found. 277 278define(x86_lookup, 279m4_assert_numargs_range(1,999) 280`ifelse(eval($#<3),1, 281`m4_error(`unrecognised part of x86 instruction: $1 282')', 283`ifelse(`$1',`$2', `$3', 284`x86_lookup(`$1',shift(shift(shift($@))))')')') 285 286 287dnl Usage: x86_opcode_regxmm(reg) 288dnl 289dnl Validate the given xmm register, and return its number, 0 to 7. 290 291define(x86_opcode_regxmm, 292m4_assert_numargs(1) 293`x86_lookup(`$1',x86_opcode_regxmm_list)') 294 295define(x86_opcode_regxmm_list, 296``%xmm0',0, 297`%xmm1',1, 298`%xmm2',2, 299`%xmm3',3, 300`%xmm4',4, 301`%xmm5',5, 302`%xmm6',6, 303`%xmm7',7, 304`%xmm8',8, 305`%xmm9',9, 306`%xmm10',10, 307`%xmm11',11, 308`%xmm12',12, 309`%xmm13',13, 310`%xmm14',14, 311`%xmm15',15') 312 313dnl Usage: palignr($imm,%srcreg,%dstreg) 314dnl 315dnl Emit a palignr instruction, using a .byte sequence, since obsolete but 316dnl still distributed versions of gas don't know SSSE3 instructions. 317 318define(`palignr', 319m4_assert_numargs(3) 320`.byte 0x66,dnl 321ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1, 322 `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl 3230x0f,0x3a,0x0f,dnl 324eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl 325substr($1,1)') 326 327 328dnl Usage 329dnl 330dnl regnum(op) raw operand index (so slightly misnamed) 331dnl regnumh(op) high bit of register operand nimber 332dnl ix(op) 0 for reg operand, 1 for plain pointer operand. 333dnl 334 335define(`regnum',`x86_lookup(`$1',oplist)') 336define(`regnumh',`eval(regnum($1)/8 & 1)') 337define(`ix',`eval(regnum($1)/16)') 338define(`oplist', 339``%rax', 0, `%rcx', 1, `%rdx', 2, `%rbx', 3, 340 `%rsp', 4, `%rbp', 5, `%rsi', 6, `%rdi', 7, 341 `%r8', 8, `%r9', 9, `%r10', 10, `%r11', 11, 342 `%r12', 12, `%r13', 13, `%r14', 14, `%r15', 15, 343 `(%rax)',16, `(%rcx)',17, `(%rdx)',18, `(%rbx)',19, 344 `(%rsp)',20, `(%rbp)',21, `(%rsi)',22, `(%rdi)',23, 345 `(%r8)', 24, `(%r9)', 25, `(%r10)',26, `(%r11)',27, 346 `(%r12)',28, `(%r13)',29, `(%r14)',30, `(%r15)',31') 347 348dnl Usage (by mulx, shlx, shrx) 349dnl 350dnl reg1,reg2,reg3,opc1,opc2 351dnl 352dnl or 353dnl 354dnl (reg1),reg2,reg3,opc1,opc2 355dnl 356dnl where reg1 is any register but rsp,rbp,r12,r13, or 357dnl 358dnl or 359dnl 360dnl off,(reg1),reg2,reg3,opc1,opc2 361dnl 362dnl where reg1 is any register but rsp,r12. 363dnl 364dnl The exceptions are due to special coding needed for some registers; rsp 365dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the 366dnl offset-less form. 367dnl 368dnl Other addressing forms are not handled. Invalid forms are not properly 369dnl detected. Offsets that don't fit one byte are not handled correctly. 370 371define(`c4_helper',`dnl 372.byte 0xc4`'dnl 373ifelse(`$#',5,`dnl 374,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl 375,eval(0x$4-8*regnum($2))`'dnl 376,0x$5`'dnl 377,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl 378',`$#',6,`dnl 379,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl 380,eval(0x$5-8*regnum($3))`'dnl 381,0x$6`'dnl 382,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl 383,eval(($1 + 256) % 256)`'dnl 384')') 385 386 387dnl Usage 388dnl 389dnl mulx(reg1,reg2,reg3) 390dnl 391dnl or 392dnl 393dnl mulx((reg1),reg2,reg3) 394dnl 395dnl where reg1 is any register but rsp,rbp,r12,r13, or 396dnl 397dnl mulx(off,(reg1),reg2,reg3) 398dnl 399dnl where reg1 is any register but rsp,r12. 400 401define(`mulx',`dnl 402ifelse(`$#',3,`dnl 403c4_helper($1,$2,$3,fb,f6)',`dnl format 1,2 404c4_helper($1,$2,$3,$4,fb,f6)'dnl format 3 405)') 406 407 408dnl Usage 409dnl 410dnl shlx(reg1,reg2,reg3) 411dnl shrx(reg1,reg2,reg3) 412dnl 413dnl or 414dnl 415dnl shlx(reg1,(reg2),reg3) 416dnl shrx(reg1,(reg2),reg3) 417dnl 418dnl where reg2 is any register but rsp,rbp,r12,r13, or 419dnl 420dnl shlx(reg1,off,(reg2),reg3) 421dnl shrx(reg1,off,(reg2),reg3) 422dnl 423dnl where reg2 is any register but rsp,r12. 424 425define(`shlx',`dnl 426ifelse(`$#',3,`dnl 427c4_helper($2,$1,$3,f9,f7)',`dnl format 1,2 428c4_helper($1,$3,$2,$4,f9,f7)'dnl format 3 429)') 430 431define(`shrx',`dnl 432ifelse(`$#',3,`dnl 433c4_helper($2,$1,$3,fb,f7)',`dnl format 1,2 434c4_helper($1,$3,$2,$4,fb,f7)'dnl format 3 435)') 436 437define(`sarx',`dnl 438ifelse(`$#',3,`dnl 439c4_helper($2,$1,$3,fa,f7)',`dnl format 1,2 440c4_helper($1,$3,$2,$4,fa,f7)'dnl format 3 441)') 442 443 444dnl Usage 445dnl 446dnl adcx(reg1,reg2) 447dnl adox(reg1,reg2) 448dnl 449dnl or 450dnl 451dnl adcx((reg1),reg2) 452dnl adox((reg1),reg2) 453dnl 454dnl where reg1 is any register but rsp,rbp,r12,r13, or 455dnl 456dnl adcx(off,(reg1),reg2) 457dnl adox(off,(reg1),reg2) 458dnl 459dnl where reg1 is any register but rsp,r12. 460dnl 461dnl The exceptions are due to special coding needed for some registers; rsp 462dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the 463dnl offset-less form. 464dnl 465dnl Other addressing forms are not handled. Invalid forms are not properly 466dnl detected. Offsets that don't fit one byte are not handled correctly. 467 468define(`adx_helper',`dnl 469,eval(0x48+regnumh($1)+4*regnumh($2))`'dnl 470,0x0f`'dnl 471,0x38`'dnl 472,0xf6`'dnl 473') 474 475define(`adx',`dnl 476ifelse(`$#',2,`dnl 477adx_helper($1,$2)dnl 478,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($2))-0xc0*ix($1))`'dnl 479',`$#',3,`dnl 480adx_helper($2,$3)dnl 481,eval(0x40+(7 & regnum($2))+8*(7 & regnum($3)))`'dnl 482,eval(($1 + 256) % 256)`'dnl 483')') 484 485define(`adcx',`dnl 486.byte 0x66`'dnl 487adx($@)') 488 489define(`adox',`dnl 490.byte 0xf3`'dnl 491adx($@)') 492 493divert`'dnl 494