1divert(-1) 2 3dnl m4 macros for amd64 assembler. 4 5dnl Copyright 1999-2005, 2008, 2009, 2011-2013 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33 34dnl Usage: CPUVEC_FUNCS_LIST 35dnl 36dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the 37dnl order they appear in that structure. 38 39define(CPUVEC_FUNCS_LIST, 40``add_n', 41`addlsh1_n', 42`addlsh2_n', 43`addmul_1', 44`addmul_2', 45`bdiv_dbm1c', 46`cnd_add_n', 47`cnd_sub_n', 48`com', 49`copyd', 50`copyi', 51`divexact_1', 52`divrem_1', 53`gcd_1', 54`lshift', 55`lshiftc', 56`mod_1', 57`mod_1_1p', 58`mod_1_1p_cps', 59`mod_1s_2p', 60`mod_1s_2p_cps', 61`mod_1s_4p', 62`mod_1s_4p_cps', 63`mod_34lsub1', 64`modexact_1c_odd', 65`mul_1', 66`mul_basecase', 67`mullo_basecase', 68`preinv_divrem_1', 69`preinv_mod_1', 70`redc_1', 71`redc_2', 72`rshift', 73`sqr_basecase', 74`sub_n', 75`sublsh1_n', 76`submul_1'') 77 78 79dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo) 80dnl 81dnl In the amd64 code we use explicit TEXT and ALIGN() calls in the code, 82dnl since different alignments are wanted in various circumstances. So for 83dnl instance, 84dnl 85dnl TEXT 86dnl ALIGN(16) 87dnl PROLOGUE(mpn_add_n) 88dnl ... 89dnl EPILOGUE() 90 91define(`PROLOGUE_cpu', 92m4_assert_numargs(1) 93` GLOBL $1 94 TYPE($1,`function') 95$1: 96') 97 98 99dnl Usage: ASSERT([cond][,instructions]) 100dnl 101dnl If WANT_ASSERT is 1, output the given instructions and expect the given 102dnl flags condition to then be satisfied. For example, 103dnl 104dnl ASSERT(ne, `cmpq %rax, %rbx') 105dnl 106dnl The instructions can be omitted to just assert a flags condition with 107dnl no extra calculation. For example, 108dnl 109dnl ASSERT(nc) 110dnl 111dnl When `instructions' is not empty, a pushfq/popfq is added for 112dnl convenience to preserve the flags, but the instructions themselves must 113dnl preserve any registers that matter. 114dnl 115dnl The condition can be omitted to just output the given instructions when 116dnl assertion checking is wanted. In this case the pushf/popf is omitted. 117dnl For example, 118dnl 119dnl ASSERT(, `movq %rax, VAR_KEEPVAL') 120 121define(ASSERT, 122m4_assert_numargs_range(1,2) 123m4_assert_defined(`WANT_ASSERT') 124`ifelse(WANT_ASSERT,1, 125`ifelse(`$1',, 126` $2', 127`ifelse(`$2',,, 128` pushfq') 129 $2 130 `j$1' L(ASSERT_ok`'ASSERT_counter) 131 ud2 C assertion failed 132L(ASSERT_ok`'ASSERT_counter): 133ifelse(`$2',,,` popfq') 134define(`ASSERT_counter',incr(ASSERT_counter))')')') 135 136define(ASSERT_counter,1) 137 138define(`LEA',`dnl 139ifdef(`PIC', 140 `mov $1@GOTPCREL(%rip), $2' 141, 142 `movabs `$'$1, $2') 143') 144 145 146define(`DEF_OBJECT', 147m4_assert_numargs_range(1,2) 148` RODATA 149 ALIGN(ifelse($#,1,2,$2)) 150$1: 151') 152 153define(`END_OBJECT', 154m4_assert_numargs(1) 155` SIZE(`$1',.-`$1')') 156 157 158define(`R32', 159 `ifelse($1,`%rax',`%eax', 160 $1,`%rbx',`%ebx', 161 $1,`%rcx',`%ecx', 162 $1,`%rdx',`%edx', 163 $1,`%rsi',`%esi', 164 $1,`%rdi',`%edi', 165 $1,`%rbp',`%ebp', 166 $1,`%r8',`%r8d', 167 $1,`%r9',`%r9d', 168 $1,`%r10',`%r10d', 169 $1,`%r11',`%r11d', 170 $1,`%r12',`%r12d', 171 $1,`%r13',`%r13d', 172 $1,`%r14',`%r14d', 173 $1,`%r15',`%r15d')') 174define(`R8', 175 `ifelse($1,`%rax',`%al', 176 $1,`%rbx',`%bl', 177 $1,`%rcx',`%cl', 178 $1,`%rdx',`%dl', 179 $1,`%rsi',`%sil', 180 $1,`%rdi',`%dil', 181 $1,`%rbp',`%bpl', 182 $1,`%r8',`%r8b', 183 $1,`%r9',`%r9b', 184 $1,`%r10',`%r10b', 185 $1,`%r11',`%r11b', 186 $1,`%r12',`%r12b', 187 $1,`%r13',`%r13b', 188 $1,`%r14',`%r14b', 189 $1,`%r15',`%r15b')') 190 191 192dnl Usage: CALL(funcname) 193dnl 194 195define(`CALL',`dnl 196ifdef(`PIC', 197 `call GSYM_PREFIX`'$1@PLT' 198, 199 `call GSYM_PREFIX`'$1' 200)') 201 202 203define(`JUMPTABSECT', `.section .data.rel.ro.local,"aw",@progbits') 204 205 206dnl Usage: JMPENT(targlabel,tablabel) 207 208define(`JMPENT',`dnl 209ifdef(`PIC', 210 `.long $1-$2'dnl 211, 212 `.quad $1'dnl 213)') 214 215 216dnl These macros are defined just for DOS64, where they provide calling 217dnl sequence glue code. 218 219define(`FUNC_ENTRY',`') 220define(`FUNC_EXIT',`') 221 222 223dnl Target ABI macros. 224 225define(`IFDOS', `') 226define(`IFSTD', `$1') 227define(`IFELF', `$1') 228 229 230dnl Usage: PROTECT(symbol) 231dnl 232dnl Used for private GMP symbols that should never be overridden by users. 233dnl This can save reloc entries and improve shlib sharing as well as 234dnl application startup times 235 236define(`PROTECT', `.hidden $1') 237 238 239dnl Usage: x86_lookup(target, key,value, key,value, ...) 240dnl 241dnl Look for `target' among the `key' parameters. 242dnl 243dnl x86_lookup expands to the corresponding `value', or generates an error 244dnl if `target' isn't found. 245 246define(x86_lookup, 247m4_assert_numargs_range(1,999) 248`ifelse(eval($#<3),1, 249`m4_error(`unrecognised part of x86 instruction: $1 250')', 251`ifelse(`$1',`$2', `$3', 252`x86_lookup(`$1',shift(shift(shift($@))))')')') 253 254 255dnl Usage: x86_opcode_regxmm(reg) 256dnl 257dnl Validate the given xmm register, and return its number, 0 to 7. 258 259define(x86_opcode_regxmm, 260m4_assert_numargs(1) 261`x86_lookup(`$1',x86_opcode_regxmm_list)') 262 263define(x86_opcode_regxmm_list, 264``%xmm0',0, 265`%xmm1',1, 266`%xmm2',2, 267`%xmm3',3, 268`%xmm4',4, 269`%xmm5',5, 270`%xmm6',6, 271`%xmm7',7, 272`%xmm8',8, 273`%xmm9',9, 274`%xmm10',10, 275`%xmm11',11, 276`%xmm12',12, 277`%xmm13',13, 278`%xmm14',14, 279`%xmm15',15') 280 281dnl Usage: palignr($imm,%srcreg,%dstreg) 282dnl 283dnl Emit a palignr instruction, using a .byte sequence, since obsolete but 284dnl still distributed versions of gas don't know SSSE3 instructions. 285 286define(`palignr', 287m4_assert_numargs(3) 288`.byte 0x66,dnl 289ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1, 290 `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl 2910x0f,0x3a,0x0f,dnl 292eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl 293substr($1,1)') 294 295 296dnl Usage 297dnl 298dnl regnum(op) raw operand index (so slightly misnamed) 299dnl regnumh(op) high bit of register operand nimber 300dnl ix(op) 0 for reg operand, 1 for plain pointer operand. 301dnl 302 303define(`regnum',`x86_lookup(`$1',oplist)') 304define(`regnumh',`eval(regnum($1)/8 & 1)') 305define(`ix',`eval(regnum($1)/16)') 306define(`oplist', 307``%rax', 0, `%rcx', 1, `%rdx', 2, `%rbx', 3, 308 `%rsp', 4, `%rbp', 5, `%rsi', 6, `%rdi', 7, 309 `%r8', 8, `%r9', 9, `%r10', 10, `%r11', 11, 310 `%r12', 12, `%r13', 13, `%r14', 14, `%r15', 15, 311 `(%rax)',16, `(%rcx)',17, `(%rdx)',18, `(%rbx)',19, 312 `(%rsp)',20, `(%rbp)',21, `(%rsi)',22, `(%rdi)',23, 313 `(%r8)', 24, `(%r9)', 25, `(%r10)',26, `(%r11)',27, 314 `(%r12)',28, `(%r13)',29, `(%r14)',30, `(%r15)',31') 315 316 317dnl Usage 318dnl 319dnl mulx(reg1,reg2,reg3) 320dnl 321dnl or 322dnl 323dnl mulx((reg1),reg2,reg3) 324dnl 325dnl where reg1 is any register but rsp,rbp,r12,r13, or 326dnl 327dnl mulx(off,(reg1),reg2,reg3) 328dnl 329dnl where reg1 is any register but rsp,r12. 330dnl 331dnl The exceptions are due to special coding needed for some registers; rsp 332dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the 333dnl offset-less form. 334dnl 335dnl Other addressing forms are not handled. Invalid forms are not properly 336dnl detected. Offsets that don't fit one byte are not handled correctly. 337 338define(`mulx',`dnl 339.byte 0xc4`'dnl 340ifelse(`$#',3,`dnl 341,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl 342,eval(0xfb-8*regnum($2))`'dnl 343,0xf6`'dnl 344,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl 345',`$#',4,`dnl 346,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl 347,eval(0xfb-8*regnum($3))`'dnl 348,0xf6`'dnl 349,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl 350,eval(($1 + 256) % 256)`'dnl 351')') 352 353dnl Usage 354dnl 355dnl adcx(reg1,reg2) 356dnl adox(reg1,reg2) 357dnl 358dnl or 359dnl 360dnl adcx((reg1),reg2) 361dnl adox((reg1),reg2) 362dnl 363dnl where reg1 is any register but rsp,rbp,r12,r13, or 364dnl 365dnl adcx(off,(reg1),reg2) 366dnl adox(off,(reg1),reg2) 367dnl 368dnl where reg1 is any register but rsp,r12. 369dnl 370dnl The exceptions are due to special coding needed for some registers; rsp 371dnl and r12 need an extra byte 0x24 at the end while rbp and r13 lack the 372dnl offset-less form. 373dnl 374dnl Other addressing forms are not handled. Invalid forms are not properly 375dnl detected. Offsets that don't fit one byte are not handled correctly. 376 377define(`adx_helper',`dnl 378,eval(0x48+regnumh($1)+4*regnumh($2))`'dnl 379,0x0f`'dnl 380,0x38`'dnl 381,0xf6`'dnl 382') 383 384define(`adx',`dnl 385ifelse(`$#',2,`dnl 386adx_helper($1,$2)dnl 387,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($2))-0xc0*ix($1))`'dnl 388',`$#',3,`dnl 389adx_helper($2,$3)dnl 390,eval(0x40+(7 & regnum($2))+8*(7 & regnum($3)))`'dnl 391,eval(($1 + 256) % 256)`'dnl 392')') 393 394define(`adcx',`dnl 395.byte 0x66`'dnl 396adx($@)') 397 398define(`adox',`dnl 399.byte 0xf3`'dnl 400adx($@)') 401 402divert`'dnl 403