1divert(-1) 2 3dnl m4 macros for amd64 assembler. 4 5dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2011, 6dnl 2012, 2013 Free Software Foundation, Inc. 7dnl 8dnl This file is part of the GNU MP Library. 9dnl 10dnl The GNU MP Library is free software; you can redistribute it and/or 11dnl modify it under the terms of the GNU Lesser General Public License as 12dnl published by the Free Software Foundation; either version 3 of the 13dnl License, or (at your option) any later version. 14dnl 15dnl The GNU MP Library is distributed in the hope that it will be useful, 16dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 17dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18dnl Lesser General Public License for more details. 19dnl 20dnl You should have received a copy of the GNU Lesser General Public License 21dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 22 23 24dnl Usage: CPUVEC_FUNCS_LIST 25dnl 26dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the 27dnl order they appear in that structure. 28 29define(CPUVEC_FUNCS_LIST, 30``add_n', 31`addlsh1_n', 32`addlsh2_n', 33`addmul_1', 34`addmul_2', 35`bdiv_dbm1c', 36`com', 37`copyd', 38`copyi', 39`divexact_1', 40`divrem_1', 41`gcd_1', 42`lshift', 43`lshiftc', 44`mod_1', 45`mod_1_1p', 46`mod_1_1p_cps', 47`mod_1s_2p', 48`mod_1s_2p_cps', 49`mod_1s_4p', 50`mod_1s_4p_cps', 51`mod_34lsub1', 52`modexact_1c_odd', 53`mul_1', 54`mul_basecase', 55`mullo_basecase', 56`preinv_divrem_1', 57`preinv_mod_1', 58`redc_1', 59`redc_2', 60`rshift', 61`sqr_basecase', 62`sub_n', 63`sublsh1_n', 64`submul_1'') 65 66 67dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo) 68dnl 69dnl In the amd64 code we use explicit TEXT and ALIGN() calls in the code, 70dnl since different alignments are wanted in various circumstances. So for 71dnl instance, 72dnl 73dnl TEXT 74dnl ALIGN(16) 75dnl PROLOGUE(mpn_add_n) 76dnl ... 77dnl EPILOGUE() 78 79define(`PROLOGUE_cpu', 80m4_assert_numargs(1) 81` GLOBL $1 82 TYPE($1,`function') 83$1: 84') 85 86 87dnl Usage: ASSERT([cond][,instructions]) 88dnl 89dnl If WANT_ASSERT is 1, output the given instructions and expect the given 90dnl flags condition to then be satisfied. For example, 91dnl 92dnl ASSERT(ne, `cmpq %rax, %rbx') 93dnl 94dnl The instructions can be omitted to just assert a flags condition with 95dnl no extra calculation. For example, 96dnl 97dnl ASSERT(nc) 98dnl 99dnl When `instructions' is not empty, a pushfq/popfq is added for 100dnl convenience to preserve the flags, but the instructions themselves must 101dnl preserve any registers that matter. 102dnl 103dnl The condition can be omitted to just output the given instructions when 104dnl assertion checking is wanted. In this case the pushf/popf is omitted. 105dnl For example, 106dnl 107dnl ASSERT(, `movq %rax, VAR_KEEPVAL') 108 109define(ASSERT, 110m4_assert_numargs_range(1,2) 111m4_assert_defined(`WANT_ASSERT') 112`ifelse(WANT_ASSERT,1, 113`ifelse(`$1',, 114` $2', 115`ifelse(`$2',,, 116` pushfq') 117 $2 118 j`$1' L(ASSERT_ok`'ASSERT_counter) 119 ud2 C assertion failed 120L(ASSERT_ok`'ASSERT_counter): 121ifelse(`$2',,,` popfq') 122define(`ASSERT_counter',incr(ASSERT_counter))')')') 123 124define(ASSERT_counter,1) 125 126define(`LEA',`dnl 127ifdef(`PIC', 128 `mov $1@GOTPCREL(%rip), $2' 129, 130 `movabs `$'$1, $2') 131') 132 133 134define(`DEF_OBJECT', 135m4_assert_numargs_range(1,2) 136` RODATA 137 ALIGN(ifelse($#,1,2,$2)) 138$1: 139') 140 141define(`END_OBJECT', 142m4_assert_numargs(1) 143` SIZE(`$1',.-`$1')') 144 145 146define(`R32', 147 `ifelse($1,`%rax',`%eax', 148 $1,`%rbx',`%ebx', 149 $1,`%rcx',`%ecx', 150 $1,`%rdx',`%edx', 151 $1,`%rsi',`%esi', 152 $1,`%rdi',`%edi', 153 $1,`%rbp',`%ebp', 154 $1,`%r8',`%r8d', 155 $1,`%r9',`%r9d', 156 $1,`%r10',`%r10d', 157 $1,`%r11',`%r11d', 158 $1,`%r12',`%r12d', 159 $1,`%r13',`%r13d', 160 $1,`%r14',`%r14d', 161 $1,`%r15',`%r15d')') 162define(`R8', 163 `ifelse($1,`%rax',`%al', 164 $1,`%rbx',`%bl', 165 $1,`%rcx',`%cl', 166 $1,`%rdx',`%dl', 167 $1,`%rsi',`%sil', 168 $1,`%rdi',`%dil', 169 $1,`%rbp',`%bpl', 170 $1,`%r8',`%r8b', 171 $1,`%r9',`%r9b', 172 $1,`%r10',`%r10b', 173 $1,`%r11',`%r11b', 174 $1,`%r12',`%r12b', 175 $1,`%r13',`%r13b', 176 $1,`%r14',`%r14b', 177 $1,`%r15',`%r15b')') 178 179 180dnl Usage: CALL(funcname) 181dnl 182 183ifdef(`PIC', 184 `define(`CALL',`call GSYM_PREFIX`'$1@PLT')', 185 `define(`CALL',`call GSYM_PREFIX`'$1')') 186 187 188define(`JUMPTABSECT', `.section .data.rel.ro.local,"aw",@progbits') 189 190 191dnl Usage: JMPENT(targlabel,tablabel) 192 193define(`JMPENT',`dnl 194ifdef(`PIC', 195 `.long $1-$2' 196, 197 `.quad $1' 198)') 199 200 201dnl These macros are defined just for DOS64, where they provide calling 202dnl sequence glue code. 203 204define(`FUNC_ENTRY',`') 205define(`FUNC_EXIT',`') 206 207 208dnl Target ABI macros. 209 210define(`IFDOS', `') 211define(`IFSTD', `$1') 212define(`IFELF', `$1') 213 214 215dnl Usage: PROTECT(symbol) 216dnl 217dnl Used for private GMP symbols that should never be overridden by users. 218dnl This can save reloc entries and improve shlib sharing as well as 219dnl application startup times 220 221define(`PROTECT', `.hidden $1') 222 223 224dnl Usage: x86_lookup(target, key,value, key,value, ...) 225dnl 226dnl Look for `target' among the `key' parameters. 227dnl 228dnl x86_lookup expands to the corresponding `value', or generates an error 229dnl if `target' isn't found. 230 231define(x86_lookup, 232m4_assert_numargs_range(1,999) 233`ifelse(eval($#<3),1, 234`m4_error(`unrecognised part of x86 instruction: $1 235')', 236`ifelse(`$1',`$2', `$3', 237`x86_lookup(`$1',shift(shift(shift($@))))')')') 238 239 240dnl Usage: x86_opcode_regxmm(reg) 241dnl 242dnl Validate the given xmm register, and return its number, 0 to 7. 243 244define(x86_opcode_regxmm, 245m4_assert_numargs(1) 246`x86_lookup(`$1',x86_opcode_regxmm_list)') 247 248define(x86_opcode_regxmm_list, 249``%xmm0',0, 250`%xmm1',1, 251`%xmm2',2, 252`%xmm3',3, 253`%xmm4',4, 254`%xmm5',5, 255`%xmm6',6, 256`%xmm7',7, 257`%xmm8',8, 258`%xmm9',9, 259`%xmm10',10, 260`%xmm11',11, 261`%xmm12',12, 262`%xmm13',13, 263`%xmm14',14, 264`%xmm15',15') 265 266dnl Usage: palignr($imm,%srcreg,%dstreg) 267dnl 268dnl Emit a palignr instruction, using a .byte sequence, since obsolete but 269dnl still distributed versions of gas don't know SSSE3 instructions. 270 271define(`palignr', 272m4_assert_numargs(3) 273`.byte 0x66,dnl 274ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1, 275 `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl 2760x0f,0x3a,0x0f,dnl 277eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl 278substr($1,1)') 279 280 281divert`'dnl 282