1dnl x86 fat binary entrypoints. 2 3dnl Contributed to the GNU project by Kevin Ryde (original x86_32 code) and 4dnl Torbjorn Granlund (port to x86_64) 5 6dnl Copyright 2003, 2009, 2011-2014, 2016 Free Software Foundation, Inc. 7 8dnl This file is part of the GNU MP Library. 9dnl 10dnl The GNU MP Library is free software; you can redistribute it and/or modify 11dnl it under the terms of either: 12dnl 13dnl * the GNU Lesser General Public License as published by the Free 14dnl Software Foundation; either version 3 of the License, or (at your 15dnl option) any later version. 16dnl 17dnl or 18dnl 19dnl * the GNU General Public License as published by the Free Software 20dnl Foundation; either version 2 of the License, or (at your option) any 21dnl later version. 22dnl 23dnl or both in parallel, as here. 24dnl 25dnl The GNU MP Library is distributed in the hope that it will be useful, but 26dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28dnl for more details. 29dnl 30dnl You should have received copies of the GNU General Public License and the 31dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32dnl see https://www.gnu.org/licenses/. 33 34include(`../config.m4') 35 36 37dnl Forcibly disable profiling. 38dnl 39dnl The entrypoints and inits are small enough not to worry about, the real 40dnl routines arrived at will have any profiling. Also, the way the code 41dnl here ends with a jump means we won't work properly with the 42dnl "instrument" profiling scheme anyway. 43 44define(`WANT_PROFILING',no) 45 46 47dnl We define PRETEND_PIC as a helper symbol, the use it for suppressing 48dnl normal, fast call code, since that triggers problems on Darwin, OpenBSD 49dnl and some versions of GNU/Linux. This will go away when symbol hiding is 50dnl finished. 51 52ifdef(`DARWIN', 53`define(`PRETEND_PIC')') 54ifdef(`OPENBSD', 55`define(`PRETEND_PIC')') 56ifdef(`LINUX', 57`define(`PRETEND_PIC')') 58ifdef(`PIC', 59`define(`PRETEND_PIC')') 60 61ABI_SUPPORT(DOS64) 62ABI_SUPPORT(STD64) 63 64 TEXT 65 66dnl Usage: FAT_ENTRY(name, offset) 67dnl 68dnl Emit a fat binary entrypoint function of the given name. This is the 69dnl normal entry for applications, eg. __gmpn_add_n. 70dnl 71dnl The code simply jumps through the function pointer in __gmpn_cpuvec at 72dnl the given "offset" (in bytes). 73dnl 74dnl For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be 75dnl fine for all x86s. 76dnl 77dnl For ELF/DARWIN PIC, the jumps are 20 bytes each, and are best aligned to 78dnl 16 to ensure at least the first two instructions don't cross a cache line 79dnl boundary. 80dnl 81dnl For DOS64, the jumps are 6 bytes. The same form works also for GNU/Linux 82dnl (at least with certain assembler/linkers) but FreeBSD 8.2 crashes. Not 83dnl tested on Darwin, Slowaris, NetBSD, etc. 84dnl 85dnl Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE 86dnl grepping in configure, stopping that code trying to eval something with 87dnl $1 in it. 88 89define(FAT_ENTRY, 90m4_assert_numargs(2) 91`ifdef(`HOST_DOS64', 92` ALIGN(8) 93`'PROLOGUE($1) 94 jmp *$2+GSYM_PREFIX`'__gmpn_cpuvec(%rip) 95EPILOGUE() 96', 97` ALIGN(ifdef(`PIC',16,8)) 98`'PROLOGUE($1) 99ifdef(`PRETEND_PIC', 100` LEA( GSYM_PREFIX`'__gmpn_cpuvec, %rax) 101 jmp *$2(%rax) 102',`dnl non-PIC 103 jmp *GSYM_PREFIX`'__gmpn_cpuvec+$2 104') 105EPILOGUE() 106')') 107 108 109dnl FAT_ENTRY for each CPUVEC_FUNCS_LIST 110dnl 111 112define(`CPUVEC_offset',0) 113foreach(i, 114`FAT_ENTRY(MPN(i),CPUVEC_offset) 115define(`CPUVEC_offset',eval(CPUVEC_offset + 8))', 116CPUVEC_FUNCS_LIST) 117 118 119dnl Usage: FAT_INIT(name, offset) 120dnl 121dnl Emit a fat binary initializer function of the given name. These 122dnl functions are the initial values for the pointers in __gmpn_cpuvec. 123dnl 124dnl The code simply calls __gmpn_cpuvec_init, and then jumps back through 125dnl the __gmpn_cpuvec pointer, at the given "offset" (in bytes). 126dnl __gmpn_cpuvec_init will have stored the address of the selected 127dnl implementation there. 128dnl 129dnl Only one of these routines will be executed, and only once, since after 130dnl that all the __gmpn_cpuvec pointers go to real routines. So there's no 131dnl need for anything special here, just something small and simple. To 132dnl keep code size down, "fat_init" is a shared bit of code, arrived at 133dnl with the offset in %al. %al is used since the movb instruction is 2 134dnl bytes where %eax would be 4. 135dnl 136dnl Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the 137dnl HAVE_NATIVE grepping in configure, preventing that code trying to eval 138dnl something with $1 in it. 139dnl 140dnl We need to preserve parameter registers over the __gmpn_cpuvec_init call 141 142define(FAT_INIT, 143m4_assert_numargs(2) 144`PROLOGUE($1) 145 mov $`'$2, %al 146 jmp L(fat_init) 147EPILOGUE() 148') 149 150dnl FAT_INIT for each CPUVEC_FUNCS_LIST 151dnl 152 153define(`CPUVEC_offset',0) 154foreach(i, 155`FAT_INIT(MPN(i`'_init),CPUVEC_offset) 156define(`CPUVEC_offset',eval(CPUVEC_offset + 1))', 157CPUVEC_FUNCS_LIST) 158 159L(fat_init): 160 C al __gmpn_cpuvec byte offset 161 162 movzbl %al, %eax 163IFSTD(` push %rdi ') 164IFSTD(` push %rsi ') 165 push %rdx 166 push %rcx 167 push %r8 168 push %r9 169 push %rax 170 CALL( __gmpn_cpuvec_init) 171 pop %rax 172 pop %r9 173 pop %r8 174 pop %rcx 175 pop %rdx 176IFSTD(` pop %rsi ') 177IFSTD(` pop %rdi ') 178ifdef(`PRETEND_PIC',` 179 LEA( GSYM_PREFIX`'__gmpn_cpuvec, %r10) 180 jmp *(%r10,%rax,8) 181',`dnl non-PIC 182 jmp *GSYM_PREFIX`'__gmpn_cpuvec(,%rax,8) 183') 184 185 186C long __gmpn_cpuid (char dst[12], int id); 187C 188C This is called only 3 times, so just something simple and compact is fine. 189C 190C The rcx/ecx zeroing here is needed for the BMI2 check. 191 192define(`rp', `%rdi') 193define(`idx', `%rsi') 194 195PROLOGUE(__gmpn_cpuid) 196 FUNC_ENTRY(2) 197 mov %rbx, %r8 198 mov R32(idx), R32(%rax) 199 xor %ecx, %ecx 200 cpuid 201 mov %ebx, (rp) 202 mov %edx, 4(rp) 203 mov %ecx, 8(rp) 204 mov %r8, %rbx 205 FUNC_EXIT() 206 ret 207EPILOGUE() 208