1dnl x86 fat binary entrypoints. 2 3dnl Copyright 2003, 2012, 2014 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34dnl Forcibly disable profiling. 35dnl 36dnl The entrypoints and inits are small enough not to worry about, the real 37dnl routines arrived at will have any profiling. Also, the way the code 38dnl here ends with a jump means we won't work properly with the 39dnl "instrument" profiling scheme anyway. 40 41define(`WANT_PROFILING',no) 42 43 44 TEXT 45 46 47dnl Usage: FAT_ENTRY(name, offset) 48dnl 49dnl Emit a fat binary entrypoint function of the given name. This is the 50dnl normal entry for applications, eg. __gmpn_add_n. 51dnl 52dnl The code simply jumps through the function pointer in __gmpn_cpuvec at 53dnl the given "offset" (in bytes). 54dnl 55dnl For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be 56dnl fine for all x86s. 57dnl 58dnl For PIC, the jumps are 20 bytes each, and are best aligned to 16 to 59dnl ensure at least the first two instructions don't cross a cache line 60dnl boundary. 61dnl 62dnl Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE 63dnl grepping in configure, stopping that code trying to eval something with 64dnl $1 in it. 65 66define(FAT_ENTRY, 67m4_assert_numargs(2) 68` ALIGN(ifdef(`PIC',16,8)) 69`'PROLOGUE($1)dnl 70ifdef(`PIC',`dnl 71ifdef(`DARWIN',` 72 call L(movl_eip_edx) 73 movl L(___gmpn_cpuvec)$non_lazy_ptr-.(%edx), %edx 74 jmp *m4_empty_if_zero($2)(%edx) 75',`dnl 76 call L(movl_eip_edx) 77L(entry_here$2): 78 addl $_GLOBAL_OFFSET_TABLE_+[.-L(entry_here$2)], %edx 79 movl GSYM_PREFIX`'__gmpn_cpuvec@GOT(%edx), %edx 80 jmp *m4_empty_if_zero($2)(%edx) 81') 82',`dnl non-PIC 83 jmp *GSYM_PREFIX`'__gmpn_cpuvec+$2 84') 85EPILOGUE() 86') 87 88 89dnl FAT_ENTRY for each CPUVEC_FUNCS_LIST 90dnl 91 92define(`CPUVEC_offset',0) 93foreach(i, 94`FAT_ENTRY(MPN(i),CPUVEC_offset) 95define(`CPUVEC_offset',eval(CPUVEC_offset + 4))', 96CPUVEC_FUNCS_LIST) 97 98ifdef(`PIC',` 99 ALIGN(8) 100L(movl_eip_edx): 101 movl (%esp), %edx 102 ret_internal 103ifdef(`DARWIN',` 104 .section __IMPORT,__pointers,non_lazy_symbol_pointers 105L(___gmpn_cpuvec)$non_lazy_ptr: 106 .indirect_symbol ___gmpn_cpuvec 107 .long 0 108 TEXT 109') 110') 111 112 113dnl Usage: FAT_INIT(name, offset) 114dnl 115dnl Emit a fat binary initializer function of the given name. These 116dnl functions are the initial values for the pointers in __gmpn_cpuvec. 117dnl 118dnl The code simply calls __gmpn_cpuvec_init, and then jumps back through 119dnl the __gmpn_cpuvec pointer, at the given "offset" (in bytes). 120dnl __gmpn_cpuvec_init will have stored the address of the selected 121dnl implementation there. 122dnl 123dnl Only one of these routines will be executed, and only once, since after 124dnl that all the __gmpn_cpuvec pointers go to real routines. So there's no 125dnl need for anything special here, just something small and simple. To 126dnl keep code size down, "fat_init" is a shared bit of code, arrived at 127dnl with the offset in %al. %al is used since the movb instruction is 2 128dnl bytes where %eax would be 4. 129dnl 130dnl Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the 131dnl HAVE_NATIVE grepping in configure, preventing that code trying to eval 132dnl something with $1 in it. 133 134define(FAT_INIT, 135m4_assert_numargs(2) 136`PROLOGUE($1)dnl 137 movb $`'$2, %al 138 jmp L(fat_init) 139EPILOGUE() 140') 141 142L(fat_init): 143 C al __gmpn_cpuvec byte offset 144 145 movzbl %al, %eax 146 pushl %eax 147 148ifdef(`PIC',`dnl 149ifdef(`DARWIN',` 150 sub $8, %esp 151 CALL( __gmpn_cpuvec_init) 152 add $8, %esp 153 call L(movl_eip_edx) 154 movl L(___gmpn_cpuvec)$non_lazy_ptr-.(%edx), %edx 155',`dnl 156 pushl %ebx 157 call L(movl_eip_ebx) 158L(init_here): 159 addl $_GLOBAL_OFFSET_TABLE_+[.-L(init_here)], %ebx 160 CALL( __gmpn_cpuvec_init) 161 movl GSYM_PREFIX`'__gmpn_cpuvec@GOT(%ebx), %edx 162 popl %ebx 163') 164 popl %eax 165 jmp *(%edx,%eax) 166 167L(movl_eip_ebx): 168 movl (%esp), %ebx 169 ret_internal 170',`dnl non-PIC 171 sub $8, %esp C needed on Darwin, harmless elsewhere 172 CALL( __gmpn_cpuvec_init) 173 add $8, %esp C needed on Darwin, harmless elsewhere 174 popl %eax 175 jmp *GSYM_PREFIX`'__gmpn_cpuvec(%eax) 176') 177 178dnl FAT_INIT for each CPUVEC_FUNCS_LIST 179dnl 180 181define(`CPUVEC_offset',0) 182foreach(i, 183`FAT_INIT(MPN(i`'_init),CPUVEC_offset) 184define(`CPUVEC_offset',eval(CPUVEC_offset + 4))', 185CPUVEC_FUNCS_LIST) 186 187 188 189C long __gmpn_cpuid (char dst[12], int id); 190C 191C This is called only once, so just something simple and compact is fine. 192 193defframe(PARAM_ID, 8) 194defframe(PARAM_DST, 4) 195deflit(`FRAME',0) 196 197PROLOGUE(__gmpn_cpuid) 198 pushl %esi FRAME_pushl() 199 pushl %ebx FRAME_pushl() 200 movl PARAM_ID, %eax 201 cpuid 202 movl PARAM_DST, %esi 203 movl %ebx, (%esi) 204 movl %edx, 4(%esi) 205 movl %ecx, 8(%esi) 206 popl %ebx 207 popl %esi 208 ret 209EPILOGUE() 210 211 212C int __gmpn_cpuid_available (void); 213C 214C Return non-zero if the cpuid instruction is available, which means late 215C model 80486 and higher. 80386 and early 80486 don't have cpuid. 216C 217C The test follows Intel AP-485 application note, namely that if bit 21 is 218C modifiable then cpuid is supported. This test is reentrant and thread 219C safe, since of course any interrupt or context switch will preserve the 220C flags while we're tinkering with them. 221C 222C This is called only once, so just something simple and compact is fine. 223 224PROLOGUE(__gmpn_cpuid_available) 225 pushf 226 popl %ecx C old flags 227 228 movl %ecx, %edx 229 xorl $0x200000, %edx 230 pushl %edx 231 popf 232 pushf 233 popl %edx C tweaked flags 234 235 movl $1, %eax 236 cmpl %ecx, %edx 237 jne L(available) 238 xorl %eax, %eax C not changed, so cpuid not available 239 240L(available): 241 ret 242EPILOGUE() 243ASM_END() 244