1 /* x86_64 fat binary initializers. 2 3 Contributed to the GNU project by Kevin Ryde (original x86_32 code) and 4 Torbjorn Granlund (port to x86_64) 5 6 THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY. 7 THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR 8 COMPLETELY IN FUTURE GNU MP RELEASES. 9 10 Copyright 2003, 2004, 2009, 2011, 2012 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of the GNU Lesser General Public License as published by 16 the Free Software Foundation; either version 3 of the License, or (at your 17 option) any later version. 18 19 The GNU MP Library is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 22 License for more details. 23 24 You should have received a copy of the GNU Lesser General Public License 25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 26 27 #include <stdio.h> /* for printf */ 28 #include <stdlib.h> /* for getenv */ 29 #include <string.h> 30 31 #include "gmp.h" 32 #include "gmp-impl.h" 33 34 /* Change this to "#define TRACE(x) x" for some traces. */ 35 #define TRACE(x) 36 37 /* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */ 38 #define WANT_FAKE_CPUID 0 39 40 41 /* fat_entry.asm */ 42 long __gmpn_cpuid (char [12], int); 43 44 45 #if WANT_FAKE_CPUID 46 /* The "name"s in the table are values for the GMP_CPU_TYPE environment 47 variable. Anything can be used, but for now it's the canonical cpu types 48 as per config.guess/config.sub. */ 49 50 #define __gmpn_cpuid fake_cpuid 51 #define __gmpn_cpuid_available fake_cpuid_available 52 53 #define MAKE_FMS(family, model) \ 54 ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \ 55 + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12)) 56 57 static struct { 58 const char *name; 59 const char vendor[13]; 60 unsigned fms; 61 } fake_cpuid_table[] = { 62 { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) }, 63 { "coreinhm", "GenuineIntel", MAKE_FMS (6, 0x1a) }, 64 { "coreiwsm", "GenuineIntel", MAKE_FMS (6, 0x25) }, 65 { "coreisbr", "GenuineIntel", MAKE_FMS (6, 0x2a) }, 66 { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) }, 67 { "pentium4", "GenuineIntel", MAKE_FMS (15, 3) }, 68 69 { "k8", "AuthenticAMD", MAKE_FMS (15, 0) }, 70 { "k10", "AuthenticAMD", MAKE_FMS (16, 0) }, 71 { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) }, 72 { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) }, 73 74 { "nano", "CentaurHauls", MAKE_FMS (6, 15) }, 75 }; 76 77 static int 78 fake_cpuid_lookup (void) 79 { 80 char *s; 81 int i; 82 83 s = getenv ("GMP_CPU_TYPE"); 84 if (s == NULL) 85 { 86 printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n"); 87 abort (); 88 } 89 90 for (i = 0; i < numberof (fake_cpuid_table); i++) 91 if (strcmp (s, fake_cpuid_table[i].name) == 0) 92 return i; 93 94 printf ("GMP_CPU_TYPE=%s unknown\n", s); 95 abort (); 96 } 97 98 static int 99 fake_cpuid_available (void) 100 { 101 return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0'; 102 } 103 104 static long 105 fake_cpuid (char dst[12], int id) 106 { 107 int i = fake_cpuid_lookup(); 108 109 switch (id) { 110 case 0: 111 memcpy (dst, fake_cpuid_table[i].vendor, 12); 112 return 0; 113 case 1: 114 return fake_cpuid_table[i].fms; 115 default: 116 printf ("fake_cpuid(): oops, unknown id %d\n", id); 117 abort (); 118 } 119 } 120 #endif 121 122 123 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t)); 124 typedef DECL_preinv_mod_1 ((*preinv_mod_1_t)); 125 126 struct cpuvec_t __gmpn_cpuvec = { 127 __MPN(add_n_init), 128 __MPN(addlsh1_n_init), 129 __MPN(addlsh2_n_init), 130 __MPN(addmul_1_init), 131 __MPN(addmul_2_init), 132 __MPN(bdiv_dbm1c_init), 133 __MPN(com_init), 134 __MPN(copyd_init), 135 __MPN(copyi_init), 136 __MPN(divexact_1_init), 137 __MPN(divrem_1_init), 138 __MPN(gcd_1_init), 139 __MPN(lshift_init), 140 __MPN(lshiftc_init), 141 __MPN(mod_1_init), 142 __MPN(mod_1_1p_init), 143 __MPN(mod_1_1p_cps_init), 144 __MPN(mod_1s_2p_init), 145 __MPN(mod_1s_2p_cps_init), 146 __MPN(mod_1s_4p_init), 147 __MPN(mod_1s_4p_cps_init), 148 __MPN(mod_34lsub1_init), 149 __MPN(modexact_1c_odd_init), 150 __MPN(mul_1_init), 151 __MPN(mul_basecase_init), 152 __MPN(mullo_basecase_init), 153 __MPN(preinv_divrem_1_init), 154 __MPN(preinv_mod_1_init), 155 __MPN(redc_1_init), 156 __MPN(redc_2_init), 157 __MPN(rshift_init), 158 __MPN(sqr_basecase_init), 159 __MPN(sub_n_init), 160 __MPN(sublsh1_n_init), 161 __MPN(submul_1_init), 162 0 163 }; 164 165 int __gmpn_cpuvec_initialized = 0; 166 167 /* The following setups start with generic x86, then overwrite with 168 specifics for a chip, and higher versions of that chip. 169 170 The arrangement of the setups here will normally be the same as the $path 171 selections in configure.in for the respective chips. 172 173 This code is reentrant and thread safe. We always calculate the same 174 decided_cpuvec, so if two copies of the code are running it doesn't 175 matter which completes first, both write the same to __gmpn_cpuvec. 176 177 We need to go via decided_cpuvec because if one thread has completed 178 __gmpn_cpuvec then it may be making use of the threshold values in that 179 vector. If another thread is still running __gmpn_cpuvec_init then we 180 don't want it to write different values to those fields since some of the 181 asm routines only operate correctly up to their own defined threshold, 182 not an arbitrary value. */ 183 184 void 185 __gmpn_cpuvec_init (void) 186 { 187 struct cpuvec_t decided_cpuvec; 188 char vendor_string[13]; 189 char dummy_string[12]; 190 long fms; 191 int family, model; 192 193 TRACE (printf ("__gmpn_cpuvec_init:\n")); 194 195 memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec)); 196 197 CPUVEC_SETUP_x86_64; 198 CPUVEC_SETUP_fat; 199 200 __gmpn_cpuid (vendor_string, 0); 201 vendor_string[12] = 0; 202 203 fms = __gmpn_cpuid (dummy_string, 1); 204 family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff); 205 model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0); 206 207 /* Check extended feature flags */ 208 __gmpn_cpuid (dummy_string, 0x80000001); 209 if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0) 210 abort (); /* longmode-capable-bit turned off! */ 211 212 /*********************************************************/ 213 /*** WARNING: keep this list in sync with config.guess ***/ 214 /*********************************************************/ 215 if (strcmp (vendor_string, "GenuineIntel") == 0) 216 { 217 switch (family) 218 { 219 case 6: 220 switch (model) 221 { 222 case 0x0f: /* Conroe Merom Kentsfield Allendale */ 223 case 0x10: 224 case 0x11: 225 case 0x12: 226 case 0x13: 227 case 0x14: 228 case 0x15: 229 case 0x16: 230 case 0x17: /* PNR Wolfdale Yorkfield */ 231 case 0x18: 232 case 0x19: 233 case 0x1d: /* PNR Dunnington */ 234 CPUVEC_SETUP_core2; 235 break; 236 237 case 0x1c: /* Atom Silverthorne */ 238 case 0x26: /* Atom Lincroft */ 239 case 0x27: /* Atom Saltwell? */ 240 case 0x36: /* Atom Cedarview/Saltwell */ 241 CPUVEC_SETUP_atom; 242 break; 243 244 case 0x1a: /* NHM Gainestown */ 245 case 0x1b: 246 case 0x1e: /* NHM Lynnfield/Jasper */ 247 case 0x1f: 248 case 0x20: 249 case 0x21: 250 case 0x22: 251 case 0x23: 252 case 0x24: 253 case 0x25: /* WSM Clarkdale/Arrandale */ 254 case 0x28: 255 case 0x29: 256 case 0x2b: 257 case 0x2c: /* WSM Gulftown */ 258 case 0x2e: /* NHM Beckton */ 259 case 0x2f: /* WSM Eagleton */ 260 CPUVEC_SETUP_core2; 261 CPUVEC_SETUP_coreinhm; 262 break; 263 264 case 0x2a: /* SB */ 265 case 0x2d: /* SBC-EP */ 266 case 0x3a: /* IBR */ 267 case 0x3c: /* Haswell */ 268 CPUVEC_SETUP_core2; 269 CPUVEC_SETUP_coreinhm; 270 CPUVEC_SETUP_coreisbr; 271 break; 272 } 273 break; 274 275 case 15: 276 CPUVEC_SETUP_pentium4; 277 break; 278 } 279 } 280 else if (strcmp (vendor_string, "AuthenticAMD") == 0) 281 { 282 switch (family) 283 { 284 case 0x0f: /* k8 */ 285 case 0x11: /* "fam 11h", mix of k8 and k10 */ 286 case 0x13: 287 case 0x16: 288 case 0x17: 289 CPUVEC_SETUP_k8; 290 break; 291 292 case 0x10: /* k10 */ 293 case 0x12: /* k10 (llano) */ 294 CPUVEC_SETUP_k8; 295 CPUVEC_SETUP_k10; 296 break; 297 298 case 0x14: /* bobcat */ 299 CPUVEC_SETUP_k8; 300 CPUVEC_SETUP_k10; 301 CPUVEC_SETUP_bobcat; 302 break; 303 304 case 0x15: /* bulldozer */ 305 CPUVEC_SETUP_k8; 306 CPUVEC_SETUP_k10; 307 CPUVEC_SETUP_bd1; 308 } 309 } 310 else if (strcmp (vendor_string, "CentaurHauls") == 0) 311 { 312 switch (family) 313 { 314 case 6: 315 if (model >= 15) 316 CPUVEC_SETUP_nano; 317 break; 318 } 319 } 320 321 /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1. 322 Instead default to the plain versions from whichever CPU we detected. 323 The function arguments are compatible, no need for any glue code. */ 324 if (decided_cpuvec.preinv_divrem_1 == NULL) 325 decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1; 326 if (decided_cpuvec.preinv_mod_1 == NULL) 327 decided_cpuvec.preinv_mod_1 =(preinv_mod_1_t) decided_cpuvec.mod_1; 328 329 ASSERT_CPUVEC (decided_cpuvec); 330 CPUVEC_INSTALL (decided_cpuvec); 331 332 /* Set this once the threshold fields are ready. 333 Use volatile to prevent it getting moved. */ 334 *((volatile int *) &__gmpn_cpuvec_initialized) = 1; 335 } 336