1 /* x86_64 fat binary initializers. 2 3 Contributed to the GNU project by Kevin Ryde (original x86_32 code) and 4 Torbjorn Granlund (port to x86_64) 5 6 THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY. 7 THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR 8 COMPLETELY IN FUTURE GNU MP RELEASES. 9 10 Copyright 2003, 2004, 2009, 2011-2015, 2017 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 #include <stdio.h> /* for printf */ 39 #include <stdlib.h> /* for getenv */ 40 #include <string.h> 41 42 #include "gmp-impl.h" 43 44 /* Change this to "#define TRACE(x) x" for some traces. */ 45 #define TRACE(x) 46 47 48 /* fat_entry.asm */ 49 long __gmpn_cpuid (char [12], int); 50 51 52 #if WANT_FAKE_CPUID 53 /* The "name"s in the table are values for the GMP_CPU_TYPE environment 54 variable. Anything can be used, but for now it's the canonical cpu types 55 as per config.guess/config.sub. */ 56 57 #define __gmpn_cpuid fake_cpuid 58 59 #define MAKE_FMS(family, model) \ 60 ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \ 61 + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12)) 62 63 static struct { 64 const char *name; 65 const char *vendor; 66 unsigned fms; 67 } fake_cpuid_table[] = { 68 { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) }, 69 { "nehalem", "GenuineIntel", MAKE_FMS (6, 0x1a) }, 70 { "nhm", "GenuineIntel", MAKE_FMS (6, 0x1a) }, 71 { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) }, 72 { "westmere", "GenuineIntel", MAKE_FMS (6, 0x25) }, 73 { "wsm", "GenuineIntel", MAKE_FMS (6, 0x25) }, 74 { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) }, 75 { "sbr", "GenuineIntel", MAKE_FMS (6, 0x2a) }, 76 { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) }, 77 { "slm", "GenuineIntel", MAKE_FMS (6, 0x37) }, 78 { "haswell", "GenuineIntel", MAKE_FMS (6, 0x3c) }, 79 { "hwl", "GenuineIntel", MAKE_FMS (6, 0x3c) }, 80 { "broadwell", "GenuineIntel", MAKE_FMS (6, 0x3d) }, 81 { "bwl", "GenuineIntel", MAKE_FMS (6, 0x3d) }, 82 { "skylake", "GenuineIntel", MAKE_FMS (6, 0x5e) }, 83 { "sky", "GenuineIntel", MAKE_FMS (6, 0x5e) }, 84 { "pentium4", "GenuineIntel", MAKE_FMS (15, 3) }, 85 86 { "k8", "AuthenticAMD", MAKE_FMS (15, 0) }, 87 { "k10", "AuthenticAMD", MAKE_FMS (16, 0) }, 88 { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) }, 89 { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) }, 90 { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) }, 91 { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) }, 92 { "excavator", "AuthenticAMD", MAKE_FMS (21, 0x60) }, 93 { "jaguar", "AuthenticAMD", MAKE_FMS (22, 1) }, 94 { "zen", "AuthenticAMD", MAKE_FMS (23, 1) }, 95 96 { "nano", "CentaurHauls", MAKE_FMS (6, 15) }, 97 }; 98 99 static int 100 fake_cpuid_lookup (void) 101 { 102 char *s; 103 int i; 104 105 s = getenv ("GMP_CPU_TYPE"); 106 if (s == NULL) 107 { 108 printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n"); 109 abort (); 110 } 111 112 for (i = 0; i < numberof (fake_cpuid_table); i++) 113 if (strcmp (s, fake_cpuid_table[i].name) == 0) 114 return i; 115 116 printf ("GMP_CPU_TYPE=%s unknown\n", s); 117 abort (); 118 } 119 120 static long 121 fake_cpuid (char dst[12], unsigned int id) 122 { 123 int i = fake_cpuid_lookup(); 124 125 switch (id) { 126 case 0: 127 memcpy (dst, fake_cpuid_table[i].vendor, 12); 128 return 0; 129 case 1: 130 return fake_cpuid_table[i].fms; 131 case 7: 132 dst[0] = 0xff; /* BMI1, AVX2, etc */ 133 dst[1] = 0xff; /* BMI2, etc */ 134 return 0; 135 case 0x80000001: 136 dst[4 + 29 / 8] = (1 << (29 % 8)); /* "long" mode */ 137 return 0; 138 default: 139 printf ("fake_cpuid(): oops, unknown id %d\n", id); 140 abort (); 141 } 142 } 143 #endif 144 145 146 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t)); 147 typedef DECL_preinv_mod_1 ((*preinv_mod_1_t)); 148 149 struct cpuvec_t __gmpn_cpuvec = { 150 __MPN(add_n_init), 151 __MPN(addlsh1_n_init), 152 __MPN(addlsh2_n_init), 153 __MPN(addmul_1_init), 154 __MPN(addmul_2_init), 155 __MPN(bdiv_dbm1c_init), 156 __MPN(cnd_add_n_init), 157 __MPN(cnd_sub_n_init), 158 __MPN(com_init), 159 __MPN(copyd_init), 160 __MPN(copyi_init), 161 __MPN(divexact_1_init), 162 __MPN(divrem_1_init), 163 __MPN(gcd_11_init), 164 __MPN(lshift_init), 165 __MPN(lshiftc_init), 166 __MPN(mod_1_init), 167 __MPN(mod_1_1p_init), 168 __MPN(mod_1_1p_cps_init), 169 __MPN(mod_1s_2p_init), 170 __MPN(mod_1s_2p_cps_init), 171 __MPN(mod_1s_4p_init), 172 __MPN(mod_1s_4p_cps_init), 173 __MPN(mod_34lsub1_init), 174 __MPN(modexact_1c_odd_init), 175 __MPN(mul_1_init), 176 __MPN(mul_basecase_init), 177 __MPN(mullo_basecase_init), 178 __MPN(preinv_divrem_1_init), 179 __MPN(preinv_mod_1_init), 180 __MPN(redc_1_init), 181 __MPN(redc_2_init), 182 __MPN(rshift_init), 183 __MPN(sqr_basecase_init), 184 __MPN(sub_n_init), 185 __MPN(sublsh1_n_init), 186 __MPN(submul_1_init), 187 0 188 }; 189 190 int __gmpn_cpuvec_initialized = 0; 191 192 /* The following setups start with generic x86, then overwrite with 193 specifics for a chip, and higher versions of that chip. 194 195 The arrangement of the setups here will normally be the same as the $path 196 selections in configure.in for the respective chips. 197 198 This code is reentrant and thread safe. We always calculate the same 199 decided_cpuvec, so if two copies of the code are running it doesn't 200 matter which completes first, both write the same to __gmpn_cpuvec. 201 202 We need to go via decided_cpuvec because if one thread has completed 203 __gmpn_cpuvec then it may be making use of the threshold values in that 204 vector. If another thread is still running __gmpn_cpuvec_init then we 205 don't want it to write different values to those fields since some of the 206 asm routines only operate correctly up to their own defined threshold, 207 not an arbitrary value. */ 208 209 static int 210 gmp_workaround_skylake_cpuid_bug () 211 { 212 char feature_string[49]; 213 char processor_name_string[49]; 214 static const char *bad_cpus[] = {" G44", " G45", " G39" /* , "6600" */ }; 215 int i; 216 217 /* Example strings: */ 218 /* "Intel(R) Pentium(R) CPU G4400 @ 3.30GHz" */ 219 /* "Intel(R) Core(TM) i5-6600K CPU @ 3.50GHz" */ 220 /* ^ ^ ^ */ 221 /* 0x80000002 0x80000003 0x80000004 */ 222 /* We match out just the 0x80000003 part here. */ 223 224 /* In their infinitive wisdom, Intel decided to use one register order for 225 the vendor string, and another for the processor name string. We shuffle 226 things about here, rather than write a new variant of our assembly cpuid. 227 */ 228 229 unsigned int eax, ebx, ecx, edx; 230 eax = __gmpn_cpuid (feature_string, 0x80000003); 231 ebx = ((unsigned int *)feature_string)[0]; 232 edx = ((unsigned int *)feature_string)[1]; 233 ecx = ((unsigned int *)feature_string)[2]; 234 235 ((unsigned int *) (processor_name_string))[0] = eax; 236 ((unsigned int *) (processor_name_string))[1] = ebx; 237 ((unsigned int *) (processor_name_string))[2] = ecx; 238 ((unsigned int *) (processor_name_string))[3] = edx; 239 240 processor_name_string[16] = 0; 241 242 for (i = 0; i < sizeof (bad_cpus) / sizeof (char *); i++) 243 { 244 if (strstr (processor_name_string, bad_cpus[i]) != 0) 245 return 1; 246 } 247 return 0; 248 } 249 250 enum {BMI2_BIT = 8}; 251 252 void 253 __gmpn_cpuvec_init (void) 254 { 255 struct cpuvec_t decided_cpuvec; 256 char vendor_string[13]; 257 char dummy_string[12]; 258 long fms; 259 int family, model; 260 261 TRACE (printf ("__gmpn_cpuvec_init:\n")); 262 263 memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec)); 264 265 CPUVEC_SETUP_x86_64; 266 CPUVEC_SETUP_fat; 267 268 __gmpn_cpuid (vendor_string, 0); 269 vendor_string[12] = 0; 270 271 fms = __gmpn_cpuid (dummy_string, 1); 272 family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff); 273 model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0); 274 275 /* Check extended feature flags */ 276 __gmpn_cpuid (dummy_string, 0x80000001); 277 if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0) 278 abort (); /* longmode-capable-bit turned off! */ 279 280 /*********************************************************/ 281 /*** WARNING: keep this list in sync with config.guess ***/ 282 /*********************************************************/ 283 if (strcmp (vendor_string, "GenuineIntel") == 0) 284 { 285 switch (family) 286 { 287 case 6: 288 switch (model) 289 { 290 case 0x0f: /* Conroe Merom Kentsfield Allendale */ 291 case 0x10: 292 case 0x11: 293 case 0x12: 294 case 0x13: 295 case 0x14: 296 case 0x15: 297 case 0x16: 298 case 0x17: /* PNR Wolfdale Yorkfield */ 299 case 0x18: 300 case 0x19: 301 case 0x1d: /* PNR Dunnington */ 302 CPUVEC_SETUP_core2; 303 break; 304 305 case 0x1c: /* Atom Silverthorne */ 306 case 0x26: /* Atom Lincroft */ 307 case 0x27: /* Atom Saltwell? */ 308 case 0x36: /* Atom Cedarview/Saltwell */ 309 CPUVEC_SETUP_atom; 310 break; 311 312 case 0x1a: /* NHM Gainestown */ 313 case 0x1b: 314 case 0x1e: /* NHM Lynnfield/Jasper */ 315 case 0x1f: 316 case 0x20: 317 case 0x21: 318 case 0x22: 319 case 0x23: 320 case 0x24: 321 case 0x25: /* WSM Clarkdale/Arrandale */ 322 case 0x28: 323 case 0x29: 324 case 0x2b: 325 case 0x2c: /* WSM Gulftown */ 326 case 0x2e: /* NHM Beckton */ 327 case 0x2f: /* WSM Eagleton */ 328 CPUVEC_SETUP_core2; 329 CPUVEC_SETUP_coreinhm; 330 break; 331 332 case 0x37: /* Silvermont */ 333 case 0x4a: /* Silvermont */ 334 case 0x4c: /* Airmont */ 335 case 0x4d: /* Silvermont/Avoton */ 336 case 0x5a: /* Silvermont */ 337 CPUVEC_SETUP_atom; 338 CPUVEC_SETUP_silvermont; 339 break; 340 341 case 0x5c: /* Goldmont */ 342 case 0x5f: /* Goldmont */ 343 case 0x7a: /* Goldmont Plus */ 344 CPUVEC_SETUP_atom; 345 CPUVEC_SETUP_silvermont; 346 CPUVEC_SETUP_goldmont; 347 break; 348 349 case 0x2a: /* SB */ 350 case 0x2d: /* SBC-EP */ 351 case 0x3a: /* IBR */ 352 case 0x3e: /* IBR Ivytown */ 353 CPUVEC_SETUP_core2; 354 CPUVEC_SETUP_coreinhm; 355 CPUVEC_SETUP_coreisbr; 356 break; 357 case 0x3c: /* Haswell client */ 358 case 0x3f: /* Haswell server */ 359 case 0x45: /* Haswell ULT */ 360 case 0x46: /* Crystal Well */ 361 CPUVEC_SETUP_core2; 362 CPUVEC_SETUP_coreinhm; 363 CPUVEC_SETUP_coreisbr; 364 /* Some Haswells lack BMI2. Let them appear as Sandybridges for 365 now. */ 366 __gmpn_cpuid (dummy_string, 7); 367 if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) 368 break; 369 CPUVEC_SETUP_coreihwl; 370 break; 371 case 0x3d: /* Broadwell */ 372 case 0x47: /* Broadwell */ 373 case 0x4f: /* Broadwell server */ 374 case 0x56: /* Broadwell microserver */ 375 CPUVEC_SETUP_core2; 376 CPUVEC_SETUP_coreinhm; 377 CPUVEC_SETUP_coreisbr; 378 if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) 379 break; 380 CPUVEC_SETUP_coreihwl; 381 CPUVEC_SETUP_coreibwl; 382 break; 383 case 0x4e: /* Skylake client */ 384 case 0x55: /* Skylake server */ 385 case 0x5e: /* Skylake */ 386 case 0x8e: /* Kabylake */ 387 case 0x9e: /* Kabylake */ 388 CPUVEC_SETUP_core2; 389 CPUVEC_SETUP_coreinhm; 390 CPUVEC_SETUP_coreisbr; 391 if ((dummy_string[0 + BMI2_BIT / 8] & (1 << (BMI2_BIT % 8))) == 0) 392 break; 393 if (gmp_workaround_skylake_cpuid_bug ()) 394 break; 395 CPUVEC_SETUP_coreihwl; 396 CPUVEC_SETUP_coreibwl; 397 CPUVEC_SETUP_skylake; 398 break; 399 } 400 break; 401 402 case 15: 403 CPUVEC_SETUP_pentium4; 404 break; 405 } 406 } 407 else if (strcmp (vendor_string, "AuthenticAMD") == 0) 408 { 409 switch (family) 410 { 411 case 0x0f: /* k8 */ 412 case 0x11: /* "fam 11h", mix of k8 and k10 */ 413 case 0x13: 414 CPUVEC_SETUP_k8; 415 break; 416 417 case 0x10: /* k10 */ 418 case 0x12: /* k10 (llano) */ 419 CPUVEC_SETUP_k8; 420 CPUVEC_SETUP_k10; 421 break; 422 423 case 0x14: /* bobcat */ 424 CPUVEC_SETUP_k8; 425 CPUVEC_SETUP_k10; 426 CPUVEC_SETUP_bt1; 427 break; 428 429 case 0x16: /* jaguar */ 430 CPUVEC_SETUP_k8; 431 CPUVEC_SETUP_k10; 432 CPUVEC_SETUP_bt1; 433 CPUVEC_SETUP_bt2; 434 break; 435 436 case 0x15: /* bulldozer, piledriver, steamroller, excavator */ 437 CPUVEC_SETUP_k8; 438 CPUVEC_SETUP_k10; 439 CPUVEC_SETUP_bd1; 440 break; 441 442 case 0x17: /* zen */ 443 case 0x19: /* zen3 */ 444 CPUVEC_SETUP_zen; 445 break; 446 } 447 } 448 else if (strcmp (vendor_string, "CentaurHauls") == 0) 449 { 450 switch (family) 451 { 452 case 6: 453 if (model >= 15) 454 CPUVEC_SETUP_nano; 455 break; 456 } 457 } 458 459 /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1. 460 Instead default to the plain versions from whichever CPU we detected. 461 The function arguments are compatible, no need for any glue code. */ 462 if (decided_cpuvec.preinv_divrem_1 == NULL) 463 decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1; 464 if (decided_cpuvec.preinv_mod_1 == NULL) 465 decided_cpuvec.preinv_mod_1 =(preinv_mod_1_t) decided_cpuvec.mod_1; 466 467 ASSERT_CPUVEC (decided_cpuvec); 468 CPUVEC_INSTALL (decided_cpuvec); 469 470 /* Set this once the threshold fields are ready. 471 Use volatile to prevent it getting moved. */ 472 *((volatile int *) &__gmpn_cpuvec_initialized) = 1; 473 } 474