1 /* x86 fat binary initializers. 2 3 THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY. 4 THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR 5 COMPLETELY IN FUTURE GNU MP RELEASES. 6 7 Copyright 2003, 2004, 2011, 2012 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of the GNU Lesser General Public License as published by 13 the Free Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 The GNU MP Library is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19 License for more details. 20 21 You should have received a copy of the GNU Lesser General Public License 22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 #include <stdio.h> /* for printf */ 25 #include <stdlib.h> /* for getenv */ 26 #include <string.h> 27 28 #include "gmp.h" 29 #include "gmp-impl.h" 30 31 /* Change this to "#define TRACE(x) x" for some traces. */ 32 #define TRACE(x) 33 34 /* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */ 35 #define WANT_FAKE_CPUID 0 36 37 38 /* fat_entry.asm */ 39 long __gmpn_cpuid (char [12], int); 40 int __gmpn_cpuid_available (void); 41 42 43 #if WANT_FAKE_CPUID 44 /* The "name"s in the table are values for the GMP_CPU_TYPE environment 45 variable. Anything can be used, but for now it's the canonical cpu types 46 as per config.guess/config.sub. */ 47 48 #define __gmpn_cpuid fake_cpuid 49 #define __gmpn_cpuid_available fake_cpuid_available 50 51 #define MAKE_FMS(family, model) \ 52 ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \ 53 + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12)) 54 55 static struct { 56 const char *name; 57 const char vendor[13]; 58 unsigned fms; 59 } fake_cpuid_table[] = { 60 { "i386", "" }, 61 { "i486", "GenuineIntel", MAKE_FMS (4, 0) }, 62 { "pentium", "GenuineIntel", MAKE_FMS (5, 0) }, 63 { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) }, 64 { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) }, 65 { "pentium2", "GenuineIntel", MAKE_FMS (6, 2) }, 66 { "pentium3", "GenuineIntel", MAKE_FMS (6, 7) }, 67 { "pentium4", "GenuineIntel", MAKE_FMS (15, 2) }, 68 { "prescott", "GenuineIntel", MAKE_FMS (15, 3) }, 69 { "nocona", "GenuineIntel", MAKE_FMS (15, 4) }, 70 { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) }, 71 { "coreinhm", "GenuineIntel", MAKE_FMS (6, 0x1a) }, 72 { "coreiwsm", "GenuineIntel", MAKE_FMS (6, 0x25) }, 73 { "coreisbr", "GenuineIntel", MAKE_FMS (6, 0x2a) }, 74 { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) }, 75 76 { "k5", "AuthenticAMD", MAKE_FMS (5, 0) }, 77 { "k6", "AuthenticAMD", MAKE_FMS (5, 3) }, 78 { "k62", "AuthenticAMD", MAKE_FMS (5, 8) }, 79 { "k63", "AuthenticAMD", MAKE_FMS (5, 9) }, 80 { "athlon", "AuthenticAMD", MAKE_FMS (6, 0) }, 81 { "k8", "AuthenticAMD", MAKE_FMS (15, 0) }, 82 { "k10", "AuthenticAMD", MAKE_FMS (16, 0) }, 83 { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) }, 84 { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) }, 85 86 { "viac3", "CentaurHauls", MAKE_FMS (6, 0) }, 87 { "viac32", "CentaurHauls", MAKE_FMS (6, 9) }, 88 { "nano", "CentaurHauls", MAKE_FMS (6, 15) }, 89 }; 90 91 static int 92 fake_cpuid_lookup (void) 93 { 94 char *s; 95 int i; 96 97 s = getenv ("GMP_CPU_TYPE"); 98 if (s == NULL) 99 { 100 printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n"); 101 abort (); 102 } 103 104 for (i = 0; i < numberof (fake_cpuid_table); i++) 105 if (strcmp (s, fake_cpuid_table[i].name) == 0) 106 return i; 107 108 printf ("GMP_CPU_TYPE=%s unknown\n", s); 109 abort (); 110 } 111 112 static int 113 fake_cpuid_available (void) 114 { 115 return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0'; 116 } 117 118 static long 119 fake_cpuid (char dst[12], int id) 120 { 121 int i = fake_cpuid_lookup(); 122 123 switch (id) { 124 case 0: 125 memcpy (dst, fake_cpuid_table[i].vendor, 12); 126 return 0; 127 case 1: 128 return fake_cpuid_table[i].fms; 129 default: 130 printf ("fake_cpuid(): oops, unknown id %d\n", id); 131 abort (); 132 } 133 } 134 #endif 135 136 137 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t)); 138 typedef DECL_preinv_mod_1 ((*preinv_mod_1_t)); 139 140 struct cpuvec_t __gmpn_cpuvec = { 141 __MPN(add_n_init), 142 0, 143 0, 144 __MPN(addmul_1_init), 145 0, 146 __MPN(bdiv_dbm1c_init), 147 __MPN(com_init), 148 __MPN(copyd_init), 149 __MPN(copyi_init), 150 __MPN(divexact_1_init), 151 __MPN(divrem_1_init), 152 __MPN(gcd_1_init), 153 __MPN(lshift_init), 154 __MPN(lshiftc_init), 155 __MPN(mod_1_init), 156 __MPN(mod_1_1p_init), 157 __MPN(mod_1_1p_cps_init), 158 __MPN(mod_1s_2p_init), 159 __MPN(mod_1s_2p_cps_init), 160 __MPN(mod_1s_4p_init), 161 __MPN(mod_1s_4p_cps_init), 162 __MPN(mod_34lsub1_init), 163 __MPN(modexact_1c_odd_init), 164 __MPN(mul_1_init), 165 __MPN(mul_basecase_init), 166 __MPN(mullo_basecase_init), 167 __MPN(preinv_divrem_1_init), 168 __MPN(preinv_mod_1_init), 169 __MPN(redc_1_init), 170 __MPN(redc_2_init), 171 __MPN(rshift_init), 172 __MPN(sqr_basecase_init), 173 __MPN(sub_n_init), 174 0, 175 __MPN(submul_1_init), 176 0 177 }; 178 179 int __gmpn_cpuvec_initialized = 0; 180 181 /* The following setups start with generic x86, then overwrite with 182 specifics for a chip, and higher versions of that chip. 183 184 The arrangement of the setups here will normally be the same as the $path 185 selections in configure.in for the respective chips. 186 187 This code is reentrant and thread safe. We always calculate the same 188 decided_cpuvec, so if two copies of the code are running it doesn't 189 matter which completes first, both write the same to __gmpn_cpuvec. 190 191 We need to go via decided_cpuvec because if one thread has completed 192 __gmpn_cpuvec then it may be making use of the threshold values in that 193 vector. If another thread is still running __gmpn_cpuvec_init then we 194 don't want it to write different values to those fields since some of the 195 asm routines only operate correctly up to their own defined threshold, 196 not an arbitrary value. */ 197 198 void 199 __gmpn_cpuvec_init (void) 200 { 201 struct cpuvec_t decided_cpuvec; 202 203 TRACE (printf ("__gmpn_cpuvec_init:\n")); 204 205 memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec)); 206 207 CPUVEC_SETUP_x86; 208 CPUVEC_SETUP_fat; 209 210 if (! __gmpn_cpuid_available ()) 211 { 212 TRACE (printf (" 80386, or early 80486 without cpuid\n")); 213 } 214 else 215 { 216 char vendor_string[13]; 217 char dummy_string[12]; 218 long fms; 219 int family, model; 220 221 __gmpn_cpuid (vendor_string, 0); 222 vendor_string[12] = 0; 223 224 fms = __gmpn_cpuid (dummy_string, 1); 225 family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff); 226 model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0); 227 228 if (strcmp (vendor_string, "GenuineIntel") == 0) 229 { 230 switch (family) 231 { 232 case 4: 233 TRACE (printf (" 80486 with cpuid\n")); 234 break; 235 236 case 5: 237 TRACE (printf (" pentium\n")); 238 CPUVEC_SETUP_pentium; 239 if (model >= 4) 240 { 241 TRACE (printf (" pentiummmx\n")); 242 CPUVEC_SETUP_pentium_mmx; 243 } 244 break; 245 246 case 6: 247 TRACE (printf (" p6\n")); 248 CPUVEC_SETUP_p6; 249 switch (model) 250 { 251 case 0x00: 252 case 0x01: 253 TRACE (printf (" pentiumpro\n")); 254 break; 255 256 case 0x02: 257 case 0x03: 258 case 0x04: 259 case 0x05: 260 case 0x06: 261 TRACE (printf (" pentium2\n")); 262 CPUVEC_SETUP_p6_mmx; 263 break; 264 265 case 0x07: 266 case 0x08: 267 case 0x0a: 268 case 0x0b: 269 case 0x0c: 270 TRACE (printf (" pentium3\n")); 271 CPUVEC_SETUP_p6_mmx; 272 CPUVEC_SETUP_p6_p3mmx; 273 break; 274 275 case 0x09: /* Banias */ 276 case 0x0d: /* Dothan */ 277 case 0x0e: /* Yonah */ 278 TRACE (printf (" Banias/Bothan/Yonah\n")); 279 CPUVEC_SETUP_p6_mmx; 280 CPUVEC_SETUP_p6_p3mmx; 281 CPUVEC_SETUP_p6_sse2; 282 break; 283 284 case 0x0f: /* Conroe Merom Kentsfield Allendale */ 285 case 0x10: 286 case 0x11: 287 case 0x12: 288 case 0x13: 289 case 0x14: 290 case 0x15: 291 case 0x16: 292 case 0x17: /* PNR Wolfdale Yorkfield */ 293 case 0x18: 294 case 0x19: 295 case 0x1d: /* PNR Dunnington */ 296 TRACE (printf (" Conroe\n")); 297 CPUVEC_SETUP_p6_mmx; 298 CPUVEC_SETUP_p6_p3mmx; 299 CPUVEC_SETUP_p6_sse2; 300 CPUVEC_SETUP_core2; 301 break; 302 303 case 0x1c: /* Atom Silverthorne */ 304 case 0x26: /* Atom Lincroft */ 305 case 0x27: /* Atom Saltwell */ 306 case 0x36: /* Atom Cedarview/Saltwell */ 307 TRACE (printf (" atom\n")); 308 CPUVEC_SETUP_atom; 309 CPUVEC_SETUP_atom_mmx; 310 CPUVEC_SETUP_atom_sse2; 311 break; 312 313 case 0x1a: /* NHM Gainestown */ 314 case 0x1b: 315 case 0x1e: /* NHM Lynnfield/Jasper */ 316 case 0x1f: 317 case 0x20: 318 case 0x21: 319 case 0x22: 320 case 0x23: 321 case 0x24: 322 case 0x25: /* WSM Clarkdale/Arrandale */ 323 case 0x28: 324 case 0x29: 325 case 0x2b: 326 case 0x2c: /* WSM Gulftown */ 327 case 0x2e: /* NHM Beckton */ 328 case 0x2f: /* WSM Eagleton */ 329 TRACE (printf (" nehalem/westmere\n")); 330 CPUVEC_SETUP_p6_mmx; 331 CPUVEC_SETUP_p6_p3mmx; 332 CPUVEC_SETUP_p6_sse2; 333 CPUVEC_SETUP_core2; 334 CPUVEC_SETUP_coreinhm; 335 break; 336 337 case 0x2a: /* SBR */ 338 case 0x2d: /* SBR-EP */ 339 case 0x3a: /* IBR */ 340 case 0x3c: /* Haswell */ 341 TRACE (printf (" sandybridge\n")); 342 CPUVEC_SETUP_p6_mmx; 343 CPUVEC_SETUP_p6_p3mmx; 344 CPUVEC_SETUP_p6_sse2; 345 CPUVEC_SETUP_core2; 346 CPUVEC_SETUP_coreinhm; 347 CPUVEC_SETUP_coreisbr; 348 break; 349 } 350 break; 351 352 case 15: 353 TRACE (printf (" pentium4\n")); 354 CPUVEC_SETUP_pentium4; 355 CPUVEC_SETUP_pentium4_mmx; 356 CPUVEC_SETUP_pentium4_sse2; 357 break; 358 } 359 } 360 else if (strcmp (vendor_string, "AuthenticAMD") == 0) 361 { 362 switch (family) 363 { 364 case 5: 365 if (model <= 3) 366 { 367 TRACE (printf (" k5\n")); 368 } 369 else 370 { 371 TRACE (printf (" k6\n")); 372 CPUVEC_SETUP_k6; 373 CPUVEC_SETUP_k6_mmx; 374 if (model >= 8) 375 { 376 TRACE (printf (" k62\n")); 377 CPUVEC_SETUP_k6_k62mmx; 378 } 379 if (model >= 9) 380 { 381 TRACE (printf (" k63\n")); 382 } 383 } 384 break; 385 case 6: 386 TRACE (printf (" athlon\n")); 387 CPUVEC_SETUP_k7; 388 CPUVEC_SETUP_k7_mmx; 389 break; 390 391 case 0x0f: /* k8 */ 392 case 0x11: /* "fam 11h", mix of k8 and k10 */ 393 case 0x13: /* unknown, conservativeky assume k8 */ 394 case 0x16: /* unknown, conservativeky assume k8 */ 395 case 0x17: /* unknown, conservativeky assume k8 */ 396 TRACE (printf (" k8\n")); 397 CPUVEC_SETUP_k7; 398 CPUVEC_SETUP_k7_mmx; 399 CPUVEC_SETUP_k8; 400 break; 401 402 case 0x10: /* k10 */ 403 case 0x12: /* k10 (llano) */ 404 TRACE (printf (" k10\n")); 405 CPUVEC_SETUP_k7; 406 CPUVEC_SETUP_k7_mmx; 407 break; 408 409 case 0x14: /* bobcat */ 410 TRACE (printf (" bobcat\n")); 411 CPUVEC_SETUP_k7; 412 CPUVEC_SETUP_k7_mmx; 413 CPUVEC_SETUP_bobcat; 414 break; 415 416 case 0x15: /* bulldozer */ 417 TRACE (printf (" bulldozer\n")); 418 CPUVEC_SETUP_k7; 419 CPUVEC_SETUP_k7_mmx; 420 break; 421 } 422 } 423 else if (strcmp (vendor_string, "CentaurHauls") == 0) 424 { 425 switch (family) 426 { 427 case 6: 428 TRACE (printf (" viac3\n")); 429 if (model >= 9) 430 { 431 TRACE (printf (" viac32\n")); 432 } 433 if (model >= 15) 434 { 435 TRACE (printf (" nano\n")); 436 CPUVEC_SETUP_nano; 437 } 438 break; 439 } 440 } 441 else if (strcmp (vendor_string, "CyrixInstead") == 0) 442 { 443 /* Should recognize Cyrix' processors too. */ 444 TRACE (printf (" cyrix something\n")); 445 } 446 } 447 448 /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1. 449 Instead default to the plain versions from whichever CPU we detected. 450 The function arguments are compatible, no need for any glue code. */ 451 if (decided_cpuvec.preinv_divrem_1 == NULL) 452 decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1; 453 if (decided_cpuvec.preinv_mod_1 == NULL) 454 decided_cpuvec.preinv_mod_1 =(preinv_mod_1_t) decided_cpuvec.mod_1; 455 456 ASSERT_CPUVEC (decided_cpuvec); 457 CPUVEC_INSTALL (decided_cpuvec); 458 459 /* Set this once the threshold fields are ready. 460 Use volatile to prevent it getting moved. */ 461 *((volatile int *) &__gmpn_cpuvec_initialized) = 1; 462 } 463