1 /* x86 fat binary initializers.
2
3 THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
4 THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5 COMPLETELY IN FUTURE GNU MP RELEASES.
6
7 Copyright 2003, 2004, 2011-2013, 2015, 2017, 2018 Free Software Foundation,
8 Inc.
9
10 This file is part of the GNU MP Library.
11
12 The GNU MP Library is free software; you can redistribute it and/or modify
13 it under the terms of either:
14
15 * the GNU Lesser General Public License as published by the Free
16 Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18
19 or
20
21 * the GNU General Public License as published by the Free Software
22 Foundation; either version 2 of the License, or (at your option) any
23 later version.
24
25 or both in parallel, as here.
26
27 The GNU MP Library is distributed in the hope that it will be useful, but
28 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
30 for more details.
31
32 You should have received copies of the GNU General Public License and the
33 GNU Lesser General Public License along with the GNU MP Library. If not,
34 see https://www.gnu.org/licenses/. */
35
36 #include <stdio.h> /* for printf */
37 #include <stdlib.h> /* for getenv */
38 #include <string.h>
39
40 #include "gmp-impl.h"
41
42 /* Change this to "#define TRACE(x) x" for some traces. */
43 #define TRACE(x)
44
45
46 /* fat_entry.asm */
47 long __gmpn_cpuid (char [12], int);
48 int __gmpn_cpuid_available (void);
49
50
51 #if WANT_FAKE_CPUID
52 /* The "name"s in the table are values for the GMP_CPU_TYPE environment
53 variable. Anything can be used, but for now it's the canonical cpu types
54 as per config.guess/config.sub. */
55
56 #define __gmpn_cpuid fake_cpuid
57 #define __gmpn_cpuid_available fake_cpuid_available
58
59 #define MAKE_FMS(family, model) \
60 ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \
61 + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12))
62
63 static struct {
64 const char *name;
65 const char *vendor;
66 unsigned fms;
67 } fake_cpuid_table[] = {
68 { "i386", "" },
69 { "i486", "GenuineIntel", MAKE_FMS (4, 0) },
70 { "pentium", "GenuineIntel", MAKE_FMS (5, 0) },
71 { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) },
72 { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
73 { "pentium2", "GenuineIntel", MAKE_FMS (6, 2) },
74 { "pentium3", "GenuineIntel", MAKE_FMS (6, 7) },
75 { "pentium4", "GenuineIntel", MAKE_FMS (15, 2) },
76 { "prescott", "GenuineIntel", MAKE_FMS (15, 3) },
77 { "nocona", "GenuineIntel", MAKE_FMS (15, 4) },
78 { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) },
79 { "nehalem", "GenuineIntel", MAKE_FMS (6, 0x1a) },
80 { "nhm", "GenuineIntel", MAKE_FMS (6, 0x1a) },
81 { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) },
82 { "westmere", "GenuineIntel", MAKE_FMS (6, 0x25) },
83 { "wsm", "GenuineIntel", MAKE_FMS (6, 0x25) },
84 { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
85 { "sbr", "GenuineIntel", MAKE_FMS (6, 0x2a) },
86 { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
87 { "slm", "GenuineIntel", MAKE_FMS (6, 0x37) },
88 { "haswell", "GenuineIntel", MAKE_FMS (6, 0x3c) },
89 { "hwl", "GenuineIntel", MAKE_FMS (6, 0x3c) },
90 { "broadwell", "GenuineIntel", MAKE_FMS (6, 0x3d) },
91 { "bwl", "GenuineIntel", MAKE_FMS (6, 0x3d) },
92 { "skylake", "GenuineIntel", MAKE_FMS (6, 0x5e) },
93 { "sky", "GenuineIntel", MAKE_FMS (6, 0x5e) },
94
95 { "k5", "AuthenticAMD", MAKE_FMS (5, 0) },
96 { "k6", "AuthenticAMD", MAKE_FMS (5, 3) },
97 { "k62", "AuthenticAMD", MAKE_FMS (5, 8) },
98 { "k63", "AuthenticAMD", MAKE_FMS (5, 9) },
99 { "athlon", "AuthenticAMD", MAKE_FMS (6, 0) },
100 { "k8", "AuthenticAMD", MAKE_FMS (15, 0) },
101 { "k10", "AuthenticAMD", MAKE_FMS (16, 0) },
102 { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) },
103 { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) },
104 { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
105 { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
106 { "excavator", "AuthenticAMD", MAKE_FMS (21, 0x60) },
107 { "jaguar", "AuthenticAMD", MAKE_FMS (22, 1) },
108 { "zen", "AuthenticAMD", MAKE_FMS (23, 1) },
109
110 { "viac3", "CentaurHauls", MAKE_FMS (6, 0) },
111 { "viac32", "CentaurHauls", MAKE_FMS (6, 9) },
112 { "nano", "CentaurHauls", MAKE_FMS (6, 15) },
113 };
114
115 static int
fake_cpuid_lookup(void)116 fake_cpuid_lookup (void)
117 {
118 char *s;
119 int i;
120
121 s = getenv ("GMP_CPU_TYPE");
122 if (s == NULL)
123 {
124 printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
125 abort ();
126 }
127
128 for (i = 0; i < numberof (fake_cpuid_table); i++)
129 if (strcmp (s, fake_cpuid_table[i].name) == 0)
130 return i;
131
132 printf ("GMP_CPU_TYPE=%s unknown\n", s);
133 abort ();
134 }
135
136 static int
fake_cpuid_available(void)137 fake_cpuid_available (void)
138 {
139 return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0';
140 }
141
142 static long
fake_cpuid(char dst[12],int id)143 fake_cpuid (char dst[12], int id)
144 {
145 int i = fake_cpuid_lookup();
146
147 switch (id) {
148 case 0:
149 memcpy (dst, fake_cpuid_table[i].vendor, 12);
150 return 0;
151 case 1:
152 return fake_cpuid_table[i].fms;
153 default:
154 printf ("fake_cpuid(): oops, unknown id %d\n", id);
155 abort ();
156 }
157 }
158 #endif
159
160
161 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
162 typedef DECL_preinv_mod_1 ((*preinv_mod_1_t));
163
164 struct cpuvec_t __gmpn_cpuvec = {
165 __MPN(add_n_init),
166 0,
167 0,
168 __MPN(addmul_1_init),
169 0,
170 __MPN(bdiv_dbm1c_init),
171 __MPN(cnd_add_n_init),
172 __MPN(cnd_sub_n_init),
173 __MPN(com_init),
174 __MPN(copyd_init),
175 __MPN(copyi_init),
176 __MPN(divexact_1_init),
177 __MPN(divrem_1_init),
178 __MPN(gcd_11_init),
179 __MPN(lshift_init),
180 __MPN(lshiftc_init),
181 __MPN(mod_1_init),
182 __MPN(mod_1_1p_init),
183 __MPN(mod_1_1p_cps_init),
184 __MPN(mod_1s_2p_init),
185 __MPN(mod_1s_2p_cps_init),
186 __MPN(mod_1s_4p_init),
187 __MPN(mod_1s_4p_cps_init),
188 __MPN(mod_34lsub1_init),
189 __MPN(modexact_1c_odd_init),
190 __MPN(mul_1_init),
191 __MPN(mul_basecase_init),
192 __MPN(mullo_basecase_init),
193 __MPN(preinv_divrem_1_init),
194 __MPN(preinv_mod_1_init),
195 __MPN(redc_1_init),
196 __MPN(redc_2_init),
197 __MPN(rshift_init),
198 __MPN(sqr_basecase_init),
199 __MPN(sub_n_init),
200 0,
201 __MPN(submul_1_init),
202 0
203 };
204
205 int __gmpn_cpuvec_initialized = 0;
206
207 /* The following setups start with generic x86, then overwrite with
208 specifics for a chip, and higher versions of that chip.
209
210 The arrangement of the setups here will normally be the same as the $path
211 selections in configure.in for the respective chips.
212
213 This code is reentrant and thread safe. We always calculate the same
214 decided_cpuvec, so if two copies of the code are running it doesn't
215 matter which completes first, both write the same to __gmpn_cpuvec.
216
217 We need to go via decided_cpuvec because if one thread has completed
218 __gmpn_cpuvec then it may be making use of the threshold values in that
219 vector. If another thread is still running __gmpn_cpuvec_init then we
220 don't want it to write different values to those fields since some of the
221 asm routines only operate correctly up to their own defined threshold,
222 not an arbitrary value. */
223
224 void
__gmpn_cpuvec_init(void)225 __gmpn_cpuvec_init (void)
226 {
227 struct cpuvec_t decided_cpuvec;
228
229 TRACE (printf ("__gmpn_cpuvec_init:\n"));
230
231 memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
232
233 CPUVEC_SETUP_x86;
234 CPUVEC_SETUP_fat;
235
236 if (! __gmpn_cpuid_available ())
237 {
238 TRACE (printf (" 80386, or early 80486 without cpuid\n"));
239 }
240 else
241 {
242 char vendor_string[13];
243 char dummy_string[12];
244 long fms;
245 int family, model;
246
247 __gmpn_cpuid (vendor_string, 0);
248 vendor_string[12] = 0;
249
250 fms = __gmpn_cpuid (dummy_string, 1);
251 family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
252 model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
253
254 if (strcmp (vendor_string, "GenuineIntel") == 0)
255 {
256 switch (family)
257 {
258 case 4:
259 TRACE (printf (" 80486 with cpuid\n"));
260 break;
261
262 case 5:
263 TRACE (printf (" pentium\n"));
264 CPUVEC_SETUP_pentium;
265 if (model == 4 || model == 8)
266 {
267 TRACE (printf (" pentiummmx\n"));
268 CPUVEC_SETUP_pentium_mmx;
269 }
270 break;
271
272 case 6:
273 TRACE (printf (" p6\n"));
274 CPUVEC_SETUP_p6;
275 switch (model)
276 {
277 case 0x00:
278 case 0x01:
279 TRACE (printf (" pentiumpro\n"));
280 break;
281
282 case 0x02:
283 case 0x03:
284 case 0x04:
285 case 0x05:
286 case 0x06:
287 TRACE (printf (" pentium2\n"));
288 CPUVEC_SETUP_p6_mmx;
289 break;
290
291 case 0x07:
292 case 0x08:
293 case 0x0a:
294 case 0x0b:
295 case 0x0c:
296 TRACE (printf (" pentium3\n"));
297 CPUVEC_SETUP_p6_mmx;
298 CPUVEC_SETUP_p6_p3mmx;
299 break;
300
301 case 0x09: /* Banias */
302 case 0x0d: /* Dothan */
303 case 0x0e: /* Yonah */
304 TRACE (printf (" Banias/Dothan/Yonah\n"));
305 CPUVEC_SETUP_p6_mmx;
306 CPUVEC_SETUP_p6_p3mmx;
307 CPUVEC_SETUP_p6_sse2;
308 break;
309
310 case 0x0f: /* Conroe Merom Kentsfield Allendale */
311 case 0x10:
312 case 0x11:
313 case 0x12:
314 case 0x13:
315 case 0x14:
316 case 0x15:
317 case 0x16:
318 case 0x17: /* PNR Wolfdale Yorkfield */
319 case 0x18:
320 case 0x19:
321 case 0x1d: /* PNR Dunnington */
322 TRACE (printf (" Conroe\n"));
323 CPUVEC_SETUP_p6_mmx;
324 CPUVEC_SETUP_p6_p3mmx;
325 CPUVEC_SETUP_p6_sse2;
326 CPUVEC_SETUP_core2;
327 break;
328
329 case 0x1c: /* Atom Silverthorne */
330 case 0x26: /* Atom Lincroft */
331 case 0x27: /* Atom Saltwell */
332 case 0x36: /* Atom Cedarview/Saltwell */
333 TRACE (printf (" atom\n"));
334 CPUVEC_SETUP_atom;
335 CPUVEC_SETUP_atom_mmx;
336 CPUVEC_SETUP_atom_sse2;
337 break;
338
339 case 0x37: /* Silvermont */
340 case 0x4a: /* Silvermont */
341 case 0x4c: /* Airmont */
342 case 0x4d: /* Silvermont/Avoton */
343 case 0x5a: /* Silvermont */
344 TRACE (printf (" silvermont\n"));
345 CPUVEC_SETUP_atom;
346 CPUVEC_SETUP_atom_mmx;
347 CPUVEC_SETUP_atom_sse2;
348 CPUVEC_SETUP_silvermont;
349 break;
350
351 case 0x5c: /* Goldmont */
352 case 0x5f: /* Goldmont */
353 case 0x7a: /* Goldmont Plus */
354 TRACE (printf (" goldmont\n"));
355 CPUVEC_SETUP_atom;
356 CPUVEC_SETUP_atom_mmx;
357 CPUVEC_SETUP_atom_sse2;
358 CPUVEC_SETUP_goldmont;
359 break;
360
361 case 0x1a: /* NHM Gainestown */
362 case 0x1b:
363 case 0x1e: /* NHM Lynnfield/Jasper */
364 case 0x1f:
365 case 0x20:
366 case 0x21:
367 case 0x22:
368 case 0x23:
369 case 0x24:
370 case 0x25: /* WSM Clarkdale/Arrandale */
371 case 0x28:
372 case 0x29:
373 case 0x2b:
374 case 0x2c: /* WSM Gulftown */
375 case 0x2e: /* NHM Beckton */
376 case 0x2f: /* WSM Eagleton */
377 TRACE (printf (" nehalem/westmere\n"));
378 CPUVEC_SETUP_p6_mmx;
379 CPUVEC_SETUP_p6_p3mmx;
380 CPUVEC_SETUP_p6_sse2;
381 CPUVEC_SETUP_core2;
382 CPUVEC_SETUP_coreinhm;
383 break;
384
385 case 0x2a: /* SBR */
386 case 0x2d: /* SBR-EP */
387 case 0x3a: /* IBR */
388 case 0x3e: /* IBR Ivytown */
389 case 0x3c: /* Haswell client */
390 case 0x3f: /* Haswell server */
391 case 0x45: /* Haswell ULT */
392 case 0x46: /* Crystal Well */
393 case 0x3d: /* Broadwell */
394 case 0x47: /* Broadwell */
395 case 0x4f: /* Broadwell server */
396 case 0x56: /* Broadwell microserver */
397 case 0x4e: /* Skylake client */
398 case 0x55: /* Skylake server */
399 case 0x5e: /* Skylake */
400 case 0x8e: /* Kabylake */
401 case 0x9e: /* Kabylake */
402 TRACE (printf (" sandybridge\n"));
403 CPUVEC_SETUP_p6_mmx;
404 CPUVEC_SETUP_p6_p3mmx;
405 CPUVEC_SETUP_p6_sse2;
406 CPUVEC_SETUP_core2;
407 CPUVEC_SETUP_coreinhm;
408 CPUVEC_SETUP_coreisbr;
409 break;
410 }
411 break;
412
413 case 15:
414 TRACE (printf (" pentium4\n"));
415 CPUVEC_SETUP_pentium4;
416 CPUVEC_SETUP_pentium4_mmx;
417 CPUVEC_SETUP_pentium4_sse2;
418 break;
419 }
420 }
421 else if (strcmp (vendor_string, "AuthenticAMD") == 0)
422 {
423 switch (family)
424 {
425 case 5:
426 if (model <= 3)
427 {
428 TRACE (printf (" k5\n"));
429 }
430 else
431 {
432 TRACE (printf (" k6\n"));
433 CPUVEC_SETUP_k6;
434 CPUVEC_SETUP_k6_mmx;
435 if (model >= 8)
436 {
437 TRACE (printf (" k62\n"));
438 CPUVEC_SETUP_k6_k62mmx;
439 }
440 if (model >= 9)
441 {
442 TRACE (printf (" k63\n"));
443 }
444 }
445 break;
446 case 6:
447 TRACE (printf (" athlon\n"));
448 CPUVEC_SETUP_k7;
449 CPUVEC_SETUP_k7_mmx;
450 break;
451
452 case 0x0f: /* k8 */
453 case 0x11: /* "fam 11h", mix of k8 and k10 */
454 case 0x13: /* unknown, conservatively assume k8 */
455 TRACE (printf (" k8\n"));
456 CPUVEC_SETUP_k7;
457 CPUVEC_SETUP_k7_mmx;
458 CPUVEC_SETUP_k8;
459 break;
460
461 case 0x10: /* k10 */
462 case 0x12: /* k10 (llano) */
463 TRACE (printf (" k10\n"));
464 CPUVEC_SETUP_k7;
465 CPUVEC_SETUP_k7_mmx;
466 break;
467
468 case 0x14: /* bobcat */
469 case 0x16: /* jaguar */
470 TRACE (printf (" bobcat\n"));
471 CPUVEC_SETUP_k7;
472 CPUVEC_SETUP_k7_mmx;
473 CPUVEC_SETUP_bt1;
474 break;
475
476 case 0x15: /* bulldozer */
477 TRACE (printf (" bulldozer\n"));
478 CPUVEC_SETUP_k7;
479 CPUVEC_SETUP_k7_mmx;
480 CPUVEC_SETUP_bd1;
481 break;
482
483 case 0x17: /* zen */
484 case 0x19: /* zen3 */
485 TRACE (printf (" zen\n"));
486 CPUVEC_SETUP_k7;
487 CPUVEC_SETUP_k7_mmx;
488 break;
489 }
490 }
491 else if (strcmp (vendor_string, "CentaurHauls") == 0)
492 {
493 switch (family)
494 {
495 case 6:
496 TRACE (printf (" viac3\n"));
497 if (model >= 9)
498 {
499 TRACE (printf (" viac32\n"));
500 }
501 if (model >= 15)
502 {
503 TRACE (printf (" nano\n"));
504 CPUVEC_SETUP_nano;
505 }
506 break;
507 }
508 }
509 else if (strcmp (vendor_string, "CyrixInstead") == 0)
510 {
511 /* Should recognize Cyrix' processors too. */
512 TRACE (printf (" cyrix something\n"));
513 }
514 }
515
516 /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
517 Instead default to the plain versions from whichever CPU we detected.
518 The function arguments are compatible, no need for any glue code. */
519 if (decided_cpuvec.preinv_divrem_1 == NULL)
520 decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
521 if (decided_cpuvec.preinv_mod_1 == NULL)
522 decided_cpuvec.preinv_mod_1 =(preinv_mod_1_t) decided_cpuvec.mod_1;
523
524 ASSERT_CPUVEC (decided_cpuvec);
525 CPUVEC_INSTALL (decided_cpuvec);
526
527 /* Set this once the threshold fields are ready.
528 Use volatile to prevent it getting moved. */
529 *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
530 }
531