xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/fat/fat.c (revision 3587d6f89c746bbb4f886219ddacd41ace480ecf)
1 /* x86 fat binary initializers.
2 
3    THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
4    THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5    COMPLETELY IN FUTURE GNU MP RELEASES.
6 
7 Copyright 2003, 2004, 2011-2013, 2015, 2017, 2018 Free Software Foundation,
8 Inc.
9 
10 This file is part of the GNU MP Library.
11 
12 The GNU MP Library is free software; you can redistribute it and/or modify
13 it under the terms of either:
14 
15   * the GNU Lesser General Public License as published by the Free
16     Software Foundation; either version 3 of the License, or (at your
17     option) any later version.
18 
19 or
20 
21   * the GNU General Public License as published by the Free Software
22     Foundation; either version 2 of the License, or (at your option) any
23     later version.
24 
25 or both in parallel, as here.
26 
27 The GNU MP Library is distributed in the hope that it will be useful, but
28 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
30 for more details.
31 
32 You should have received copies of the GNU General Public License and the
33 GNU Lesser General Public License along with the GNU MP Library.  If not,
34 see https://www.gnu.org/licenses/.  */
35 
36 #include <stdio.h>    /* for printf */
37 #include <stdlib.h>   /* for getenv */
38 #include <string.h>
39 
40 #include "gmp-impl.h"
41 
42 /* Change this to "#define TRACE(x) x" for some traces. */
43 #define TRACE(x)
44 
45 
46 /* fat_entry.asm */
47 long __gmpn_cpuid (char [12], int);
48 int  __gmpn_cpuid_available (void);
49 
50 
51 #if WANT_FAKE_CPUID
52 /* The "name"s in the table are values for the GMP_CPU_TYPE environment
53    variable.  Anything can be used, but for now it's the canonical cpu types
54    as per config.guess/config.sub.  */
55 
56 #define __gmpn_cpuid            fake_cpuid
57 #define __gmpn_cpuid_available  fake_cpuid_available
58 
59 #define MAKE_FMS(family, model)						\
60   ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
61    + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
62 
63 static struct {
64   const char  *name;
65   const char  *vendor;
66   unsigned    fms;
67 } fake_cpuid_table[] = {
68   { "i386",       "" },
69   { "i486",       "GenuineIntel", MAKE_FMS (4, 0) },
70   { "pentium",    "GenuineIntel", MAKE_FMS (5, 0) },
71   { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) },
72   { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
73   { "pentium2",   "GenuineIntel", MAKE_FMS (6, 2) },
74   { "pentium3",   "GenuineIntel", MAKE_FMS (6, 7) },
75   { "pentium4",   "GenuineIntel", MAKE_FMS (15, 2) },
76   { "prescott",   "GenuineIntel", MAKE_FMS (15, 3) },
77   { "nocona",     "GenuineIntel", MAKE_FMS (15, 4) },
78   { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
79   { "nehalem",    "GenuineIntel", MAKE_FMS (6, 0x1a) },
80   { "nhm",        "GenuineIntel", MAKE_FMS (6, 0x1a) },
81   { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
82   { "westmere",   "GenuineIntel", MAKE_FMS (6, 0x25) },
83   { "wsm",        "GenuineIntel", MAKE_FMS (6, 0x25) },
84   { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
85   { "sbr",        "GenuineIntel", MAKE_FMS (6, 0x2a) },
86   { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
87   { "slm",        "GenuineIntel", MAKE_FMS (6, 0x37) },
88   { "haswell",    "GenuineIntel", MAKE_FMS (6, 0x3c) },
89   { "hwl",        "GenuineIntel", MAKE_FMS (6, 0x3c) },
90   { "broadwell",  "GenuineIntel", MAKE_FMS (6, 0x3d) },
91   { "bwl",        "GenuineIntel", MAKE_FMS (6, 0x3d) },
92   { "skylake",    "GenuineIntel", MAKE_FMS (6, 0x5e) },
93   { "sky",        "GenuineIntel", MAKE_FMS (6, 0x5e) },
94 
95   { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
96   { "k6",         "AuthenticAMD", MAKE_FMS (5, 3) },
97   { "k62",        "AuthenticAMD", MAKE_FMS (5, 8) },
98   { "k63",        "AuthenticAMD", MAKE_FMS (5, 9) },
99   { "athlon",     "AuthenticAMD", MAKE_FMS (6, 0) },
100   { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
101   { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
102   { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
103   { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
104   { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
105   { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
106   { "excavator",  "AuthenticAMD", MAKE_FMS (21, 0x60) },
107   { "jaguar",     "AuthenticAMD", MAKE_FMS (22, 1) },
108   { "zen",        "AuthenticAMD", MAKE_FMS (23, 1) },
109 
110   { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
111   { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
112   { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
113 };
114 
115 static int
116 fake_cpuid_lookup (void)
117 {
118   char  *s;
119   int   i;
120 
121   s = getenv ("GMP_CPU_TYPE");
122   if (s == NULL)
123     {
124       printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
125       abort ();
126     }
127 
128   for (i = 0; i < numberof (fake_cpuid_table); i++)
129     if (strcmp (s, fake_cpuid_table[i].name) == 0)
130       return i;
131 
132   printf ("GMP_CPU_TYPE=%s unknown\n", s);
133   abort ();
134 }
135 
136 static int
137 fake_cpuid_available (void)
138 {
139   return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0';
140 }
141 
142 static long
143 fake_cpuid (char dst[12], int id)
144 {
145   int  i = fake_cpuid_lookup();
146 
147   switch (id) {
148   case 0:
149     memcpy (dst, fake_cpuid_table[i].vendor, 12);
150     return 0;
151   case 1:
152     return fake_cpuid_table[i].fms;
153   default:
154     printf ("fake_cpuid(): oops, unknown id %d\n", id);
155     abort ();
156   }
157 }
158 #endif
159 
160 
161 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
162 typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
163 
164 struct cpuvec_t __gmpn_cpuvec = {
165   __MPN(add_n_init),
166   0,
167   0,
168   __MPN(addmul_1_init),
169   0,
170   __MPN(bdiv_dbm1c_init),
171   __MPN(cnd_add_n_init),
172   __MPN(cnd_sub_n_init),
173   __MPN(com_init),
174   __MPN(copyd_init),
175   __MPN(copyi_init),
176   __MPN(divexact_1_init),
177   __MPN(divrem_1_init),
178   __MPN(gcd_11_init),
179   __MPN(lshift_init),
180   __MPN(lshiftc_init),
181   __MPN(mod_1_init),
182   __MPN(mod_1_1p_init),
183   __MPN(mod_1_1p_cps_init),
184   __MPN(mod_1s_2p_init),
185   __MPN(mod_1s_2p_cps_init),
186   __MPN(mod_1s_4p_init),
187   __MPN(mod_1s_4p_cps_init),
188   __MPN(mod_34lsub1_init),
189   __MPN(modexact_1c_odd_init),
190   __MPN(mul_1_init),
191   __MPN(mul_basecase_init),
192   __MPN(mullo_basecase_init),
193   __MPN(preinv_divrem_1_init),
194   __MPN(preinv_mod_1_init),
195   __MPN(redc_1_init),
196   __MPN(redc_2_init),
197   __MPN(rshift_init),
198   __MPN(sqr_basecase_init),
199   __MPN(sub_n_init),
200   0,
201   __MPN(submul_1_init),
202   0
203 };
204 
205 int __gmpn_cpuvec_initialized = 0;
206 
207 /* The following setups start with generic x86, then overwrite with
208    specifics for a chip, and higher versions of that chip.
209 
210    The arrangement of the setups here will normally be the same as the $path
211    selections in configure.in for the respective chips.
212 
213    This code is reentrant and thread safe.  We always calculate the same
214    decided_cpuvec, so if two copies of the code are running it doesn't
215    matter which completes first, both write the same to __gmpn_cpuvec.
216 
217    We need to go via decided_cpuvec because if one thread has completed
218    __gmpn_cpuvec then it may be making use of the threshold values in that
219    vector.  If another thread is still running __gmpn_cpuvec_init then we
220    don't want it to write different values to those fields since some of the
221    asm routines only operate correctly up to their own defined threshold,
222    not an arbitrary value.  */
223 
224 void
225 __gmpn_cpuvec_init (void)
226 {
227   struct cpuvec_t  decided_cpuvec;
228 
229   TRACE (printf ("__gmpn_cpuvec_init:\n"));
230 
231   memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
232 
233   CPUVEC_SETUP_x86;
234   CPUVEC_SETUP_fat;
235 
236   if (! __gmpn_cpuid_available ())
237     {
238       TRACE (printf ("  80386, or early 80486 without cpuid\n"));
239     }
240   else
241     {
242       char vendor_string[13];
243       char dummy_string[12];
244       long fms;
245       int family, model;
246 
247       __gmpn_cpuid (vendor_string, 0);
248       vendor_string[12] = 0;
249 
250       fms = __gmpn_cpuid (dummy_string, 1);
251       family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
252       model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
253 
254       if (strcmp (vendor_string, "GenuineIntel") == 0)
255         {
256           switch (family)
257             {
258             case 4:
259               TRACE (printf ("  80486 with cpuid\n"));
260               break;
261 
262             case 5:
263               TRACE (printf ("  pentium\n"));
264               CPUVEC_SETUP_pentium;
265               if (model == 4 || model == 8)
266                 {
267                   TRACE (printf ("  pentiummmx\n"));
268                   CPUVEC_SETUP_pentium_mmx;
269                 }
270               break;
271 
272             case 6:
273               TRACE (printf ("  p6\n"));
274               CPUVEC_SETUP_p6;
275 	      switch (model)
276 		{
277 		case 0x00:
278 		case 0x01:
279 		  TRACE (printf ("  pentiumpro\n"));
280 		  break;
281 
282 		case 0x02:
283 		case 0x03:
284 		case 0x04:
285 		case 0x05:
286 		case 0x06:
287 		  TRACE (printf ("  pentium2\n"));
288                   CPUVEC_SETUP_p6_mmx;
289 		  break;
290 
291 		case 0x07:
292 		case 0x08:
293 		case 0x0a:
294 		case 0x0b:
295 		case 0x0c:
296 		  TRACE (printf ("  pentium3\n"));
297                   CPUVEC_SETUP_p6_mmx;
298                   CPUVEC_SETUP_p6_p3mmx;
299 		  break;
300 
301 		case 0x09:		/* Banias */
302 		case 0x0d:		/* Dothan */
303 		case 0x0e:		/* Yonah */
304 		  TRACE (printf ("  Banias/Dothan/Yonah\n"));
305                   CPUVEC_SETUP_p6_mmx;
306                   CPUVEC_SETUP_p6_p3mmx;
307                   CPUVEC_SETUP_p6_sse2;
308 		  break;
309 
310 		case 0x0f:		/* Conroe Merom Kentsfield Allendale */
311 		case 0x10:
312 		case 0x11:
313 		case 0x12:
314 		case 0x13:
315 		case 0x14:
316 		case 0x15:
317 		case 0x16:
318 		case 0x17:		/* PNR Wolfdale Yorkfield */
319 		case 0x18:
320 		case 0x19:
321 		case 0x1d:		/* PNR Dunnington */
322 		  TRACE (printf ("  Conroe\n"));
323                   CPUVEC_SETUP_p6_mmx;
324                   CPUVEC_SETUP_p6_p3mmx;
325                   CPUVEC_SETUP_p6_sse2;
326 		  CPUVEC_SETUP_core2;
327 		  break;
328 
329 		case 0x1c:		/* Atom Silverthorne */
330 		case 0x26:		/* Atom Lincroft */
331 		case 0x27:		/* Atom Saltwell */
332 		case 0x36:		/* Atom Cedarview/Saltwell */
333 		  TRACE (printf ("  atom\n"));
334 		  CPUVEC_SETUP_atom;
335 		  CPUVEC_SETUP_atom_mmx;
336 		  CPUVEC_SETUP_atom_sse2;
337 		  break;
338 
339 		case 0x37:		/* Silvermont */
340 		case 0x4a:		/* Silvermont */
341 		case 0x4c:		/* Airmont */
342 		case 0x4d:		/* Silvermont/Avoton */
343 		case 0x5a:		/* Silvermont */
344 		  TRACE (printf ("  silvermont\n"));
345 		  CPUVEC_SETUP_atom;
346 		  CPUVEC_SETUP_atom_mmx;
347 		  CPUVEC_SETUP_atom_sse2;
348 		  CPUVEC_SETUP_silvermont;
349 		  break;
350 
351 		case 0x5c:		/* Goldmont */
352 		case 0x5f:		/* Goldmont */
353 		case 0x7a:		/* Goldmont Plus */
354 		  TRACE (printf ("  goldmont\n"));
355 		  CPUVEC_SETUP_atom;
356 		  CPUVEC_SETUP_atom_mmx;
357 		  CPUVEC_SETUP_atom_sse2;
358 		  CPUVEC_SETUP_goldmont;
359 		  break;
360 
361 		case 0x1a:		/* NHM Gainestown */
362 		case 0x1b:
363 		case 0x1e:		/* NHM Lynnfield/Jasper */
364 		case 0x1f:
365 		case 0x20:
366 		case 0x21:
367 		case 0x22:
368 		case 0x23:
369 		case 0x24:
370 		case 0x25:		/* WSM Clarkdale/Arrandale */
371 		case 0x28:
372 		case 0x29:
373 		case 0x2b:
374 		case 0x2c:		/* WSM Gulftown */
375 		case 0x2e:		/* NHM Beckton */
376 		case 0x2f:		/* WSM Eagleton */
377 		  TRACE (printf ("  nehalem/westmere\n"));
378                   CPUVEC_SETUP_p6_mmx;
379                   CPUVEC_SETUP_p6_p3mmx;
380                   CPUVEC_SETUP_p6_sse2;
381 		  CPUVEC_SETUP_core2;
382 		  CPUVEC_SETUP_coreinhm;
383 		  break;
384 
385 		case 0x2a:		/* SBR */
386 		case 0x2d:		/* SBR-EP */
387 		case 0x3a:		/* IBR */
388 		case 0x3e:		/* IBR Ivytown */
389 		case 0x3c:		/* Haswell client */
390 		case 0x3f:		/* Haswell server */
391 		case 0x45:		/* Haswell ULT */
392 		case 0x46:		/* Crystal Well */
393 		case 0x3d:		/* Broadwell */
394 		case 0x47:		/* Broadwell */
395 		case 0x4f:		/* Broadwell server */
396 		case 0x56:		/* Broadwell microserver */
397 		case 0x4e:		/* Skylake client */
398 		case 0x55:		/* Skylake server */
399 		case 0x5e:		/* Skylake */
400 		case 0x8e:		/* Kabylake */
401 		case 0x9e:		/* Kabylake */
402 		  TRACE (printf ("  sandybridge\n"));
403                   CPUVEC_SETUP_p6_mmx;
404                   CPUVEC_SETUP_p6_p3mmx;
405                   CPUVEC_SETUP_p6_sse2;
406 		  CPUVEC_SETUP_core2;
407 		  CPUVEC_SETUP_coreinhm;
408 		  CPUVEC_SETUP_coreisbr;
409 		  break;
410 		}
411               break;
412 
413             case 15:
414               TRACE (printf ("  pentium4\n"));
415               CPUVEC_SETUP_pentium4;
416               CPUVEC_SETUP_pentium4_mmx;
417               CPUVEC_SETUP_pentium4_sse2;
418               break;
419             }
420         }
421       else if (strcmp (vendor_string, "AuthenticAMD") == 0)
422         {
423           switch (family)
424             {
425             case 5:
426               if (model <= 3)
427                 {
428                   TRACE (printf ("  k5\n"));
429                 }
430               else
431                 {
432                   TRACE (printf ("  k6\n"));
433                   CPUVEC_SETUP_k6;
434                   CPUVEC_SETUP_k6_mmx;
435                   if (model >= 8)
436                     {
437                       TRACE (printf ("  k62\n"));
438                       CPUVEC_SETUP_k6_k62mmx;
439                     }
440                   if (model >= 9)
441                     {
442                       TRACE (printf ("  k63\n"));
443                     }
444                 }
445               break;
446             case 6:
447               TRACE (printf ("  athlon\n"));
448               CPUVEC_SETUP_k7;
449               CPUVEC_SETUP_k7_mmx;
450               break;
451 
452             case 0x0f:		/* k8 */
453             case 0x11:		/* "fam 11h", mix of k8 and k10 */
454             case 0x13:		/* unknown, conservatively assume k8  */
455               TRACE (printf ("  k8\n"));
456               CPUVEC_SETUP_k7;
457               CPUVEC_SETUP_k7_mmx;
458               CPUVEC_SETUP_k8;
459 	      break;
460 
461             case 0x10:		/* k10 */
462             case 0x12:		/* k10 (llano) */
463               TRACE (printf ("  k10\n"));
464               CPUVEC_SETUP_k7;
465               CPUVEC_SETUP_k7_mmx;
466 	      break;
467 
468             case 0x14:		/* bobcat */
469             case 0x16:		/* jaguar */
470               TRACE (printf ("  bobcat\n"));
471               CPUVEC_SETUP_k7;
472               CPUVEC_SETUP_k7_mmx;
473               CPUVEC_SETUP_bt1;
474 	      break;
475 
476             case 0x15:		/* bulldozer */
477               TRACE (printf ("  bulldozer\n"));
478               CPUVEC_SETUP_k7;
479               CPUVEC_SETUP_k7_mmx;
480               CPUVEC_SETUP_bd1;
481 	      break;
482 
483 	    case 0x17:		/* zen */
484 	    case 0x19:		/* zen3 */
485 	      TRACE (printf ("  zen\n"));
486 	      CPUVEC_SETUP_k7;
487 	      CPUVEC_SETUP_k7_mmx;
488 	      break;
489             }
490         }
491       else if (strcmp (vendor_string, "CentaurHauls") == 0)
492         {
493           switch (family)
494             {
495             case 6:
496               TRACE (printf ("  viac3\n"));
497               if (model >= 9)
498                 {
499                   TRACE (printf ("  viac32\n"));
500                 }
501 	      if (model >= 15)
502 		{
503                   TRACE (printf ("  nano\n"));
504 		  CPUVEC_SETUP_nano;
505 		}
506               break;
507             }
508         }
509       else if (strcmp (vendor_string, "CyrixInstead") == 0)
510         {
511           /* Should recognize Cyrix' processors too.  */
512           TRACE (printf ("  cyrix something\n"));
513         }
514     }
515 
516   /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
517      Instead default to the plain versions from whichever CPU we detected.
518      The function arguments are compatible, no need for any glue code.  */
519   if (decided_cpuvec.preinv_divrem_1 == NULL)
520     decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
521   if (decided_cpuvec.preinv_mod_1 == NULL)
522     decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
523 
524   ASSERT_CPUVEC (decided_cpuvec);
525   CPUVEC_INSTALL (decided_cpuvec);
526 
527   /* Set this once the threshold fields are ready.
528      Use volatile to prevent it getting moved.  */
529   *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
530 }
531