xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/fat/fat.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /* x86 fat binary initializers.
2 
3    THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
4    THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5    COMPLETELY IN FUTURE GNU MP RELEASES.
6 
7 Copyright 2003, 2004, 2011-2013, 2015 Free Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13 
14   * the GNU Lesser General Public License as published by the Free
15     Software Foundation; either version 3 of the License, or (at your
16     option) any later version.
17 
18 or
19 
20   * the GNU General Public License as published by the Free Software
21     Foundation; either version 2 of the License, or (at your option) any
22     later version.
23 
24 or both in parallel, as here.
25 
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29 for more details.
30 
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library.  If not,
33 see https://www.gnu.org/licenses/.  */
34 
35 #include <stdio.h>    /* for printf */
36 #include <stdlib.h>   /* for getenv */
37 #include <string.h>
38 
39 #include "gmp.h"
40 #include "gmp-impl.h"
41 
42 /* Change this to "#define TRACE(x) x" for some traces. */
43 #define TRACE(x)
44 
45 
46 /* fat_entry.asm */
47 long __gmpn_cpuid (char [12], int);
48 int  __gmpn_cpuid_available (void);
49 
50 
51 #if WANT_FAKE_CPUID
52 /* The "name"s in the table are values for the GMP_CPU_TYPE environment
53    variable.  Anything can be used, but for now it's the canonical cpu types
54    as per config.guess/config.sub.  */
55 
56 #define __gmpn_cpuid            fake_cpuid
57 #define __gmpn_cpuid_available  fake_cpuid_available
58 
59 #define MAKE_FMS(family, model)						\
60   ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
61    + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
62 
63 static struct {
64   const char  *name;
65   const char  *vendor;
66   unsigned    fms;
67 } fake_cpuid_table[] = {
68   { "i386",       "" },
69   { "i486",       "GenuineIntel", MAKE_FMS (4, 0) },
70   { "pentium",    "GenuineIntel", MAKE_FMS (5, 0) },
71   { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) },
72   { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
73   { "pentium2",   "GenuineIntel", MAKE_FMS (6, 2) },
74   { "pentium3",   "GenuineIntel", MAKE_FMS (6, 7) },
75   { "pentium4",   "GenuineIntel", MAKE_FMS (15, 2) },
76   { "prescott",   "GenuineIntel", MAKE_FMS (15, 3) },
77   { "nocona",     "GenuineIntel", MAKE_FMS (15, 4) },
78   { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
79   { "nehalem",    "GenuineIntel", MAKE_FMS (6, 0x1a) },
80   { "nhm",        "GenuineIntel", MAKE_FMS (6, 0x1a) },
81   { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
82   { "westmere",   "GenuineIntel", MAKE_FMS (6, 0x25) },
83   { "wsm",        "GenuineIntel", MAKE_FMS (6, 0x25) },
84   { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
85   { "sbr",        "GenuineIntel", MAKE_FMS (6, 0x2a) },
86   { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
87   { "slm",        "GenuineIntel", MAKE_FMS (6, 0x37) },
88   { "haswell",    "GenuineIntel", MAKE_FMS (6, 0x3c) },
89   { "hwl",        "GenuineIntel", MAKE_FMS (6, 0x3c) },
90   { "broadwell",  "GenuineIntel", MAKE_FMS (6, 0x3d) },
91   { "bwl",        "GenuineIntel", MAKE_FMS (6, 0x3d) },
92   { "skylake",    "GenuineIntel", MAKE_FMS (6, 0x5e) },
93   { "sky",        "GenuineIntel", MAKE_FMS (6, 0x5e) },
94 
95   { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
96   { "k6",         "AuthenticAMD", MAKE_FMS (5, 3) },
97   { "k62",        "AuthenticAMD", MAKE_FMS (5, 8) },
98   { "k63",        "AuthenticAMD", MAKE_FMS (5, 9) },
99   { "athlon",     "AuthenticAMD", MAKE_FMS (6, 0) },
100   { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
101   { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
102   { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
103   { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
104   { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
105   { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
106   { "excavator",  "AuthenticAMD", MAKE_FMS (21, 0x60) },
107   { "jaguar",     "AuthenticAMD", MAKE_FMS (22, 1) },
108 
109   { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
110   { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
111   { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
112 };
113 
114 static int
115 fake_cpuid_lookup (void)
116 {
117   char  *s;
118   int   i;
119 
120   s = getenv ("GMP_CPU_TYPE");
121   if (s == NULL)
122     {
123       printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
124       abort ();
125     }
126 
127   for (i = 0; i < numberof (fake_cpuid_table); i++)
128     if (strcmp (s, fake_cpuid_table[i].name) == 0)
129       return i;
130 
131   printf ("GMP_CPU_TYPE=%s unknown\n", s);
132   abort ();
133 }
134 
135 static int
136 fake_cpuid_available (void)
137 {
138   return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0';
139 }
140 
141 static long
142 fake_cpuid (char dst[12], int id)
143 {
144   int  i = fake_cpuid_lookup();
145 
146   switch (id) {
147   case 0:
148     memcpy (dst, fake_cpuid_table[i].vendor, 12);
149     return 0;
150   case 1:
151     return fake_cpuid_table[i].fms;
152   default:
153     printf ("fake_cpuid(): oops, unknown id %d\n", id);
154     abort ();
155   }
156 }
157 #endif
158 
159 
160 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
161 typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
162 
163 struct cpuvec_t __gmpn_cpuvec = {
164   __MPN(add_n_init),
165   0,
166   0,
167   __MPN(addmul_1_init),
168   0,
169   __MPN(bdiv_dbm1c_init),
170   __MPN(cnd_add_n_init),
171   __MPN(cnd_sub_n_init),
172   __MPN(com_init),
173   __MPN(copyd_init),
174   __MPN(copyi_init),
175   __MPN(divexact_1_init),
176   __MPN(divrem_1_init),
177   __MPN(gcd_1_init),
178   __MPN(lshift_init),
179   __MPN(lshiftc_init),
180   __MPN(mod_1_init),
181   __MPN(mod_1_1p_init),
182   __MPN(mod_1_1p_cps_init),
183   __MPN(mod_1s_2p_init),
184   __MPN(mod_1s_2p_cps_init),
185   __MPN(mod_1s_4p_init),
186   __MPN(mod_1s_4p_cps_init),
187   __MPN(mod_34lsub1_init),
188   __MPN(modexact_1c_odd_init),
189   __MPN(mul_1_init),
190   __MPN(mul_basecase_init),
191   __MPN(mullo_basecase_init),
192   __MPN(preinv_divrem_1_init),
193   __MPN(preinv_mod_1_init),
194   __MPN(redc_1_init),
195   __MPN(redc_2_init),
196   __MPN(rshift_init),
197   __MPN(sqr_basecase_init),
198   __MPN(sub_n_init),
199   0,
200   __MPN(submul_1_init),
201   0
202 };
203 
204 int __gmpn_cpuvec_initialized = 0;
205 
206 /* The following setups start with generic x86, then overwrite with
207    specifics for a chip, and higher versions of that chip.
208 
209    The arrangement of the setups here will normally be the same as the $path
210    selections in configure.in for the respective chips.
211 
212    This code is reentrant and thread safe.  We always calculate the same
213    decided_cpuvec, so if two copies of the code are running it doesn't
214    matter which completes first, both write the same to __gmpn_cpuvec.
215 
216    We need to go via decided_cpuvec because if one thread has completed
217    __gmpn_cpuvec then it may be making use of the threshold values in that
218    vector.  If another thread is still running __gmpn_cpuvec_init then we
219    don't want it to write different values to those fields since some of the
220    asm routines only operate correctly up to their own defined threshold,
221    not an arbitrary value.  */
222 
223 void
224 __gmpn_cpuvec_init (void)
225 {
226   struct cpuvec_t  decided_cpuvec;
227 
228   TRACE (printf ("__gmpn_cpuvec_init:\n"));
229 
230   memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
231 
232   CPUVEC_SETUP_x86;
233   CPUVEC_SETUP_fat;
234 
235   if (! __gmpn_cpuid_available ())
236     {
237       TRACE (printf ("  80386, or early 80486 without cpuid\n"));
238     }
239   else
240     {
241       char vendor_string[13];
242       char dummy_string[12];
243       long fms;
244       int family, model;
245 
246       __gmpn_cpuid (vendor_string, 0);
247       vendor_string[12] = 0;
248 
249       fms = __gmpn_cpuid (dummy_string, 1);
250       family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
251       model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
252 
253       if (strcmp (vendor_string, "GenuineIntel") == 0)
254         {
255           switch (family)
256             {
257             case 4:
258               TRACE (printf ("  80486 with cpuid\n"));
259               break;
260 
261             case 5:
262               TRACE (printf ("  pentium\n"));
263               CPUVEC_SETUP_pentium;
264               if (model >= 4)
265                 {
266                   TRACE (printf ("  pentiummmx\n"));
267                   CPUVEC_SETUP_pentium_mmx;
268                 }
269               break;
270 
271             case 6:
272               TRACE (printf ("  p6\n"));
273               CPUVEC_SETUP_p6;
274 	      switch (model)
275 		{
276 		case 0x00:
277 		case 0x01:
278 		  TRACE (printf ("  pentiumpro\n"));
279 		  break;
280 
281 		case 0x02:
282 		case 0x03:
283 		case 0x04:
284 		case 0x05:
285 		case 0x06:
286 		  TRACE (printf ("  pentium2\n"));
287                   CPUVEC_SETUP_p6_mmx;
288 		  break;
289 
290 		case 0x07:
291 		case 0x08:
292 		case 0x0a:
293 		case 0x0b:
294 		case 0x0c:
295 		  TRACE (printf ("  pentium3\n"));
296                   CPUVEC_SETUP_p6_mmx;
297                   CPUVEC_SETUP_p6_p3mmx;
298 		  break;
299 
300 		case 0x09:		/* Banias */
301 		case 0x0d:		/* Dothan */
302 		case 0x0e:		/* Yonah */
303 		  TRACE (printf ("  Banias/Dothan/Yonah\n"));
304                   CPUVEC_SETUP_p6_mmx;
305                   CPUVEC_SETUP_p6_p3mmx;
306                   CPUVEC_SETUP_p6_sse2;
307 		  break;
308 
309 		case 0x0f:		/* Conroe Merom Kentsfield Allendale */
310 		case 0x10:
311 		case 0x11:
312 		case 0x12:
313 		case 0x13:
314 		case 0x14:
315 		case 0x15:
316 		case 0x16:
317 		case 0x17:		/* PNR Wolfdale Yorkfield */
318 		case 0x18:
319 		case 0x19:
320 		case 0x1d:		/* PNR Dunnington */
321 		  TRACE (printf ("  Conroe\n"));
322                   CPUVEC_SETUP_p6_mmx;
323                   CPUVEC_SETUP_p6_p3mmx;
324                   CPUVEC_SETUP_p6_sse2;
325 		  CPUVEC_SETUP_core2;
326 		  break;
327 
328 		case 0x1c:		/* Atom Silverthorne */
329 		case 0x26:		/* Atom Lincroft */
330 		case 0x27:		/* Atom Saltwell */
331 		case 0x36:		/* Atom Cedarview/Saltwell */
332 		  TRACE (printf ("  atom\n"));
333 		  CPUVEC_SETUP_atom;
334 		  CPUVEC_SETUP_atom_mmx;
335 		  CPUVEC_SETUP_atom_sse2;
336 		  break;
337 
338 		case 0x1a:		/* NHM Gainestown */
339 		case 0x1b:
340 		case 0x1e:		/* NHM Lynnfield/Jasper */
341 		case 0x1f:
342 		case 0x20:
343 		case 0x21:
344 		case 0x22:
345 		case 0x23:
346 		case 0x24:
347 		case 0x25:		/* WSM Clarkdale/Arrandale */
348 		case 0x28:
349 		case 0x29:
350 		case 0x2b:
351 		case 0x2c:		/* WSM Gulftown */
352 		case 0x2e:		/* NHM Beckton */
353 		case 0x2f:		/* WSM Eagleton */
354 		  TRACE (printf ("  nehalem/westmere\n"));
355                   CPUVEC_SETUP_p6_mmx;
356                   CPUVEC_SETUP_p6_p3mmx;
357                   CPUVEC_SETUP_p6_sse2;
358 		  CPUVEC_SETUP_core2;
359 		  CPUVEC_SETUP_coreinhm;
360 		  break;
361 
362 		case 0x2a:		/* SBR */
363 		case 0x2d:		/* SBR-EP */
364 		case 0x3a:		/* IBR */
365 		case 0x3c:		/* Haswell */
366 		  TRACE (printf ("  sandybridge\n"));
367                   CPUVEC_SETUP_p6_mmx;
368                   CPUVEC_SETUP_p6_p3mmx;
369                   CPUVEC_SETUP_p6_sse2;
370 		  CPUVEC_SETUP_core2;
371 		  CPUVEC_SETUP_coreinhm;
372 		  CPUVEC_SETUP_coreisbr;
373 		  break;
374 		}
375               break;
376 
377             case 15:
378               TRACE (printf ("  pentium4\n"));
379               CPUVEC_SETUP_pentium4;
380               CPUVEC_SETUP_pentium4_mmx;
381               CPUVEC_SETUP_pentium4_sse2;
382               break;
383             }
384         }
385       else if (strcmp (vendor_string, "AuthenticAMD") == 0)
386         {
387           switch (family)
388             {
389             case 5:
390               if (model <= 3)
391                 {
392                   TRACE (printf ("  k5\n"));
393                 }
394               else
395                 {
396                   TRACE (printf ("  k6\n"));
397                   CPUVEC_SETUP_k6;
398                   CPUVEC_SETUP_k6_mmx;
399                   if (model >= 8)
400                     {
401                       TRACE (printf ("  k62\n"));
402                       CPUVEC_SETUP_k6_k62mmx;
403                     }
404                   if (model >= 9)
405                     {
406                       TRACE (printf ("  k63\n"));
407                     }
408                 }
409               break;
410             case 6:
411               TRACE (printf ("  athlon\n"));
412               CPUVEC_SETUP_k7;
413               CPUVEC_SETUP_k7_mmx;
414               break;
415 
416             case 0x0f:		/* k8 */
417             case 0x11:		/* "fam 11h", mix of k8 and k10 */
418             case 0x13:		/* unknown, conservatively assume k8  */
419             case 0x16:		/* unknown, conservatively assume k8  */
420             case 0x17:		/* unknown, conservatively assume k8  */
421               TRACE (printf ("  k8\n"));
422               CPUVEC_SETUP_k7;
423               CPUVEC_SETUP_k7_mmx;
424               CPUVEC_SETUP_k8;
425 	      break;
426 
427             case 0x10:		/* k10 */
428             case 0x12:		/* k10 (llano) */
429               TRACE (printf ("  k10\n"));
430               CPUVEC_SETUP_k7;
431               CPUVEC_SETUP_k7_mmx;
432 	      break;
433 
434             case 0x14:		/* bobcat */
435               TRACE (printf ("  bobcat\n"));
436               CPUVEC_SETUP_k7;
437               CPUVEC_SETUP_k7_mmx;
438               CPUVEC_SETUP_bobcat;
439 	      break;
440 
441             case 0x15:		/* bulldozer */
442               TRACE (printf ("  bulldozer\n"));
443               CPUVEC_SETUP_k7;
444               CPUVEC_SETUP_k7_mmx;
445 	      break;
446             }
447         }
448       else if (strcmp (vendor_string, "CentaurHauls") == 0)
449         {
450           switch (family)
451             {
452             case 6:
453               TRACE (printf ("  viac3\n"));
454               if (model >= 9)
455                 {
456                   TRACE (printf ("  viac32\n"));
457                 }
458 	      if (model >= 15)
459 		{
460                   TRACE (printf ("  nano\n"));
461 		  CPUVEC_SETUP_nano;
462 		}
463               break;
464             }
465         }
466       else if (strcmp (vendor_string, "CyrixInstead") == 0)
467         {
468           /* Should recognize Cyrix' processors too.  */
469           TRACE (printf ("  cyrix something\n"));
470         }
471     }
472 
473   /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
474      Instead default to the plain versions from whichever CPU we detected.
475      The function arguments are compatible, no need for any glue code.  */
476   if (decided_cpuvec.preinv_divrem_1 == NULL)
477     decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
478   if (decided_cpuvec.preinv_mod_1 == NULL)
479     decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
480 
481   ASSERT_CPUVEC (decided_cpuvec);
482   CPUVEC_INSTALL (decided_cpuvec);
483 
484   /* Set this once the threshold fields are ready.
485      Use volatile to prevent it getting moved.  */
486   *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
487 }
488