xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/fat/fat.c (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1 /* x86 fat binary initializers.
2 
3    THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
4    THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5    COMPLETELY IN FUTURE GNU MP RELEASES.
6 
7 Copyright 2003, 2004, 2011, 2012 Free Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15 
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19 License for more details.
20 
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23 
24 #include <stdio.h>    /* for printf */
25 #include <stdlib.h>   /* for getenv */
26 #include <string.h>
27 
28 #include "gmp.h"
29 #include "gmp-impl.h"
30 
31 /* Change this to "#define TRACE(x) x" for some traces. */
32 #define TRACE(x)
33 
34 /* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */
35 #define WANT_FAKE_CPUID  0
36 
37 
38 /* fat_entry.asm */
39 long __gmpn_cpuid (char [12], int);
40 int  __gmpn_cpuid_available (void);
41 
42 
43 #if WANT_FAKE_CPUID
44 /* The "name"s in the table are values for the GMP_CPU_TYPE environment
45    variable.  Anything can be used, but for now it's the canonical cpu types
46    as per config.guess/config.sub.  */
47 
48 #define __gmpn_cpuid            fake_cpuid
49 #define __gmpn_cpuid_available  fake_cpuid_available
50 
51 #define MAKE_FMS(family, model)						\
52   ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
53    + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
54 
55 static struct {
56   const char  *name;
57   const char  vendor[13];
58   unsigned    fms;
59 } fake_cpuid_table[] = {
60   { "i386",       "" },
61   { "i486",       "GenuineIntel", MAKE_FMS (4, 0) },
62   { "pentium",    "GenuineIntel", MAKE_FMS (5, 0) },
63   { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) },
64   { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
65   { "pentium2",   "GenuineIntel", MAKE_FMS (6, 2) },
66   { "pentium3",   "GenuineIntel", MAKE_FMS (6, 7) },
67   { "pentium4",   "GenuineIntel", MAKE_FMS (15, 2) },
68   { "prescott",   "GenuineIntel", MAKE_FMS (15, 3) },
69   { "nocona",     "GenuineIntel", MAKE_FMS (15, 4) },
70   { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
71   { "coreinhm",   "GenuineIntel", MAKE_FMS (6, 0x1a) },
72   { "coreiwsm",   "GenuineIntel", MAKE_FMS (6, 0x25) },
73   { "coreisbr",   "GenuineIntel", MAKE_FMS (6, 0x2a) },
74   { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
75 
76   { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
77   { "k6",         "AuthenticAMD", MAKE_FMS (5, 3) },
78   { "k62",        "AuthenticAMD", MAKE_FMS (5, 8) },
79   { "k63",        "AuthenticAMD", MAKE_FMS (5, 9) },
80   { "athlon",     "AuthenticAMD", MAKE_FMS (6, 0) },
81   { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
82   { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
83   { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
84   { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
85 
86   { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
87   { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
88   { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
89 };
90 
91 static int
92 fake_cpuid_lookup (void)
93 {
94   char  *s;
95   int   i;
96 
97   s = getenv ("GMP_CPU_TYPE");
98   if (s == NULL)
99     {
100       printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
101       abort ();
102     }
103 
104   for (i = 0; i < numberof (fake_cpuid_table); i++)
105     if (strcmp (s, fake_cpuid_table[i].name) == 0)
106       return i;
107 
108   printf ("GMP_CPU_TYPE=%s unknown\n", s);
109   abort ();
110 }
111 
112 static int
113 fake_cpuid_available (void)
114 {
115   return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0';
116 }
117 
118 static long
119 fake_cpuid (char dst[12], int id)
120 {
121   int  i = fake_cpuid_lookup();
122 
123   switch (id) {
124   case 0:
125     memcpy (dst, fake_cpuid_table[i].vendor, 12);
126     return 0;
127   case 1:
128     return fake_cpuid_table[i].fms;
129   default:
130     printf ("fake_cpuid(): oops, unknown id %d\n", id);
131     abort ();
132   }
133 }
134 #endif
135 
136 
137 typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
138 typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
139 
140 struct cpuvec_t __gmpn_cpuvec = {
141   __MPN(add_n_init),
142   0,
143   0,
144   __MPN(addmul_1_init),
145   0,
146   __MPN(bdiv_dbm1c_init),
147   __MPN(com_init),
148   __MPN(copyd_init),
149   __MPN(copyi_init),
150   __MPN(divexact_1_init),
151   __MPN(divrem_1_init),
152   __MPN(gcd_1_init),
153   __MPN(lshift_init),
154   __MPN(lshiftc_init),
155   __MPN(mod_1_init),
156   __MPN(mod_1_1p_init),
157   __MPN(mod_1_1p_cps_init),
158   __MPN(mod_1s_2p_init),
159   __MPN(mod_1s_2p_cps_init),
160   __MPN(mod_1s_4p_init),
161   __MPN(mod_1s_4p_cps_init),
162   __MPN(mod_34lsub1_init),
163   __MPN(modexact_1c_odd_init),
164   __MPN(mul_1_init),
165   __MPN(mul_basecase_init),
166   __MPN(mullo_basecase_init),
167   __MPN(preinv_divrem_1_init),
168   __MPN(preinv_mod_1_init),
169   __MPN(redc_1_init),
170   __MPN(redc_2_init),
171   __MPN(rshift_init),
172   __MPN(sqr_basecase_init),
173   __MPN(sub_n_init),
174   0,
175   __MPN(submul_1_init),
176   0
177 };
178 
179 int __gmpn_cpuvec_initialized = 0;
180 
181 /* The following setups start with generic x86, then overwrite with
182    specifics for a chip, and higher versions of that chip.
183 
184    The arrangement of the setups here will normally be the same as the $path
185    selections in configure.in for the respective chips.
186 
187    This code is reentrant and thread safe.  We always calculate the same
188    decided_cpuvec, so if two copies of the code are running it doesn't
189    matter which completes first, both write the same to __gmpn_cpuvec.
190 
191    We need to go via decided_cpuvec because if one thread has completed
192    __gmpn_cpuvec then it may be making use of the threshold values in that
193    vector.  If another thread is still running __gmpn_cpuvec_init then we
194    don't want it to write different values to those fields since some of the
195    asm routines only operate correctly up to their own defined threshold,
196    not an arbitrary value.  */
197 
198 void
199 __gmpn_cpuvec_init (void)
200 {
201   struct cpuvec_t  decided_cpuvec;
202 
203   TRACE (printf ("__gmpn_cpuvec_init:\n"));
204 
205   memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
206 
207   CPUVEC_SETUP_x86;
208   CPUVEC_SETUP_fat;
209 
210   if (! __gmpn_cpuid_available ())
211     {
212       TRACE (printf ("  80386, or early 80486 without cpuid\n"));
213     }
214   else
215     {
216       char vendor_string[13];
217       char dummy_string[12];
218       long fms;
219       int family, model;
220 
221       __gmpn_cpuid (vendor_string, 0);
222       vendor_string[12] = 0;
223 
224       fms = __gmpn_cpuid (dummy_string, 1);
225       family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
226       model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
227 
228       if (strcmp (vendor_string, "GenuineIntel") == 0)
229         {
230           switch (family)
231             {
232             case 4:
233               TRACE (printf ("  80486 with cpuid\n"));
234               break;
235 
236             case 5:
237               TRACE (printf ("  pentium\n"));
238               CPUVEC_SETUP_pentium;
239               if (model >= 4)
240                 {
241                   TRACE (printf ("  pentiummmx\n"));
242                   CPUVEC_SETUP_pentium_mmx;
243                 }
244               break;
245 
246             case 6:
247               TRACE (printf ("  p6\n"));
248               CPUVEC_SETUP_p6;
249 	      switch (model)
250 		{
251 		case 0x00:
252 		case 0x01:
253 		  TRACE (printf ("  pentiumpro\n"));
254 		  break;
255 
256 		case 0x02:
257 		case 0x03:
258 		case 0x04:
259 		case 0x05:
260 		case 0x06:
261 		  TRACE (printf ("  pentium2\n"));
262                   CPUVEC_SETUP_p6_mmx;
263 		  break;
264 
265 		case 0x07:
266 		case 0x08:
267 		case 0x0a:
268 		case 0x0b:
269 		case 0x0c:
270 		  TRACE (printf ("  pentium3\n"));
271                   CPUVEC_SETUP_p6_mmx;
272                   CPUVEC_SETUP_p6_p3mmx;
273 		  break;
274 
275 		case 0x09:		/* Banias */
276 		case 0x0d:		/* Dothan */
277 		case 0x0e:		/* Yonah */
278 		  TRACE (printf ("  Banias/Bothan/Yonah\n"));
279                   CPUVEC_SETUP_p6_mmx;
280                   CPUVEC_SETUP_p6_p3mmx;
281                   CPUVEC_SETUP_p6_sse2;
282 		  break;
283 
284 		case 0x0f:		/* Conroe Merom Kentsfield Allendale */
285 		case 0x10:
286 		case 0x11:
287 		case 0x12:
288 		case 0x13:
289 		case 0x14:
290 		case 0x15:
291 		case 0x16:
292 		case 0x17:		/* PNR Wolfdale Yorkfield */
293 		case 0x18:
294 		case 0x19:
295 		case 0x1d:		/* PNR Dunnington */
296 		  TRACE (printf ("  Conroe\n"));
297                   CPUVEC_SETUP_p6_mmx;
298                   CPUVEC_SETUP_p6_p3mmx;
299                   CPUVEC_SETUP_p6_sse2;
300 		  CPUVEC_SETUP_core2;
301 		  break;
302 
303 		case 0x1c:		/* Atom Silverthorne */
304 		case 0x26:		/* Atom Lincroft */
305 		case 0x27:		/* Atom Saltwell */
306 		case 0x36:		/* Atom Cedarview/Saltwell */
307 		  TRACE (printf ("  atom\n"));
308 		  CPUVEC_SETUP_atom;
309 		  CPUVEC_SETUP_atom_mmx;
310 		  CPUVEC_SETUP_atom_sse2;
311 		  break;
312 
313 		case 0x1a:		/* NHM Gainestown */
314 		case 0x1b:
315 		case 0x1e:		/* NHM Lynnfield/Jasper */
316 		case 0x1f:
317 		case 0x20:
318 		case 0x21:
319 		case 0x22:
320 		case 0x23:
321 		case 0x24:
322 		case 0x25:		/* WSM Clarkdale/Arrandale */
323 		case 0x28:
324 		case 0x29:
325 		case 0x2b:
326 		case 0x2c:		/* WSM Gulftown */
327 		case 0x2e:		/* NHM Beckton */
328 		case 0x2f:		/* WSM Eagleton */
329 		  TRACE (printf ("  nehalem/westmere\n"));
330                   CPUVEC_SETUP_p6_mmx;
331                   CPUVEC_SETUP_p6_p3mmx;
332                   CPUVEC_SETUP_p6_sse2;
333 		  CPUVEC_SETUP_core2;
334 		  CPUVEC_SETUP_coreinhm;
335 		  break;
336 
337 		case 0x2a:		/* SBR */
338 		case 0x2d:		/* SBR-EP */
339 		case 0x3a:		/* IBR */
340 		case 0x3c:		/* Haswell */
341 		  TRACE (printf ("  sandybridge\n"));
342                   CPUVEC_SETUP_p6_mmx;
343                   CPUVEC_SETUP_p6_p3mmx;
344                   CPUVEC_SETUP_p6_sse2;
345 		  CPUVEC_SETUP_core2;
346 		  CPUVEC_SETUP_coreinhm;
347 		  CPUVEC_SETUP_coreisbr;
348 		  break;
349 		}
350               break;
351 
352             case 15:
353               TRACE (printf ("  pentium4\n"));
354               CPUVEC_SETUP_pentium4;
355               CPUVEC_SETUP_pentium4_mmx;
356               CPUVEC_SETUP_pentium4_sse2;
357               break;
358             }
359         }
360       else if (strcmp (vendor_string, "AuthenticAMD") == 0)
361         {
362           switch (family)
363             {
364             case 5:
365               if (model <= 3)
366                 {
367                   TRACE (printf ("  k5\n"));
368                 }
369               else
370                 {
371                   TRACE (printf ("  k6\n"));
372                   CPUVEC_SETUP_k6;
373                   CPUVEC_SETUP_k6_mmx;
374                   if (model >= 8)
375                     {
376                       TRACE (printf ("  k62\n"));
377                       CPUVEC_SETUP_k6_k62mmx;
378                     }
379                   if (model >= 9)
380                     {
381                       TRACE (printf ("  k63\n"));
382                     }
383                 }
384               break;
385             case 6:
386               TRACE (printf ("  athlon\n"));
387               CPUVEC_SETUP_k7;
388               CPUVEC_SETUP_k7_mmx;
389               break;
390 
391             case 0x0f:		/* k8 */
392             case 0x11:		/* "fam 11h", mix of k8 and k10 */
393             case 0x13:		/* unknown, conservativeky assume k8  */
394             case 0x16:		/* unknown, conservativeky assume k8  */
395             case 0x17:		/* unknown, conservativeky assume k8  */
396               TRACE (printf ("  k8\n"));
397               CPUVEC_SETUP_k7;
398               CPUVEC_SETUP_k7_mmx;
399               CPUVEC_SETUP_k8;
400 	      break;
401 
402             case 0x10:		/* k10 */
403             case 0x12:		/* k10 (llano) */
404               TRACE (printf ("  k10\n"));
405               CPUVEC_SETUP_k7;
406               CPUVEC_SETUP_k7_mmx;
407 	      break;
408 
409             case 0x14:		/* bobcat */
410               TRACE (printf ("  bobcat\n"));
411               CPUVEC_SETUP_k7;
412               CPUVEC_SETUP_k7_mmx;
413               CPUVEC_SETUP_bobcat;
414 	      break;
415 
416             case 0x15:		/* bulldozer */
417               TRACE (printf ("  bulldozer\n"));
418               CPUVEC_SETUP_k7;
419               CPUVEC_SETUP_k7_mmx;
420 	      break;
421             }
422         }
423       else if (strcmp (vendor_string, "CentaurHauls") == 0)
424         {
425           switch (family)
426             {
427             case 6:
428               TRACE (printf ("  viac3\n"));
429               if (model >= 9)
430                 {
431                   TRACE (printf ("  viac32\n"));
432                 }
433 	      if (model >= 15)
434 		{
435                   TRACE (printf ("  nano\n"));
436 		  CPUVEC_SETUP_nano;
437 		}
438               break;
439             }
440         }
441       else if (strcmp (vendor_string, "CyrixInstead") == 0)
442         {
443           /* Should recognize Cyrix' processors too.  */
444           TRACE (printf ("  cyrix something\n"));
445         }
446     }
447 
448   /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
449      Instead default to the plain versions from whichever CPU we detected.
450      The function arguments are compatible, no need for any glue code.  */
451   if (decided_cpuvec.preinv_divrem_1 == NULL)
452     decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
453   if (decided_cpuvec.preinv_mod_1 == NULL)
454     decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
455 
456   ASSERT_CPUVEC (decided_cpuvec);
457   CPUVEC_INSTALL (decided_cpuvec);
458 
459   /* Set this once the threshold fields are ready.
460      Use volatile to prevent it getting moved.  */
461   *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
462 }
463