xref: /openbsd-src/gnu/llvm/compiler-rt/lib/builtins/cpu_model.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 //===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file is based on LLVM's lib/Support/Host.cpp.
10 //  It implements the operating system Host concept and builtin
11 //  __cpu_model for the compiler_rt library for x86 and
12 //  __aarch64_have_lse_atomics for AArch64.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #if defined(HAVE_INIT_PRIORITY)
17 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
18 #elif __has_attribute(__constructor__)
19 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
20 #else
21 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
22 // this runs during initialization.
23 #define CONSTRUCTOR_ATTRIBUTE
24 #endif
25 
26 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||           \
27      defined(_M_X64)) &&                                                       \
28     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
29 
30 #include <assert.h>
31 
32 #define bool int
33 #define true 1
34 #define false 0
35 
36 #ifdef _MSC_VER
37 #include <intrin.h>
38 #endif
39 
40 #ifndef __has_attribute
41 #define __has_attribute(attr) 0
42 #endif
43 
44 enum VendorSignatures {
45   SIG_INTEL = 0x756e6547, // Genu
46   SIG_AMD = 0x68747541,   // Auth
47 };
48 
49 enum ProcessorVendors {
50   VENDOR_INTEL = 1,
51   VENDOR_AMD,
52   VENDOR_OTHER,
53   VENDOR_MAX
54 };
55 
56 enum ProcessorTypes {
57   INTEL_BONNELL = 1,
58   INTEL_CORE2,
59   INTEL_COREI7,
60   AMDFAM10H,
61   AMDFAM15H,
62   INTEL_SILVERMONT,
63   INTEL_KNL,
64   AMD_BTVER1,
65   AMD_BTVER2,
66   AMDFAM17H,
67   INTEL_KNM,
68   INTEL_GOLDMONT,
69   INTEL_GOLDMONT_PLUS,
70   INTEL_TREMONT,
71   AMDFAM19H,
72   CPU_TYPE_MAX
73 };
74 
75 enum ProcessorSubtypes {
76   INTEL_COREI7_NEHALEM = 1,
77   INTEL_COREI7_WESTMERE,
78   INTEL_COREI7_SANDYBRIDGE,
79   AMDFAM10H_BARCELONA,
80   AMDFAM10H_SHANGHAI,
81   AMDFAM10H_ISTANBUL,
82   AMDFAM15H_BDVER1,
83   AMDFAM15H_BDVER2,
84   AMDFAM15H_BDVER3,
85   AMDFAM15H_BDVER4,
86   AMDFAM17H_ZNVER1,
87   INTEL_COREI7_IVYBRIDGE,
88   INTEL_COREI7_HASWELL,
89   INTEL_COREI7_BROADWELL,
90   INTEL_COREI7_SKYLAKE,
91   INTEL_COREI7_SKYLAKE_AVX512,
92   INTEL_COREI7_CANNONLAKE,
93   INTEL_COREI7_ICELAKE_CLIENT,
94   INTEL_COREI7_ICELAKE_SERVER,
95   AMDFAM17H_ZNVER2,
96   INTEL_COREI7_CASCADELAKE,
97   INTEL_COREI7_TIGERLAKE,
98   INTEL_COREI7_COOPERLAKE,
99   INTEL_COREI7_SAPPHIRERAPIDS,
100   INTEL_COREI7_ALDERLAKE,
101   AMDFAM19H_ZNVER3,
102   INTEL_COREI7_ROCKETLAKE,
103   CPU_SUBTYPE_MAX
104 };
105 
106 enum ProcessorFeatures {
107   FEATURE_CMOV = 0,
108   FEATURE_MMX,
109   FEATURE_POPCNT,
110   FEATURE_SSE,
111   FEATURE_SSE2,
112   FEATURE_SSE3,
113   FEATURE_SSSE3,
114   FEATURE_SSE4_1,
115   FEATURE_SSE4_2,
116   FEATURE_AVX,
117   FEATURE_AVX2,
118   FEATURE_SSE4_A,
119   FEATURE_FMA4,
120   FEATURE_XOP,
121   FEATURE_FMA,
122   FEATURE_AVX512F,
123   FEATURE_BMI,
124   FEATURE_BMI2,
125   FEATURE_AES,
126   FEATURE_PCLMUL,
127   FEATURE_AVX512VL,
128   FEATURE_AVX512BW,
129   FEATURE_AVX512DQ,
130   FEATURE_AVX512CD,
131   FEATURE_AVX512ER,
132   FEATURE_AVX512PF,
133   FEATURE_AVX512VBMI,
134   FEATURE_AVX512IFMA,
135   FEATURE_AVX5124VNNIW,
136   FEATURE_AVX5124FMAPS,
137   FEATURE_AVX512VPOPCNTDQ,
138   FEATURE_AVX512VBMI2,
139   FEATURE_GFNI,
140   FEATURE_VPCLMULQDQ,
141   FEATURE_AVX512VNNI,
142   FEATURE_AVX512BITALG,
143   FEATURE_AVX512BF16,
144   FEATURE_AVX512VP2INTERSECT,
145   CPU_FEATURE_MAX
146 };
147 
148 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
149 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
150 // support. Consequently, for i386, the presence of CPUID is checked first
151 // via the corresponding eflags bit.
152 static bool isCpuIdSupported() {
153 #if defined(__GNUC__) || defined(__clang__)
154 #if defined(__i386__)
155   int __cpuid_supported;
156   __asm__("  pushfl\n"
157           "  popl   %%eax\n"
158           "  movl   %%eax,%%ecx\n"
159           "  xorl   $0x00200000,%%eax\n"
160           "  pushl  %%eax\n"
161           "  popfl\n"
162           "  pushfl\n"
163           "  popl   %%eax\n"
164           "  movl   $0,%0\n"
165           "  cmpl   %%eax,%%ecx\n"
166           "  je     1f\n"
167           "  movl   $1,%0\n"
168           "1:"
169           : "=r"(__cpuid_supported)
170           :
171           : "eax", "ecx");
172   if (!__cpuid_supported)
173     return false;
174 #endif
175   return true;
176 #endif
177   return true;
178 }
179 
180 // This code is copied from lib/Support/Host.cpp.
181 // Changes to either file should be mirrored in the other.
182 
183 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
184 /// the specified arguments.  If we can't run cpuid on the host, return true.
185 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
186                                unsigned *rECX, unsigned *rEDX) {
187 #if defined(__GNUC__) || defined(__clang__)
188 #if defined(__x86_64__)
189   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
190   // FIXME: should we save this for Clang?
191   __asm__("movq\t%%rbx, %%rsi\n\t"
192           "cpuid\n\t"
193           "xchgq\t%%rbx, %%rsi\n\t"
194           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
195           : "a"(value));
196   return false;
197 #elif defined(__i386__)
198   __asm__("movl\t%%ebx, %%esi\n\t"
199           "cpuid\n\t"
200           "xchgl\t%%ebx, %%esi\n\t"
201           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
202           : "a"(value));
203   return false;
204 #else
205   return true;
206 #endif
207 #elif defined(_MSC_VER)
208   // The MSVC intrinsic is portable across x86 and x64.
209   int registers[4];
210   __cpuid(registers, value);
211   *rEAX = registers[0];
212   *rEBX = registers[1];
213   *rECX = registers[2];
214   *rEDX = registers[3];
215   return false;
216 #else
217   return true;
218 #endif
219 }
220 
221 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
222 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
223 /// return true.
224 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
225                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
226                                  unsigned *rEDX) {
227 #if defined(__GNUC__) || defined(__clang__)
228 #if defined(__x86_64__)
229   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
230   // FIXME: should we save this for Clang?
231   __asm__("movq\t%%rbx, %%rsi\n\t"
232           "cpuid\n\t"
233           "xchgq\t%%rbx, %%rsi\n\t"
234           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
235           : "a"(value), "c"(subleaf));
236   return false;
237 #elif defined(__i386__)
238   __asm__("movl\t%%ebx, %%esi\n\t"
239           "cpuid\n\t"
240           "xchgl\t%%ebx, %%esi\n\t"
241           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
242           : "a"(value), "c"(subleaf));
243   return false;
244 #else
245   return true;
246 #endif
247 #elif defined(_MSC_VER)
248   int registers[4];
249   __cpuidex(registers, value, subleaf);
250   *rEAX = registers[0];
251   *rEBX = registers[1];
252   *rECX = registers[2];
253   *rEDX = registers[3];
254   return false;
255 #else
256   return true;
257 #endif
258 }
259 
260 // Read control register 0 (XCR0). Used to detect features such as AVX.
261 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
262 #if defined(__GNUC__) || defined(__clang__)
263   // Check xgetbv; this uses a .byte sequence instead of the instruction
264   // directly because older assemblers do not include support for xgetbv and
265   // there is no easy way to conditionally compile based on the assembler used.
266   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
267   return false;
268 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
269   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
270   *rEAX = Result;
271   *rEDX = Result >> 32;
272   return false;
273 #else
274   return true;
275 #endif
276 }
277 
278 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
279                                  unsigned *Model) {
280   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
281   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
282   if (*Family == 6 || *Family == 0xf) {
283     if (*Family == 0xf)
284       // Examine extended family ID if family ID is F.
285       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
286     // Examine extended model ID if family ID is 6 or F.
287     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
288   }
289 }
290 
291 static const char *
292 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
293                                 const unsigned *Features,
294                                 unsigned *Type, unsigned *Subtype) {
295 #define testFeature(F)                                                         \
296   (Features[F / 32] & (1 << (F % 32))) != 0
297 
298   // We select CPU strings to match the code in Host.cpp, but we don't use them
299   // in compiler-rt.
300   const char *CPU = 0;
301 
302   switch (Family) {
303   case 6:
304     switch (Model) {
305     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
306                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
307                // mobile processor, Intel Core 2 Extreme processor, Intel
308                // Pentium Dual-Core processor, Intel Xeon processor, model
309                // 0Fh. All processors are manufactured using the 65 nm process.
310     case 0x16: // Intel Celeron processor model 16h. All processors are
311                // manufactured using the 65 nm process
312       CPU = "core2";
313       *Type = INTEL_CORE2;
314       break;
315     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
316                // 17h. All processors are manufactured using the 45 nm process.
317                //
318                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
319     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
320                // the 45 nm process.
321       CPU = "penryn";
322       *Type = INTEL_CORE2;
323       break;
324     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
325                // processors are manufactured using the 45 nm process.
326     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
327                // As found in a Summer 2010 model iMac.
328     case 0x1f:
329     case 0x2e:              // Nehalem EX
330       CPU = "nehalem";
331       *Type = INTEL_COREI7;
332       *Subtype = INTEL_COREI7_NEHALEM;
333       break;
334     case 0x25: // Intel Core i7, laptop version.
335     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
336                // processors are manufactured using the 32 nm process.
337     case 0x2f: // Westmere EX
338       CPU = "westmere";
339       *Type = INTEL_COREI7;
340       *Subtype = INTEL_COREI7_WESTMERE;
341       break;
342     case 0x2a: // Intel Core i7 processor. All processors are manufactured
343                // using the 32 nm process.
344     case 0x2d:
345       CPU = "sandybridge";
346       *Type = INTEL_COREI7;
347       *Subtype = INTEL_COREI7_SANDYBRIDGE;
348       break;
349     case 0x3a:
350     case 0x3e:              // Ivy Bridge EP
351       CPU = "ivybridge";
352       *Type = INTEL_COREI7;
353       *Subtype = INTEL_COREI7_IVYBRIDGE;
354       break;
355 
356     // Haswell:
357     case 0x3c:
358     case 0x3f:
359     case 0x45:
360     case 0x46:
361       CPU = "haswell";
362       *Type = INTEL_COREI7;
363       *Subtype = INTEL_COREI7_HASWELL;
364       break;
365 
366     // Broadwell:
367     case 0x3d:
368     case 0x47:
369     case 0x4f:
370     case 0x56:
371       CPU = "broadwell";
372       *Type = INTEL_COREI7;
373       *Subtype = INTEL_COREI7_BROADWELL;
374       break;
375 
376     // Skylake:
377     case 0x4e:              // Skylake mobile
378     case 0x5e:              // Skylake desktop
379     case 0x8e:              // Kaby Lake mobile
380     case 0x9e:              // Kaby Lake desktop
381     case 0xa5:              // Comet Lake-H/S
382     case 0xa6:              // Comet Lake-U
383       CPU = "skylake";
384       *Type = INTEL_COREI7;
385       *Subtype = INTEL_COREI7_SKYLAKE;
386       break;
387 
388     // Rocketlake:
389     case 0xa7:
390       CPU = "rocketlake";
391       *Type = INTEL_COREI7;
392       *Subtype = INTEL_COREI7_ROCKETLAKE;
393       break;
394 
395     // Skylake Xeon:
396     case 0x55:
397       *Type = INTEL_COREI7;
398       if (testFeature(FEATURE_AVX512BF16)) {
399         CPU = "cooperlake";
400         *Subtype = INTEL_COREI7_COOPERLAKE;
401       } else if (testFeature(FEATURE_AVX512VNNI)) {
402         CPU = "cascadelake";
403         *Subtype = INTEL_COREI7_CASCADELAKE;
404       } else {
405         CPU = "skylake-avx512";
406         *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
407       }
408       break;
409 
410     // Cannonlake:
411     case 0x66:
412       CPU = "cannonlake";
413       *Type = INTEL_COREI7;
414       *Subtype = INTEL_COREI7_CANNONLAKE;
415       break;
416 
417     // Icelake:
418     case 0x7d:
419     case 0x7e:
420       CPU = "icelake-client";
421       *Type = INTEL_COREI7;
422       *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
423       break;
424 
425     // Icelake Xeon:
426     case 0x6a:
427     case 0x6c:
428       CPU = "icelake-server";
429       *Type = INTEL_COREI7;
430       *Subtype = INTEL_COREI7_ICELAKE_SERVER;
431       break;
432 
433     // Sapphire Rapids:
434     case 0x8f:
435       CPU = "sapphirerapids";
436       *Type = INTEL_COREI7;
437       *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
438       break;
439 
440     case 0x1c: // Most 45 nm Intel Atom processors
441     case 0x26: // 45 nm Atom Lincroft
442     case 0x27: // 32 nm Atom Medfield
443     case 0x35: // 32 nm Atom Midview
444     case 0x36: // 32 nm Atom Midview
445       CPU = "bonnell";
446       *Type = INTEL_BONNELL;
447       break;
448 
449     // Atom Silvermont codes from the Intel software optimization guide.
450     case 0x37:
451     case 0x4a:
452     case 0x4d:
453     case 0x5a:
454     case 0x5d:
455     case 0x4c: // really airmont
456       CPU = "silvermont";
457       *Type = INTEL_SILVERMONT;
458       break;
459     // Goldmont:
460     case 0x5c: // Apollo Lake
461     case 0x5f: // Denverton
462       CPU = "goldmont";
463       *Type = INTEL_GOLDMONT;
464       break; // "goldmont"
465     case 0x7a:
466       CPU = "goldmont-plus";
467       *Type = INTEL_GOLDMONT_PLUS;
468       break;
469     case 0x86:
470       CPU = "tremont";
471       *Type = INTEL_TREMONT;
472       break;
473 
474     case 0x57:
475       CPU = "knl";
476       *Type = INTEL_KNL;
477       break;
478 
479     case 0x85:
480       CPU = "knm";
481       *Type = INTEL_KNM;
482       break;
483 
484     default: // Unknown family 6 CPU.
485       break;
486     }
487     break;
488   default:
489     break; // Unknown.
490   }
491 
492   return CPU;
493 }
494 
495 static const char *
496 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
497                               const unsigned *Features,
498                               unsigned *Type, unsigned *Subtype) {
499   // We select CPU strings to match the code in Host.cpp, but we don't use them
500   // in compiler-rt.
501   const char *CPU = 0;
502 
503   switch (Family) {
504   case 16:
505     CPU = "amdfam10";
506     *Type = AMDFAM10H;
507     switch (Model) {
508     case 2:
509       *Subtype = AMDFAM10H_BARCELONA;
510       break;
511     case 4:
512       *Subtype = AMDFAM10H_SHANGHAI;
513       break;
514     case 8:
515       *Subtype = AMDFAM10H_ISTANBUL;
516       break;
517     }
518     break;
519   case 20:
520     CPU = "btver1";
521     *Type = AMD_BTVER1;
522     break;
523   case 21:
524     CPU = "bdver1";
525     *Type = AMDFAM15H;
526     if (Model >= 0x60 && Model <= 0x7f) {
527       CPU = "bdver4";
528       *Subtype = AMDFAM15H_BDVER4;
529       break; // 60h-7Fh: Excavator
530     }
531     if (Model >= 0x30 && Model <= 0x3f) {
532       CPU = "bdver3";
533       *Subtype = AMDFAM15H_BDVER3;
534       break; // 30h-3Fh: Steamroller
535     }
536     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
537       CPU = "bdver2";
538       *Subtype = AMDFAM15H_BDVER2;
539       break; // 02h, 10h-1Fh: Piledriver
540     }
541     if (Model <= 0x0f) {
542       *Subtype = AMDFAM15H_BDVER1;
543       break; // 00h-0Fh: Bulldozer
544     }
545     break;
546   case 22:
547     CPU = "btver2";
548     *Type = AMD_BTVER2;
549     break;
550   case 23:
551     CPU = "znver1";
552     *Type = AMDFAM17H;
553     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
554       CPU = "znver2";
555       *Subtype = AMDFAM17H_ZNVER2;
556       break; // 30h-3fh, 71h: Zen2
557     }
558     if (Model <= 0x0f) {
559       *Subtype = AMDFAM17H_ZNVER1;
560       break; // 00h-0Fh: Zen1
561     }
562     break;
563   case 25:
564     CPU = "znver3";
565     *Type = AMDFAM19H;
566     if (Model <= 0x0f) {
567       *Subtype = AMDFAM19H_ZNVER3;
568       break; // 00h-0Fh: Zen3
569     }
570     break;
571   default:
572     break; // Unknown AMD CPU.
573   }
574 
575   return CPU;
576 }
577 
578 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
579                                  unsigned *Features) {
580   unsigned EAX, EBX;
581 
582 #define setFeature(F)                                                          \
583   Features[F / 32] |= 1U << (F % 32)
584 
585   if ((EDX >> 15) & 1)
586     setFeature(FEATURE_CMOV);
587   if ((EDX >> 23) & 1)
588     setFeature(FEATURE_MMX);
589   if ((EDX >> 25) & 1)
590     setFeature(FEATURE_SSE);
591   if ((EDX >> 26) & 1)
592     setFeature(FEATURE_SSE2);
593 
594   if ((ECX >> 0) & 1)
595     setFeature(FEATURE_SSE3);
596   if ((ECX >> 1) & 1)
597     setFeature(FEATURE_PCLMUL);
598   if ((ECX >> 9) & 1)
599     setFeature(FEATURE_SSSE3);
600   if ((ECX >> 12) & 1)
601     setFeature(FEATURE_FMA);
602   if ((ECX >> 19) & 1)
603     setFeature(FEATURE_SSE4_1);
604   if ((ECX >> 20) & 1)
605     setFeature(FEATURE_SSE4_2);
606   if ((ECX >> 23) & 1)
607     setFeature(FEATURE_POPCNT);
608   if ((ECX >> 25) & 1)
609     setFeature(FEATURE_AES);
610 
611   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
612   // indicates that the AVX registers will be saved and restored on context
613   // switch, then we have full AVX support.
614   const unsigned AVXBits = (1 << 27) | (1 << 28);
615   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
616                 ((EAX & 0x6) == 0x6);
617 #if defined(__APPLE__)
618   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
619   // save the AVX512 context if we use AVX512 instructions, even the bit is not
620   // set right now.
621   bool HasAVX512Save = true;
622 #else
623   // AVX512 requires additional context to be saved by the OS.
624   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
625 #endif
626 
627   if (HasAVX)
628     setFeature(FEATURE_AVX);
629 
630   bool HasLeaf7 =
631       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
632 
633   if (HasLeaf7 && ((EBX >> 3) & 1))
634     setFeature(FEATURE_BMI);
635   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
636     setFeature(FEATURE_AVX2);
637   if (HasLeaf7 && ((EBX >> 8) & 1))
638     setFeature(FEATURE_BMI2);
639   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
640     setFeature(FEATURE_AVX512F);
641   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
642     setFeature(FEATURE_AVX512DQ);
643   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
644     setFeature(FEATURE_AVX512IFMA);
645   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
646     setFeature(FEATURE_AVX512PF);
647   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
648     setFeature(FEATURE_AVX512ER);
649   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
650     setFeature(FEATURE_AVX512CD);
651   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
652     setFeature(FEATURE_AVX512BW);
653   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
654     setFeature(FEATURE_AVX512VL);
655 
656   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
657     setFeature(FEATURE_AVX512VBMI);
658   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
659     setFeature(FEATURE_AVX512VBMI2);
660   if (HasLeaf7 && ((ECX >> 8) & 1))
661     setFeature(FEATURE_GFNI);
662   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
663     setFeature(FEATURE_VPCLMULQDQ);
664   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
665     setFeature(FEATURE_AVX512VNNI);
666   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
667     setFeature(FEATURE_AVX512BITALG);
668   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
669     setFeature(FEATURE_AVX512VPOPCNTDQ);
670 
671   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
672     setFeature(FEATURE_AVX5124VNNIW);
673   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
674     setFeature(FEATURE_AVX5124FMAPS);
675   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
676     setFeature(FEATURE_AVX512VP2INTERSECT);
677 
678   bool HasLeaf7Subleaf1 =
679       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
680   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
681     setFeature(FEATURE_AVX512BF16);
682 
683   unsigned MaxExtLevel;
684   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
685 
686   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
687                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
688   if (HasExtLeaf1 && ((ECX >> 6) & 1))
689     setFeature(FEATURE_SSE4_A);
690   if (HasExtLeaf1 && ((ECX >> 11) & 1))
691     setFeature(FEATURE_XOP);
692   if (HasExtLeaf1 && ((ECX >> 16) & 1))
693     setFeature(FEATURE_FMA4);
694 #undef setFeature
695 }
696 
697 #ifndef _WIN32
698 __attribute__((visibility("hidden")))
699 #endif
700 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
701 
702 #ifndef _WIN32
703 __attribute__((visibility("hidden")))
704 #endif
705 struct __processor_model {
706   unsigned int __cpu_vendor;
707   unsigned int __cpu_type;
708   unsigned int __cpu_subtype;
709   unsigned int __cpu_features[1];
710 } __cpu_model = {0, 0, 0, {0}};
711 
712 #ifndef _WIN32
713 __attribute__((visibility("hidden")))
714 #endif
715 unsigned int __cpu_features2 = 0;
716 
717 // A constructor function that is sets __cpu_model and __cpu_features2 with
718 // the right values.  This needs to run only once.  This constructor is
719 // given the highest priority and it should run before constructors without
720 // the priority set.  However, it still runs after ifunc initializers and
721 // needs to be called explicitly there.
722 
723 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
724   unsigned EAX, EBX, ECX, EDX;
725   unsigned MaxLeaf = 5;
726   unsigned Vendor;
727   unsigned Model, Family;
728   unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
729 
730   // This function needs to run just once.
731   if (__cpu_model.__cpu_vendor)
732     return 0;
733 
734   if (!isCpuIdSupported() ||
735       getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
736     __cpu_model.__cpu_vendor = VENDOR_OTHER;
737     return -1;
738   }
739 
740   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
741   detectX86FamilyModel(EAX, &Family, &Model);
742 
743   // Find available features.
744   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
745 
746   assert((sizeof(Features)/sizeof(Features[0])) == 2);
747   __cpu_model.__cpu_features[0] = Features[0];
748   __cpu_features2 = Features[1];
749 
750   if (Vendor == SIG_INTEL) {
751     // Get CPU type.
752     getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
753                                     &(__cpu_model.__cpu_type),
754                                     &(__cpu_model.__cpu_subtype));
755     __cpu_model.__cpu_vendor = VENDOR_INTEL;
756   } else if (Vendor == SIG_AMD) {
757     // Get CPU type.
758     getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
759                                   &(__cpu_model.__cpu_type),
760                                   &(__cpu_model.__cpu_subtype));
761     __cpu_model.__cpu_vendor = VENDOR_AMD;
762   } else
763     __cpu_model.__cpu_vendor = VENDOR_OTHER;
764 
765   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
766   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
767   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
768 
769   return 0;
770 }
771 #elif defined(__aarch64__)
772 // LSE support detection for out-of-line atomics
773 // using HWCAP and Auxiliary vector
774 _Bool __aarch64_have_lse_atomics
775     __attribute__((visibility("hidden"), nocommon));
776 #if defined(__has_include)
777 #if __has_include(<sys/auxv.h>)
778 #include <sys/auxv.h>
779 #ifndef AT_HWCAP
780 #define AT_HWCAP 16
781 #endif
782 #ifndef HWCAP_ATOMICS
783 #define HWCAP_ATOMICS (1 << 8)
784 #endif
785 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
786   unsigned long hwcap = getauxval(AT_HWCAP);
787   __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
788 }
789 #endif // defined(__has_include)
790 #endif // __has_include(<sys/auxv.h>)
791 #endif // defined(__aarch64__)
792