xref: /openbsd-src/gnu/llvm/compiler-rt/lib/builtins/cpu_model.c (revision 810390e339a5425391477d5d41c78d7cab2424ac)
13cab2bb3Spatrick //===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
23cab2bb3Spatrick //
33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick //
73cab2bb3Spatrick //===----------------------------------------------------------------------===//
83cab2bb3Spatrick //
93cab2bb3Spatrick //  This file is based on LLVM's lib/Support/Host.cpp.
103cab2bb3Spatrick //  It implements the operating system Host concept and builtin
11d89ec533Spatrick //  __cpu_model for the compiler_rt library for x86 and
12*810390e3Srobert //  __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64.
133cab2bb3Spatrick //
143cab2bb3Spatrick //===----------------------------------------------------------------------===//
153cab2bb3Spatrick 
16*810390e3Srobert #ifndef __has_attribute
17*810390e3Srobert #define __has_attribute(attr) 0
18*810390e3Srobert #endif
19*810390e3Srobert 
20*810390e3Srobert #if __has_attribute(constructor)
21*810390e3Srobert #if __GNUC__ >= 9
22*810390e3Srobert // Ordinarily init priorities below 101 are disallowed as they are reserved for the
23*810390e3Srobert // implementation. However, we are the implementation, so silence the diagnostic,
24*810390e3Srobert // since it doesn't apply to us.
25*810390e3Srobert #pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
26*810390e3Srobert #endif
27*810390e3Srobert // We're choosing init priority 90 to force our constructors to run before any
28*810390e3Srobert // constructors in the end user application (starting at priority 101). This value
29*810390e3Srobert // matches the libgcc choice for the same functions.
30*810390e3Srobert #define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
31d89ec533Spatrick #else
32d89ec533Spatrick // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
33d89ec533Spatrick // this runs during initialization.
34d89ec533Spatrick #define CONSTRUCTOR_ATTRIBUTE
35d89ec533Spatrick #endif
36d89ec533Spatrick 
373cab2bb3Spatrick #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||           \
383cab2bb3Spatrick      defined(_M_X64)) &&                                                       \
393cab2bb3Spatrick     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
403cab2bb3Spatrick 
413cab2bb3Spatrick #include <assert.h>
423cab2bb3Spatrick 
433cab2bb3Spatrick #define bool int
443cab2bb3Spatrick #define true 1
453cab2bb3Spatrick #define false 0
463cab2bb3Spatrick 
473cab2bb3Spatrick #ifdef _MSC_VER
483cab2bb3Spatrick #include <intrin.h>
493cab2bb3Spatrick #endif
503cab2bb3Spatrick 
513cab2bb3Spatrick enum VendorSignatures {
523cab2bb3Spatrick   SIG_INTEL = 0x756e6547, // Genu
533cab2bb3Spatrick   SIG_AMD = 0x68747541,   // Auth
543cab2bb3Spatrick };
553cab2bb3Spatrick 
563cab2bb3Spatrick enum ProcessorVendors {
573cab2bb3Spatrick   VENDOR_INTEL = 1,
583cab2bb3Spatrick   VENDOR_AMD,
593cab2bb3Spatrick   VENDOR_OTHER,
603cab2bb3Spatrick   VENDOR_MAX
613cab2bb3Spatrick };
623cab2bb3Spatrick 
633cab2bb3Spatrick enum ProcessorTypes {
643cab2bb3Spatrick   INTEL_BONNELL = 1,
653cab2bb3Spatrick   INTEL_CORE2,
663cab2bb3Spatrick   INTEL_COREI7,
673cab2bb3Spatrick   AMDFAM10H,
683cab2bb3Spatrick   AMDFAM15H,
693cab2bb3Spatrick   INTEL_SILVERMONT,
703cab2bb3Spatrick   INTEL_KNL,
713cab2bb3Spatrick   AMD_BTVER1,
723cab2bb3Spatrick   AMD_BTVER2,
733cab2bb3Spatrick   AMDFAM17H,
743cab2bb3Spatrick   INTEL_KNM,
753cab2bb3Spatrick   INTEL_GOLDMONT,
763cab2bb3Spatrick   INTEL_GOLDMONT_PLUS,
773cab2bb3Spatrick   INTEL_TREMONT,
78d89ec533Spatrick   AMDFAM19H,
79*810390e3Srobert   ZHAOXIN_FAM7H,
80*810390e3Srobert   INTEL_SIERRAFOREST,
81*810390e3Srobert   INTEL_GRANDRIDGE,
823cab2bb3Spatrick   CPU_TYPE_MAX
833cab2bb3Spatrick };
843cab2bb3Spatrick 
853cab2bb3Spatrick enum ProcessorSubtypes {
863cab2bb3Spatrick   INTEL_COREI7_NEHALEM = 1,
873cab2bb3Spatrick   INTEL_COREI7_WESTMERE,
883cab2bb3Spatrick   INTEL_COREI7_SANDYBRIDGE,
893cab2bb3Spatrick   AMDFAM10H_BARCELONA,
903cab2bb3Spatrick   AMDFAM10H_SHANGHAI,
913cab2bb3Spatrick   AMDFAM10H_ISTANBUL,
923cab2bb3Spatrick   AMDFAM15H_BDVER1,
933cab2bb3Spatrick   AMDFAM15H_BDVER2,
943cab2bb3Spatrick   AMDFAM15H_BDVER3,
953cab2bb3Spatrick   AMDFAM15H_BDVER4,
963cab2bb3Spatrick   AMDFAM17H_ZNVER1,
973cab2bb3Spatrick   INTEL_COREI7_IVYBRIDGE,
983cab2bb3Spatrick   INTEL_COREI7_HASWELL,
993cab2bb3Spatrick   INTEL_COREI7_BROADWELL,
1003cab2bb3Spatrick   INTEL_COREI7_SKYLAKE,
1013cab2bb3Spatrick   INTEL_COREI7_SKYLAKE_AVX512,
1023cab2bb3Spatrick   INTEL_COREI7_CANNONLAKE,
1033cab2bb3Spatrick   INTEL_COREI7_ICELAKE_CLIENT,
1043cab2bb3Spatrick   INTEL_COREI7_ICELAKE_SERVER,
1053cab2bb3Spatrick   AMDFAM17H_ZNVER2,
1063cab2bb3Spatrick   INTEL_COREI7_CASCADELAKE,
1071f9cb04fSpatrick   INTEL_COREI7_TIGERLAKE,
1081f9cb04fSpatrick   INTEL_COREI7_COOPERLAKE,
109d89ec533Spatrick   INTEL_COREI7_SAPPHIRERAPIDS,
110d89ec533Spatrick   INTEL_COREI7_ALDERLAKE,
111d89ec533Spatrick   AMDFAM19H_ZNVER3,
112d89ec533Spatrick   INTEL_COREI7_ROCKETLAKE,
113*810390e3Srobert   ZHAOXIN_FAM7H_LUJIAZUI,
114*810390e3Srobert   AMDFAM19H_ZNVER4,
115*810390e3Srobert   INTEL_COREI7_GRANITERAPIDS,
1163cab2bb3Spatrick   CPU_SUBTYPE_MAX
1173cab2bb3Spatrick };
1183cab2bb3Spatrick 
1193cab2bb3Spatrick enum ProcessorFeatures {
1203cab2bb3Spatrick   FEATURE_CMOV = 0,
1213cab2bb3Spatrick   FEATURE_MMX,
1223cab2bb3Spatrick   FEATURE_POPCNT,
1233cab2bb3Spatrick   FEATURE_SSE,
1243cab2bb3Spatrick   FEATURE_SSE2,
1253cab2bb3Spatrick   FEATURE_SSE3,
1263cab2bb3Spatrick   FEATURE_SSSE3,
1273cab2bb3Spatrick   FEATURE_SSE4_1,
1283cab2bb3Spatrick   FEATURE_SSE4_2,
1293cab2bb3Spatrick   FEATURE_AVX,
1303cab2bb3Spatrick   FEATURE_AVX2,
1313cab2bb3Spatrick   FEATURE_SSE4_A,
1323cab2bb3Spatrick   FEATURE_FMA4,
1333cab2bb3Spatrick   FEATURE_XOP,
1343cab2bb3Spatrick   FEATURE_FMA,
1353cab2bb3Spatrick   FEATURE_AVX512F,
1363cab2bb3Spatrick   FEATURE_BMI,
1373cab2bb3Spatrick   FEATURE_BMI2,
1383cab2bb3Spatrick   FEATURE_AES,
1393cab2bb3Spatrick   FEATURE_PCLMUL,
1403cab2bb3Spatrick   FEATURE_AVX512VL,
1413cab2bb3Spatrick   FEATURE_AVX512BW,
1423cab2bb3Spatrick   FEATURE_AVX512DQ,
1433cab2bb3Spatrick   FEATURE_AVX512CD,
1443cab2bb3Spatrick   FEATURE_AVX512ER,
1453cab2bb3Spatrick   FEATURE_AVX512PF,
1463cab2bb3Spatrick   FEATURE_AVX512VBMI,
1473cab2bb3Spatrick   FEATURE_AVX512IFMA,
1483cab2bb3Spatrick   FEATURE_AVX5124VNNIW,
1493cab2bb3Spatrick   FEATURE_AVX5124FMAPS,
1503cab2bb3Spatrick   FEATURE_AVX512VPOPCNTDQ,
1513cab2bb3Spatrick   FEATURE_AVX512VBMI2,
1523cab2bb3Spatrick   FEATURE_GFNI,
1533cab2bb3Spatrick   FEATURE_VPCLMULQDQ,
1543cab2bb3Spatrick   FEATURE_AVX512VNNI,
1553cab2bb3Spatrick   FEATURE_AVX512BITALG,
1561f9cb04fSpatrick   FEATURE_AVX512BF16,
1571f9cb04fSpatrick   FEATURE_AVX512VP2INTERSECT,
1581f9cb04fSpatrick   CPU_FEATURE_MAX
1593cab2bb3Spatrick };
1603cab2bb3Spatrick 
1613cab2bb3Spatrick // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
1623cab2bb3Spatrick // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
1633cab2bb3Spatrick // support. Consequently, for i386, the presence of CPUID is checked first
1643cab2bb3Spatrick // via the corresponding eflags bit.
isCpuIdSupported(void)165*810390e3Srobert static bool isCpuIdSupported(void) {
1663cab2bb3Spatrick #if defined(__GNUC__) || defined(__clang__)
1673cab2bb3Spatrick #if defined(__i386__)
1683cab2bb3Spatrick   int __cpuid_supported;
1693cab2bb3Spatrick   __asm__("  pushfl\n"
1703cab2bb3Spatrick           "  popl   %%eax\n"
1713cab2bb3Spatrick           "  movl   %%eax,%%ecx\n"
1723cab2bb3Spatrick           "  xorl   $0x00200000,%%eax\n"
1733cab2bb3Spatrick           "  pushl  %%eax\n"
1743cab2bb3Spatrick           "  popfl\n"
1753cab2bb3Spatrick           "  pushfl\n"
1763cab2bb3Spatrick           "  popl   %%eax\n"
1773cab2bb3Spatrick           "  movl   $0,%0\n"
1783cab2bb3Spatrick           "  cmpl   %%eax,%%ecx\n"
1793cab2bb3Spatrick           "  je     1f\n"
1803cab2bb3Spatrick           "  movl   $1,%0\n"
1813cab2bb3Spatrick           "1:"
1823cab2bb3Spatrick           : "=r"(__cpuid_supported)
1833cab2bb3Spatrick           :
1843cab2bb3Spatrick           : "eax", "ecx");
1853cab2bb3Spatrick   if (!__cpuid_supported)
1863cab2bb3Spatrick     return false;
1873cab2bb3Spatrick #endif
1883cab2bb3Spatrick   return true;
1893cab2bb3Spatrick #endif
1903cab2bb3Spatrick   return true;
1913cab2bb3Spatrick }
1923cab2bb3Spatrick 
1933cab2bb3Spatrick // This code is copied from lib/Support/Host.cpp.
1943cab2bb3Spatrick // Changes to either file should be mirrored in the other.
1953cab2bb3Spatrick 
1963cab2bb3Spatrick /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
1973cab2bb3Spatrick /// the specified arguments.  If we can't run cpuid on the host, return true.
getX86CpuIDAndInfo(unsigned value,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)1983cab2bb3Spatrick static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
1993cab2bb3Spatrick                                unsigned *rECX, unsigned *rEDX) {
2003cab2bb3Spatrick #if defined(__GNUC__) || defined(__clang__)
2013cab2bb3Spatrick #if defined(__x86_64__)
2023cab2bb3Spatrick   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
2033cab2bb3Spatrick   // FIXME: should we save this for Clang?
2043cab2bb3Spatrick   __asm__("movq\t%%rbx, %%rsi\n\t"
2053cab2bb3Spatrick           "cpuid\n\t"
2063cab2bb3Spatrick           "xchgq\t%%rbx, %%rsi\n\t"
2073cab2bb3Spatrick           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
2083cab2bb3Spatrick           : "a"(value));
2093cab2bb3Spatrick   return false;
2103cab2bb3Spatrick #elif defined(__i386__)
2113cab2bb3Spatrick   __asm__("movl\t%%ebx, %%esi\n\t"
2123cab2bb3Spatrick           "cpuid\n\t"
2133cab2bb3Spatrick           "xchgl\t%%ebx, %%esi\n\t"
2143cab2bb3Spatrick           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
2153cab2bb3Spatrick           : "a"(value));
2163cab2bb3Spatrick   return false;
2173cab2bb3Spatrick #else
2183cab2bb3Spatrick   return true;
2193cab2bb3Spatrick #endif
2203cab2bb3Spatrick #elif defined(_MSC_VER)
2213cab2bb3Spatrick   // The MSVC intrinsic is portable across x86 and x64.
2223cab2bb3Spatrick   int registers[4];
2233cab2bb3Spatrick   __cpuid(registers, value);
2243cab2bb3Spatrick   *rEAX = registers[0];
2253cab2bb3Spatrick   *rEBX = registers[1];
2263cab2bb3Spatrick   *rECX = registers[2];
2273cab2bb3Spatrick   *rEDX = registers[3];
2283cab2bb3Spatrick   return false;
2293cab2bb3Spatrick #else
2303cab2bb3Spatrick   return true;
2313cab2bb3Spatrick #endif
2323cab2bb3Spatrick }
2333cab2bb3Spatrick 
2343cab2bb3Spatrick /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
2353cab2bb3Spatrick /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
2363cab2bb3Spatrick /// return true.
getX86CpuIDAndInfoEx(unsigned value,unsigned subleaf,unsigned * rEAX,unsigned * rEBX,unsigned * rECX,unsigned * rEDX)2373cab2bb3Spatrick static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
2383cab2bb3Spatrick                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
2393cab2bb3Spatrick                                  unsigned *rEDX) {
2403cab2bb3Spatrick #if defined(__GNUC__) || defined(__clang__)
2413cab2bb3Spatrick #if defined(__x86_64__)
2423cab2bb3Spatrick   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
2433cab2bb3Spatrick   // FIXME: should we save this for Clang?
2443cab2bb3Spatrick   __asm__("movq\t%%rbx, %%rsi\n\t"
2453cab2bb3Spatrick           "cpuid\n\t"
2463cab2bb3Spatrick           "xchgq\t%%rbx, %%rsi\n\t"
2473cab2bb3Spatrick           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
2483cab2bb3Spatrick           : "a"(value), "c"(subleaf));
2493cab2bb3Spatrick   return false;
2503cab2bb3Spatrick #elif defined(__i386__)
2513cab2bb3Spatrick   __asm__("movl\t%%ebx, %%esi\n\t"
2523cab2bb3Spatrick           "cpuid\n\t"
2533cab2bb3Spatrick           "xchgl\t%%ebx, %%esi\n\t"
2543cab2bb3Spatrick           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
2553cab2bb3Spatrick           : "a"(value), "c"(subleaf));
2563cab2bb3Spatrick   return false;
2573cab2bb3Spatrick #else
2583cab2bb3Spatrick   return true;
2593cab2bb3Spatrick #endif
2603cab2bb3Spatrick #elif defined(_MSC_VER)
2613cab2bb3Spatrick   int registers[4];
2623cab2bb3Spatrick   __cpuidex(registers, value, subleaf);
2633cab2bb3Spatrick   *rEAX = registers[0];
2643cab2bb3Spatrick   *rEBX = registers[1];
2653cab2bb3Spatrick   *rECX = registers[2];
2663cab2bb3Spatrick   *rEDX = registers[3];
2673cab2bb3Spatrick   return false;
2683cab2bb3Spatrick #else
2693cab2bb3Spatrick   return true;
2703cab2bb3Spatrick #endif
2713cab2bb3Spatrick }
2723cab2bb3Spatrick 
2733cab2bb3Spatrick // Read control register 0 (XCR0). Used to detect features such as AVX.
getX86XCR0(unsigned * rEAX,unsigned * rEDX)2743cab2bb3Spatrick static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
2753cab2bb3Spatrick #if defined(__GNUC__) || defined(__clang__)
2763cab2bb3Spatrick   // Check xgetbv; this uses a .byte sequence instead of the instruction
2773cab2bb3Spatrick   // directly because older assemblers do not include support for xgetbv and
2783cab2bb3Spatrick   // there is no easy way to conditionally compile based on the assembler used.
2793cab2bb3Spatrick   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
2803cab2bb3Spatrick   return false;
2813cab2bb3Spatrick #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
2823cab2bb3Spatrick   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
2833cab2bb3Spatrick   *rEAX = Result;
2843cab2bb3Spatrick   *rEDX = Result >> 32;
2853cab2bb3Spatrick   return false;
2863cab2bb3Spatrick #else
2873cab2bb3Spatrick   return true;
2883cab2bb3Spatrick #endif
2893cab2bb3Spatrick }
2903cab2bb3Spatrick 
detectX86FamilyModel(unsigned EAX,unsigned * Family,unsigned * Model)2913cab2bb3Spatrick static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
2923cab2bb3Spatrick                                  unsigned *Model) {
2933cab2bb3Spatrick   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
2943cab2bb3Spatrick   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
2953cab2bb3Spatrick   if (*Family == 6 || *Family == 0xf) {
2963cab2bb3Spatrick     if (*Family == 0xf)
2973cab2bb3Spatrick       // Examine extended family ID if family ID is F.
2983cab2bb3Spatrick       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
2993cab2bb3Spatrick     // Examine extended model ID if family ID is 6 or F.
3003cab2bb3Spatrick     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
3013cab2bb3Spatrick   }
3023cab2bb3Spatrick }
3033cab2bb3Spatrick 
3041f9cb04fSpatrick static const char *
getIntelProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)3051f9cb04fSpatrick getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
3061f9cb04fSpatrick                                 const unsigned *Features,
3071f9cb04fSpatrick                                 unsigned *Type, unsigned *Subtype) {
3081f9cb04fSpatrick #define testFeature(F)                                                         \
3091f9cb04fSpatrick   (Features[F / 32] & (1 << (F % 32))) != 0
3101f9cb04fSpatrick 
3111f9cb04fSpatrick   // We select CPU strings to match the code in Host.cpp, but we don't use them
3121f9cb04fSpatrick   // in compiler-rt.
3131f9cb04fSpatrick   const char *CPU = 0;
3141f9cb04fSpatrick 
3153cab2bb3Spatrick   switch (Family) {
3163cab2bb3Spatrick   case 6:
3173cab2bb3Spatrick     switch (Model) {
3183cab2bb3Spatrick     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
3193cab2bb3Spatrick                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
3203cab2bb3Spatrick                // mobile processor, Intel Core 2 Extreme processor, Intel
3213cab2bb3Spatrick                // Pentium Dual-Core processor, Intel Xeon processor, model
3223cab2bb3Spatrick                // 0Fh. All processors are manufactured using the 65 nm process.
3233cab2bb3Spatrick     case 0x16: // Intel Celeron processor model 16h. All processors are
3243cab2bb3Spatrick                // manufactured using the 65 nm process
3251f9cb04fSpatrick       CPU = "core2";
3261f9cb04fSpatrick       *Type = INTEL_CORE2;
3271f9cb04fSpatrick       break;
3283cab2bb3Spatrick     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
3293cab2bb3Spatrick                // 17h. All processors are manufactured using the 45 nm process.
3303cab2bb3Spatrick                //
3313cab2bb3Spatrick                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
3323cab2bb3Spatrick     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
3333cab2bb3Spatrick                // the 45 nm process.
3341f9cb04fSpatrick       CPU = "penryn";
3351f9cb04fSpatrick       *Type = INTEL_CORE2;
3363cab2bb3Spatrick       break;
3373cab2bb3Spatrick     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
3383cab2bb3Spatrick                // processors are manufactured using the 45 nm process.
3393cab2bb3Spatrick     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
3403cab2bb3Spatrick                // As found in a Summer 2010 model iMac.
3413cab2bb3Spatrick     case 0x1f:
3423cab2bb3Spatrick     case 0x2e:              // Nehalem EX
3431f9cb04fSpatrick       CPU = "nehalem";
3441f9cb04fSpatrick       *Type = INTEL_COREI7;
3453cab2bb3Spatrick       *Subtype = INTEL_COREI7_NEHALEM;
3463cab2bb3Spatrick       break;
3473cab2bb3Spatrick     case 0x25: // Intel Core i7, laptop version.
3483cab2bb3Spatrick     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
3493cab2bb3Spatrick                // processors are manufactured using the 32 nm process.
3503cab2bb3Spatrick     case 0x2f: // Westmere EX
3511f9cb04fSpatrick       CPU = "westmere";
3521f9cb04fSpatrick       *Type = INTEL_COREI7;
3533cab2bb3Spatrick       *Subtype = INTEL_COREI7_WESTMERE;
3543cab2bb3Spatrick       break;
3553cab2bb3Spatrick     case 0x2a: // Intel Core i7 processor. All processors are manufactured
3563cab2bb3Spatrick                // using the 32 nm process.
3573cab2bb3Spatrick     case 0x2d:
3581f9cb04fSpatrick       CPU = "sandybridge";
3591f9cb04fSpatrick       *Type = INTEL_COREI7;
3603cab2bb3Spatrick       *Subtype = INTEL_COREI7_SANDYBRIDGE;
3613cab2bb3Spatrick       break;
3623cab2bb3Spatrick     case 0x3a:
3633cab2bb3Spatrick     case 0x3e:              // Ivy Bridge EP
3641f9cb04fSpatrick       CPU = "ivybridge";
3651f9cb04fSpatrick       *Type = INTEL_COREI7;
3663cab2bb3Spatrick       *Subtype = INTEL_COREI7_IVYBRIDGE;
3673cab2bb3Spatrick       break;
3683cab2bb3Spatrick 
3693cab2bb3Spatrick     // Haswell:
3703cab2bb3Spatrick     case 0x3c:
3713cab2bb3Spatrick     case 0x3f:
3723cab2bb3Spatrick     case 0x45:
3733cab2bb3Spatrick     case 0x46:
3741f9cb04fSpatrick       CPU = "haswell";
3751f9cb04fSpatrick       *Type = INTEL_COREI7;
3763cab2bb3Spatrick       *Subtype = INTEL_COREI7_HASWELL;
3773cab2bb3Spatrick       break;
3783cab2bb3Spatrick 
3793cab2bb3Spatrick     // Broadwell:
3803cab2bb3Spatrick     case 0x3d:
3813cab2bb3Spatrick     case 0x47:
3823cab2bb3Spatrick     case 0x4f:
3833cab2bb3Spatrick     case 0x56:
3841f9cb04fSpatrick       CPU = "broadwell";
3851f9cb04fSpatrick       *Type = INTEL_COREI7;
3863cab2bb3Spatrick       *Subtype = INTEL_COREI7_BROADWELL;
3873cab2bb3Spatrick       break;
3883cab2bb3Spatrick 
3893cab2bb3Spatrick     // Skylake:
3903cab2bb3Spatrick     case 0x4e:              // Skylake mobile
3913cab2bb3Spatrick     case 0x5e:              // Skylake desktop
3923cab2bb3Spatrick     case 0x8e:              // Kaby Lake mobile
3933cab2bb3Spatrick     case 0x9e:              // Kaby Lake desktop
3941f9cb04fSpatrick     case 0xa5:              // Comet Lake-H/S
3951f9cb04fSpatrick     case 0xa6:              // Comet Lake-U
3961f9cb04fSpatrick       CPU = "skylake";
3971f9cb04fSpatrick       *Type = INTEL_COREI7;
3983cab2bb3Spatrick       *Subtype = INTEL_COREI7_SKYLAKE;
3993cab2bb3Spatrick       break;
4003cab2bb3Spatrick 
401d89ec533Spatrick     // Rocketlake:
402d89ec533Spatrick     case 0xa7:
403d89ec533Spatrick       CPU = "rocketlake";
404d89ec533Spatrick       *Type = INTEL_COREI7;
405d89ec533Spatrick       *Subtype = INTEL_COREI7_ROCKETLAKE;
406d89ec533Spatrick       break;
407d89ec533Spatrick 
4083cab2bb3Spatrick     // Skylake Xeon:
4093cab2bb3Spatrick     case 0x55:
4103cab2bb3Spatrick       *Type = INTEL_COREI7;
4111f9cb04fSpatrick       if (testFeature(FEATURE_AVX512BF16)) {
4121f9cb04fSpatrick         CPU = "cooperlake";
4131f9cb04fSpatrick         *Subtype = INTEL_COREI7_COOPERLAKE;
4141f9cb04fSpatrick       } else if (testFeature(FEATURE_AVX512VNNI)) {
4151f9cb04fSpatrick         CPU = "cascadelake";
4161f9cb04fSpatrick         *Subtype = INTEL_COREI7_CASCADELAKE;
4171f9cb04fSpatrick       } else {
4181f9cb04fSpatrick         CPU = "skylake-avx512";
4191f9cb04fSpatrick         *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
4201f9cb04fSpatrick       }
4213cab2bb3Spatrick       break;
4223cab2bb3Spatrick 
4233cab2bb3Spatrick     // Cannonlake:
4243cab2bb3Spatrick     case 0x66:
4251f9cb04fSpatrick       CPU = "cannonlake";
4263cab2bb3Spatrick       *Type = INTEL_COREI7;
4271f9cb04fSpatrick       *Subtype = INTEL_COREI7_CANNONLAKE;
4283cab2bb3Spatrick       break;
4293cab2bb3Spatrick 
4303cab2bb3Spatrick     // Icelake:
4313cab2bb3Spatrick     case 0x7d:
4323cab2bb3Spatrick     case 0x7e:
4331f9cb04fSpatrick       CPU = "icelake-client";
4343cab2bb3Spatrick       *Type = INTEL_COREI7;
4351f9cb04fSpatrick       *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
4363cab2bb3Spatrick       break;
4373cab2bb3Spatrick 
438*810390e3Srobert     // Tigerlake:
439*810390e3Srobert     case 0x8c:
440*810390e3Srobert     case 0x8d:
441*810390e3Srobert       CPU = "tigerlake";
442*810390e3Srobert       *Type = INTEL_COREI7;
443*810390e3Srobert       *Subtype = INTEL_COREI7_TIGERLAKE;
444*810390e3Srobert       break;
445*810390e3Srobert 
446*810390e3Srobert     // Alderlake:
447*810390e3Srobert     case 0x97:
448*810390e3Srobert     case 0x9a:
449*810390e3Srobert     // Raptorlake:
450*810390e3Srobert     case 0xb7:
451*810390e3Srobert     // Meteorlake:
452*810390e3Srobert     case 0xaa:
453*810390e3Srobert     case 0xac:
454*810390e3Srobert       CPU = "alderlake";
455*810390e3Srobert       *Type = INTEL_COREI7;
456*810390e3Srobert       *Subtype = INTEL_COREI7_ALDERLAKE;
457*810390e3Srobert       break;
458*810390e3Srobert 
4593cab2bb3Spatrick     // Icelake Xeon:
4603cab2bb3Spatrick     case 0x6a:
4613cab2bb3Spatrick     case 0x6c:
4621f9cb04fSpatrick       CPU = "icelake-server";
4633cab2bb3Spatrick       *Type = INTEL_COREI7;
4641f9cb04fSpatrick       *Subtype = INTEL_COREI7_ICELAKE_SERVER;
4653cab2bb3Spatrick       break;
4663cab2bb3Spatrick 
467*810390e3Srobert     // Emerald Rapids:
468*810390e3Srobert     case 0xcf:
469d89ec533Spatrick     // Sapphire Rapids:
470d89ec533Spatrick     case 0x8f:
471d89ec533Spatrick       CPU = "sapphirerapids";
472d89ec533Spatrick       *Type = INTEL_COREI7;
473d89ec533Spatrick       *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
474d89ec533Spatrick       break;
475d89ec533Spatrick 
476*810390e3Srobert     // Granite Rapids:
477*810390e3Srobert     case 0xae:
478*810390e3Srobert     case 0xad:
479*810390e3Srobert       CPU = "graniterapids";
480*810390e3Srobert       *Type = INTEL_COREI7;
481*810390e3Srobert       *Subtype = INTEL_COREI7_GRANITERAPIDS;
482*810390e3Srobert       break;
483*810390e3Srobert 
4843cab2bb3Spatrick     case 0x1c: // Most 45 nm Intel Atom processors
4853cab2bb3Spatrick     case 0x26: // 45 nm Atom Lincroft
4863cab2bb3Spatrick     case 0x27: // 32 nm Atom Medfield
4873cab2bb3Spatrick     case 0x35: // 32 nm Atom Midview
4883cab2bb3Spatrick     case 0x36: // 32 nm Atom Midview
4891f9cb04fSpatrick       CPU = "bonnell";
4903cab2bb3Spatrick       *Type = INTEL_BONNELL;
4911f9cb04fSpatrick       break;
4923cab2bb3Spatrick 
4933cab2bb3Spatrick     // Atom Silvermont codes from the Intel software optimization guide.
4943cab2bb3Spatrick     case 0x37:
4953cab2bb3Spatrick     case 0x4a:
4963cab2bb3Spatrick     case 0x4d:
4973cab2bb3Spatrick     case 0x5a:
4983cab2bb3Spatrick     case 0x5d:
4993cab2bb3Spatrick     case 0x4c: // really airmont
5001f9cb04fSpatrick       CPU = "silvermont";
5013cab2bb3Spatrick       *Type = INTEL_SILVERMONT;
5021f9cb04fSpatrick       break;
5033cab2bb3Spatrick     // Goldmont:
5043cab2bb3Spatrick     case 0x5c: // Apollo Lake
5053cab2bb3Spatrick     case 0x5f: // Denverton
5061f9cb04fSpatrick       CPU = "goldmont";
5073cab2bb3Spatrick       *Type = INTEL_GOLDMONT;
5083cab2bb3Spatrick       break; // "goldmont"
5093cab2bb3Spatrick     case 0x7a:
5101f9cb04fSpatrick       CPU = "goldmont-plus";
5113cab2bb3Spatrick       *Type = INTEL_GOLDMONT_PLUS;
5123cab2bb3Spatrick       break;
5133cab2bb3Spatrick     case 0x86:
5141f9cb04fSpatrick       CPU = "tremont";
5153cab2bb3Spatrick       *Type = INTEL_TREMONT;
5163cab2bb3Spatrick       break;
5173cab2bb3Spatrick 
518*810390e3Srobert     // Sierraforest:
519*810390e3Srobert     case 0xaf:
520*810390e3Srobert       CPU = "sierraforest";
521*810390e3Srobert       *Type = INTEL_SIERRAFOREST;
522*810390e3Srobert       break;
523*810390e3Srobert 
524*810390e3Srobert     // Grandridge:
525*810390e3Srobert     case 0xb6:
526*810390e3Srobert       CPU = "grandridge";
527*810390e3Srobert       *Type = INTEL_GRANDRIDGE;
528*810390e3Srobert       break;
529*810390e3Srobert 
5303cab2bb3Spatrick     case 0x57:
5311f9cb04fSpatrick       CPU = "knl";
5321f9cb04fSpatrick       *Type = INTEL_KNL;
5333cab2bb3Spatrick       break;
5343cab2bb3Spatrick 
5353cab2bb3Spatrick     case 0x85:
5361f9cb04fSpatrick       CPU = "knm";
5371f9cb04fSpatrick       *Type = INTEL_KNM;
5383cab2bb3Spatrick       break;
5393cab2bb3Spatrick 
5403cab2bb3Spatrick     default: // Unknown family 6 CPU.
5413cab2bb3Spatrick       break;
5423cab2bb3Spatrick     }
5433cab2bb3Spatrick     break;
5443cab2bb3Spatrick   default:
5453cab2bb3Spatrick     break; // Unknown.
5463cab2bb3Spatrick   }
5471f9cb04fSpatrick 
5481f9cb04fSpatrick   return CPU;
5493cab2bb3Spatrick }
5503cab2bb3Spatrick 
5511f9cb04fSpatrick static const char *
getAMDProcessorTypeAndSubtype(unsigned Family,unsigned Model,const unsigned * Features,unsigned * Type,unsigned * Subtype)5521f9cb04fSpatrick getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
5531f9cb04fSpatrick                               const unsigned *Features,
5543cab2bb3Spatrick                               unsigned *Type, unsigned *Subtype) {
5551f9cb04fSpatrick   // We select CPU strings to match the code in Host.cpp, but we don't use them
5561f9cb04fSpatrick   // in compiler-rt.
5571f9cb04fSpatrick   const char *CPU = 0;
5581f9cb04fSpatrick 
5593cab2bb3Spatrick   switch (Family) {
5603cab2bb3Spatrick   case 16:
5611f9cb04fSpatrick     CPU = "amdfam10";
5621f9cb04fSpatrick     *Type = AMDFAM10H;
5633cab2bb3Spatrick     switch (Model) {
5643cab2bb3Spatrick     case 2:
5653cab2bb3Spatrick       *Subtype = AMDFAM10H_BARCELONA;
5663cab2bb3Spatrick       break;
5673cab2bb3Spatrick     case 4:
5683cab2bb3Spatrick       *Subtype = AMDFAM10H_SHANGHAI;
5693cab2bb3Spatrick       break;
5703cab2bb3Spatrick     case 8:
5713cab2bb3Spatrick       *Subtype = AMDFAM10H_ISTANBUL;
5723cab2bb3Spatrick       break;
5733cab2bb3Spatrick     }
5743cab2bb3Spatrick     break;
5753cab2bb3Spatrick   case 20:
5761f9cb04fSpatrick     CPU = "btver1";
5773cab2bb3Spatrick     *Type = AMD_BTVER1;
5781f9cb04fSpatrick     break;
5793cab2bb3Spatrick   case 21:
5801f9cb04fSpatrick     CPU = "bdver1";
5813cab2bb3Spatrick     *Type = AMDFAM15H;
5823cab2bb3Spatrick     if (Model >= 0x60 && Model <= 0x7f) {
5831f9cb04fSpatrick       CPU = "bdver4";
5843cab2bb3Spatrick       *Subtype = AMDFAM15H_BDVER4;
5851f9cb04fSpatrick       break; // 60h-7Fh: Excavator
5863cab2bb3Spatrick     }
5873cab2bb3Spatrick     if (Model >= 0x30 && Model <= 0x3f) {
5881f9cb04fSpatrick       CPU = "bdver3";
5893cab2bb3Spatrick       *Subtype = AMDFAM15H_BDVER3;
5901f9cb04fSpatrick       break; // 30h-3Fh: Steamroller
5913cab2bb3Spatrick     }
5923cab2bb3Spatrick     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
5931f9cb04fSpatrick       CPU = "bdver2";
5943cab2bb3Spatrick       *Subtype = AMDFAM15H_BDVER2;
5951f9cb04fSpatrick       break; // 02h, 10h-1Fh: Piledriver
5963cab2bb3Spatrick     }
5973cab2bb3Spatrick     if (Model <= 0x0f) {
5983cab2bb3Spatrick       *Subtype = AMDFAM15H_BDVER1;
5991f9cb04fSpatrick       break; // 00h-0Fh: Bulldozer
6003cab2bb3Spatrick     }
6013cab2bb3Spatrick     break;
6023cab2bb3Spatrick   case 22:
6031f9cb04fSpatrick     CPU = "btver2";
6043cab2bb3Spatrick     *Type = AMD_BTVER2;
6051f9cb04fSpatrick     break;
6063cab2bb3Spatrick   case 23:
6071f9cb04fSpatrick     CPU = "znver1";
6083cab2bb3Spatrick     *Type = AMDFAM17H;
6093cab2bb3Spatrick     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
6101f9cb04fSpatrick       CPU = "znver2";
6113cab2bb3Spatrick       *Subtype = AMDFAM17H_ZNVER2;
6121f9cb04fSpatrick       break; // 30h-3fh, 71h: Zen2
6133cab2bb3Spatrick     }
6143cab2bb3Spatrick     if (Model <= 0x0f) {
6153cab2bb3Spatrick       *Subtype = AMDFAM17H_ZNVER1;
6161f9cb04fSpatrick       break; // 00h-0Fh: Zen1
6173cab2bb3Spatrick     }
6183cab2bb3Spatrick     break;
619d89ec533Spatrick   case 25:
620d89ec533Spatrick     CPU = "znver3";
621d89ec533Spatrick     *Type = AMDFAM19H;
622*810390e3Srobert     if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
623*810390e3Srobert       // Family 19h Models 00h-0Fh - Zen3
624*810390e3Srobert       // Family 19h Models 20h-2Fh - Zen3
625*810390e3Srobert       // Family 19h Models 30h-3Fh - Zen3
626*810390e3Srobert       // Family 19h Models 40h-4Fh - Zen3+
627*810390e3Srobert       // Family 19h Models 50h-5Fh - Zen3+
628d89ec533Spatrick       *Subtype = AMDFAM19H_ZNVER3;
629*810390e3Srobert       break;
630*810390e3Srobert     }
631*810390e3Srobert     if ((Model >= 0x10 && Model <= 0x1f) ||
632*810390e3Srobert         (Model >= 0x60 && Model <= 0x74) ||
633*810390e3Srobert         (Model >= 0x78 && Model <= 0x7b) ||
634*810390e3Srobert         (Model >= 0xA0 && Model <= 0xAf)) {
635*810390e3Srobert       CPU = "znver4";
636*810390e3Srobert       *Subtype = AMDFAM19H_ZNVER4;
637*810390e3Srobert       break; //  "znver4"
638d89ec533Spatrick     }
639d89ec533Spatrick     break;
6403cab2bb3Spatrick   default:
6411f9cb04fSpatrick     break; // Unknown AMD CPU.
6423cab2bb3Spatrick   }
6431f9cb04fSpatrick 
6441f9cb04fSpatrick   return CPU;
6453cab2bb3Spatrick }
6463cab2bb3Spatrick 
getAvailableFeatures(unsigned ECX,unsigned EDX,unsigned MaxLeaf,unsigned * Features)6473cab2bb3Spatrick static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
6481f9cb04fSpatrick                                  unsigned *Features) {
6493cab2bb3Spatrick   unsigned EAX, EBX;
6503cab2bb3Spatrick 
6513cab2bb3Spatrick #define setFeature(F)                                                          \
6521f9cb04fSpatrick   Features[F / 32] |= 1U << (F % 32)
6533cab2bb3Spatrick 
6543cab2bb3Spatrick   if ((EDX >> 15) & 1)
6553cab2bb3Spatrick     setFeature(FEATURE_CMOV);
6563cab2bb3Spatrick   if ((EDX >> 23) & 1)
6573cab2bb3Spatrick     setFeature(FEATURE_MMX);
6583cab2bb3Spatrick   if ((EDX >> 25) & 1)
6593cab2bb3Spatrick     setFeature(FEATURE_SSE);
6603cab2bb3Spatrick   if ((EDX >> 26) & 1)
6613cab2bb3Spatrick     setFeature(FEATURE_SSE2);
6623cab2bb3Spatrick 
6633cab2bb3Spatrick   if ((ECX >> 0) & 1)
6643cab2bb3Spatrick     setFeature(FEATURE_SSE3);
6653cab2bb3Spatrick   if ((ECX >> 1) & 1)
6663cab2bb3Spatrick     setFeature(FEATURE_PCLMUL);
6673cab2bb3Spatrick   if ((ECX >> 9) & 1)
6683cab2bb3Spatrick     setFeature(FEATURE_SSSE3);
6693cab2bb3Spatrick   if ((ECX >> 12) & 1)
6703cab2bb3Spatrick     setFeature(FEATURE_FMA);
6713cab2bb3Spatrick   if ((ECX >> 19) & 1)
6723cab2bb3Spatrick     setFeature(FEATURE_SSE4_1);
6733cab2bb3Spatrick   if ((ECX >> 20) & 1)
6743cab2bb3Spatrick     setFeature(FEATURE_SSE4_2);
6753cab2bb3Spatrick   if ((ECX >> 23) & 1)
6763cab2bb3Spatrick     setFeature(FEATURE_POPCNT);
6773cab2bb3Spatrick   if ((ECX >> 25) & 1)
6783cab2bb3Spatrick     setFeature(FEATURE_AES);
6793cab2bb3Spatrick 
6803cab2bb3Spatrick   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
6813cab2bb3Spatrick   // indicates that the AVX registers will be saved and restored on context
6823cab2bb3Spatrick   // switch, then we have full AVX support.
6833cab2bb3Spatrick   const unsigned AVXBits = (1 << 27) | (1 << 28);
6843cab2bb3Spatrick   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
6853cab2bb3Spatrick                 ((EAX & 0x6) == 0x6);
6863cab2bb3Spatrick #if defined(__APPLE__)
6873cab2bb3Spatrick   // Darwin lazily saves the AVX512 context on first use: trust that the OS will
6883cab2bb3Spatrick   // save the AVX512 context if we use AVX512 instructions, even the bit is not
6893cab2bb3Spatrick   // set right now.
6903cab2bb3Spatrick   bool HasAVX512Save = true;
6913cab2bb3Spatrick #else
6923cab2bb3Spatrick   // AVX512 requires additional context to be saved by the OS.
6933cab2bb3Spatrick   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
6943cab2bb3Spatrick #endif
6953cab2bb3Spatrick 
6963cab2bb3Spatrick   if (HasAVX)
6973cab2bb3Spatrick     setFeature(FEATURE_AVX);
6983cab2bb3Spatrick 
6993cab2bb3Spatrick   bool HasLeaf7 =
7003cab2bb3Spatrick       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
7013cab2bb3Spatrick 
7023cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 3) & 1))
7033cab2bb3Spatrick     setFeature(FEATURE_BMI);
7043cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
7053cab2bb3Spatrick     setFeature(FEATURE_AVX2);
7063cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 8) & 1))
7073cab2bb3Spatrick     setFeature(FEATURE_BMI2);
7083cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
7093cab2bb3Spatrick     setFeature(FEATURE_AVX512F);
7103cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
7113cab2bb3Spatrick     setFeature(FEATURE_AVX512DQ);
7123cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
7133cab2bb3Spatrick     setFeature(FEATURE_AVX512IFMA);
7143cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
7153cab2bb3Spatrick     setFeature(FEATURE_AVX512PF);
7163cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
7173cab2bb3Spatrick     setFeature(FEATURE_AVX512ER);
7183cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
7193cab2bb3Spatrick     setFeature(FEATURE_AVX512CD);
7203cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
7213cab2bb3Spatrick     setFeature(FEATURE_AVX512BW);
7223cab2bb3Spatrick   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
7233cab2bb3Spatrick     setFeature(FEATURE_AVX512VL);
7243cab2bb3Spatrick 
7253cab2bb3Spatrick   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
7263cab2bb3Spatrick     setFeature(FEATURE_AVX512VBMI);
7273cab2bb3Spatrick   if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
7283cab2bb3Spatrick     setFeature(FEATURE_AVX512VBMI2);
7293cab2bb3Spatrick   if (HasLeaf7 && ((ECX >> 8) & 1))
7303cab2bb3Spatrick     setFeature(FEATURE_GFNI);
7313cab2bb3Spatrick   if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
7323cab2bb3Spatrick     setFeature(FEATURE_VPCLMULQDQ);
7333cab2bb3Spatrick   if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
7343cab2bb3Spatrick     setFeature(FEATURE_AVX512VNNI);
7353cab2bb3Spatrick   if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
7363cab2bb3Spatrick     setFeature(FEATURE_AVX512BITALG);
7373cab2bb3Spatrick   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
7383cab2bb3Spatrick     setFeature(FEATURE_AVX512VPOPCNTDQ);
7393cab2bb3Spatrick 
7403cab2bb3Spatrick   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
7413cab2bb3Spatrick     setFeature(FEATURE_AVX5124VNNIW);
7423cab2bb3Spatrick   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
7433cab2bb3Spatrick     setFeature(FEATURE_AVX5124FMAPS);
7441f9cb04fSpatrick   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
7451f9cb04fSpatrick     setFeature(FEATURE_AVX512VP2INTERSECT);
7463cab2bb3Spatrick 
7473cab2bb3Spatrick   bool HasLeaf7Subleaf1 =
7483cab2bb3Spatrick       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
7493cab2bb3Spatrick   if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
7503cab2bb3Spatrick     setFeature(FEATURE_AVX512BF16);
7513cab2bb3Spatrick 
7523cab2bb3Spatrick   unsigned MaxExtLevel;
7533cab2bb3Spatrick   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
7543cab2bb3Spatrick 
7553cab2bb3Spatrick   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
7563cab2bb3Spatrick                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
7573cab2bb3Spatrick   if (HasExtLeaf1 && ((ECX >> 6) & 1))
7583cab2bb3Spatrick     setFeature(FEATURE_SSE4_A);
7593cab2bb3Spatrick   if (HasExtLeaf1 && ((ECX >> 11) & 1))
7603cab2bb3Spatrick     setFeature(FEATURE_XOP);
7613cab2bb3Spatrick   if (HasExtLeaf1 && ((ECX >> 16) & 1))
7623cab2bb3Spatrick     setFeature(FEATURE_FMA4);
7633cab2bb3Spatrick #undef setFeature
7643cab2bb3Spatrick }
7653cab2bb3Spatrick 
7663cab2bb3Spatrick #ifndef _WIN32
7673cab2bb3Spatrick __attribute__((visibility("hidden")))
7683cab2bb3Spatrick #endif
7693cab2bb3Spatrick int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
7703cab2bb3Spatrick 
7713cab2bb3Spatrick #ifndef _WIN32
7723cab2bb3Spatrick __attribute__((visibility("hidden")))
7733cab2bb3Spatrick #endif
7743cab2bb3Spatrick struct __processor_model {
7753cab2bb3Spatrick   unsigned int __cpu_vendor;
7763cab2bb3Spatrick   unsigned int __cpu_type;
7773cab2bb3Spatrick   unsigned int __cpu_subtype;
7783cab2bb3Spatrick   unsigned int __cpu_features[1];
7793cab2bb3Spatrick } __cpu_model = {0, 0, 0, {0}};
7803cab2bb3Spatrick 
7813cab2bb3Spatrick #ifndef _WIN32
7823cab2bb3Spatrick __attribute__((visibility("hidden")))
7833cab2bb3Spatrick #endif
7841f9cb04fSpatrick unsigned int __cpu_features2 = 0;
7853cab2bb3Spatrick 
7863cab2bb3Spatrick // A constructor function that is sets __cpu_model and __cpu_features2 with
7873cab2bb3Spatrick // the right values.  This needs to run only once.  This constructor is
7883cab2bb3Spatrick // given the highest priority and it should run before constructors without
7893cab2bb3Spatrick // the priority set.  However, it still runs after ifunc initializers and
7903cab2bb3Spatrick // needs to be called explicitly there.
7913cab2bb3Spatrick 
__cpu_indicator_init(void)7923cab2bb3Spatrick int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
7933cab2bb3Spatrick   unsigned EAX, EBX, ECX, EDX;
7943cab2bb3Spatrick   unsigned MaxLeaf = 5;
7953cab2bb3Spatrick   unsigned Vendor;
7961f9cb04fSpatrick   unsigned Model, Family;
7971f9cb04fSpatrick   unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
7983cab2bb3Spatrick 
7993cab2bb3Spatrick   // This function needs to run just once.
8003cab2bb3Spatrick   if (__cpu_model.__cpu_vendor)
8013cab2bb3Spatrick     return 0;
8023cab2bb3Spatrick 
8031f9cb04fSpatrick   if (!isCpuIdSupported() ||
8041f9cb04fSpatrick       getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
8053cab2bb3Spatrick     __cpu_model.__cpu_vendor = VENDOR_OTHER;
8063cab2bb3Spatrick     return -1;
8073cab2bb3Spatrick   }
8081f9cb04fSpatrick 
8093cab2bb3Spatrick   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
8103cab2bb3Spatrick   detectX86FamilyModel(EAX, &Family, &Model);
8113cab2bb3Spatrick 
8123cab2bb3Spatrick   // Find available features.
8131f9cb04fSpatrick   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
8141f9cb04fSpatrick 
8151f9cb04fSpatrick   assert((sizeof(Features)/sizeof(Features[0])) == 2);
8161f9cb04fSpatrick   __cpu_model.__cpu_features[0] = Features[0];
8171f9cb04fSpatrick   __cpu_features2 = Features[1];
8183cab2bb3Spatrick 
8193cab2bb3Spatrick   if (Vendor == SIG_INTEL) {
8203cab2bb3Spatrick     // Get CPU type.
8211f9cb04fSpatrick     getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
8221f9cb04fSpatrick                                     &(__cpu_model.__cpu_type),
8233cab2bb3Spatrick                                     &(__cpu_model.__cpu_subtype));
8243cab2bb3Spatrick     __cpu_model.__cpu_vendor = VENDOR_INTEL;
8253cab2bb3Spatrick   } else if (Vendor == SIG_AMD) {
8263cab2bb3Spatrick     // Get CPU type.
8271f9cb04fSpatrick     getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
8283cab2bb3Spatrick                                   &(__cpu_model.__cpu_type),
8293cab2bb3Spatrick                                   &(__cpu_model.__cpu_subtype));
8303cab2bb3Spatrick     __cpu_model.__cpu_vendor = VENDOR_AMD;
8313cab2bb3Spatrick   } else
8323cab2bb3Spatrick     __cpu_model.__cpu_vendor = VENDOR_OTHER;
8333cab2bb3Spatrick 
8343cab2bb3Spatrick   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
8353cab2bb3Spatrick   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
8363cab2bb3Spatrick   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
8373cab2bb3Spatrick 
8383cab2bb3Spatrick   return 0;
8393cab2bb3Spatrick }
840d89ec533Spatrick #elif defined(__aarch64__)
841*810390e3Srobert 
842d89ec533Spatrick #ifndef AT_HWCAP
843d89ec533Spatrick #define AT_HWCAP 16
8443cab2bb3Spatrick #endif
845*810390e3Srobert #ifndef HWCAP_CPUID
846*810390e3Srobert #define HWCAP_CPUID (1 << 11)
847*810390e3Srobert #endif
848*810390e3Srobert #ifndef HWCAP_FP
849*810390e3Srobert #define HWCAP_FP (1 << 0)
850*810390e3Srobert #endif
851*810390e3Srobert #ifndef HWCAP_ASIMD
852*810390e3Srobert #define HWCAP_ASIMD (1 << 1)
853*810390e3Srobert #endif
854*810390e3Srobert #ifndef HWCAP_AES
855*810390e3Srobert #define HWCAP_AES (1 << 3)
856*810390e3Srobert #endif
857*810390e3Srobert #ifndef HWCAP_PMULL
858*810390e3Srobert #define HWCAP_PMULL (1 << 4)
859*810390e3Srobert #endif
860*810390e3Srobert #ifndef HWCAP_SHA1
861*810390e3Srobert #define HWCAP_SHA1 (1 << 5)
862*810390e3Srobert #endif
863*810390e3Srobert #ifndef HWCAP_SHA2
864*810390e3Srobert #define HWCAP_SHA2 (1 << 6)
865*810390e3Srobert #endif
866d89ec533Spatrick #ifndef HWCAP_ATOMICS
867d89ec533Spatrick #define HWCAP_ATOMICS (1 << 8)
868d89ec533Spatrick #endif
869*810390e3Srobert #ifndef HWCAP_FPHP
870*810390e3Srobert #define HWCAP_FPHP (1 << 9)
871*810390e3Srobert #endif
872*810390e3Srobert #ifndef HWCAP_ASIMDHP
873*810390e3Srobert #define HWCAP_ASIMDHP (1 << 10)
874*810390e3Srobert #endif
875*810390e3Srobert #ifndef HWCAP_ASIMDRDM
876*810390e3Srobert #define HWCAP_ASIMDRDM (1 << 12)
877*810390e3Srobert #endif
878*810390e3Srobert #ifndef HWCAP_JSCVT
879*810390e3Srobert #define HWCAP_JSCVT (1 << 13)
880*810390e3Srobert #endif
881*810390e3Srobert #ifndef HWCAP_FCMA
882*810390e3Srobert #define HWCAP_FCMA (1 << 14)
883*810390e3Srobert #endif
884*810390e3Srobert #ifndef HWCAP_LRCPC
885*810390e3Srobert #define HWCAP_LRCPC (1 << 15)
886*810390e3Srobert #endif
887*810390e3Srobert #ifndef HWCAP_DCPOP
888*810390e3Srobert #define HWCAP_DCPOP (1 << 16)
889*810390e3Srobert #endif
890*810390e3Srobert #ifndef HWCAP_SHA3
891*810390e3Srobert #define HWCAP_SHA3 (1 << 17)
892*810390e3Srobert #endif
893*810390e3Srobert #ifndef HWCAP_SM3
894*810390e3Srobert #define HWCAP_SM3 (1 << 18)
895*810390e3Srobert #endif
896*810390e3Srobert #ifndef HWCAP_SM4
897*810390e3Srobert #define HWCAP_SM4 (1 << 19)
898*810390e3Srobert #endif
899*810390e3Srobert #ifndef HWCAP_ASIMDDP
900*810390e3Srobert #define HWCAP_ASIMDDP (1 << 20)
901*810390e3Srobert #endif
902*810390e3Srobert #ifndef HWCAP_SHA512
903*810390e3Srobert #define HWCAP_SHA512 (1 << 21)
904*810390e3Srobert #endif
905*810390e3Srobert #ifndef HWCAP_SVE
906*810390e3Srobert #define HWCAP_SVE (1 << 22)
907*810390e3Srobert #endif
908*810390e3Srobert #ifndef HWCAP_ASIMDFHM
909*810390e3Srobert #define HWCAP_ASIMDFHM (1 << 23)
910*810390e3Srobert #endif
911*810390e3Srobert #ifndef HWCAP_DIT
912*810390e3Srobert #define HWCAP_DIT (1 << 24)
913*810390e3Srobert #endif
914*810390e3Srobert #ifndef HWCAP_ILRCPC
915*810390e3Srobert #define HWCAP_ILRCPC (1 << 26)
916*810390e3Srobert #endif
917*810390e3Srobert #ifndef HWCAP_FLAGM
918*810390e3Srobert #define HWCAP_FLAGM (1 << 27)
919*810390e3Srobert #endif
920*810390e3Srobert #ifndef HWCAP_SSBS
921*810390e3Srobert #define HWCAP_SSBS (1 << 28)
922*810390e3Srobert #endif
923*810390e3Srobert #ifndef HWCAP_SB
924*810390e3Srobert #define HWCAP_SB (1 << 29)
925*810390e3Srobert #endif
926*810390e3Srobert 
927*810390e3Srobert #ifndef AT_HWCAP2
928*810390e3Srobert #define AT_HWCAP2 26
929*810390e3Srobert #endif
930*810390e3Srobert #ifndef HWCAP2_DCPODP
931*810390e3Srobert #define HWCAP2_DCPODP (1 << 0)
932*810390e3Srobert #endif
933*810390e3Srobert #ifndef HWCAP2_SVE2
934*810390e3Srobert #define HWCAP2_SVE2 (1 << 1)
935*810390e3Srobert #endif
936*810390e3Srobert #ifndef HWCAP2_SVEAES
937*810390e3Srobert #define HWCAP2_SVEAES (1 << 2)
938*810390e3Srobert #endif
939*810390e3Srobert #ifndef HWCAP2_SVEPMULL
940*810390e3Srobert #define HWCAP2_SVEPMULL (1 << 3)
941*810390e3Srobert #endif
942*810390e3Srobert #ifndef HWCAP2_SVEBITPERM
943*810390e3Srobert #define HWCAP2_SVEBITPERM (1 << 4)
944*810390e3Srobert #endif
945*810390e3Srobert #ifndef HWCAP2_SVESHA3
946*810390e3Srobert #define HWCAP2_SVESHA3 (1 << 5)
947*810390e3Srobert #endif
948*810390e3Srobert #ifndef HWCAP2_SVESM4
949*810390e3Srobert #define HWCAP2_SVESM4 (1 << 6)
950*810390e3Srobert #endif
951*810390e3Srobert #ifndef HWCAP2_FLAGM2
952*810390e3Srobert #define HWCAP2_FLAGM2 (1 << 7)
953*810390e3Srobert #endif
954*810390e3Srobert #ifndef HWCAP2_FRINT
955*810390e3Srobert #define HWCAP2_FRINT (1 << 8)
956*810390e3Srobert #endif
957*810390e3Srobert #ifndef HWCAP2_SVEI8MM
958*810390e3Srobert #define HWCAP2_SVEI8MM (1 << 9)
959*810390e3Srobert #endif
960*810390e3Srobert #ifndef HWCAP2_SVEF32MM
961*810390e3Srobert #define HWCAP2_SVEF32MM (1 << 10)
962*810390e3Srobert #endif
963*810390e3Srobert #ifndef HWCAP2_SVEF64MM
964*810390e3Srobert #define HWCAP2_SVEF64MM (1 << 11)
965*810390e3Srobert #endif
966*810390e3Srobert #ifndef HWCAP2_SVEBF16
967*810390e3Srobert #define HWCAP2_SVEBF16 (1 << 12)
968*810390e3Srobert #endif
969*810390e3Srobert #ifndef HWCAP2_I8MM
970*810390e3Srobert #define HWCAP2_I8MM (1 << 13)
971*810390e3Srobert #endif
972*810390e3Srobert #ifndef HWCAP2_BF16
973*810390e3Srobert #define HWCAP2_BF16 (1 << 14)
974*810390e3Srobert #endif
975*810390e3Srobert #ifndef HWCAP2_DGH
976*810390e3Srobert #define HWCAP2_DGH (1 << 15)
977*810390e3Srobert #endif
978*810390e3Srobert #ifndef HWCAP2_RNG
979*810390e3Srobert #define HWCAP2_RNG (1 << 16)
980*810390e3Srobert #endif
981*810390e3Srobert #ifndef HWCAP2_BTI
982*810390e3Srobert #define HWCAP2_BTI (1 << 17)
983*810390e3Srobert #endif
984*810390e3Srobert #ifndef HWCAP2_MTE
985*810390e3Srobert #define HWCAP2_MTE (1 << 18)
986*810390e3Srobert #endif
987*810390e3Srobert #ifndef HWCAP2_RPRES
988*810390e3Srobert #define HWCAP2_RPRES (1 << 21)
989*810390e3Srobert #endif
990*810390e3Srobert #ifndef HWCAP2_MTE3
991*810390e3Srobert #define HWCAP2_MTE3 (1 << 22)
992*810390e3Srobert #endif
993*810390e3Srobert #ifndef HWCAP2_SME
994*810390e3Srobert #define HWCAP2_SME (1 << 23)
995*810390e3Srobert #endif
996*810390e3Srobert #ifndef HWCAP2_SME_I16I64
997*810390e3Srobert #define HWCAP2_SME_I16I64 (1 << 24)
998*810390e3Srobert #endif
999*810390e3Srobert #ifndef HWCAP2_SME_F64F64
1000*810390e3Srobert #define HWCAP2_SME_F64F64 (1 << 25)
1001*810390e3Srobert #endif
1002*810390e3Srobert #ifndef HWCAP2_WFXT
1003*810390e3Srobert #define HWCAP2_WFXT (1UL << 31)
1004*810390e3Srobert #endif
1005*810390e3Srobert #ifndef HWCAP2_EBF16
1006*810390e3Srobert #define HWCAP2_EBF16 (1UL << 32)
1007*810390e3Srobert #endif
1008*810390e3Srobert #ifndef HWCAP2_SVE_EBF16
1009*810390e3Srobert #define HWCAP2_SVE_EBF16 (1UL << 33)
1010*810390e3Srobert #endif
1011*810390e3Srobert 
1012*810390e3Srobert // LSE support detection for out-of-line atomics
1013*810390e3Srobert // using HWCAP and Auxiliary vector
1014*810390e3Srobert _Bool __aarch64_have_lse_atomics
1015*810390e3Srobert     __attribute__((visibility("hidden"), nocommon));
1016*810390e3Srobert 
1017*810390e3Srobert #if defined(__has_include)
1018*810390e3Srobert #if __has_include(<sys/auxv.h>)
1019*810390e3Srobert #include <sys/auxv.h>
1020*810390e3Srobert #if __has_include(<asm/hwcap.h>)
1021*810390e3Srobert #include <asm/hwcap.h>
1022*810390e3Srobert 
1023*810390e3Srobert #if defined(__ANDROID__)
1024*810390e3Srobert #include <string.h>
1025*810390e3Srobert #include <sys/system_properties.h>
1026*810390e3Srobert #elif defined(__Fuchsia__)
1027*810390e3Srobert #include <zircon/features.h>
1028*810390e3Srobert #include <zircon/syscalls.h>
1029*810390e3Srobert #endif
1030*810390e3Srobert 
1031*810390e3Srobert // Detect Exynos 9810 CPU
1032*810390e3Srobert #define IF_EXYNOS9810                                                          \
1033*810390e3Srobert   char arch[PROP_VALUE_MAX];                                                   \
1034*810390e3Srobert   if (__system_property_get("ro.arch", arch) > 0 &&                            \
1035*810390e3Srobert       strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0)
1036*810390e3Srobert 
init_have_lse_atomics(void)1037d89ec533Spatrick static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
1038*810390e3Srobert #if defined(__FreeBSD__)
1039*810390e3Srobert   unsigned long hwcap;
1040*810390e3Srobert   int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
1041*810390e3Srobert   __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0;
1042*810390e3Srobert #elif defined(__Fuchsia__)
1043*810390e3Srobert   // This ensures the vDSO is a direct link-time dependency of anything that
1044*810390e3Srobert   // needs this initializer code.
1045*810390e3Srobert #pragma comment(lib, "zircon")
1046*810390e3Srobert   uint32_t features;
1047*810390e3Srobert   zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
1048*810390e3Srobert   __aarch64_have_lse_atomics =
1049*810390e3Srobert       status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0;
1050*810390e3Srobert #else
1051d89ec533Spatrick   unsigned long hwcap = getauxval(AT_HWCAP);
1052*810390e3Srobert   _Bool result = (hwcap & HWCAP_ATOMICS) != 0;
1053*810390e3Srobert #if defined(__ANDROID__)
1054*810390e3Srobert   if (result) {
1055*810390e3Srobert     // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
1056*810390e3Srobert     // only the former support LSE atomics.  However, the kernel in the
1057*810390e3Srobert     // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
1058*810390e3Srobert     // reported the feature as being supported.
1059*810390e3Srobert     //
1060*810390e3Srobert     // The kernel appears to have been corrected to mark it unsupported as of
1061*810390e3Srobert     // the Android 9.0 release on those devices, and this issue has not been
1062*810390e3Srobert     // observed anywhere else. Thus, this workaround may be removed if
1063*810390e3Srobert     // compiler-rt ever drops support for Android 8.0.
1064*810390e3Srobert     IF_EXYNOS9810 result = false;
1065d89ec533Spatrick   }
1066*810390e3Srobert #endif // defined(__ANDROID__)
1067*810390e3Srobert   __aarch64_have_lse_atomics = result;
1068*810390e3Srobert #endif // defined(__FreeBSD__)
1069*810390e3Srobert }
1070*810390e3Srobert 
1071*810390e3Srobert #if !defined(DISABLE_AARCH64_FMV)
1072*810390e3Srobert // CPUFeatures must correspond to the same AArch64 features in
1073*810390e3Srobert // AArch64TargetParser.h
1074*810390e3Srobert enum CPUFeatures {
1075*810390e3Srobert   FEAT_RNG,
1076*810390e3Srobert   FEAT_FLAGM,
1077*810390e3Srobert   FEAT_FLAGM2,
1078*810390e3Srobert   FEAT_FP16FML,
1079*810390e3Srobert   FEAT_DOTPROD,
1080*810390e3Srobert   FEAT_SM4,
1081*810390e3Srobert   FEAT_RDM,
1082*810390e3Srobert   FEAT_LSE,
1083*810390e3Srobert   FEAT_FP,
1084*810390e3Srobert   FEAT_SIMD,
1085*810390e3Srobert   FEAT_CRC,
1086*810390e3Srobert   FEAT_SHA1,
1087*810390e3Srobert   FEAT_SHA2,
1088*810390e3Srobert   FEAT_SHA3,
1089*810390e3Srobert   FEAT_AES,
1090*810390e3Srobert   FEAT_PMULL,
1091*810390e3Srobert   FEAT_FP16,
1092*810390e3Srobert   FEAT_DIT,
1093*810390e3Srobert   FEAT_DPB,
1094*810390e3Srobert   FEAT_DPB2,
1095*810390e3Srobert   FEAT_JSCVT,
1096*810390e3Srobert   FEAT_FCMA,
1097*810390e3Srobert   FEAT_RCPC,
1098*810390e3Srobert   FEAT_RCPC2,
1099*810390e3Srobert   FEAT_FRINTTS,
1100*810390e3Srobert   FEAT_DGH,
1101*810390e3Srobert   FEAT_I8MM,
1102*810390e3Srobert   FEAT_BF16,
1103*810390e3Srobert   FEAT_EBF16,
1104*810390e3Srobert   FEAT_RPRES,
1105*810390e3Srobert   FEAT_SVE,
1106*810390e3Srobert   FEAT_SVE_BF16,
1107*810390e3Srobert   FEAT_SVE_EBF16,
1108*810390e3Srobert   FEAT_SVE_I8MM,
1109*810390e3Srobert   FEAT_SVE_F32MM,
1110*810390e3Srobert   FEAT_SVE_F64MM,
1111*810390e3Srobert   FEAT_SVE2,
1112*810390e3Srobert   FEAT_SVE_AES,
1113*810390e3Srobert   FEAT_SVE_PMULL128,
1114*810390e3Srobert   FEAT_SVE_BITPERM,
1115*810390e3Srobert   FEAT_SVE_SHA3,
1116*810390e3Srobert   FEAT_SVE_SM4,
1117*810390e3Srobert   FEAT_SME,
1118*810390e3Srobert   FEAT_MEMTAG,
1119*810390e3Srobert   FEAT_MEMTAG2,
1120*810390e3Srobert   FEAT_MEMTAG3,
1121*810390e3Srobert   FEAT_SB,
1122*810390e3Srobert   FEAT_PREDRES,
1123*810390e3Srobert   FEAT_SSBS,
1124*810390e3Srobert   FEAT_SSBS2,
1125*810390e3Srobert   FEAT_BTI,
1126*810390e3Srobert   FEAT_LS64,
1127*810390e3Srobert   FEAT_LS64_V,
1128*810390e3Srobert   FEAT_LS64_ACCDATA,
1129*810390e3Srobert   FEAT_WFXT,
1130*810390e3Srobert   FEAT_SME_F64,
1131*810390e3Srobert   FEAT_SME_I64,
1132*810390e3Srobert   FEAT_SME2,
1133*810390e3Srobert   FEAT_MAX
1134*810390e3Srobert };
1135*810390e3Srobert 
1136*810390e3Srobert // Architecture features used
1137*810390e3Srobert // in Function Multi Versioning
1138*810390e3Srobert struct {
1139*810390e3Srobert   unsigned long long features;
1140*810390e3Srobert   // As features grows new fields could be added
1141*810390e3Srobert } __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
1142*810390e3Srobert 
init_cpu_features_resolver(unsigned long hwcap,unsigned long hwcap2)1143*810390e3Srobert void init_cpu_features_resolver(unsigned long hwcap, unsigned long hwcap2) {
1144*810390e3Srobert #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
1145*810390e3Srobert #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
1146*810390e3Srobert #define extractBits(val, start, number)                                        \
1147*810390e3Srobert   (val & ((1ULL << number) - 1ULL) << start) >> start
1148*810390e3Srobert   if (hwcap & HWCAP_CRC32)
1149*810390e3Srobert     setCPUFeature(FEAT_CRC);
1150*810390e3Srobert   if (hwcap & HWCAP_PMULL)
1151*810390e3Srobert     setCPUFeature(FEAT_PMULL);
1152*810390e3Srobert   if (hwcap & HWCAP_FLAGM)
1153*810390e3Srobert     setCPUFeature(FEAT_FLAGM);
1154*810390e3Srobert   if (hwcap2 & HWCAP2_FLAGM2) {
1155*810390e3Srobert     setCPUFeature(FEAT_FLAGM);
1156*810390e3Srobert     setCPUFeature(FEAT_FLAGM2);
1157*810390e3Srobert   }
1158*810390e3Srobert   if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
1159*810390e3Srobert     setCPUFeature(FEAT_SM4);
1160*810390e3Srobert   if (hwcap & HWCAP_ASIMDDP)
1161*810390e3Srobert     setCPUFeature(FEAT_DOTPROD);
1162*810390e3Srobert   if (hwcap & HWCAP_ASIMDFHM)
1163*810390e3Srobert     setCPUFeature(FEAT_FP16FML);
1164*810390e3Srobert   if (hwcap & HWCAP_FPHP) {
1165*810390e3Srobert     setCPUFeature(FEAT_FP16);
1166*810390e3Srobert     setCPUFeature(FEAT_FP);
1167*810390e3Srobert   }
1168*810390e3Srobert   if (hwcap & HWCAP_DIT)
1169*810390e3Srobert     setCPUFeature(FEAT_DIT);
1170*810390e3Srobert   if (hwcap & HWCAP_ASIMDRDM)
1171*810390e3Srobert     setCPUFeature(FEAT_RDM);
1172*810390e3Srobert   if (hwcap & HWCAP_ILRCPC)
1173*810390e3Srobert     setCPUFeature(FEAT_RCPC2);
1174*810390e3Srobert   if (hwcap & HWCAP_AES)
1175*810390e3Srobert     setCPUFeature(FEAT_AES);
1176*810390e3Srobert   if (hwcap & HWCAP_SHA1)
1177*810390e3Srobert     setCPUFeature(FEAT_SHA1);
1178*810390e3Srobert   if (hwcap & HWCAP_SHA2)
1179*810390e3Srobert     setCPUFeature(FEAT_SHA2);
1180*810390e3Srobert   if (hwcap & HWCAP_JSCVT)
1181*810390e3Srobert     setCPUFeature(FEAT_JSCVT);
1182*810390e3Srobert   if (hwcap & HWCAP_FCMA)
1183*810390e3Srobert     setCPUFeature(FEAT_FCMA);
1184*810390e3Srobert   if (hwcap & HWCAP_SB)
1185*810390e3Srobert     setCPUFeature(FEAT_SB);
1186*810390e3Srobert   if (hwcap & HWCAP_SSBS)
1187*810390e3Srobert     setCPUFeature(FEAT_SSBS2);
1188*810390e3Srobert   if (hwcap2 & HWCAP2_MTE) {
1189*810390e3Srobert     setCPUFeature(FEAT_MEMTAG);
1190*810390e3Srobert     setCPUFeature(FEAT_MEMTAG2);
1191*810390e3Srobert   }
1192*810390e3Srobert   if (hwcap2 & HWCAP2_MTE3) {
1193*810390e3Srobert     setCPUFeature(FEAT_MEMTAG);
1194*810390e3Srobert     setCPUFeature(FEAT_MEMTAG2);
1195*810390e3Srobert     setCPUFeature(FEAT_MEMTAG3);
1196*810390e3Srobert   }
1197*810390e3Srobert   if (hwcap2 & HWCAP2_SVEAES)
1198*810390e3Srobert     setCPUFeature(FEAT_SVE_AES);
1199*810390e3Srobert   if (hwcap2 & HWCAP2_SVEPMULL) {
1200*810390e3Srobert     setCPUFeature(FEAT_SVE_AES);
1201*810390e3Srobert     setCPUFeature(FEAT_SVE_PMULL128);
1202*810390e3Srobert   }
1203*810390e3Srobert   if (hwcap2 & HWCAP2_SVEBITPERM)
1204*810390e3Srobert     setCPUFeature(FEAT_SVE_BITPERM);
1205*810390e3Srobert   if (hwcap2 & HWCAP2_SVESHA3)
1206*810390e3Srobert     setCPUFeature(FEAT_SVE_SHA3);
1207*810390e3Srobert   if (hwcap2 & HWCAP2_SVESM4)
1208*810390e3Srobert     setCPUFeature(FEAT_SVE_SM4);
1209*810390e3Srobert   if (hwcap2 & HWCAP2_DCPODP)
1210*810390e3Srobert     setCPUFeature(FEAT_DPB2);
1211*810390e3Srobert   if (hwcap & HWCAP_ATOMICS)
1212*810390e3Srobert     setCPUFeature(FEAT_LSE);
1213*810390e3Srobert   if (hwcap2 & HWCAP2_RNG)
1214*810390e3Srobert     setCPUFeature(FEAT_RNG);
1215*810390e3Srobert   if (hwcap2 & HWCAP2_I8MM)
1216*810390e3Srobert     setCPUFeature(FEAT_I8MM);
1217*810390e3Srobert   if (hwcap2 & HWCAP2_EBF16)
1218*810390e3Srobert     setCPUFeature(FEAT_EBF16);
1219*810390e3Srobert   if (hwcap2 & HWCAP2_SVE_EBF16)
1220*810390e3Srobert     setCPUFeature(FEAT_SVE_EBF16);
1221*810390e3Srobert   if (hwcap2 & HWCAP2_DGH)
1222*810390e3Srobert     setCPUFeature(FEAT_DGH);
1223*810390e3Srobert   if (hwcap2 & HWCAP2_FRINT)
1224*810390e3Srobert     setCPUFeature(FEAT_FRINTTS);
1225*810390e3Srobert   if (hwcap2 & HWCAP2_SVEI8MM)
1226*810390e3Srobert     setCPUFeature(FEAT_SVE_I8MM);
1227*810390e3Srobert   if (hwcap2 & HWCAP2_SVEF32MM)
1228*810390e3Srobert     setCPUFeature(FEAT_SVE_F32MM);
1229*810390e3Srobert   if (hwcap2 & HWCAP2_SVEF64MM)
1230*810390e3Srobert     setCPUFeature(FEAT_SVE_F64MM);
1231*810390e3Srobert   if (hwcap2 & HWCAP2_BTI)
1232*810390e3Srobert     setCPUFeature(FEAT_BTI);
1233*810390e3Srobert   if (hwcap2 & HWCAP2_RPRES)
1234*810390e3Srobert     setCPUFeature(FEAT_RPRES);
1235*810390e3Srobert   if (hwcap2 & HWCAP2_WFXT)
1236*810390e3Srobert     setCPUFeature(FEAT_WFXT);
1237*810390e3Srobert   if (hwcap2 & HWCAP2_SME)
1238*810390e3Srobert     setCPUFeature(FEAT_SME);
1239*810390e3Srobert   if (hwcap2 & HWCAP2_SME_I16I64)
1240*810390e3Srobert     setCPUFeature(FEAT_SME_I64);
1241*810390e3Srobert   if (hwcap2 & HWCAP2_SME_F64F64)
1242*810390e3Srobert     setCPUFeature(FEAT_SME_F64);
1243*810390e3Srobert   if (hwcap & HWCAP_CPUID) {
1244*810390e3Srobert     unsigned long ftr;
1245*810390e3Srobert     getCPUFeature(ID_AA64PFR1_EL1, ftr);
1246*810390e3Srobert     // ID_AA64PFR1_EL1.MTE >= 0b0001
1247*810390e3Srobert     if (extractBits(ftr, 8, 4) >= 0x1)
1248*810390e3Srobert       setCPUFeature(FEAT_MEMTAG);
1249*810390e3Srobert     // ID_AA64PFR1_EL1.SSBS == 0b0001
1250*810390e3Srobert     if (extractBits(ftr, 4, 4) == 0x1)
1251*810390e3Srobert       setCPUFeature(FEAT_SSBS);
1252*810390e3Srobert     // ID_AA64PFR1_EL1.SME == 0b0010
1253*810390e3Srobert     if (extractBits(ftr, 24, 4) == 0x2)
1254*810390e3Srobert       setCPUFeature(FEAT_SME2);
1255*810390e3Srobert     getCPUFeature(ID_AA64PFR0_EL1, ftr);
1256*810390e3Srobert     // ID_AA64PFR0_EL1.FP != 0b1111
1257*810390e3Srobert     if (extractBits(ftr, 16, 4) != 0xF) {
1258*810390e3Srobert       setCPUFeature(FEAT_FP);
1259*810390e3Srobert       // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
1260*810390e3Srobert       setCPUFeature(FEAT_SIMD);
1261*810390e3Srobert     }
1262*810390e3Srobert     // ID_AA64PFR0_EL1.SVE != 0b0000
1263*810390e3Srobert     if (extractBits(ftr, 32, 4) != 0x0) {
1264*810390e3Srobert       // get ID_AA64ZFR0_EL1, that name supported
1265*810390e3Srobert       // if sve enabled only
1266*810390e3Srobert       getCPUFeature(S3_0_C0_C4_4, ftr);
1267*810390e3Srobert       // ID_AA64ZFR0_EL1.SVEver == 0b0000
1268*810390e3Srobert       if (extractBits(ftr, 0, 4) == 0x0)
1269*810390e3Srobert         setCPUFeature(FEAT_SVE);
1270*810390e3Srobert       // ID_AA64ZFR0_EL1.SVEver == 0b0001
1271*810390e3Srobert       if (extractBits(ftr, 0, 4) == 0x1)
1272*810390e3Srobert         setCPUFeature(FEAT_SVE2);
1273*810390e3Srobert       // ID_AA64ZFR0_EL1.BF16 != 0b0000
1274*810390e3Srobert       if (extractBits(ftr, 20, 4) != 0x0)
1275*810390e3Srobert         setCPUFeature(FEAT_SVE_BF16);
1276*810390e3Srobert     }
1277*810390e3Srobert     getCPUFeature(ID_AA64ISAR0_EL1, ftr);
1278*810390e3Srobert     // ID_AA64ISAR0_EL1.SHA3 != 0b0000
1279*810390e3Srobert     if (extractBits(ftr, 32, 4) != 0x0)
1280*810390e3Srobert       setCPUFeature(FEAT_SHA3);
1281*810390e3Srobert     getCPUFeature(ID_AA64ISAR1_EL1, ftr);
1282*810390e3Srobert     // ID_AA64ISAR1_EL1.DPB >= 0b0001
1283*810390e3Srobert     if (extractBits(ftr, 0, 4) >= 0x1)
1284*810390e3Srobert       setCPUFeature(FEAT_DPB);
1285*810390e3Srobert     // ID_AA64ISAR1_EL1.LRCPC != 0b0000
1286*810390e3Srobert     if (extractBits(ftr, 20, 4) != 0x0)
1287*810390e3Srobert       setCPUFeature(FEAT_RCPC);
1288*810390e3Srobert     // ID_AA64ISAR1_EL1.SPECRES == 0b0001
1289*810390e3Srobert     if (extractBits(ftr, 40, 4) == 0x2)
1290*810390e3Srobert       setCPUFeature(FEAT_PREDRES);
1291*810390e3Srobert     // ID_AA64ISAR1_EL1.BF16 != 0b0000
1292*810390e3Srobert     if (extractBits(ftr, 44, 4) != 0x0)
1293*810390e3Srobert       setCPUFeature(FEAT_BF16);
1294*810390e3Srobert     // ID_AA64ISAR1_EL1.LS64 >= 0b0001
1295*810390e3Srobert     if (extractBits(ftr, 60, 4) >= 0x1)
1296*810390e3Srobert       setCPUFeature(FEAT_LS64);
1297*810390e3Srobert     // ID_AA64ISAR1_EL1.LS64 >= 0b0010
1298*810390e3Srobert     if (extractBits(ftr, 60, 4) >= 0x2)
1299*810390e3Srobert       setCPUFeature(FEAT_LS64_V);
1300*810390e3Srobert     // ID_AA64ISAR1_EL1.LS64 >= 0b0011
1301*810390e3Srobert     if (extractBits(ftr, 60, 4) >= 0x3)
1302*810390e3Srobert       setCPUFeature(FEAT_LS64_ACCDATA);
1303*810390e3Srobert   } else {
1304*810390e3Srobert     // Set some features in case of no CPUID support
1305*810390e3Srobert     if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
1306*810390e3Srobert       setCPUFeature(FEAT_FP);
1307*810390e3Srobert       // FP and AdvSIMD fields have the same value
1308*810390e3Srobert       setCPUFeature(FEAT_SIMD);
1309*810390e3Srobert     }
1310*810390e3Srobert     if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
1311*810390e3Srobert       setCPUFeature(FEAT_DPB);
1312*810390e3Srobert     if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
1313*810390e3Srobert       setCPUFeature(FEAT_RCPC);
1314*810390e3Srobert     if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
1315*810390e3Srobert       setCPUFeature(FEAT_BF16);
1316*810390e3Srobert     if (hwcap2 & HWCAP2_SVEBF16)
1317*810390e3Srobert       setCPUFeature(FEAT_SVE_BF16);
1318*810390e3Srobert     if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
1319*810390e3Srobert       setCPUFeature(FEAT_SVE2);
1320*810390e3Srobert     if (hwcap & HWCAP_SHA3)
1321*810390e3Srobert       setCPUFeature(FEAT_SHA3);
1322*810390e3Srobert   }
1323*810390e3Srobert }
1324*810390e3Srobert 
init_cpu_features(void)1325*810390e3Srobert void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) {
1326*810390e3Srobert   unsigned long hwcap;
1327*810390e3Srobert   unsigned long hwcap2;
1328*810390e3Srobert   // CPU features already initialized.
1329*810390e3Srobert   if (__aarch64_cpu_features.features)
1330*810390e3Srobert     return;
1331*810390e3Srobert   setCPUFeature(FEAT_MAX);
1332*810390e3Srobert #if defined(__FreeBSD__)
1333*810390e3Srobert   int res = 0;
1334*810390e3Srobert   res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
1335*810390e3Srobert   res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
1336*810390e3Srobert   if (res)
1337*810390e3Srobert     return;
1338*810390e3Srobert #else
1339*810390e3Srobert #if defined(__ANDROID__)
1340*810390e3Srobert   // Don't set any CPU features,
1341*810390e3Srobert   // detection could be wrong on Exynos 9810.
1342*810390e3Srobert   IF_EXYNOS9810 return;
1343*810390e3Srobert #endif // defined(__ANDROID__)
1344*810390e3Srobert   hwcap = getauxval(AT_HWCAP);
1345*810390e3Srobert   hwcap2 = getauxval(AT_HWCAP2);
1346*810390e3Srobert #endif // defined(__FreeBSD__)
1347*810390e3Srobert   init_cpu_features_resolver(hwcap, hwcap2);
1348*810390e3Srobert #undef extractBits
1349*810390e3Srobert #undef getCPUFeature
1350*810390e3Srobert #undef setCPUFeature
1351*810390e3Srobert #undef IF_EXYNOS9810
1352*810390e3Srobert }
1353*810390e3Srobert #endif // !defined(DISABLE_AARCH64_FMV)
1354d89ec533Spatrick #endif // defined(__has_include)
1355d89ec533Spatrick #endif // __has_include(<sys/auxv.h>)
1356*810390e3Srobert #endif // __has_include(<asm/hwcap.h>)
1357d89ec533Spatrick #endif // defined(__aarch64__)
1358