1 //===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is based on LLVM's lib/Support/Host.cpp. 10 // It implements the operating system Host concept and builtin 11 // __cpu_model for the compiler_rt library for x86 and 12 // __aarch64_have_lse_atomics for AArch64. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #if defined(HAVE_INIT_PRIORITY) 17 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) 18 #elif __has_attribute(__constructor__) 19 #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) 20 #else 21 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that 22 // this runs during initialization. 23 #define CONSTRUCTOR_ATTRIBUTE 24 #endif 25 26 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ 27 defined(_M_X64)) && \ 28 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) 29 30 #include <assert.h> 31 32 #define bool int 33 #define true 1 34 #define false 0 35 36 #ifdef _MSC_VER 37 #include <intrin.h> 38 #endif 39 40 #ifndef __has_attribute 41 #define __has_attribute(attr) 0 42 #endif 43 44 enum VendorSignatures { 45 SIG_INTEL = 0x756e6547, // Genu 46 SIG_AMD = 0x68747541, // Auth 47 }; 48 49 enum ProcessorVendors { 50 VENDOR_INTEL = 1, 51 VENDOR_AMD, 52 VENDOR_OTHER, 53 VENDOR_MAX 54 }; 55 56 enum ProcessorTypes { 57 INTEL_BONNELL = 1, 58 INTEL_CORE2, 59 INTEL_COREI7, 60 AMDFAM10H, 61 AMDFAM15H, 62 INTEL_SILVERMONT, 63 INTEL_KNL, 64 AMD_BTVER1, 65 AMD_BTVER2, 66 AMDFAM17H, 67 INTEL_KNM, 68 INTEL_GOLDMONT, 69 INTEL_GOLDMONT_PLUS, 70 INTEL_TREMONT, 71 AMDFAM19H, 72 CPU_TYPE_MAX 73 }; 74 75 enum ProcessorSubtypes { 76 INTEL_COREI7_NEHALEM = 1, 77 INTEL_COREI7_WESTMERE, 78 INTEL_COREI7_SANDYBRIDGE, 79 AMDFAM10H_BARCELONA, 80 AMDFAM10H_SHANGHAI, 81 AMDFAM10H_ISTANBUL, 82 AMDFAM15H_BDVER1, 83 AMDFAM15H_BDVER2, 84 AMDFAM15H_BDVER3, 85 AMDFAM15H_BDVER4, 86 AMDFAM17H_ZNVER1, 87 INTEL_COREI7_IVYBRIDGE, 88 INTEL_COREI7_HASWELL, 89 INTEL_COREI7_BROADWELL, 90 INTEL_COREI7_SKYLAKE, 91 INTEL_COREI7_SKYLAKE_AVX512, 92 INTEL_COREI7_CANNONLAKE, 93 INTEL_COREI7_ICELAKE_CLIENT, 94 INTEL_COREI7_ICELAKE_SERVER, 95 AMDFAM17H_ZNVER2, 96 INTEL_COREI7_CASCADELAKE, 97 INTEL_COREI7_TIGERLAKE, 98 INTEL_COREI7_COOPERLAKE, 99 INTEL_COREI7_SAPPHIRERAPIDS, 100 INTEL_COREI7_ALDERLAKE, 101 AMDFAM19H_ZNVER3, 102 INTEL_COREI7_ROCKETLAKE, 103 CPU_SUBTYPE_MAX 104 }; 105 106 enum ProcessorFeatures { 107 FEATURE_CMOV = 0, 108 FEATURE_MMX, 109 FEATURE_POPCNT, 110 FEATURE_SSE, 111 FEATURE_SSE2, 112 FEATURE_SSE3, 113 FEATURE_SSSE3, 114 FEATURE_SSE4_1, 115 FEATURE_SSE4_2, 116 FEATURE_AVX, 117 FEATURE_AVX2, 118 FEATURE_SSE4_A, 119 FEATURE_FMA4, 120 FEATURE_XOP, 121 FEATURE_FMA, 122 FEATURE_AVX512F, 123 FEATURE_BMI, 124 FEATURE_BMI2, 125 FEATURE_AES, 126 FEATURE_PCLMUL, 127 FEATURE_AVX512VL, 128 FEATURE_AVX512BW, 129 FEATURE_AVX512DQ, 130 FEATURE_AVX512CD, 131 FEATURE_AVX512ER, 132 FEATURE_AVX512PF, 133 FEATURE_AVX512VBMI, 134 FEATURE_AVX512IFMA, 135 FEATURE_AVX5124VNNIW, 136 FEATURE_AVX5124FMAPS, 137 FEATURE_AVX512VPOPCNTDQ, 138 FEATURE_AVX512VBMI2, 139 FEATURE_GFNI, 140 FEATURE_VPCLMULQDQ, 141 FEATURE_AVX512VNNI, 142 FEATURE_AVX512BITALG, 143 FEATURE_AVX512BF16, 144 FEATURE_AVX512VP2INTERSECT, 145 CPU_FEATURE_MAX 146 }; 147 148 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 149 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 150 // support. Consequently, for i386, the presence of CPUID is checked first 151 // via the corresponding eflags bit. 152 static bool isCpuIdSupported() { 153 #if defined(__GNUC__) || defined(__clang__) 154 #if defined(__i386__) 155 int __cpuid_supported; 156 __asm__(" pushfl\n" 157 " popl %%eax\n" 158 " movl %%eax,%%ecx\n" 159 " xorl $0x00200000,%%eax\n" 160 " pushl %%eax\n" 161 " popfl\n" 162 " pushfl\n" 163 " popl %%eax\n" 164 " movl $0,%0\n" 165 " cmpl %%eax,%%ecx\n" 166 " je 1f\n" 167 " movl $1,%0\n" 168 "1:" 169 : "=r"(__cpuid_supported) 170 : 171 : "eax", "ecx"); 172 if (!__cpuid_supported) 173 return false; 174 #endif 175 return true; 176 #endif 177 return true; 178 } 179 180 // This code is copied from lib/Support/Host.cpp. 181 // Changes to either file should be mirrored in the other. 182 183 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 184 /// the specified arguments. If we can't run cpuid on the host, return true. 185 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 186 unsigned *rECX, unsigned *rEDX) { 187 #if defined(__GNUC__) || defined(__clang__) 188 #if defined(__x86_64__) 189 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 190 // FIXME: should we save this for Clang? 191 __asm__("movq\t%%rbx, %%rsi\n\t" 192 "cpuid\n\t" 193 "xchgq\t%%rbx, %%rsi\n\t" 194 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 195 : "a"(value)); 196 return false; 197 #elif defined(__i386__) 198 __asm__("movl\t%%ebx, %%esi\n\t" 199 "cpuid\n\t" 200 "xchgl\t%%ebx, %%esi\n\t" 201 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 202 : "a"(value)); 203 return false; 204 #else 205 return true; 206 #endif 207 #elif defined(_MSC_VER) 208 // The MSVC intrinsic is portable across x86 and x64. 209 int registers[4]; 210 __cpuid(registers, value); 211 *rEAX = registers[0]; 212 *rEBX = registers[1]; 213 *rECX = registers[2]; 214 *rEDX = registers[3]; 215 return false; 216 #else 217 return true; 218 #endif 219 } 220 221 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 222 /// the 4 values in the specified arguments. If we can't run cpuid on the host, 223 /// return true. 224 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 225 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 226 unsigned *rEDX) { 227 #if defined(__GNUC__) || defined(__clang__) 228 #if defined(__x86_64__) 229 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 230 // FIXME: should we save this for Clang? 231 __asm__("movq\t%%rbx, %%rsi\n\t" 232 "cpuid\n\t" 233 "xchgq\t%%rbx, %%rsi\n\t" 234 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 235 : "a"(value), "c"(subleaf)); 236 return false; 237 #elif defined(__i386__) 238 __asm__("movl\t%%ebx, %%esi\n\t" 239 "cpuid\n\t" 240 "xchgl\t%%ebx, %%esi\n\t" 241 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 242 : "a"(value), "c"(subleaf)); 243 return false; 244 #else 245 return true; 246 #endif 247 #elif defined(_MSC_VER) 248 int registers[4]; 249 __cpuidex(registers, value, subleaf); 250 *rEAX = registers[0]; 251 *rEBX = registers[1]; 252 *rECX = registers[2]; 253 *rEDX = registers[3]; 254 return false; 255 #else 256 return true; 257 #endif 258 } 259 260 // Read control register 0 (XCR0). Used to detect features such as AVX. 261 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 262 #if defined(__GNUC__) || defined(__clang__) 263 // Check xgetbv; this uses a .byte sequence instead of the instruction 264 // directly because older assemblers do not include support for xgetbv and 265 // there is no easy way to conditionally compile based on the assembler used. 266 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 267 return false; 268 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 269 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 270 *rEAX = Result; 271 *rEDX = Result >> 32; 272 return false; 273 #else 274 return true; 275 #endif 276 } 277 278 static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 279 unsigned *Model) { 280 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 281 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 282 if (*Family == 6 || *Family == 0xf) { 283 if (*Family == 0xf) 284 // Examine extended family ID if family ID is F. 285 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 286 // Examine extended model ID if family ID is 6 or F. 287 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 288 } 289 } 290 291 static const char * 292 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 293 const unsigned *Features, 294 unsigned *Type, unsigned *Subtype) { 295 #define testFeature(F) \ 296 (Features[F / 32] & (1 << (F % 32))) != 0 297 298 // We select CPU strings to match the code in Host.cpp, but we don't use them 299 // in compiler-rt. 300 const char *CPU = 0; 301 302 switch (Family) { 303 case 6: 304 switch (Model) { 305 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 306 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 307 // mobile processor, Intel Core 2 Extreme processor, Intel 308 // Pentium Dual-Core processor, Intel Xeon processor, model 309 // 0Fh. All processors are manufactured using the 65 nm process. 310 case 0x16: // Intel Celeron processor model 16h. All processors are 311 // manufactured using the 65 nm process 312 CPU = "core2"; 313 *Type = INTEL_CORE2; 314 break; 315 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 316 // 17h. All processors are manufactured using the 45 nm process. 317 // 318 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 319 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 320 // the 45 nm process. 321 CPU = "penryn"; 322 *Type = INTEL_CORE2; 323 break; 324 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 325 // processors are manufactured using the 45 nm process. 326 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 327 // As found in a Summer 2010 model iMac. 328 case 0x1f: 329 case 0x2e: // Nehalem EX 330 CPU = "nehalem"; 331 *Type = INTEL_COREI7; 332 *Subtype = INTEL_COREI7_NEHALEM; 333 break; 334 case 0x25: // Intel Core i7, laptop version. 335 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 336 // processors are manufactured using the 32 nm process. 337 case 0x2f: // Westmere EX 338 CPU = "westmere"; 339 *Type = INTEL_COREI7; 340 *Subtype = INTEL_COREI7_WESTMERE; 341 break; 342 case 0x2a: // Intel Core i7 processor. All processors are manufactured 343 // using the 32 nm process. 344 case 0x2d: 345 CPU = "sandybridge"; 346 *Type = INTEL_COREI7; 347 *Subtype = INTEL_COREI7_SANDYBRIDGE; 348 break; 349 case 0x3a: 350 case 0x3e: // Ivy Bridge EP 351 CPU = "ivybridge"; 352 *Type = INTEL_COREI7; 353 *Subtype = INTEL_COREI7_IVYBRIDGE; 354 break; 355 356 // Haswell: 357 case 0x3c: 358 case 0x3f: 359 case 0x45: 360 case 0x46: 361 CPU = "haswell"; 362 *Type = INTEL_COREI7; 363 *Subtype = INTEL_COREI7_HASWELL; 364 break; 365 366 // Broadwell: 367 case 0x3d: 368 case 0x47: 369 case 0x4f: 370 case 0x56: 371 CPU = "broadwell"; 372 *Type = INTEL_COREI7; 373 *Subtype = INTEL_COREI7_BROADWELL; 374 break; 375 376 // Skylake: 377 case 0x4e: // Skylake mobile 378 case 0x5e: // Skylake desktop 379 case 0x8e: // Kaby Lake mobile 380 case 0x9e: // Kaby Lake desktop 381 case 0xa5: // Comet Lake-H/S 382 case 0xa6: // Comet Lake-U 383 CPU = "skylake"; 384 *Type = INTEL_COREI7; 385 *Subtype = INTEL_COREI7_SKYLAKE; 386 break; 387 388 // Rocketlake: 389 case 0xa7: 390 CPU = "rocketlake"; 391 *Type = INTEL_COREI7; 392 *Subtype = INTEL_COREI7_ROCKETLAKE; 393 break; 394 395 // Skylake Xeon: 396 case 0x55: 397 *Type = INTEL_COREI7; 398 if (testFeature(FEATURE_AVX512BF16)) { 399 CPU = "cooperlake"; 400 *Subtype = INTEL_COREI7_COOPERLAKE; 401 } else if (testFeature(FEATURE_AVX512VNNI)) { 402 CPU = "cascadelake"; 403 *Subtype = INTEL_COREI7_CASCADELAKE; 404 } else { 405 CPU = "skylake-avx512"; 406 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; 407 } 408 break; 409 410 // Cannonlake: 411 case 0x66: 412 CPU = "cannonlake"; 413 *Type = INTEL_COREI7; 414 *Subtype = INTEL_COREI7_CANNONLAKE; 415 break; 416 417 // Icelake: 418 case 0x7d: 419 case 0x7e: 420 CPU = "icelake-client"; 421 *Type = INTEL_COREI7; 422 *Subtype = INTEL_COREI7_ICELAKE_CLIENT; 423 break; 424 425 // Icelake Xeon: 426 case 0x6a: 427 case 0x6c: 428 CPU = "icelake-server"; 429 *Type = INTEL_COREI7; 430 *Subtype = INTEL_COREI7_ICELAKE_SERVER; 431 break; 432 433 // Sapphire Rapids: 434 case 0x8f: 435 CPU = "sapphirerapids"; 436 *Type = INTEL_COREI7; 437 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS; 438 break; 439 440 case 0x1c: // Most 45 nm Intel Atom processors 441 case 0x26: // 45 nm Atom Lincroft 442 case 0x27: // 32 nm Atom Medfield 443 case 0x35: // 32 nm Atom Midview 444 case 0x36: // 32 nm Atom Midview 445 CPU = "bonnell"; 446 *Type = INTEL_BONNELL; 447 break; 448 449 // Atom Silvermont codes from the Intel software optimization guide. 450 case 0x37: 451 case 0x4a: 452 case 0x4d: 453 case 0x5a: 454 case 0x5d: 455 case 0x4c: // really airmont 456 CPU = "silvermont"; 457 *Type = INTEL_SILVERMONT; 458 break; 459 // Goldmont: 460 case 0x5c: // Apollo Lake 461 case 0x5f: // Denverton 462 CPU = "goldmont"; 463 *Type = INTEL_GOLDMONT; 464 break; // "goldmont" 465 case 0x7a: 466 CPU = "goldmont-plus"; 467 *Type = INTEL_GOLDMONT_PLUS; 468 break; 469 case 0x86: 470 CPU = "tremont"; 471 *Type = INTEL_TREMONT; 472 break; 473 474 case 0x57: 475 CPU = "knl"; 476 *Type = INTEL_KNL; 477 break; 478 479 case 0x85: 480 CPU = "knm"; 481 *Type = INTEL_KNM; 482 break; 483 484 default: // Unknown family 6 CPU. 485 break; 486 } 487 break; 488 default: 489 break; // Unknown. 490 } 491 492 return CPU; 493 } 494 495 static const char * 496 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 497 const unsigned *Features, 498 unsigned *Type, unsigned *Subtype) { 499 // We select CPU strings to match the code in Host.cpp, but we don't use them 500 // in compiler-rt. 501 const char *CPU = 0; 502 503 switch (Family) { 504 case 16: 505 CPU = "amdfam10"; 506 *Type = AMDFAM10H; 507 switch (Model) { 508 case 2: 509 *Subtype = AMDFAM10H_BARCELONA; 510 break; 511 case 4: 512 *Subtype = AMDFAM10H_SHANGHAI; 513 break; 514 case 8: 515 *Subtype = AMDFAM10H_ISTANBUL; 516 break; 517 } 518 break; 519 case 20: 520 CPU = "btver1"; 521 *Type = AMD_BTVER1; 522 break; 523 case 21: 524 CPU = "bdver1"; 525 *Type = AMDFAM15H; 526 if (Model >= 0x60 && Model <= 0x7f) { 527 CPU = "bdver4"; 528 *Subtype = AMDFAM15H_BDVER4; 529 break; // 60h-7Fh: Excavator 530 } 531 if (Model >= 0x30 && Model <= 0x3f) { 532 CPU = "bdver3"; 533 *Subtype = AMDFAM15H_BDVER3; 534 break; // 30h-3Fh: Steamroller 535 } 536 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 537 CPU = "bdver2"; 538 *Subtype = AMDFAM15H_BDVER2; 539 break; // 02h, 10h-1Fh: Piledriver 540 } 541 if (Model <= 0x0f) { 542 *Subtype = AMDFAM15H_BDVER1; 543 break; // 00h-0Fh: Bulldozer 544 } 545 break; 546 case 22: 547 CPU = "btver2"; 548 *Type = AMD_BTVER2; 549 break; 550 case 23: 551 CPU = "znver1"; 552 *Type = AMDFAM17H; 553 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 554 CPU = "znver2"; 555 *Subtype = AMDFAM17H_ZNVER2; 556 break; // 30h-3fh, 71h: Zen2 557 } 558 if (Model <= 0x0f) { 559 *Subtype = AMDFAM17H_ZNVER1; 560 break; // 00h-0Fh: Zen1 561 } 562 break; 563 case 25: 564 CPU = "znver3"; 565 *Type = AMDFAM19H; 566 if (Model <= 0x0f) { 567 *Subtype = AMDFAM19H_ZNVER3; 568 break; // 00h-0Fh: Zen3 569 } 570 break; 571 default: 572 break; // Unknown AMD CPU. 573 } 574 575 return CPU; 576 } 577 578 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 579 unsigned *Features) { 580 unsigned EAX, EBX; 581 582 #define setFeature(F) \ 583 Features[F / 32] |= 1U << (F % 32) 584 585 if ((EDX >> 15) & 1) 586 setFeature(FEATURE_CMOV); 587 if ((EDX >> 23) & 1) 588 setFeature(FEATURE_MMX); 589 if ((EDX >> 25) & 1) 590 setFeature(FEATURE_SSE); 591 if ((EDX >> 26) & 1) 592 setFeature(FEATURE_SSE2); 593 594 if ((ECX >> 0) & 1) 595 setFeature(FEATURE_SSE3); 596 if ((ECX >> 1) & 1) 597 setFeature(FEATURE_PCLMUL); 598 if ((ECX >> 9) & 1) 599 setFeature(FEATURE_SSSE3); 600 if ((ECX >> 12) & 1) 601 setFeature(FEATURE_FMA); 602 if ((ECX >> 19) & 1) 603 setFeature(FEATURE_SSE4_1); 604 if ((ECX >> 20) & 1) 605 setFeature(FEATURE_SSE4_2); 606 if ((ECX >> 23) & 1) 607 setFeature(FEATURE_POPCNT); 608 if ((ECX >> 25) & 1) 609 setFeature(FEATURE_AES); 610 611 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 612 // indicates that the AVX registers will be saved and restored on context 613 // switch, then we have full AVX support. 614 const unsigned AVXBits = (1 << 27) | (1 << 28); 615 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 616 ((EAX & 0x6) == 0x6); 617 #if defined(__APPLE__) 618 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 619 // save the AVX512 context if we use AVX512 instructions, even the bit is not 620 // set right now. 621 bool HasAVX512Save = true; 622 #else 623 // AVX512 requires additional context to be saved by the OS. 624 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 625 #endif 626 627 if (HasAVX) 628 setFeature(FEATURE_AVX); 629 630 bool HasLeaf7 = 631 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 632 633 if (HasLeaf7 && ((EBX >> 3) & 1)) 634 setFeature(FEATURE_BMI); 635 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 636 setFeature(FEATURE_AVX2); 637 if (HasLeaf7 && ((EBX >> 8) & 1)) 638 setFeature(FEATURE_BMI2); 639 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 640 setFeature(FEATURE_AVX512F); 641 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 642 setFeature(FEATURE_AVX512DQ); 643 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 644 setFeature(FEATURE_AVX512IFMA); 645 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 646 setFeature(FEATURE_AVX512PF); 647 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 648 setFeature(FEATURE_AVX512ER); 649 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 650 setFeature(FEATURE_AVX512CD); 651 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 652 setFeature(FEATURE_AVX512BW); 653 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 654 setFeature(FEATURE_AVX512VL); 655 656 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 657 setFeature(FEATURE_AVX512VBMI); 658 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 659 setFeature(FEATURE_AVX512VBMI2); 660 if (HasLeaf7 && ((ECX >> 8) & 1)) 661 setFeature(FEATURE_GFNI); 662 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 663 setFeature(FEATURE_VPCLMULQDQ); 664 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 665 setFeature(FEATURE_AVX512VNNI); 666 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 667 setFeature(FEATURE_AVX512BITALG); 668 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 669 setFeature(FEATURE_AVX512VPOPCNTDQ); 670 671 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 672 setFeature(FEATURE_AVX5124VNNIW); 673 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 674 setFeature(FEATURE_AVX5124FMAPS); 675 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 676 setFeature(FEATURE_AVX512VP2INTERSECT); 677 678 bool HasLeaf7Subleaf1 = 679 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 680 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 681 setFeature(FEATURE_AVX512BF16); 682 683 unsigned MaxExtLevel; 684 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 685 686 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 687 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 688 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 689 setFeature(FEATURE_SSE4_A); 690 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 691 setFeature(FEATURE_XOP); 692 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 693 setFeature(FEATURE_FMA4); 694 #undef setFeature 695 } 696 697 #ifndef _WIN32 698 __attribute__((visibility("hidden"))) 699 #endif 700 int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; 701 702 #ifndef _WIN32 703 __attribute__((visibility("hidden"))) 704 #endif 705 struct __processor_model { 706 unsigned int __cpu_vendor; 707 unsigned int __cpu_type; 708 unsigned int __cpu_subtype; 709 unsigned int __cpu_features[1]; 710 } __cpu_model = {0, 0, 0, {0}}; 711 712 #ifndef _WIN32 713 __attribute__((visibility("hidden"))) 714 #endif 715 unsigned int __cpu_features2 = 0; 716 717 // A constructor function that is sets __cpu_model and __cpu_features2 with 718 // the right values. This needs to run only once. This constructor is 719 // given the highest priority and it should run before constructors without 720 // the priority set. However, it still runs after ifunc initializers and 721 // needs to be called explicitly there. 722 723 int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { 724 unsigned EAX, EBX, ECX, EDX; 725 unsigned MaxLeaf = 5; 726 unsigned Vendor; 727 unsigned Model, Family; 728 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0}; 729 730 // This function needs to run just once. 731 if (__cpu_model.__cpu_vendor) 732 return 0; 733 734 if (!isCpuIdSupported() || 735 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { 736 __cpu_model.__cpu_vendor = VENDOR_OTHER; 737 return -1; 738 } 739 740 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 741 detectX86FamilyModel(EAX, &Family, &Model); 742 743 // Find available features. 744 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]); 745 746 assert((sizeof(Features)/sizeof(Features[0])) == 2); 747 __cpu_model.__cpu_features[0] = Features[0]; 748 __cpu_features2 = Features[1]; 749 750 if (Vendor == SIG_INTEL) { 751 // Get CPU type. 752 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], 753 &(__cpu_model.__cpu_type), 754 &(__cpu_model.__cpu_subtype)); 755 __cpu_model.__cpu_vendor = VENDOR_INTEL; 756 } else if (Vendor == SIG_AMD) { 757 // Get CPU type. 758 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], 759 &(__cpu_model.__cpu_type), 760 &(__cpu_model.__cpu_subtype)); 761 __cpu_model.__cpu_vendor = VENDOR_AMD; 762 } else 763 __cpu_model.__cpu_vendor = VENDOR_OTHER; 764 765 assert(__cpu_model.__cpu_vendor < VENDOR_MAX); 766 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); 767 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); 768 769 return 0; 770 } 771 #elif defined(__aarch64__) 772 // LSE support detection for out-of-line atomics 773 // using HWCAP and Auxiliary vector 774 _Bool __aarch64_have_lse_atomics 775 __attribute__((visibility("hidden"), nocommon)); 776 #if defined(__has_include) 777 #if __has_include(<sys/auxv.h>) 778 #include <sys/auxv.h> 779 #ifndef AT_HWCAP 780 #define AT_HWCAP 16 781 #endif 782 #ifndef HWCAP_ATOMICS 783 #define HWCAP_ATOMICS (1 << 8) 784 #endif 785 static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { 786 unsigned long hwcap = getauxval(AT_HWCAP); 787 __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0; 788 } 789 #endif // defined(__has_include) 790 #endif // __has_include(<sys/auxv.h>) 791 #endif // defined(__aarch64__) 792