xref: /netbsd-src/external/gpl3/gcc/dist/libphobos/libdruntime/core/cpuid.d (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /**
2  * Identify the characteristics of the host CPU, providing information
3  * about cache sizes and assembly optimisation hints. This module is
4  * provided primarily for assembly language programmers.
5  *
6  * References:
7  * Some of this information was extremely difficult to track down. Some of the
8  * documents below were found only in cached versions stored by search engines!
9  * This code relies on information found in:
10  *
11  * $(UL
12  * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
13  *    Volume 2A: Instruction Set Reference, A-M" (2007).
14  * )
15  * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
16  * )
17  * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
18  *    Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
19  * )
20  * $(LI "AMD Geode(TM) GX Processors Data Book",
21  *    Advanced Micro Devices, Publication ID 31505E, (2005).
22  * )
23  * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
24  * )
25  * $(LI "Application note 106: Software Customization for the 6x86 Family",
26  *    Cyrix Corporation, Rev 1.5 (1998)
27  * )
28  * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
29  * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
30  *   National Semiconductor, (2002)
31  * )
32  * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
33  * )
34  * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
35  * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
36  * $(LI "What every programmer should know about memory",
37  *    Ulrich Depper, Red Hat, Inc., (2007).
38  * )
39  * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
40  *   $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
41  * )
42  * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
43  *    Note 485" (2009).
44  * )
45  * )
46  *
47  * Bugs: Currently only works on x86 and Itanium CPUs.
48  *      Many processors have bugs in their microcode for the CPUID instruction,
49  *      so sometimes the cache information may be incorrect.
50  *
51  * Copyright: Copyright Don Clugston 2007 - 2009.
52  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
53  * Authors:   Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
54  * Source:    $(DRUNTIMESRC core/_cpuid.d)
55  */
56 
57 module core.cpuid;
58 
59 version (GNU) version = GNU_OR_LDC;
60 version (LDC) version = GNU_OR_LDC;
61 
62 @trusted:
63 nothrow:
64 @nogc:
65 
66 // If optimizing for a particular processor, it is generally better
67 // to identify based on features rather than model. NOTE: Normally
68 // it's only worthwhile to optimise for the latest Intel and AMD CPU,
69 // with a backup for other CPUs.
70 // Pentium    -- preferPentium1()
71 // PMMX       --   + mmx()
72 // PPro       -- default
73 // PII        --   + mmx()
74 // PIII       --   + mmx() + sse()
75 // PentiumM   --   + mmx() + sse() + sse2()
76 // Pentium4   -- preferPentium4()
77 // PentiumD   --   + isX86_64()
78 // Core2      -- default + isX86_64()
79 // AMD K5     -- preferPentium1()
80 // AMD K6     --   + mmx()
81 // AMD K6-II  --   + mmx() + 3dnow()
82 // AMD K7     -- preferAthlon()
83 // AMD K8     --   + sse2()
84 // AMD K10    --   + isX86_64()
85 // Cyrix 6x86 -- preferPentium1()
86 //    6x86MX  --   + mmx()
87 
88 // GDC support uses extended inline assembly:
89 //   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html        (general information and hints)
90 //   https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html  (binding variables to registers)
91 //   https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
92 
93 public:
94 
95 /// Cache size and behaviour
96 struct CacheInfo
97 {
98     /// Size of the cache, in kilobytes, per CPU.
99     /// For L1 unified (data + code) caches, this size is half the physical size.
100     /// (we don't halve it for larger sizes, since normally
101     /// data size is much greater than code size for critical loops).
102     size_t size;
103     /// Number of ways of associativity, eg:
104     /// $(UL
105     /// $(LI 1 = direct mapped)
106     /// $(LI 2 = 2-way set associative)
107     /// $(LI 3 = 3-way set associative)
108     /// $(LI ubyte.max = fully associative)
109     /// )
110     ubyte associativity;
111     /// Number of bytes read into the cache when a cache miss occurs.
112     uint lineSize;
113 }
114 
115 public:
116     /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
117     // Note: When we deprecate it, we simply make it private.
118     __gshared CacheInfo[5] datacache;
119 
120 @property pure
121 {
122     /// The data caches. If there are fewer than 5 physical caches levels,
123     /// the remaining levels are set to size_t.max (== entire memory space)
dataCaches()124     const(CacheInfo)[5] dataCaches() { return _dataCaches; }
125 
126     /// Returns vendor string, for display purposes only.
127     /// Do NOT use this to determine features!
128     /// Note that some CPUs have programmable vendorIDs.
vendor()129     string vendor()     {return _vendor;}
130     /// Returns processor string, for display purposes only
processor()131     string processor()  {return _processor;}
132 
133     /// Does it have an x87 FPU on-chip?
x87onChip()134     bool x87onChip()    {return _x87onChip;}
135     /// Is MMX supported?
mmx()136     bool mmx()          {return _mmx;}
137     /// Is SSE supported?
sse()138     bool sse()          {return _sse;}
139     /// Is SSE2 supported?
sse2()140     bool sse2()         {return _sse2;}
141     /// Is SSE3 supported?
sse3()142     bool sse3()         {return _sse3;}
143     /// Is SSSE3 supported?
ssse3()144     bool ssse3()         {return _ssse3;}
145     /// Is SSE4.1 supported?
sse41()146     bool sse41()        {return _sse41;}
147     /// Is SSE4.2 supported?
sse42()148     bool sse42()        {return _sse42;}
149     /// Is SSE4a supported?
sse4a()150     bool sse4a()        {return _sse4a;}
151     /// Is AES supported
aes()152     bool aes()          {return _aes;}
153     /// Is pclmulqdq supported
hasPclmulqdq()154     bool hasPclmulqdq() {return _hasPclmulqdq;}
155     /// Is rdrand supported
hasRdrand()156     bool hasRdrand()    {return _hasRdrand;}
157     /// Is AVX supported
avx()158     bool avx()          {return _avx;}
159     /// Is VEX-Encoded AES supported
vaes()160     bool vaes()         {return _vaes;}
161     /// Is vpclmulqdq supported
hasVpclmulqdq()162     bool hasVpclmulqdq(){return _hasVpclmulqdq; }
163     /// Is FMA supported
fma()164     bool fma()          {return _fma;}
165     /// Is FP16C supported
fp16c()166     bool fp16c()        {return _fp16c;}
167     /// Is AVX2 supported
avx2()168     bool avx2()         {return _avx2;}
169     /// Is HLE (hardware lock elision) supported
hle()170     bool hle()          {return _hle;}
171     /// Is RTM (restricted transactional memory) supported
rtm()172     bool rtm()          {return _rtm;}
173     /// Is rdseed supported
hasRdseed()174     bool hasRdseed()    {return _hasRdseed;}
175     /// Is SHA supported
hasSha()176     bool hasSha()       {return _hasSha;}
177     /// Is AMD 3DNOW supported?
amd3dnow()178     bool amd3dnow()     {return _amd3dnow;}
179     /// Is AMD 3DNOW Ext supported?
amd3dnowExt()180     bool amd3dnowExt()  {return _amd3dnowExt;}
181     /// Are AMD extensions to MMX supported?
amdMmx()182     bool amdMmx()       {return _amdMmx;}
183     /// Is fxsave/fxrstor supported?
hasFxsr()184     bool hasFxsr()          {return _hasFxsr;}
185     /// Is cmov supported?
hasCmov()186     bool hasCmov()          {return _hasCmov;}
187     /// Is rdtsc supported?
hasRdtsc()188     bool hasRdtsc()         {return _hasRdtsc;}
189     /// Is cmpxchg8b supported?
hasCmpxchg8b()190     bool hasCmpxchg8b()     {return _hasCmpxchg8b;}
191     /// Is cmpxchg8b supported?
hasCmpxchg16b()192     bool hasCmpxchg16b()    {return _hasCmpxchg16b;}
193     /// Is SYSENTER/SYSEXIT supported?
hasSysEnterSysExit()194     bool hasSysEnterSysExit() {return _hasSysEnterSysExit;}
195     /// Is 3DNow prefetch supported?
has3dnowPrefetch()196     bool has3dnowPrefetch()   {return _has3dnowPrefetch;}
197     /// Are LAHF and SAHF supported in 64-bit mode?
hasLahfSahf()198     bool hasLahfSahf()        {return _hasLahfSahf;}
199     /// Is POPCNT supported?
hasPopcnt()200     bool hasPopcnt()        {return _hasPopcnt;}
201     /// Is LZCNT supported?
hasLzcnt()202     bool hasLzcnt()         {return _hasLzcnt;}
203     /// Is this an Intel64 or AMD 64?
isX86_64()204     bool isX86_64()         {return _isX86_64;}
205 
206     /// Is this an IA64 (Itanium) processor?
isItanium()207     bool isItanium()        { return _isItanium; }
208 
209     /// Is hyperthreading supported?
hyperThreading()210     bool hyperThreading()   { return _hyperThreading; }
211     /// Returns number of threads per CPU
threadsPerCPU()212     uint threadsPerCPU()    {return _threadsPerCPU;}
213     /// Returns number of cores in CPU
coresPerCPU()214     uint coresPerCPU()      {return _coresPerCPU;}
215 
216     /// Optimisation hints for assembly code.
217     ///
218     /// For forward compatibility, the CPU is compared against different
219     /// microarchitectures. For 32-bit x86, comparisons are made against
220     /// the Intel PPro/PII/PIII/PM family.
221     ///
222     /// The major 32-bit x86 microarchitecture 'dynasties' have been:
223     ///
224     /// $(UL
225     /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
226     /// $(LI AMD Athlon (K7, K8, K10). )
227     /// $(LI Intel NetBurst (Pentium 4, Pentium D). )
228     /// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
229     /// )
230     ///
231     /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
232     /// Cyrix, Rise) were mostly in-order.
233     ///
234     /// Some new processors do not fit into the existing categories:
235     ///
236     /// $(UL
237     /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
238     /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
239     /// )
240     ///
241     /// Within each dynasty, the optimisation techniques are largely
242     /// identical (eg, use instruction pairing for group 4). Major
243     /// instruction set improvements occur within each dynasty.
244 
245     /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
preferAthlon()246     bool preferAthlon() { return _preferAthlon; }
247     /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
preferPentium4()248     bool preferPentium4() { return _preferPentium4; }
249     /// Does this CPU perform better on Pentium I code than Pentium Pro code?
preferPentium1()250     bool preferPentium1() { return _preferPentium1; }
251 }
252 
253 private immutable
254 {
255     /* These exist as immutables so that the query property functions can
256      * be backwards compatible with code that called them with ().
257      * Also, immutables can only be set by the static this().
258      */
259     const(CacheInfo)[5] _dataCaches;
260     string _vendor;
261     string _processor;
262     bool _x87onChip;
263     bool _mmx;
264     bool _sse;
265     bool _sse2;
266     bool _sse3;
267     bool _ssse3;
268     bool _sse41;
269     bool _sse42;
270     bool _sse4a;
271     bool _aes;
272     bool _hasPclmulqdq;
273     bool _hasRdrand;
274     bool _avx;
275     bool _vaes;
276     bool _hasVpclmulqdq;
277     bool _fma;
278     bool _fp16c;
279     bool _avx2;
280     bool _hle;
281     bool _rtm;
282     bool _hasRdseed;
283     bool _hasSha;
284     bool _amd3dnow;
285     bool _amd3dnowExt;
286     bool _amdMmx;
287     bool _hasFxsr;
288     bool _hasCmov;
289     bool _hasRdtsc;
290     bool _hasCmpxchg8b;
291     bool _hasCmpxchg16b;
292     bool _hasSysEnterSysExit;
293     bool _has3dnowPrefetch;
294     bool _hasLahfSahf;
295     bool _hasPopcnt;
296     bool _hasLzcnt;
297     bool _isX86_64;
298     bool _isItanium;
299     bool _hyperThreading;
300     uint _threadsPerCPU;
301     uint _coresPerCPU;
302     bool _preferAthlon;
303     bool _preferPentium4;
304     bool _preferPentium1;
305 }
306 
307 __gshared:
308     // All these values are set only once, and never subsequently modified.
309 public:
310     /// $(RED Warning: This field will be turned into a property in a future release.)
311     ///
312     /// Processor type (vendor-dependent).
313     /// This should be visible ONLY for display purposes.
314     uint stepping, model, family;
315     /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
316     uint numCacheLevels = 1;
317     /// The number of cache levels in the CPU.
cacheLevels()318     @property uint cacheLevels() { return numCacheLevels; }
319 private:
320 
321 struct CpuFeatures
322 {
323     bool probablyIntel; // true = _probably_ an Intel processor, might be faking
324     bool probablyAMD; // true = _probably_ an AMD or Hygon processor
325     string processorName;
326     char [12] vendorID = 0;
327     char [48] processorNameBuffer = 0;
328     uint features = 0;     // mmx, sse, sse2, hyperthreading, etc
329     uint miscfeatures = 0; // sse3, etc.
330     uint extfeatures = 0;  // HLE, AVX2, RTM, etc.
331     uint amdfeatures = 0;  // 3DNow!, mmxext, etc
332     uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
333     ulong xfeatures = 0;   // XFEATURES_ENABLED_MASK
334     uint maxCores = 1;
335     uint maxThreads = 1;
336 }
337 
338 CpuFeatures cpuFeatures;
339 
340 /* Hide from the optimizer where cf (a register) is coming from, so that
341  * cf doesn't get "optimized away". The idea is to  reference
342  * the global data through cf so not so many fixups are inserted
343  * into the executable image.
344  */
getCpuFeatures()345 CpuFeatures* getCpuFeatures() @nogc nothrow
346 {
347     pragma(inline, false);
348     return &cpuFeatures;
349 }
350 
351     // Note that this may indicate multi-core rather than hyperthreading.
hyperThreadingBit()352     @property bool hyperThreadingBit()    { return (cpuFeatures.features&HTT_BIT)!=0;}
353 
354     // feature flags CPUID1_EDX
355     enum : uint
356     {
357         FPU_BIT = 1,
358         TIMESTAMP_BIT = 1<<4, // rdtsc
359         MDSR_BIT = 1<<5,      // RDMSR/WRMSR
360         CMPXCHG8B_BIT = 1<<8,
361         SYSENTERSYSEXIT_BIT = 1<<11,
362         CMOV_BIT = 1<<15,
363         MMX_BIT = 1<<23,
364         FXSR_BIT = 1<<24,
365         SSE_BIT = 1<<25,
366         SSE2_BIT = 1<<26,
367         HTT_BIT = 1<<28,
368         IA64_BIT = 1<<30
369     }
370     // feature flags misc CPUID1_ECX
371     enum : uint
372     {
373         SSE3_BIT = 1,
374         PCLMULQDQ_BIT = 1<<1, // from AVX
375         MWAIT_BIT = 1<<3,
376         SSSE3_BIT = 1<<9,
377         FMA_BIT = 1<<12,     // from AVX
378         CMPXCHG16B_BIT = 1<<13,
379         SSE41_BIT = 1<<19,
380         SSE42_BIT = 1<<20,
381         POPCNT_BIT = 1<<23,
382         AES_BIT = 1<<25, // AES instructions from AVX
383         OSXSAVE_BIT = 1<<27, // Used for AVX
384         AVX_BIT = 1<<28,
385         FP16C_BIT = 1<<29,
386         RDRAND_BIT = 1<<30,
387     }
388     // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
389     enum : uint
390     {
391         FSGSBASE_BIT = 1 << 0,
392         BMI1_BIT = 1 << 3,
393         HLE_BIT = 1 << 4,
394         AVX2_BIT = 1 << 5,
395         SMEP_BIT = 1 << 7,
396         BMI2_BIT = 1 << 8,
397         ERMS_BIT = 1 << 9,
398         INVPCID_BIT = 1 << 10,
399         RTM_BIT = 1 << 11,
400         RDSEED_BIT = 1 << 18,
401         SHA_BIT = 1 << 29,
402     }
403     // feature flags XFEATURES_ENABLED_MASK
404     enum : ulong
405     {
406         XF_FP_BIT  = 0x1,
407         XF_SSE_BIT = 0x2,
408         XF_YMM_BIT = 0x4,
409     }
410     // AMD feature flags CPUID80000001_EDX
411     enum : uint
412     {
413         AMD_MMX_BIT = 1<<22,
414 //      FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
415         FFXSR_BIT = 1<<25,
416         PAGE1GB_BIT = 1<<26, // support for 1GB pages
417         RDTSCP_BIT = 1<<27,
418         AMD64_BIT = 1<<29,
419         AMD_3DNOW_EXT_BIT = 1<<30,
420         AMD_3DNOW_BIT = 1<<31
421     }
422     // AMD misc feature flags CPUID80000001_ECX
423     enum : uint
424     {
425         LAHFSAHF_BIT = 1,
426         LZCNT_BIT = 1<<5,
427         SSE4A_BIT = 1<<6,
428         AMD_3DNOW_PREFETCH_BIT = 1<<8,
429     }
430 
431 
version(GNU_OR_LDC)432 version (GNU_OR_LDC) {
433     version (X86)
434         enum supportedX86 = true;
435     else version (X86_64)
436         enum supportedX86 = true;
437     else
438         enum supportedX86 = false;
439 } else version (D_InlineAsm_X86) {
440     enum supportedX86 = true;
version(D_InlineAsm_X86_64)441 } else version (D_InlineAsm_X86_64) {
442     enum supportedX86 = true;
443 } else {
444     enum supportedX86 = false;
445 }
446 
447 static if (supportedX86) {
448 // Note that this code will also work for Itanium in x86 mode.
449 
450 __gshared uint max_cpuid, max_extended_cpuid;
451 
452 // CPUID2: "cache and tlb information"
getcacheinfoCPUID2()453 void getcacheinfoCPUID2()
454 {
455     // We are only interested in the data caches
456     void decipherCpuid2(ubyte x) @nogc nothrow {
457         if (x==0) return;
458         // Values from http://www.sandpile.org/ia32/cpuid.htm.
459         // Includes Itanium and non-Intel CPUs.
460         //
461         static immutable ubyte [63] ids = [
462             0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
463             // level 2 cache
464             0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
465             0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
466             0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
467             // level 3 cache
468             0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
469 
470             0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
471             0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
472         ];
473         static immutable uint [63] sizes = [
474             8, 16, 16, 64, 16, 24, 8, 16, 32,
475             128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
476             256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
477             128, 192, 128, 256, 384, 512, 3072, 512, 128,
478             512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
479 
480             512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
481             2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
482         ];
483     // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
484         static immutable ubyte [63] ways = [
485             2, 4, 4, 8, 8, 6, 4, 4, 4,
486             4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
487             8, 8, 8, 8, 4, 8, 16, 24,
488             4, 6, 2, 4, 6, 4, 12, 8, 8,
489             4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
490             4, 4, 4, 8, 8, 8, 12, 12, 12,
491             16, 16, 16, 24, 24, 24
492         ];
493         enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
494         for (size_t i=0; i< ids.length; ++i) {
495             if (x==ids[i]) {
496                 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
497                 if (x==0x49 && family==0xF && model==0x6) level=2;
498                 datacache[level].size=sizes[i];
499                 datacache[level].associativity=ways[i];
500                 if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
501                                    || x==0x86 || x==0x87
502                                    || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
503                     datacache[level].lineSize = 64;
504                 } else datacache[level].lineSize = 32;
505             }
506         }
507     }
508 
509     uint[4] a;
510     bool firstTime = true;
511     // On a multi-core system, this could theoretically fail, but it's only used
512     // for old single-core CPUs.
513     uint numinfos = 1;
514     do {
515         version (GNU_OR_LDC) asm pure nothrow @nogc {
516             "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
517         } else asm pure nothrow @nogc {
518             mov EAX, 2;
519             cpuid;
520             mov a+0, EAX;
521             mov a+4, EBX;
522             mov a+8, ECX;
523             mov a+12, EDX;
524         }
525         if (firstTime) {
526             if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
527         // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
528         // These are NOT standard Intel values
529         // (TLB = 32 entry, 4 way associative, 4K pages)
530         // (L1 cache = 16K, 4way, linesize16)
531                 datacache[0].size=8;
532                 datacache[0].associativity=4;
533                 datacache[0].lineSize=16;
534                 return;
535             }
536             // lsb of a is how many times to loop.
537             numinfos = a[0] & 0xFF;
538             // and otherwise it should be ignored
539             a[0] &= 0xFFFF_FF00;
540             firstTime = false;
541         }
542         for (int c=0; c<4;++c) {
543             // high bit set == no info.
544             if (a[c] & 0x8000_0000) continue;
545             decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
546             decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
547             decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
548             decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
549         }
550     } while (--numinfos);
551 }
552 
553 // CPUID4: "Deterministic cache parameters" leaf
getcacheinfoCPUID4()554 void getcacheinfoCPUID4()
555 {
556     int cachenum = 0;
557     for (;;) {
558         uint a, b, number_of_sets;
559         version (GNU_OR_LDC) asm pure nothrow @nogc {
560             "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
561         } else asm pure nothrow @nogc {
562             mov EAX, 4;
563             mov ECX, cachenum;
564             cpuid;
565             mov a, EAX;
566             mov b, EBX;
567             mov number_of_sets, ECX;
568         }
569         ++cachenum;
570         if ((a&0x1F)==0) break; // no more caches
571         immutable uint numthreads = ((a>>14) & 0xFFF)  + 1;
572         immutable uint numcores = ((a>>26) & 0x3F) + 1;
573         if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
574         if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
575 
576         ++number_of_sets;
577         immutable ubyte level = cast(ubyte)(((a>>5)&7)-1);
578         if (level > datacache.length) continue; // ignore deep caches
579         datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
580         datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
581         immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1;
582         // Size = number of sets * associativity * cachelinesize * linepartitions
583         // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
584         immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
585             datacache[level].associativity : number_of_sets;
586         datacache[level].size = cast(size_t)(
587                 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
588         if (level == 0 && (a&0xF)==3) {
589             // Halve the size for unified L1 caches
590             datacache[level].size/=2;
591         }
592     }
593 }
594 
595 // CPUID8000_0005 & 6
getAMDcacheinfo()596 void getAMDcacheinfo()
597 {
598     uint dummy, c5, c6, d6;
599     version (GNU_OR_LDC) asm pure nothrow @nogc {
600         "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
601     } else asm pure nothrow @nogc {
602         mov EAX, 0x8000_0005; // L1 cache
603         cpuid;
604         // EAX has L1_TLB_4M.
605         // EBX has L1_TLB_4K
606         // EDX has L1 instruction cache
607         mov c5, ECX;
608     }
609 
610     datacache[0].size = ( (c5>>24) & 0xFF);
611     datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
612     datacache[0].lineSize = c5 & 0xFF;
613 
614     if (max_extended_cpuid >= 0x8000_0006) {
615         // AMD K6-III or K6-2+ or later.
616         ubyte numcores = 1;
617         if (max_extended_cpuid >= 0x8000_0008) {
618             version (GNU_OR_LDC) asm pure nothrow @nogc {
619                 "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
620             } else asm pure nothrow @nogc {
621                 mov EAX, 0x8000_0008;
622                 cpuid;
623                 mov numcores, CL;
624             }
625             ++numcores;
626             if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
627         }
628 
629         version (GNU_OR_LDC) asm pure nothrow @nogc {
630             "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
631         } else asm pure nothrow @nogc {
632             mov EAX, 0x8000_0006; // L2/L3 cache
633             cpuid;
634             mov c6, ECX; // L2 cache info
635             mov d6, EDX; // L3 cache info
636         }
637 
638         static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
639         datacache[1].size = (c6>>16) & 0xFFFF;
640         datacache[1].associativity = assocmap[(c6>>12)&0xF];
641         datacache[1].lineSize = c6 & 0xFF;
642 
643         // The L3 cache value is TOTAL, not per core.
644         datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
645         datacache[2].associativity = assocmap[(d6>>12)&0xF];
646         datacache[2].lineSize = d6 & 0xFF;
647     }
648 }
649 
650 // For Intel CoreI7 and later, use function 0x0B
651 // to determine number of processors.
getCpuInfo0B()652 void getCpuInfo0B()
653 {
654     int threadsPerCore;
655     uint a, b, c, d;
656     // I'm not sure about this. The docs state that there
657     // are 2 hyperthreads per core if HT is factory enabled.
658     for (int level = 0; level < 2; level++)
659     {
660         version (GNU_OR_LDC) asm pure nothrow @nogc {
661             "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
662         } else asm pure nothrow @nogc {
663             mov EAX, 0x0B;
664             mov ECX, level;
665             cpuid;
666             mov a, EAX;
667             mov b, EBX;
668             mov c, ECX;
669             mov d, EDX;
670         }
671         if (b != 0)
672         {
673             if (level == 0)
674                 threadsPerCore = b & 0xFFFF;
675             else if (level == 1)
676             {
677                 cpuFeatures.maxThreads = b & 0xFFFF;
678                 cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore;
679             }
680         }
681         // Got "invalid domain" returned from cpuid
682         if (a == 0 && b == 0)
683             break;
684     }
685 }
686 
cpuidX86()687 void cpuidX86()
688 {
689     auto cf = getCpuFeatures();
690 
691     uint a, b, c, d;
692     uint* venptr = cast(uint*)cf.vendorID.ptr;
693     version (GNU_OR_LDC)
694     {
695         asm pure nothrow @nogc {
696             "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
697             "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
698         }
699     }
700     else
701     {
702         uint a2;
703         version (D_InlineAsm_X86)
704         {
705             asm pure nothrow @nogc {
706                 mov EAX, 0;
707                 cpuid;
708                 mov a, EAX;
709                 mov EAX, venptr;
710                 mov [EAX], EBX;
711                 mov [EAX + 4], EDX;
712                 mov [EAX + 8], ECX;
713             }
714         }
715         else version (D_InlineAsm_X86_64)
716         {
717             asm pure nothrow @nogc {
718                 mov EAX, 0;
719                 cpuid;
720                 mov a, EAX;
721                 mov RAX, venptr;
722                 mov [RAX], EBX;
723                 mov [RAX + 4], EDX;
724                 mov [RAX + 8], ECX;
725             }
726         }
727         asm pure nothrow @nogc {
728             mov EAX, 0x8000_0000;
729             cpuid;
730             mov a2, EAX;
731         }
732         max_cpuid = a;
733         max_extended_cpuid = a2;
734     }
735 
736 
737     cf.probablyIntel = cf.vendorID == "GenuineIntel";
738     cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine");
739     uint apic = 0; // brand index, apic id
740     version (GNU_OR_LDC) asm pure nothrow @nogc {
741         "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
742     } else {
743         asm pure nothrow @nogc {
744             mov EAX, 1; // model, stepping
745             cpuid;
746             mov a, EAX;
747             mov apic, EBX;
748             mov c, ECX;
749             mov d, EDX;
750         }
751         cf.features = d;
752         cf.miscfeatures = c;
753     }
754     stepping = a & 0xF;
755     immutable uint fbase = (a >> 8) & 0xF;
756     immutable uint mbase = (a >> 4) & 0xF;
757     family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
758     model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ?
759          mbase + ((a >> 12) & 0xF0) : mbase;
760 
761     if (max_cpuid >= 7)
762     {
763         version (GNU_OR_LDC) asm pure nothrow @nogc {
764             "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
765         } else {
766             uint ext;
767             asm pure nothrow @nogc {
768                 mov EAX, 7; // Structured extended feature leaf.
769                 mov ECX, 0; // Main leaf.
770                 cpuid;
771                 mov ext, EBX; // HLE, AVX2, RTM, etc.
772             }
773             cf.extfeatures = ext;
774         }
775     }
776 
777     if (cf.miscfeatures & OSXSAVE_BIT)
778     {
779         version (GNU_OR_LDC) asm pure nothrow @nogc {
780             /* Old assemblers do not recognize xgetbv, and there is no easy way
781              * to conditionally compile based on the assembler used, so use the
782              * raw .byte sequence instead.  */
783             ".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0);
784         } else asm pure nothrow @nogc {
785             mov ECX, 0;
786             xgetbv;
787             mov d, EDX;
788             mov a, EAX;
789         }
790         cf.xfeatures = cast(ulong)d << 32 | a;
791     }
792 
793     cf.amdfeatures = 0;
794     cf.amdmiscfeatures = 0;
795     if (max_extended_cpuid >= 0x8000_0001) {
796         version (GNU_OR_LDC) asm pure nothrow @nogc {
797             "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
798         } else {
799             asm pure nothrow @nogc {
800                 mov EAX, 0x8000_0001;
801                 cpuid;
802                 mov c, ECX;
803                 mov d, EDX;
804             }
805             cf.amdmiscfeatures = c;
806             cf.amdfeatures = d;
807         }
808     }
809     // Try to detect fraudulent vendorIDs
810     if (amd3dnow) cf.probablyIntel = false;
811 
812     if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) {
813         //http://support.amd.com/TechDocs/25481.pdf pg.36
814         cf.maxCores = 1;
815         if (hyperThreadingBit) {
816             // determine max number of cores for AMD
817             version (GNU_OR_LDC) asm pure nothrow @nogc {
818                 "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
819             } else asm pure nothrow @nogc {
820                 mov EAX, 0x8000_0008;
821                 cpuid;
822                 mov c, ECX;
823             }
824             cf.maxCores += c & 0xFF;
825         }
826     }
827 
828     if (max_extended_cpuid >= 0x8000_0004) {
829         uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
830         version (GNU_OR_LDC)
831         {
832             asm pure nothrow @nogc {
833                 "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
834                 "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003);
835                 "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004);
836             }
837         }
838         else version (D_InlineAsm_X86)
839         {
840             asm pure nothrow @nogc {
841                 push ESI;
842                 mov ESI, pnb;
843                 mov EAX, 0x8000_0002;
844                 cpuid;
845                 mov [ESI], EAX;
846                 mov [ESI+4], EBX;
847                 mov [ESI+8], ECX;
848                 mov [ESI+12], EDX;
849                 mov EAX, 0x8000_0003;
850                 cpuid;
851                 mov [ESI+16], EAX;
852                 mov [ESI+20], EBX;
853                 mov [ESI+24], ECX;
854                 mov [ESI+28], EDX;
855                 mov EAX, 0x8000_0004;
856                 cpuid;
857                 mov [ESI+32], EAX;
858                 mov [ESI+36], EBX;
859                 mov [ESI+40], ECX;
860                 mov [ESI+44], EDX;
861                 pop ESI;
862             }
863         }
864         else version (D_InlineAsm_X86_64)
865         {
866             asm pure nothrow @nogc {
867                 push RSI;
868                 mov RSI, pnb;
869                 mov EAX, 0x8000_0002;
870                 cpuid;
871                 mov [RSI], EAX;
872                 mov [RSI+4], EBX;
873                 mov [RSI+8], ECX;
874                 mov [RSI+12], EDX;
875                 mov EAX, 0x8000_0003;
876                 cpuid;
877                 mov [RSI+16], EAX;
878                 mov [RSI+20], EBX;
879                 mov [RSI+24], ECX;
880                 mov [RSI+28], EDX;
881                 mov EAX, 0x8000_0004;
882                 cpuid;
883                 mov [RSI+32], EAX;
884                 mov [RSI+36], EBX;
885                 mov [RSI+40], ECX;
886                 mov [RSI+44], EDX;
887                 pop RSI;
888             }
889         }
890         // Intel P4 and PM pad at front with spaces.
891         // Other CPUs pad at end with nulls.
892         int start = 0, end = 0;
893         while (cf.processorNameBuffer[start] == ' ') { ++start; }
894         while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; }
895         cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]);
896     } else {
897         cf.processorName = "Unknown CPU";
898     }
899     // Determine cache sizes
900 
901     // Intel docs specify that they return 0 for 0x8000_0005.
902     // AMD docs do not specify the behaviour for 0004 and 0002.
903     // Centaur/VIA and most other manufacturers use the AMD method,
904     // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
905     // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
906     // for CPUID80000005. But Geode GX uses the AMD method
907 
908     // Deal with Geode GX1 - make it same as MediaGX MMX.
909     if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
910         max_extended_cpuid = 0x8000_0004;
911     }
912     // Therefore, we try the AMD method unless it's an Intel chip.
913     // If we still have no info, try the Intel methods.
914     datacache[0].size = 0;
915     if (max_cpuid<2 || !cf.probablyIntel) {
916         if (max_extended_cpuid >= 0x8000_0005) {
917             getAMDcacheinfo();
918         } else if (cf.probablyAMD) {
919             // According to AMDProcRecognitionAppNote, this means CPU
920             // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
921             // Am5x86 has 16Kb 4-way unified data & code cache.
922             datacache[0].size = 8;
923             datacache[0].associativity = 4;
924             datacache[0].lineSize = 32;
925         } else {
926             // Some obscure CPU.
927             // Values for Cyrix 6x86MX (family 6, model 0)
928             datacache[0].size = 64;
929             datacache[0].associativity = 4;
930             datacache[0].lineSize = 32;
931         }
932     }
933     if ((datacache[0].size == 0) && max_cpuid>=4) {
934         getcacheinfoCPUID4();
935     }
936     if ((datacache[0].size == 0) && max_cpuid>=2) {
937         getcacheinfoCPUID2();
938     }
939     if (datacache[0].size == 0) {
940         // Pentium, PMMX, late model 486, or an obscure CPU
941         if (mmx) { // Pentium MMX. Also has 8kB code cache.
942             datacache[0].size = 16;
943             datacache[0].associativity = 4;
944             datacache[0].lineSize = 32;
945         } else { // Pentium 1 (which also has 8kB code cache)
946                  // or 486.
947             // Cyrix 6x86: 16, 4way, 32 linesize
948             datacache[0].size = 8;
949             datacache[0].associativity = 2;
950             datacache[0].lineSize = 32;
951         }
952     }
953     if (cf.probablyIntel && max_cpuid >= 0x0B) {
954         // For Intel i7 and later, use function 0x0B to determine
955         // cores and hyperthreads.
956         getCpuInfo0B();
957     } else {
958         if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF;
959         else cf.maxThreads = cf.maxCores;
960 
961         if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
962             version (GNU_OR_LDC) asm pure nothrow @nogc {
963                 "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
964             } else {
965                 asm pure nothrow @nogc {
966                     mov EAX, 0x8000_001e;
967                     cpuid;
968                     mov b, EBX;
969                 }
970             }
971             ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1;
972             cf.maxCores = cf.maxThreads / coresPerComputeUnit;
973         }
974     }
975 }
976 
977 // Return true if the cpuid instruction is supported.
978 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
hasCPUID()979 bool hasCPUID()
980 {
981     version (X86_64)
982         return true;
983     else
984     {
985         uint flags;
986         version (GNU_OR_LDC)
987         {
988             // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
989             asm nothrow @nogc { "
990                 pushfl                    # Save EFLAGS
991                 pushfl                    # Store EFLAGS
992                 xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS
993                 popfl                     # Load stored EFLAGS (with ID bit inverted)
994                 pushfl                    # Store EFLAGS again (ID bit may or may not be inverted)
995                 popl %%eax                # eax = modified EFLAGS (ID bit may or may not be inverted)
996                 xorl (%%esp), %%eax       # eax = whichever bits were changed
997                 popfl                     # Restore original EFLAGS
998                 " : "=a" (flags);
999             }
1000         }
1001         else version (D_InlineAsm_X86)
1002         {
1003             asm nothrow @nogc {
1004                 pushfd;
1005                 pop EAX;
1006                 mov flags, EAX;
1007                 xor EAX, 0x0020_0000;
1008                 push EAX;
1009                 popfd;
1010                 pushfd;
1011                 pop EAX;
1012                 xor flags, EAX;
1013             }
1014         }
1015         return (flags & 0x0020_0000) != 0;
1016     }
1017 }
1018 
1019 } else { // supported X86
1020 
hasCPUID()1021     bool hasCPUID() { return false; }
1022 
cpuidX86()1023     void cpuidX86()
1024     {
1025             datacache[0].size = 8;
1026             datacache[0].associativity = 2;
1027             datacache[0].lineSize = 32;
1028     }
1029 }
1030 
1031 /*
1032 // TODO: Implement this function with OS support
1033 void cpuidPPC()
1034 {
1035     enum :int  { PPC601, PPC603, PPC603E, PPC604,
1036                  PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
1037 
1038     // TODO:
1039     // asm { mfpvr; } returns the CPU version but unfortunately it can
1040     // only be used in kernel mode. So OS support is required.
1041     int cputype = PPC603;
1042 
1043     // 601 has a 8KB combined data & code L1 cache.
1044     uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
1045     ubyte ways[] = [8, 2,  4,  4,  4,  8,  8,  8,  8];
1046     uint L2size[]= [0, 0,  0,  0,  0,  0,  0,  256,  512];
1047     uint L3size[]= [0, 0,  0,  0,  0,  0,  0,  2048,  0];
1048 
1049     datacache[0].size = sizes[cputype];
1050     datacache[0].associativity = ways[cputype];
1051     datacache[0].lineSize = (cputype==PPCG5)? 128 :
1052         (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
1053     datacache[1].size = L2size[cputype];
1054     datacache[2].size = L3size[cputype];
1055     datacache[1].lineSize = datacache[0].lineSize;
1056     datacache[2].lineSize = datacache[0].lineSize;
1057 }
1058 
1059 // TODO: Implement this function with OS support
1060 void cpuidSparc()
1061 {
1062     // UltaSparcIIi  : L1 = 16,  2way. L2 = 512, 4 way.
1063     // UltraSparcIII : L1 = 64,  4way. L2= 4096 or 8192.
1064     // UltraSparcIIIi: L1 = 64,  4way. L2= 1024, 4 way
1065     // UltraSparcIV  : L1 = 64,  4way. L2 = 16*1024.
1066     // UltraSparcIV+ : L1 = 64,  4way. L2 = 2048, L3=32*1024.
1067     // Sparc64V      : L1 = 128, 2way. L2 = 4096 4way.
1068 }
1069 */
1070 
this()1071 shared static this()
1072 {
1073     auto cf = getCpuFeatures();
1074 
1075     if (hasCPUID()) {
1076         cpuidX86();
1077     } else {
1078         // it's a 386 or 486, or a Cyrix 6x86.
1079         //Probably still has an external cache.
1080     }
1081     if (datacache[0].size==0) {
1082             // Guess same as Pentium 1.
1083             datacache[0].size = 8;
1084             datacache[0].associativity = 2;
1085             datacache[0].lineSize = 32;
1086     }
1087     numCacheLevels = 1;
1088     // And now fill up all the unused levels with full memory space.
1089     for (size_t i=1; i< datacache.length; ++i) {
1090         if (datacache[i].size==0) {
1091             // Set all remaining levels of cache equal to full address space.
1092             datacache[i].size = size_t.max/1024;
1093             datacache[i].associativity = 1;
1094             datacache[i].lineSize = datacache[i-1].lineSize;
1095         }
1096         else
1097             ++numCacheLevels;
1098     }
1099 
1100     // Set the immortals
1101 
1102     _dataCaches =     datacache;
1103     _vendor =         cast(string)cf.vendorID;
1104     _processor =      cf.processorName;
1105     _x87onChip =      (cf.features&FPU_BIT)!=0;
1106     _mmx =            (cf.features&MMX_BIT)!=0;
1107     _sse =            (cf.features&SSE_BIT)!=0;
1108     _sse2 =           (cf.features&SSE2_BIT)!=0;
1109     _sse3 =           (cf.miscfeatures&SSE3_BIT)!=0;
1110     _ssse3 =          (cf.miscfeatures&SSSE3_BIT)!=0;
1111     _sse41 =          (cf.miscfeatures&SSE41_BIT)!=0;
1112     _sse42 =          (cf.miscfeatures&SSE42_BIT)!=0;
1113     _sse4a =          (cf.amdmiscfeatures&SSE4A_BIT)!=0;
1114     _aes =            (cf.miscfeatures&AES_BIT)!=0;
1115     _hasPclmulqdq =   (cf.miscfeatures&PCLMULQDQ_BIT)!=0;
1116     _hasRdrand =      (cf.miscfeatures&RDRAND_BIT)!=0;
1117 
1118     enum avx_mask = XF_SSE_BIT|XF_YMM_BIT;
1119     _avx =            (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0;
1120 
1121     _vaes =           avx && aes;
1122     _hasVpclmulqdq =  avx && hasPclmulqdq;
1123     _fma =            avx && (cf.miscfeatures&FMA_BIT)!=0;
1124     _fp16c =          avx && (cf.miscfeatures&FP16C_BIT)!=0;
1125     _avx2 =           avx && (cf.extfeatures & AVX2_BIT) != 0;
1126     _hle =            (cf.extfeatures & HLE_BIT) != 0;
1127     _rtm =            (cf.extfeatures & RTM_BIT) != 0;
1128     _hasRdseed =      (cf.extfeatures&RDSEED_BIT)!=0;
1129     _hasSha =         (cf.extfeatures&SHA_BIT)!=0;
1130     _amd3dnow =       (cf.amdfeatures&AMD_3DNOW_BIT)!=0;
1131     _amd3dnowExt =    (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0;
1132     _amdMmx =         (cf.amdfeatures&AMD_MMX_BIT)!=0;
1133     _hasFxsr =        (cf.features&FXSR_BIT)!=0;
1134     _hasCmov =        (cf.features&CMOV_BIT)!=0;
1135     _hasRdtsc =       (cf.features&TIMESTAMP_BIT)!=0;
1136     _hasCmpxchg8b =   (cf.features&CMPXCHG8B_BIT)!=0;
1137     _hasCmpxchg16b =  (cf.miscfeatures&CMPXCHG16B_BIT)!=0;
1138     _hasSysEnterSysExit =
1139         // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
1140         // (REF: www.geoffchappell.com).
1141         (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
1142             ? false
1143             : (cf.features & SYSENTERSYSEXIT_BIT)!=0;
1144     _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;
1145     _hasLahfSahf =    (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0;
1146     _hasPopcnt =      (cf.miscfeatures&POPCNT_BIT)!=0;
1147     _hasLzcnt =       (cf.amdmiscfeatures&LZCNT_BIT)!=0;
1148     _isX86_64 =       (cf.amdfeatures&AMD64_BIT)!=0;
1149     _isItanium =      (cf.features&IA64_BIT)!=0;
1150     _hyperThreading = cf.maxThreads>cf.maxCores;
1151     _threadsPerCPU =  cf.maxThreads;
1152     _coresPerCPU =    cf.maxCores;
1153     _preferAthlon =   cf.probablyAMD && family >=6;
1154     _preferPentium4 = cf.probablyIntel && family == 0xF;
1155     _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel);
1156 }
1157