1*a28cd43dSSascha Wildner /*
2*a28cd43dSSascha Wildner * Copyright (c) 2018-2020, Facebook, Inc.
3*a28cd43dSSascha Wildner * All rights reserved.
4*a28cd43dSSascha Wildner *
5*a28cd43dSSascha Wildner * This source code is licensed under both the BSD-style license (found in the
6*a28cd43dSSascha Wildner * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*a28cd43dSSascha Wildner * in the COPYING file in the root directory of this source tree).
8*a28cd43dSSascha Wildner * You may select, at your option, one of the above-listed licenses.
9*a28cd43dSSascha Wildner */
10*a28cd43dSSascha Wildner
11*a28cd43dSSascha Wildner #ifndef ZSTD_COMMON_CPU_H
12*a28cd43dSSascha Wildner #define ZSTD_COMMON_CPU_H
13*a28cd43dSSascha Wildner
14*a28cd43dSSascha Wildner /**
15*a28cd43dSSascha Wildner * Implementation taken from folly/CpuId.h
16*a28cd43dSSascha Wildner * https://github.com/facebook/folly/blob/master/folly/CpuId.h
17*a28cd43dSSascha Wildner */
18*a28cd43dSSascha Wildner
19*a28cd43dSSascha Wildner #include "mem.h"
20*a28cd43dSSascha Wildner
21*a28cd43dSSascha Wildner #ifdef _MSC_VER
22*a28cd43dSSascha Wildner #include <intrin.h>
23*a28cd43dSSascha Wildner #endif
24*a28cd43dSSascha Wildner
25*a28cd43dSSascha Wildner typedef struct {
26*a28cd43dSSascha Wildner U32 f1c;
27*a28cd43dSSascha Wildner U32 f1d;
28*a28cd43dSSascha Wildner U32 f7b;
29*a28cd43dSSascha Wildner U32 f7c;
30*a28cd43dSSascha Wildner } ZSTD_cpuid_t;
31*a28cd43dSSascha Wildner
ZSTD_cpuid(void)32*a28cd43dSSascha Wildner MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
33*a28cd43dSSascha Wildner U32 f1c = 0;
34*a28cd43dSSascha Wildner U32 f1d = 0;
35*a28cd43dSSascha Wildner U32 f7b = 0;
36*a28cd43dSSascha Wildner U32 f7c = 0;
37*a28cd43dSSascha Wildner #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
38*a28cd43dSSascha Wildner int reg[4];
39*a28cd43dSSascha Wildner __cpuid((int*)reg, 0);
40*a28cd43dSSascha Wildner {
41*a28cd43dSSascha Wildner int const n = reg[0];
42*a28cd43dSSascha Wildner if (n >= 1) {
43*a28cd43dSSascha Wildner __cpuid((int*)reg, 1);
44*a28cd43dSSascha Wildner f1c = (U32)reg[2];
45*a28cd43dSSascha Wildner f1d = (U32)reg[3];
46*a28cd43dSSascha Wildner }
47*a28cd43dSSascha Wildner if (n >= 7) {
48*a28cd43dSSascha Wildner __cpuidex((int*)reg, 7, 0);
49*a28cd43dSSascha Wildner f7b = (U32)reg[1];
50*a28cd43dSSascha Wildner f7c = (U32)reg[2];
51*a28cd43dSSascha Wildner }
52*a28cd43dSSascha Wildner }
53*a28cd43dSSascha Wildner #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
54*a28cd43dSSascha Wildner /* The following block like the normal cpuid branch below, but gcc
55*a28cd43dSSascha Wildner * reserves ebx for use of its pic register so we must specially
56*a28cd43dSSascha Wildner * handle the save and restore to avoid clobbering the register
57*a28cd43dSSascha Wildner */
58*a28cd43dSSascha Wildner U32 n;
59*a28cd43dSSascha Wildner __asm__(
60*a28cd43dSSascha Wildner "pushl %%ebx\n\t"
61*a28cd43dSSascha Wildner "cpuid\n\t"
62*a28cd43dSSascha Wildner "popl %%ebx\n\t"
63*a28cd43dSSascha Wildner : "=a"(n)
64*a28cd43dSSascha Wildner : "a"(0)
65*a28cd43dSSascha Wildner : "ecx", "edx");
66*a28cd43dSSascha Wildner if (n >= 1) {
67*a28cd43dSSascha Wildner U32 f1a;
68*a28cd43dSSascha Wildner __asm__(
69*a28cd43dSSascha Wildner "pushl %%ebx\n\t"
70*a28cd43dSSascha Wildner "cpuid\n\t"
71*a28cd43dSSascha Wildner "popl %%ebx\n\t"
72*a28cd43dSSascha Wildner : "=a"(f1a), "=c"(f1c), "=d"(f1d)
73*a28cd43dSSascha Wildner : "a"(1));
74*a28cd43dSSascha Wildner }
75*a28cd43dSSascha Wildner if (n >= 7) {
76*a28cd43dSSascha Wildner __asm__(
77*a28cd43dSSascha Wildner "pushl %%ebx\n\t"
78*a28cd43dSSascha Wildner "cpuid\n\t"
79*a28cd43dSSascha Wildner "movl %%ebx, %%eax\n\t"
80*a28cd43dSSascha Wildner "popl %%ebx"
81*a28cd43dSSascha Wildner : "=a"(f7b), "=c"(f7c)
82*a28cd43dSSascha Wildner : "a"(7), "c"(0)
83*a28cd43dSSascha Wildner : "edx");
84*a28cd43dSSascha Wildner }
85*a28cd43dSSascha Wildner #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
86*a28cd43dSSascha Wildner U32 n;
87*a28cd43dSSascha Wildner __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
88*a28cd43dSSascha Wildner if (n >= 1) {
89*a28cd43dSSascha Wildner U32 f1a;
90*a28cd43dSSascha Wildner __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
91*a28cd43dSSascha Wildner }
92*a28cd43dSSascha Wildner if (n >= 7) {
93*a28cd43dSSascha Wildner U32 f7a;
94*a28cd43dSSascha Wildner __asm__("cpuid"
95*a28cd43dSSascha Wildner : "=a"(f7a), "=b"(f7b), "=c"(f7c)
96*a28cd43dSSascha Wildner : "a"(7), "c"(0)
97*a28cd43dSSascha Wildner : "edx");
98*a28cd43dSSascha Wildner }
99*a28cd43dSSascha Wildner #endif
100*a28cd43dSSascha Wildner {
101*a28cd43dSSascha Wildner ZSTD_cpuid_t cpuid;
102*a28cd43dSSascha Wildner cpuid.f1c = f1c;
103*a28cd43dSSascha Wildner cpuid.f1d = f1d;
104*a28cd43dSSascha Wildner cpuid.f7b = f7b;
105*a28cd43dSSascha Wildner cpuid.f7c = f7c;
106*a28cd43dSSascha Wildner return cpuid;
107*a28cd43dSSascha Wildner }
108*a28cd43dSSascha Wildner }
109*a28cd43dSSascha Wildner
110*a28cd43dSSascha Wildner #define X(name, r, bit) \
111*a28cd43dSSascha Wildner MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
112*a28cd43dSSascha Wildner return ((cpuid.r) & (1U << bit)) != 0; \
113*a28cd43dSSascha Wildner }
114*a28cd43dSSascha Wildner
115*a28cd43dSSascha Wildner /* cpuid(1): Processor Info and Feature Bits. */
116*a28cd43dSSascha Wildner #define C(name, bit) X(name, f1c, bit)
117*a28cd43dSSascha Wildner C(sse3, 0)
118*a28cd43dSSascha Wildner C(pclmuldq, 1)
119*a28cd43dSSascha Wildner C(dtes64, 2)
120*a28cd43dSSascha Wildner C(monitor, 3)
121*a28cd43dSSascha Wildner C(dscpl, 4)
122*a28cd43dSSascha Wildner C(vmx, 5)
123*a28cd43dSSascha Wildner C(smx, 6)
124*a28cd43dSSascha Wildner C(eist, 7)
125*a28cd43dSSascha Wildner C(tm2, 8)
126*a28cd43dSSascha Wildner C(ssse3, 9)
127*a28cd43dSSascha Wildner C(cnxtid, 10)
128*a28cd43dSSascha Wildner C(fma, 12)
129*a28cd43dSSascha Wildner C(cx16, 13)
130*a28cd43dSSascha Wildner C(xtpr, 14)
131*a28cd43dSSascha Wildner C(pdcm, 15)
132*a28cd43dSSascha Wildner C(pcid, 17)
133*a28cd43dSSascha Wildner C(dca, 18)
134*a28cd43dSSascha Wildner C(sse41, 19)
135*a28cd43dSSascha Wildner C(sse42, 20)
136*a28cd43dSSascha Wildner C(x2apic, 21)
137*a28cd43dSSascha Wildner C(movbe, 22)
138*a28cd43dSSascha Wildner C(popcnt, 23)
139*a28cd43dSSascha Wildner C(tscdeadline, 24)
140*a28cd43dSSascha Wildner C(aes, 25)
141*a28cd43dSSascha Wildner C(xsave, 26)
142*a28cd43dSSascha Wildner C(osxsave, 27)
143*a28cd43dSSascha Wildner C(avx, 28)
144*a28cd43dSSascha Wildner C(f16c, 29)
145*a28cd43dSSascha Wildner C(rdrand, 30)
146*a28cd43dSSascha Wildner #undef C
147*a28cd43dSSascha Wildner #define D(name, bit) X(name, f1d, bit)
148*a28cd43dSSascha Wildner D(fpu, 0)
149*a28cd43dSSascha Wildner D(vme, 1)
150*a28cd43dSSascha Wildner D(de, 2)
151*a28cd43dSSascha Wildner D(pse, 3)
152*a28cd43dSSascha Wildner D(tsc, 4)
153*a28cd43dSSascha Wildner D(msr, 5)
154*a28cd43dSSascha Wildner D(pae, 6)
155*a28cd43dSSascha Wildner D(mce, 7)
156*a28cd43dSSascha Wildner D(cx8, 8)
157*a28cd43dSSascha Wildner D(apic, 9)
158*a28cd43dSSascha Wildner D(sep, 11)
159*a28cd43dSSascha Wildner D(mtrr, 12)
160*a28cd43dSSascha Wildner D(pge, 13)
161*a28cd43dSSascha Wildner D(mca, 14)
162*a28cd43dSSascha Wildner D(cmov, 15)
163*a28cd43dSSascha Wildner D(pat, 16)
164*a28cd43dSSascha Wildner D(pse36, 17)
165*a28cd43dSSascha Wildner D(psn, 18)
166*a28cd43dSSascha Wildner D(clfsh, 19)
167*a28cd43dSSascha Wildner D(ds, 21)
168*a28cd43dSSascha Wildner D(acpi, 22)
169*a28cd43dSSascha Wildner D(mmx, 23)
170*a28cd43dSSascha Wildner D(fxsr, 24)
171*a28cd43dSSascha Wildner D(sse, 25)
172*a28cd43dSSascha Wildner D(sse2, 26)
173*a28cd43dSSascha Wildner D(ss, 27)
174*a28cd43dSSascha Wildner D(htt, 28)
175*a28cd43dSSascha Wildner D(tm, 29)
176*a28cd43dSSascha Wildner D(pbe, 31)
177*a28cd43dSSascha Wildner #undef D
178*a28cd43dSSascha Wildner
179*a28cd43dSSascha Wildner /* cpuid(7): Extended Features. */
180*a28cd43dSSascha Wildner #define B(name, bit) X(name, f7b, bit)
181*a28cd43dSSascha Wildner B(bmi1, 3)
182*a28cd43dSSascha Wildner B(hle, 4)
183*a28cd43dSSascha Wildner B(avx2, 5)
184*a28cd43dSSascha Wildner B(smep, 7)
185*a28cd43dSSascha Wildner B(bmi2, 8)
186*a28cd43dSSascha Wildner B(erms, 9)
187*a28cd43dSSascha Wildner B(invpcid, 10)
188*a28cd43dSSascha Wildner B(rtm, 11)
189*a28cd43dSSascha Wildner B(mpx, 14)
190*a28cd43dSSascha Wildner B(avx512f, 16)
191*a28cd43dSSascha Wildner B(avx512dq, 17)
192*a28cd43dSSascha Wildner B(rdseed, 18)
193*a28cd43dSSascha Wildner B(adx, 19)
194*a28cd43dSSascha Wildner B(smap, 20)
195*a28cd43dSSascha Wildner B(avx512ifma, 21)
196*a28cd43dSSascha Wildner B(pcommit, 22)
197*a28cd43dSSascha Wildner B(clflushopt, 23)
198*a28cd43dSSascha Wildner B(clwb, 24)
199*a28cd43dSSascha Wildner B(avx512pf, 26)
200*a28cd43dSSascha Wildner B(avx512er, 27)
201*a28cd43dSSascha Wildner B(avx512cd, 28)
202*a28cd43dSSascha Wildner B(sha, 29)
203*a28cd43dSSascha Wildner B(avx512bw, 30)
204*a28cd43dSSascha Wildner B(avx512vl, 31)
205*a28cd43dSSascha Wildner #undef B
206*a28cd43dSSascha Wildner #define C(name, bit) X(name, f7c, bit)
207*a28cd43dSSascha Wildner C(prefetchwt1, 0)
208*a28cd43dSSascha Wildner C(avx512vbmi, 1)
209*a28cd43dSSascha Wildner #undef C
210*a28cd43dSSascha Wildner
211*a28cd43dSSascha Wildner #undef X
212*a28cd43dSSascha Wildner
213*a28cd43dSSascha Wildner #endif /* ZSTD_COMMON_CPU_H */
214