1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26
27 const char *host_detect_local_cpu (int argc, const char **argv);
28
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
30 #include "cpuid.h"
31
32 struct cache_desc
33 {
34 unsigned sizekb;
35 unsigned assoc;
36 unsigned line;
37 };
38
39 /* Returns command line parameters that describe size and
40 cache line size of the processor caches. */
41
42 static char *
describe_cache(struct cache_desc level1,struct cache_desc level2)43 describe_cache (struct cache_desc level1, struct cache_desc level2)
44 {
45 char size[100], line[100], size2[100];
46
47 /* At the moment, gcc does not use the information
48 about the associativity of the cache. */
49
50 snprintf (size, sizeof (size),
51 "--param l1-cache-size=%u ", level1.sizekb);
52 snprintf (line, sizeof (line),
53 "--param l1-cache-line-size=%u ", level1.line);
54
55 snprintf (size2, sizeof (size2),
56 "--param l2-cache-size=%u ", level2.sizekb);
57
58 return concat (size, line, size2, NULL);
59 }
60
61 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
62
63 static void
detect_l2_cache(struct cache_desc * level2)64 detect_l2_cache (struct cache_desc *level2)
65 {
66 unsigned eax, ebx, ecx, edx;
67 unsigned assoc;
68
69 __cpuid (0x80000006, eax, ebx, ecx, edx);
70
71 level2->sizekb = (ecx >> 16) & 0xffff;
72 level2->line = ecx & 0xff;
73
74 assoc = (ecx >> 12) & 0xf;
75 if (assoc == 6)
76 assoc = 8;
77 else if (assoc == 8)
78 assoc = 16;
79 else if (assoc >= 0xa && assoc <= 0xc)
80 assoc = 32 + (assoc - 0xa) * 16;
81 else if (assoc >= 0xd && assoc <= 0xe)
82 assoc = 96 + (assoc - 0xd) * 32;
83
84 level2->assoc = assoc;
85 }
86
87 /* Returns the description of caches for an AMD processor. */
88
89 static const char *
detect_caches_amd(unsigned max_ext_level)90 detect_caches_amd (unsigned max_ext_level)
91 {
92 unsigned eax, ebx, ecx, edx;
93
94 struct cache_desc level1, level2 = {0, 0, 0};
95
96 if (max_ext_level < 0x80000005)
97 return "";
98
99 __cpuid (0x80000005, eax, ebx, ecx, edx);
100
101 level1.sizekb = (ecx >> 24) & 0xff;
102 level1.assoc = (ecx >> 16) & 0xff;
103 level1.line = ecx & 0xff;
104
105 if (max_ext_level >= 0x80000006)
106 detect_l2_cache (&level2);
107
108 return describe_cache (level1, level2);
109 }
110
111 /* Decodes the size, the associativity and the cache line size of
112 L1/L2 caches of an Intel processor. Values are based on
113 "Intel Processor Identification and the CPUID Instruction"
114 [Application Note 485], revision -032, December 2007. */
115
116 static void
decode_caches_intel(unsigned reg,bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)117 decode_caches_intel (unsigned reg, bool xeon_mp,
118 struct cache_desc *level1, struct cache_desc *level2)
119 {
120 int i;
121
122 for (i = 24; i >= 0; i -= 8)
123 switch ((reg >> i) & 0xff)
124 {
125 case 0x0a:
126 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
127 break;
128 case 0x0c:
129 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
130 break;
131 case 0x0d:
132 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
133 break;
134 case 0x0e:
135 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
136 break;
137 case 0x21:
138 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
139 break;
140 case 0x24:
141 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
142 break;
143 case 0x2c:
144 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
145 break;
146 case 0x39:
147 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
148 break;
149 case 0x3a:
150 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
151 break;
152 case 0x3b:
153 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
154 break;
155 case 0x3c:
156 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
157 break;
158 case 0x3d:
159 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
160 break;
161 case 0x3e:
162 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
163 break;
164 case 0x41:
165 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
166 break;
167 case 0x42:
168 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
169 break;
170 case 0x43:
171 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
172 break;
173 case 0x44:
174 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
175 break;
176 case 0x45:
177 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
178 break;
179 case 0x48:
180 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
181 break;
182 case 0x49:
183 if (xeon_mp)
184 break;
185 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
186 break;
187 case 0x4e:
188 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
189 break;
190 case 0x60:
191 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
192 break;
193 case 0x66:
194 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
195 break;
196 case 0x67:
197 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
198 break;
199 case 0x68:
200 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
201 break;
202 case 0x78:
203 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
204 break;
205 case 0x79:
206 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
207 break;
208 case 0x7a:
209 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
210 break;
211 case 0x7b:
212 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
213 break;
214 case 0x7c:
215 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
216 break;
217 case 0x7d:
218 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
219 break;
220 case 0x7f:
221 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
222 break;
223 case 0x80:
224 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
225 break;
226 case 0x82:
227 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
228 break;
229 case 0x83:
230 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
231 break;
232 case 0x84:
233 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
234 break;
235 case 0x85:
236 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
237 break;
238 case 0x86:
239 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
240 break;
241 case 0x87:
242 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
243
244 default:
245 break;
246 }
247 }
248
249 /* Detect cache parameters using CPUID function 2. */
250
251 static void
detect_caches_cpuid2(bool xeon_mp,struct cache_desc * level1,struct cache_desc * level2)252 detect_caches_cpuid2 (bool xeon_mp,
253 struct cache_desc *level1, struct cache_desc *level2)
254 {
255 unsigned regs[4];
256 int nreps, i;
257
258 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
259
260 nreps = regs[0] & 0x0f;
261 regs[0] &= ~0x0f;
262
263 while (--nreps >= 0)
264 {
265 for (i = 0; i < 4; i++)
266 if (regs[i] && !((regs[i] >> 31) & 1))
267 decode_caches_intel (regs[i], xeon_mp, level1, level2);
268
269 if (nreps)
270 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
271 }
272 }
273
274 /* Detect cache parameters using CPUID function 4. This
275 method doesn't require hardcoded tables. */
276
277 enum cache_type
278 {
279 CACHE_END = 0,
280 CACHE_DATA = 1,
281 CACHE_INST = 2,
282 CACHE_UNIFIED = 3
283 };
284
285 static void
detect_caches_cpuid4(struct cache_desc * level1,struct cache_desc * level2,struct cache_desc * level3)286 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
287 struct cache_desc *level3)
288 {
289 struct cache_desc *cache;
290
291 unsigned eax, ebx, ecx, edx;
292 int count;
293
294 for (count = 0;; count++)
295 {
296 __cpuid_count(4, count, eax, ebx, ecx, edx);
297 switch (eax & 0x1f)
298 {
299 case CACHE_END:
300 return;
301 case CACHE_DATA:
302 case CACHE_UNIFIED:
303 {
304 switch ((eax >> 5) & 0x07)
305 {
306 case 1:
307 cache = level1;
308 break;
309 case 2:
310 cache = level2;
311 break;
312 case 3:
313 cache = level3;
314 break;
315 default:
316 cache = NULL;
317 }
318
319 if (cache)
320 {
321 unsigned sets = ecx + 1;
322 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
323
324 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
325 cache->line = (ebx & 0x0fff) + 1;
326
327 cache->sizekb = (cache->assoc * part
328 * cache->line * sets) / 1024;
329 }
330 }
331 default:
332 break;
333 }
334 }
335 }
336
337 /* Returns the description of caches for an Intel processor. */
338
339 static const char *
detect_caches_intel(bool xeon_mp,unsigned max_level,unsigned max_ext_level,unsigned * l2sizekb)340 detect_caches_intel (bool xeon_mp, unsigned max_level,
341 unsigned max_ext_level, unsigned *l2sizekb)
342 {
343 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
344
345 if (max_level >= 4)
346 detect_caches_cpuid4 (&level1, &level2, &level3);
347 else if (max_level >= 2)
348 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
349 else
350 return "";
351
352 if (level1.sizekb == 0)
353 return "";
354
355 /* Let the L3 replace the L2. This assumes inclusive caches
356 and single threaded program for now. */
357 if (level3.sizekb)
358 level2 = level3;
359
360 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
361 method if other methods fail to provide L2 cache parameters. */
362 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
363 detect_l2_cache (&level2);
364
365 *l2sizekb = level2.sizekb;
366
367 return describe_cache (level1, level2);
368 }
369
370 /* This will be called by the spec parser in gcc.c when it sees
371 a %:local_cpu_detect(args) construct. Currently it will be called
372 with either "arch" or "tune" as argument depending on if -march=native
373 or -mtune=native is to be substituted.
374
375 It returns a string containing new command line parameters to be
376 put at the place of the above two options, depending on what CPU
377 this is executed. E.g. "-march=k8" on an AMD64 machine
378 for -march=native.
379
380 ARGC and ARGV are set depending on the actual arguments given
381 in the spec. */
382
host_detect_local_cpu(int argc,const char ** argv)383 const char *host_detect_local_cpu (int argc, const char **argv)
384 {
385 enum processor_type processor = PROCESSOR_I386;
386 const char *cpu = "i386";
387
388 const char *cache = "";
389 const char *options = "";
390
391 unsigned int eax, ebx, ecx, edx;
392
393 unsigned int max_level, ext_level;
394
395 unsigned int vendor;
396 unsigned int model, family;
397
398 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
399 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
400
401 /* Extended features */
402 unsigned int has_lahf_lm = 0, has_sse4a = 0;
403 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
404 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
405 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
406 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
407 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
408 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
409 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410 unsigned int has_pconfig = 0, has_wbnoinvd = 0;
411 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
412 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
413 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
414 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
415 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
416 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
417 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
418 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
419 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
420 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421 unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422 unsigned int has_avx512bitalg = 0;
423 unsigned int has_avx512vpopcntdq = 0;
424 unsigned int has_shstk = 0;
425 unsigned int has_avx512vnni = 0, has_vaes = 0;
426 unsigned int has_vpclmulqdq = 0;
427 unsigned int has_avx512vp2intersect = 0;
428 unsigned int has_movdiri = 0, has_movdir64b = 0;
429 unsigned int has_enqcmd = 0;
430 unsigned int has_waitpkg = 0;
431 unsigned int has_cldemote = 0;
432 unsigned int has_avx512bf16 = 0;
433
434 unsigned int has_ptwrite = 0;
435
436 bool arch;
437
438 unsigned int l2sizekb = 0;
439
440 if (argc < 1)
441 return NULL;
442
443 arch = !strcmp (argv[0], "arch");
444
445 if (!arch && strcmp (argv[0], "tune"))
446 return NULL;
447
448 max_level = __get_cpuid_max (0, &vendor);
449 if (max_level < 1)
450 goto done;
451
452 __cpuid (1, eax, ebx, ecx, edx);
453
454 model = (eax >> 4) & 0x0f;
455 family = (eax >> 8) & 0x0f;
456 if (vendor == signature_INTEL_ebx
457 || vendor == signature_AMD_ebx)
458 {
459 unsigned int extended_model, extended_family;
460
461 extended_model = (eax >> 12) & 0xf0;
462 extended_family = (eax >> 20) & 0xff;
463 if (family == 0x0f)
464 {
465 family += extended_family;
466 model += extended_model;
467 }
468 else if (family == 0x06)
469 model += extended_model;
470 }
471
472 has_sse3 = ecx & bit_SSE3;
473 has_ssse3 = ecx & bit_SSSE3;
474 has_sse4_1 = ecx & bit_SSE4_1;
475 has_sse4_2 = ecx & bit_SSE4_2;
476 has_avx = ecx & bit_AVX;
477 has_osxsave = ecx & bit_OSXSAVE;
478 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
479 has_movbe = ecx & bit_MOVBE;
480 has_popcnt = ecx & bit_POPCNT;
481 has_aes = ecx & bit_AES;
482 has_pclmul = ecx & bit_PCLMUL;
483 has_fma = ecx & bit_FMA;
484 has_f16c = ecx & bit_F16C;
485 has_rdrnd = ecx & bit_RDRND;
486 has_xsave = ecx & bit_XSAVE;
487
488 has_cmpxchg8b = edx & bit_CMPXCHG8B;
489 has_cmov = edx & bit_CMOV;
490 has_mmx = edx & bit_MMX;
491 has_fxsr = edx & bit_FXSAVE;
492 has_sse = edx & bit_SSE;
493 has_sse2 = edx & bit_SSE2;
494
495 if (max_level >= 7)
496 {
497 __cpuid_count (7, 0, eax, ebx, ecx, edx);
498
499 has_bmi = ebx & bit_BMI;
500 has_sgx = ebx & bit_SGX;
501 has_hle = ebx & bit_HLE;
502 has_rtm = ebx & bit_RTM;
503 has_avx2 = ebx & bit_AVX2;
504 has_bmi2 = ebx & bit_BMI2;
505 has_fsgsbase = ebx & bit_FSGSBASE;
506 has_rdseed = ebx & bit_RDSEED;
507 has_adx = ebx & bit_ADX;
508 has_avx512f = ebx & bit_AVX512F;
509 has_avx512er = ebx & bit_AVX512ER;
510 has_avx512pf = ebx & bit_AVX512PF;
511 has_avx512cd = ebx & bit_AVX512CD;
512 has_sha = ebx & bit_SHA;
513 has_clflushopt = ebx & bit_CLFLUSHOPT;
514 has_clwb = ebx & bit_CLWB;
515 has_avx512dq = ebx & bit_AVX512DQ;
516 has_avx512bw = ebx & bit_AVX512BW;
517 has_avx512vl = ebx & bit_AVX512VL;
518 has_avx512ifma = ebx & bit_AVX512IFMA;
519
520 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
521 has_avx512vbmi = ecx & bit_AVX512VBMI;
522 has_pku = ecx & bit_OSPKE;
523 has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
524 has_avx512vnni = ecx & bit_AVX512VNNI;
525 has_rdpid = ecx & bit_RDPID;
526 has_gfni = ecx & bit_GFNI;
527 has_vaes = ecx & bit_VAES;
528 has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
529 has_avx512bitalg = ecx & bit_AVX512BITALG;
530 has_avx512vpopcntdq = ecx & bit_AVX512VPOPCNTDQ;
531 has_movdiri = ecx & bit_MOVDIRI;
532 has_movdir64b = ecx & bit_MOVDIR64B;
533 has_enqcmd = ecx & bit_ENQCMD;
534 has_cldemote = ecx & bit_CLDEMOTE;
535
536 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
537 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
538 has_avx512vp2intersect = edx & bit_AVX512VP2INTERSECT;
539
540 has_shstk = ecx & bit_SHSTK;
541 has_pconfig = edx & bit_PCONFIG;
542 has_waitpkg = ecx & bit_WAITPKG;
543
544 __cpuid_count (7, 1, eax, ebx, ecx, edx);
545 has_avx512bf16 = eax & bit_AVX512BF16;
546 }
547
548 if (max_level >= 13)
549 {
550 __cpuid_count (13, 1, eax, ebx, ecx, edx);
551
552 has_xsaveopt = eax & bit_XSAVEOPT;
553 has_xsavec = eax & bit_XSAVEC;
554 has_xsaves = eax & bit_XSAVES;
555 }
556
557 if (max_level >= 0x14)
558 {
559 __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
560
561 has_ptwrite = ebx & bit_PTWRITE;
562 }
563
564 /* Check cpuid level of extended features. */
565 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
566
567 if (ext_level >= 0x80000001)
568 {
569 __cpuid (0x80000001, eax, ebx, ecx, edx);
570
571 has_lahf_lm = ecx & bit_LAHF_LM;
572 has_sse4a = ecx & bit_SSE4a;
573 has_abm = ecx & bit_ABM;
574 has_lwp = ecx & bit_LWP;
575 has_fma4 = ecx & bit_FMA4;
576 has_xop = ecx & bit_XOP;
577 has_tbm = ecx & bit_TBM;
578 has_lzcnt = ecx & bit_LZCNT;
579 has_prfchw = ecx & bit_PRFCHW;
580
581 has_longmode = edx & bit_LM;
582 has_3dnowp = edx & bit_3DNOWP;
583 has_3dnow = edx & bit_3DNOW;
584 has_mwaitx = ecx & bit_MWAITX;
585 }
586
587 if (ext_level >= 0x80000008)
588 {
589 __cpuid (0x80000008, eax, ebx, ecx, edx);
590 has_clzero = ebx & bit_CLZERO;
591 has_wbnoinvd = ebx & bit_WBNOINVD;
592 }
593
594 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
595 #define XCR_XFEATURE_ENABLED_MASK 0x0
596 #define XSTATE_FP 0x1
597 #define XSTATE_SSE 0x2
598 #define XSTATE_YMM 0x4
599 #define XSTATE_OPMASK 0x20
600 #define XSTATE_ZMM 0x40
601 #define XSTATE_HI_ZMM 0x80
602
603 #define XCR_AVX_ENABLED_MASK \
604 (XSTATE_SSE | XSTATE_YMM)
605 #define XCR_AVX512F_ENABLED_MASK \
606 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
607
608 if (has_osxsave)
609 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
610 : "=a" (eax), "=d" (edx)
611 : "c" (XCR_XFEATURE_ENABLED_MASK));
612 else
613 eax = 0;
614
615 /* Check if AVX registers are supported. */
616 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
617 {
618 has_avx = 0;
619 has_avx2 = 0;
620 has_fma = 0;
621 has_fma4 = 0;
622 has_f16c = 0;
623 has_xop = 0;
624 has_xsave = 0;
625 has_xsaveopt = 0;
626 has_xsaves = 0;
627 has_xsavec = 0;
628 }
629
630 /* Check if AVX512F registers are supported. */
631 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
632 {
633 has_avx512f = 0;
634 has_avx512er = 0;
635 has_avx512pf = 0;
636 has_avx512cd = 0;
637 has_avx512dq = 0;
638 has_avx512bw = 0;
639 has_avx512vl = 0;
640 }
641
642 if (!arch)
643 {
644 if (vendor == signature_AMD_ebx
645 || vendor == signature_CENTAUR_ebx
646 || vendor == signature_CYRIX_ebx
647 || vendor == signature_NSC_ebx)
648 cache = detect_caches_amd (ext_level);
649 else if (vendor == signature_INTEL_ebx)
650 {
651 bool xeon_mp = (family == 15 && model == 6);
652 cache = detect_caches_intel (xeon_mp, max_level,
653 ext_level, &l2sizekb);
654 }
655 }
656
657 if (vendor == signature_AMD_ebx)
658 {
659 unsigned int name;
660
661 /* Detect geode processor by its processor signature. */
662 if (ext_level >= 0x80000002)
663 __cpuid (0x80000002, name, ebx, ecx, edx);
664 else
665 name = 0;
666
667 if (name == signature_NSC_ebx)
668 processor = PROCESSOR_GEODE;
669 else if (has_movbe && family == 22)
670 processor = PROCESSOR_BTVER2;
671 else if (has_vaes)
672 processor = PROCESSOR_ZNVER3;
673 else if (has_clwb)
674 processor = PROCESSOR_ZNVER2;
675 else if (has_clzero)
676 processor = PROCESSOR_ZNVER1;
677 else if (has_avx2)
678 processor = PROCESSOR_BDVER4;
679 else if (has_xsaveopt)
680 processor = PROCESSOR_BDVER3;
681 else if (has_bmi)
682 processor = PROCESSOR_BDVER2;
683 else if (has_xop)
684 processor = PROCESSOR_BDVER1;
685 else if (has_sse4a && has_ssse3)
686 processor = PROCESSOR_BTVER1;
687 else if (has_sse4a)
688 processor = PROCESSOR_AMDFAM10;
689 else if (has_sse2 || has_longmode)
690 processor = PROCESSOR_K8;
691 else if (has_3dnowp && family == 6)
692 processor = PROCESSOR_ATHLON;
693 else if (has_mmx)
694 processor = PROCESSOR_K6;
695 else
696 processor = PROCESSOR_PENTIUM;
697 }
698 else if (vendor == signature_CENTAUR_ebx)
699 {
700 processor = PROCESSOR_GENERIC;
701
702 switch (family)
703 {
704 default:
705 /* We have no idea. */
706 break;
707
708 case 5:
709 if (has_3dnow || has_mmx)
710 processor = PROCESSOR_I486;
711 break;
712
713 case 6:
714 if (has_longmode)
715 processor = PROCESSOR_K8;
716 else if (model >= 9)
717 processor = PROCESSOR_PENTIUMPRO;
718 else if (model >= 6)
719 processor = PROCESSOR_I486;
720 }
721 }
722 else
723 {
724 switch (family)
725 {
726 case 4:
727 processor = PROCESSOR_I486;
728 break;
729 case 5:
730 processor = PROCESSOR_PENTIUM;
731 break;
732 case 6:
733 processor = PROCESSOR_PENTIUMPRO;
734 break;
735 case 15:
736 processor = PROCESSOR_PENTIUM4;
737 break;
738 default:
739 /* We have no idea. */
740 processor = PROCESSOR_GENERIC;
741 }
742 }
743
744 switch (processor)
745 {
746 case PROCESSOR_I386:
747 /* Default. */
748 break;
749 case PROCESSOR_I486:
750 if (arch && vendor == signature_CENTAUR_ebx)
751 {
752 if (model >= 6)
753 cpu = "c3";
754 else if (has_3dnow)
755 cpu = "winchip2";
756 else
757 /* Assume WinChip C6. */
758 cpu = "winchip-c6";
759 }
760 else
761 cpu = "i486";
762 break;
763 case PROCESSOR_PENTIUM:
764 if (arch && has_mmx)
765 cpu = "pentium-mmx";
766 else
767 cpu = "pentium";
768 break;
769 case PROCESSOR_PENTIUMPRO:
770 switch (model)
771 {
772 case 0x1c:
773 case 0x26:
774 /* Bonnell. */
775 cpu = "bonnell";
776 break;
777 case 0x37:
778 case 0x4a:
779 case 0x4d:
780 case 0x5d:
781 /* Silvermont. */
782 case 0x4c:
783 case 0x5a:
784 case 0x75:
785 /* Airmont. */
786 cpu = "silvermont";
787 break;
788 case 0x5c:
789 case 0x5f:
790 /* Goldmont. */
791 cpu = "goldmont";
792 break;
793 case 0x7a:
794 /* Goldmont Plus. */
795 cpu = "goldmont-plus";
796 break;
797 case 0x86:
798 case 0x96:
799 case 0x9c:
800 /* Tremont. */
801 cpu = "tremont";
802 break;
803 case 0x0f:
804 /* Merom. */
805 case 0x17:
806 case 0x1d:
807 /* Penryn. */
808 cpu = "core2";
809 break;
810 case 0x1a:
811 case 0x1e:
812 case 0x1f:
813 case 0x2e:
814 /* Nehalem. */
815 cpu = "nehalem";
816 break;
817 case 0x25:
818 case 0x2c:
819 case 0x2f:
820 /* Westmere. */
821 cpu = "westmere";
822 break;
823 case 0x2a:
824 case 0x2d:
825 /* Sandy Bridge. */
826 cpu = "sandybridge";
827 break;
828 case 0x3a:
829 case 0x3e:
830 /* Ivy Bridge. */
831 cpu = "ivybridge";
832 break;
833 case 0x3c:
834 case 0x3f:
835 case 0x45:
836 case 0x46:
837 /* Haswell. */
838 cpu = "haswell";
839 break;
840 case 0x3d:
841 case 0x47:
842 case 0x4f:
843 case 0x56:
844 /* Broadwell. */
845 cpu = "broadwell";
846 break;
847 case 0x4e:
848 case 0x5e:
849 /* Skylake. */
850 case 0x8e:
851 case 0x9e:
852 /* Kaby Lake. */
853 case 0xa5:
854 case 0xa6:
855 /* Comet Lake. */
856 cpu = "skylake";
857 break;
858 case 0x55:
859 if (has_avx512vnni)
860 /* Cascade Lake. */
861 cpu = "cascadelake";
862 else
863 /* Skylake with AVX-512. */
864 cpu = "skylake-avx512";
865 break;
866 case 0x6a:
867 case 0x6c:
868 /* Ice Lake server. */
869 cpu = "icelake-server";
870 break;
871 case 0x7e:
872 case 0x7d:
873 case 0x9d:
874 /* Ice Lake client. */
875 cpu = "icelake-client";
876 break;
877 case 0x8c:
878 case 0x8d:
879 /* Tiger Lake. */
880 cpu = "tigerlake";
881 break;
882 case 0x57:
883 /* Knights Landing. */
884 cpu = "knl";
885 break;
886 case 0x66:
887 /* Cannon Lake. */
888 cpu = "cannonlake";
889 break;
890 case 0x85:
891 /* Knights Mill. */
892 cpu = "knm";
893 break;
894 default:
895 if (arch)
896 {
897 /* This is unknown family 0x6 CPU. */
898 if (has_avx)
899 {
900 /* Assume Tiger Lake */
901 if (has_avx512vp2intersect)
902 cpu = "tigerlake";
903 /* Assume Cooper Lake */
904 else if (has_avx512bf16)
905 cpu = "cooperlake";
906 /* Assume Ice Lake Server. */
907 else if (has_wbnoinvd)
908 cpu = "icelake-server";
909 /* Assume Ice Lake. */
910 else if (has_avx512bitalg)
911 cpu = "icelake-client";
912 /* Assume Cannon Lake. */
913 else if (has_avx512vbmi)
914 cpu = "cannonlake";
915 /* Assume Knights Mill. */
916 else if (has_avx5124vnniw)
917 cpu = "knm";
918 /* Assume Knights Landing. */
919 else if (has_avx512er)
920 cpu = "knl";
921 /* Assume Skylake with AVX-512. */
922 else if (has_avx512f)
923 cpu = "skylake-avx512";
924 /* Assume Skylake. */
925 else if (has_clflushopt)
926 cpu = "skylake";
927 /* Assume Broadwell. */
928 else if (has_adx)
929 cpu = "broadwell";
930 else if (has_avx2)
931 /* Assume Haswell. */
932 cpu = "haswell";
933 else
934 /* Assume Sandy Bridge. */
935 cpu = "sandybridge";
936 }
937 else if (has_sse4_2)
938 {
939 if (has_gfni)
940 /* Assume Tremont. */
941 cpu = "tremont";
942 else if (has_sgx)
943 /* Assume Goldmont Plus. */
944 cpu = "goldmont-plus";
945 else if (has_xsave)
946 /* Assume Goldmont. */
947 cpu = "goldmont";
948 else if (has_movbe)
949 /* Assume Silvermont. */
950 cpu = "silvermont";
951 else
952 /* Assume Nehalem. */
953 cpu = "nehalem";
954 }
955 else if (has_ssse3)
956 {
957 if (has_movbe)
958 /* Assume Bonnell. */
959 cpu = "bonnell";
960 else
961 /* Assume Core 2. */
962 cpu = "core2";
963 }
964 else if (has_longmode)
965 /* Perhaps some emulator? Assume x86-64, otherwise gcc
966 -march=native would be unusable for 64-bit compilations,
967 as all the CPUs below are 32-bit only. */
968 cpu = "x86-64";
969 else if (has_sse3)
970 {
971 if (vendor == signature_CENTAUR_ebx)
972 /* C7 / Eden "Esther" */
973 cpu = "c7";
974 else
975 /* It is Core Duo. */
976 cpu = "pentium-m";
977 }
978 else if (has_sse2)
979 /* It is Pentium M. */
980 cpu = "pentium-m";
981 else if (has_sse)
982 {
983 if (vendor == signature_CENTAUR_ebx)
984 {
985 if (model >= 9)
986 /* Eden "Nehemiah" */
987 cpu = "nehemiah";
988 else
989 cpu = "c3-2";
990 }
991 else
992 /* It is Pentium III. */
993 cpu = "pentium3";
994 }
995 else if (has_mmx)
996 /* It is Pentium II. */
997 cpu = "pentium2";
998 else
999 /* Default to Pentium Pro. */
1000 cpu = "pentiumpro";
1001 }
1002 else
1003 /* For -mtune, we default to -mtune=generic. */
1004 cpu = "generic";
1005 break;
1006 }
1007 break;
1008 case PROCESSOR_PENTIUM4:
1009 if (has_sse3)
1010 {
1011 if (has_longmode)
1012 cpu = "nocona";
1013 else
1014 cpu = "prescott";
1015 }
1016 else
1017 cpu = "pentium4";
1018 break;
1019 case PROCESSOR_GEODE:
1020 cpu = "geode";
1021 break;
1022 case PROCESSOR_K6:
1023 if (arch && has_3dnow)
1024 cpu = "k6-3";
1025 else
1026 cpu = "k6";
1027 break;
1028 case PROCESSOR_ATHLON:
1029 if (arch && has_sse)
1030 cpu = "athlon-4";
1031 else
1032 cpu = "athlon";
1033 break;
1034 case PROCESSOR_K8:
1035 if (arch)
1036 {
1037 if (vendor == signature_CENTAUR_ebx)
1038 {
1039 if (has_sse4_1)
1040 /* Nano 3000 | Nano dual / quad core | Eden X4 */
1041 cpu = "nano-3000";
1042 else if (has_ssse3)
1043 /* Nano 1000 | Nano 2000 */
1044 cpu = "nano";
1045 else if (has_sse3)
1046 /* Eden X2 */
1047 cpu = "eden-x2";
1048 else
1049 /* Default to k8 */
1050 cpu = "k8";
1051 }
1052 else if (has_sse3)
1053 cpu = "k8-sse3";
1054 else
1055 cpu = "k8";
1056 }
1057 else
1058 /* For -mtune, we default to -mtune=k8 */
1059 cpu = "k8";
1060 break;
1061 case PROCESSOR_AMDFAM10:
1062 cpu = "amdfam10";
1063 break;
1064 case PROCESSOR_BDVER1:
1065 cpu = "bdver1";
1066 break;
1067 case PROCESSOR_BDVER2:
1068 cpu = "bdver2";
1069 break;
1070 case PROCESSOR_BDVER3:
1071 cpu = "bdver3";
1072 break;
1073 case PROCESSOR_BDVER4:
1074 cpu = "bdver4";
1075 break;
1076 case PROCESSOR_ZNVER1:
1077 cpu = "znver1";
1078 break;
1079 case PROCESSOR_ZNVER2:
1080 cpu = "znver2";
1081 break;
1082 case PROCESSOR_ZNVER3:
1083 cpu = "znver3";
1084 break;
1085 case PROCESSOR_BTVER1:
1086 cpu = "btver1";
1087 break;
1088 case PROCESSOR_BTVER2:
1089 cpu = "btver2";
1090 break;
1091
1092 default:
1093 /* Use something reasonable. */
1094 if (arch)
1095 {
1096 if (has_ssse3)
1097 cpu = "core2";
1098 else if (has_sse3)
1099 {
1100 if (has_longmode)
1101 cpu = "nocona";
1102 else
1103 cpu = "prescott";
1104 }
1105 else if (has_longmode)
1106 /* Perhaps some emulator? Assume x86-64, otherwise gcc
1107 -march=native would be unusable for 64-bit compilations,
1108 as all the CPUs below are 32-bit only. */
1109 cpu = "x86-64";
1110 else if (has_sse2)
1111 cpu = "pentium4";
1112 else if (has_cmov)
1113 cpu = "pentiumpro";
1114 else if (has_mmx)
1115 cpu = "pentium-mmx";
1116 else if (has_cmpxchg8b)
1117 cpu = "pentium";
1118 }
1119 else
1120 cpu = "generic";
1121 }
1122
1123 if (arch)
1124 {
1125 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1126 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1127 const char *sse = has_sse ? " -msse" : " -mno-sse";
1128 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1129 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1130 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1131 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1132 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1133 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1134 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1135 const char *aes = has_aes ? " -maes" : " -mno-aes";
1136 const char *sha = has_sha ? " -msha" : " -mno-sha";
1137 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1138 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1139 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1140 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1141 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1142 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1143 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1144 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1145 const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1146 const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
1147 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1148 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1149 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1150 const char *avx = has_avx ? " -mavx" : " -mno-avx";
1151 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1152 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1153 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1154 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1155 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1156 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1157 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1158 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1159 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1160 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1161 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1162 const char *adx = has_adx ? " -madx" : " -mno-adx";
1163 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1164 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1165 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1166 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1167 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1168 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1169 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1170 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1171 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1172 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1173 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1174 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1175 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1176 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1177 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1178 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1179 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1180 const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1181 const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1182 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1183 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1184 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1185 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1186 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1187 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1188 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1189 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1190 const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1191 const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1192 const char *avx512vp2intersect = has_avx512vp2intersect ? " -mavx512vp2intersect" : " -mno-avx512vp2intersect";
1193 const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1194 const char *avx512vpopcntdq = has_avx512vpopcntdq ? " -mavx512vpopcntdq" : " -mno-avx512vpopcntdq";
1195 const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1196 const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1197 const char *enqcmd = has_enqcmd ? " -menqcmd" : " -mno-enqcmd";
1198 const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
1199 const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
1200 const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
1201 const char *avx512bf16 = has_avx512bf16 ? " -mavx512bf16" : " -mno-avx512bf16";
1202
1203 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1204 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1205 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1206 pconfig, wbnoinvd,
1207 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1208 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1209 fxsr, xsave, xsaveopt, avx512f, avx512er,
1210 avx512cd, avx512pf, prefetchwt1, clflushopt,
1211 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1212 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1213 clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
1214 avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1215 avx512bitalg, avx512vpopcntdq, movdiri, movdir64b,
1216 waitpkg, cldemote, ptwrite, avx512bf16, enqcmd,
1217 avx512vp2intersect, NULL);
1218 }
1219
1220 done:
1221 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1222 }
1223 #else
1224
1225 /* If we are compiling with GCC where %EBX register is fixed, then the
1226 driver will just ignore -march and -mtune "native" target and will leave
1227 to the newly built compiler to generate code for its default target. */
1228
host_detect_local_cpu(int,const char **)1229 const char *host_detect_local_cpu (int, const char **)
1230 {
1231 return NULL;
1232 }
1233 #endif /* __GNUC__ */
1234