1 //===--- X86.cpp - Implement X86 target feature support -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements X86 TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "X86.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/TargetBuiltins.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Support/X86TargetParser.h"
21 #include <optional>
22
23 namespace clang {
24 namespace targets {
25
26 static constexpr Builtin::Info BuiltinInfoX86[] = {
27 #define BUILTIN(ID, TYPE, ATTRS) \
28 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
29 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
30 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
31 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \
32 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
33 #include "clang/Basic/BuiltinsX86.def"
34
35 #define BUILTIN(ID, TYPE, ATTRS) \
36 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
37 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
38 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
39 #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \
40 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
41 #include "clang/Basic/BuiltinsX86_64.def"
42 };
43
44 static const char *const GCCRegNames[] = {
45 "ax", "dx", "cx", "bx", "si", "di", "bp", "sp",
46 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
47 "argp", "flags", "fpcr", "fpsr", "dirflag", "frame", "xmm0", "xmm1",
48 "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "mm0", "mm1",
49 "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "r8", "r9",
50 "r10", "r11", "r12", "r13", "r14", "r15", "xmm8", "xmm9",
51 "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "ymm0", "ymm1",
52 "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9",
53 "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", "xmm16", "xmm17",
54 "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25",
55 "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", "ymm16", "ymm17",
56 "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25",
57 "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31", "zmm0", "zmm1",
58 "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9",
59 "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17",
60 "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25",
61 "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "k0", "k1",
62 "k2", "k3", "k4", "k5", "k6", "k7",
63 "cr0", "cr2", "cr3", "cr4", "cr8",
64 "dr0", "dr1", "dr2", "dr3", "dr6", "dr7",
65 "bnd0", "bnd1", "bnd2", "bnd3",
66 "tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7",
67 };
68
69 const TargetInfo::AddlRegName AddlRegNames[] = {
70 {{"al", "ah", "eax", "rax"}, 0},
71 {{"bl", "bh", "ebx", "rbx"}, 3},
72 {{"cl", "ch", "ecx", "rcx"}, 2},
73 {{"dl", "dh", "edx", "rdx"}, 1},
74 {{"esi", "rsi"}, 4},
75 {{"edi", "rdi"}, 5},
76 {{"esp", "rsp"}, 7},
77 {{"ebp", "rbp"}, 6},
78 {{"r8d", "r8w", "r8b"}, 38},
79 {{"r9d", "r9w", "r9b"}, 39},
80 {{"r10d", "r10w", "r10b"}, 40},
81 {{"r11d", "r11w", "r11b"}, 41},
82 {{"r12d", "r12w", "r12b"}, 42},
83 {{"r13d", "r13w", "r13b"}, 43},
84 {{"r14d", "r14w", "r14b"}, 44},
85 {{"r15d", "r15w", "r15b"}, 45},
86 };
87
88 } // namespace targets
89 } // namespace clang
90
91 using namespace clang;
92 using namespace clang::targets;
93
setFPMath(StringRef Name)94 bool X86TargetInfo::setFPMath(StringRef Name) {
95 if (Name == "387") {
96 FPMath = FP_387;
97 return true;
98 }
99 if (Name == "sse") {
100 FPMath = FP_SSE;
101 return true;
102 }
103 return false;
104 }
105
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeaturesVec) const106 bool X86TargetInfo::initFeatureMap(
107 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
108 const std::vector<std::string> &FeaturesVec) const {
109 // FIXME: This *really* should not be here.
110 // X86_64 always has SSE2.
111 if (getTriple().getArch() == llvm::Triple::x86_64)
112 setFeatureEnabled(Features, "sse2", true);
113
114 using namespace llvm::X86;
115
116 SmallVector<StringRef, 16> CPUFeatures;
117 getFeaturesForCPU(CPU, CPUFeatures);
118 for (auto &F : CPUFeatures)
119 setFeatureEnabled(Features, F, true);
120
121 std::vector<std::string> UpdatedFeaturesVec;
122 for (const auto &Feature : FeaturesVec) {
123 // Expand general-regs-only to -x86, -mmx and -sse
124 if (Feature == "+general-regs-only") {
125 UpdatedFeaturesVec.push_back("-x87");
126 UpdatedFeaturesVec.push_back("-mmx");
127 UpdatedFeaturesVec.push_back("-sse");
128 continue;
129 }
130
131 UpdatedFeaturesVec.push_back(Feature);
132 }
133
134 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec))
135 return false;
136
137 // Can't do this earlier because we need to be able to explicitly enable
138 // or disable these features and the things that they depend upon.
139
140 // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled.
141 auto I = Features.find("sse4.2");
142 if (I != Features.end() && I->getValue() &&
143 !llvm::is_contained(UpdatedFeaturesVec, "-popcnt"))
144 Features["popcnt"] = true;
145
146 // Additionally, if SSE is enabled and mmx is not explicitly disabled,
147 // then enable MMX.
148 I = Features.find("sse");
149 if (I != Features.end() && I->getValue() &&
150 !llvm::is_contained(UpdatedFeaturesVec, "-mmx"))
151 Features["mmx"] = true;
152
153 // Enable xsave if avx is enabled and xsave is not explicitly disabled.
154 I = Features.find("avx");
155 if (I != Features.end() && I->getValue() &&
156 !llvm::is_contained(UpdatedFeaturesVec, "-xsave"))
157 Features["xsave"] = true;
158
159 // Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled.
160 I = Features.find("sse4.2");
161 if (I != Features.end() && I->getValue() &&
162 !llvm::is_contained(UpdatedFeaturesVec, "-crc32"))
163 Features["crc32"] = true;
164
165 return true;
166 }
167
setFeatureEnabled(llvm::StringMap<bool> & Features,StringRef Name,bool Enabled) const168 void X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
169 StringRef Name, bool Enabled) const {
170 if (Name == "sse4") {
171 // We can get here via the __target__ attribute since that's not controlled
172 // via the -msse4/-mno-sse4 command line alias. Handle this the same way
173 // here - turn on the sse4.2 if enabled, turn off the sse4.1 level if
174 // disabled.
175 if (Enabled)
176 Name = "sse4.2";
177 else
178 Name = "sse4.1";
179 }
180
181 Features[Name] = Enabled;
182 llvm::X86::updateImpliedFeatures(Name, Enabled, Features);
183 }
184
185 /// handleTargetFeatures - Perform initialization based on the user
186 /// configured set of features.
handleTargetFeatures(std::vector<std::string> & Features,DiagnosticsEngine & Diags)187 bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
188 DiagnosticsEngine &Diags) {
189 for (const auto &Feature : Features) {
190 if (Feature[0] != '+')
191 continue;
192
193 if (Feature == "+aes") {
194 HasAES = true;
195 } else if (Feature == "+vaes") {
196 HasVAES = true;
197 } else if (Feature == "+pclmul") {
198 HasPCLMUL = true;
199 } else if (Feature == "+vpclmulqdq") {
200 HasVPCLMULQDQ = true;
201 } else if (Feature == "+lzcnt") {
202 HasLZCNT = true;
203 } else if (Feature == "+rdrnd") {
204 HasRDRND = true;
205 } else if (Feature == "+fsgsbase") {
206 HasFSGSBASE = true;
207 } else if (Feature == "+bmi") {
208 HasBMI = true;
209 } else if (Feature == "+bmi2") {
210 HasBMI2 = true;
211 } else if (Feature == "+popcnt") {
212 HasPOPCNT = true;
213 } else if (Feature == "+rtm") {
214 HasRTM = true;
215 } else if (Feature == "+prfchw") {
216 HasPRFCHW = true;
217 } else if (Feature == "+rdseed") {
218 HasRDSEED = true;
219 } else if (Feature == "+adx") {
220 HasADX = true;
221 } else if (Feature == "+tbm") {
222 HasTBM = true;
223 } else if (Feature == "+lwp") {
224 HasLWP = true;
225 } else if (Feature == "+fma") {
226 HasFMA = true;
227 } else if (Feature == "+f16c") {
228 HasF16C = true;
229 } else if (Feature == "+gfni") {
230 HasGFNI = true;
231 } else if (Feature == "+avx512cd") {
232 HasAVX512CD = true;
233 } else if (Feature == "+avx512vpopcntdq") {
234 HasAVX512VPOPCNTDQ = true;
235 } else if (Feature == "+avx512vnni") {
236 HasAVX512VNNI = true;
237 } else if (Feature == "+avx512bf16") {
238 HasAVX512BF16 = true;
239 } else if (Feature == "+avx512er") {
240 HasAVX512ER = true;
241 } else if (Feature == "+avx512fp16") {
242 HasAVX512FP16 = true;
243 HasLegalHalfType = true;
244 } else if (Feature == "+avx512pf") {
245 HasAVX512PF = true;
246 } else if (Feature == "+avx512dq") {
247 HasAVX512DQ = true;
248 } else if (Feature == "+avx512bitalg") {
249 HasAVX512BITALG = true;
250 } else if (Feature == "+avx512bw") {
251 HasAVX512BW = true;
252 } else if (Feature == "+avx512vl") {
253 HasAVX512VL = true;
254 } else if (Feature == "+avx512vbmi") {
255 HasAVX512VBMI = true;
256 } else if (Feature == "+avx512vbmi2") {
257 HasAVX512VBMI2 = true;
258 } else if (Feature == "+avx512ifma") {
259 HasAVX512IFMA = true;
260 } else if (Feature == "+avx512vp2intersect") {
261 HasAVX512VP2INTERSECT = true;
262 } else if (Feature == "+sha") {
263 HasSHA = true;
264 } else if (Feature == "+shstk") {
265 HasSHSTK = true;
266 } else if (Feature == "+movbe") {
267 HasMOVBE = true;
268 } else if (Feature == "+sgx") {
269 HasSGX = true;
270 } else if (Feature == "+cx8") {
271 HasCX8 = true;
272 } else if (Feature == "+cx16") {
273 HasCX16 = true;
274 } else if (Feature == "+fxsr") {
275 HasFXSR = true;
276 } else if (Feature == "+xsave") {
277 HasXSAVE = true;
278 } else if (Feature == "+xsaveopt") {
279 HasXSAVEOPT = true;
280 } else if (Feature == "+xsavec") {
281 HasXSAVEC = true;
282 } else if (Feature == "+xsaves") {
283 HasXSAVES = true;
284 } else if (Feature == "+mwaitx") {
285 HasMWAITX = true;
286 } else if (Feature == "+pku") {
287 HasPKU = true;
288 } else if (Feature == "+clflushopt") {
289 HasCLFLUSHOPT = true;
290 } else if (Feature == "+clwb") {
291 HasCLWB = true;
292 } else if (Feature == "+wbnoinvd") {
293 HasWBNOINVD = true;
294 } else if (Feature == "+prefetchi") {
295 HasPREFETCHI = true;
296 } else if (Feature == "+prefetchwt1") {
297 HasPREFETCHWT1 = true;
298 } else if (Feature == "+clzero") {
299 HasCLZERO = true;
300 } else if (Feature == "+cldemote") {
301 HasCLDEMOTE = true;
302 } else if (Feature == "+rdpid") {
303 HasRDPID = true;
304 } else if (Feature == "+rdpru") {
305 HasRDPRU = true;
306 } else if (Feature == "+kl") {
307 HasKL = true;
308 } else if (Feature == "+widekl") {
309 HasWIDEKL = true;
310 } else if (Feature == "+retpoline-external-thunk") {
311 HasRetpolineExternalThunk = true;
312 } else if (Feature == "+sahf") {
313 HasLAHFSAHF = true;
314 } else if (Feature == "+waitpkg") {
315 HasWAITPKG = true;
316 } else if (Feature == "+movdiri") {
317 HasMOVDIRI = true;
318 } else if (Feature == "+movdir64b") {
319 HasMOVDIR64B = true;
320 } else if (Feature == "+pconfig") {
321 HasPCONFIG = true;
322 } else if (Feature == "+ptwrite") {
323 HasPTWRITE = true;
324 } else if (Feature == "+invpcid") {
325 HasINVPCID = true;
326 } else if (Feature == "+save-args") {
327 HasSaveArgs = true;
328 } else if (Feature == "+enqcmd") {
329 HasENQCMD = true;
330 } else if (Feature == "+hreset") {
331 HasHRESET = true;
332 } else if (Feature == "+amx-bf16") {
333 HasAMXBF16 = true;
334 } else if (Feature == "+amx-fp16") {
335 HasAMXFP16 = true;
336 } else if (Feature == "+amx-int8") {
337 HasAMXINT8 = true;
338 } else if (Feature == "+amx-tile") {
339 HasAMXTILE = true;
340 } else if (Feature == "+cmpccxadd") {
341 HasCMPCCXADD = true;
342 } else if (Feature == "+raoint") {
343 HasRAOINT = true;
344 } else if (Feature == "+avxifma") {
345 HasAVXIFMA = true;
346 } else if (Feature == "+avxneconvert") {
347 HasAVXNECONVERT= true;
348 } else if (Feature == "+avxvnni") {
349 HasAVXVNNI = true;
350 } else if (Feature == "+avxvnniint8") {
351 HasAVXVNNIINT8 = true;
352 } else if (Feature == "+serialize") {
353 HasSERIALIZE = true;
354 } else if (Feature == "+tsxldtrk") {
355 HasTSXLDTRK = true;
356 } else if (Feature == "+uintr") {
357 HasUINTR = true;
358 } else if (Feature == "+crc32") {
359 HasCRC32 = true;
360 } else if (Feature == "+x87") {
361 HasX87 = true;
362 }
363
364 X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
365 .Case("+avx512f", AVX512F)
366 .Case("+avx2", AVX2)
367 .Case("+avx", AVX)
368 .Case("+sse4.2", SSE42)
369 .Case("+sse4.1", SSE41)
370 .Case("+ssse3", SSSE3)
371 .Case("+sse3", SSE3)
372 .Case("+sse2", SSE2)
373 .Case("+sse", SSE1)
374 .Default(NoSSE);
375 SSELevel = std::max(SSELevel, Level);
376
377 HasFloat16 = SSELevel >= SSE2;
378
379 HasBFloat16 = SSELevel >= SSE2;
380
381 MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature)
382 .Case("+3dnowa", AMD3DNowAthlon)
383 .Case("+3dnow", AMD3DNow)
384 .Case("+mmx", MMX)
385 .Default(NoMMX3DNow);
386 MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel);
387
388 XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature)
389 .Case("+xop", XOP)
390 .Case("+fma4", FMA4)
391 .Case("+sse4a", SSE4A)
392 .Default(NoXOP);
393 XOPLevel = std::max(XOPLevel, XLevel);
394 }
395
396 // LLVM doesn't have a separate switch for fpmath, so only accept it if it
397 // matches the selected sse level.
398 if ((FPMath == FP_SSE && SSELevel < SSE1) ||
399 (FPMath == FP_387 && SSELevel >= SSE1)) {
400 Diags.Report(diag::err_target_unsupported_fpmath)
401 << (FPMath == FP_SSE ? "sse" : "387");
402 return false;
403 }
404
405 SimdDefaultAlign =
406 hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
407
408 // FIXME: We should allow long double type on 32-bits to match with GCC.
409 // This requires backend to be able to lower f80 without x87 first.
410 if (!HasX87 && LongDoubleFormat == &llvm::APFloat::x87DoubleExtended())
411 HasLongDouble = false;
412
413 return true;
414 }
415
416 /// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro
417 /// definitions for this particular subtarget.
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const418 void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
419 MacroBuilder &Builder) const {
420 // Inline assembly supports X86 flag outputs.
421 Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
422
423 std::string CodeModel = getTargetOpts().CodeModel;
424 if (CodeModel == "default")
425 CodeModel = "small";
426 Builder.defineMacro("__code_model_" + CodeModel + "__");
427
428 // Target identification.
429 if (getTriple().getArch() == llvm::Triple::x86_64) {
430 Builder.defineMacro("__amd64__");
431 Builder.defineMacro("__amd64");
432 Builder.defineMacro("__x86_64");
433 Builder.defineMacro("__x86_64__");
434 if (getTriple().getArchName() == "x86_64h") {
435 Builder.defineMacro("__x86_64h");
436 Builder.defineMacro("__x86_64h__");
437 }
438 } else {
439 DefineStd(Builder, "i386", Opts);
440 }
441
442 Builder.defineMacro("__SEG_GS");
443 Builder.defineMacro("__SEG_FS");
444 Builder.defineMacro("__seg_gs", "__attribute__((address_space(256)))");
445 Builder.defineMacro("__seg_fs", "__attribute__((address_space(257)))");
446
447 // Subtarget options.
448 // FIXME: We are hard-coding the tune parameters based on the CPU, but they
449 // truly should be based on -mtune options.
450 using namespace llvm::X86;
451 switch (CPU) {
452 case CK_None:
453 break;
454 case CK_i386:
455 // The rest are coming from the i386 define above.
456 Builder.defineMacro("__tune_i386__");
457 break;
458 case CK_i486:
459 case CK_WinChipC6:
460 case CK_WinChip2:
461 case CK_C3:
462 defineCPUMacros(Builder, "i486");
463 break;
464 case CK_PentiumMMX:
465 Builder.defineMacro("__pentium_mmx__");
466 Builder.defineMacro("__tune_pentium_mmx__");
467 [[fallthrough]];
468 case CK_i586:
469 case CK_Pentium:
470 defineCPUMacros(Builder, "i586");
471 defineCPUMacros(Builder, "pentium");
472 break;
473 case CK_Pentium3:
474 case CK_PentiumM:
475 Builder.defineMacro("__tune_pentium3__");
476 [[fallthrough]];
477 case CK_Pentium2:
478 case CK_C3_2:
479 Builder.defineMacro("__tune_pentium2__");
480 [[fallthrough]];
481 case CK_PentiumPro:
482 case CK_i686:
483 defineCPUMacros(Builder, "i686");
484 defineCPUMacros(Builder, "pentiumpro");
485 break;
486 case CK_Pentium4:
487 defineCPUMacros(Builder, "pentium4");
488 break;
489 case CK_Yonah:
490 case CK_Prescott:
491 case CK_Nocona:
492 defineCPUMacros(Builder, "nocona");
493 break;
494 case CK_Core2:
495 case CK_Penryn:
496 defineCPUMacros(Builder, "core2");
497 break;
498 case CK_Bonnell:
499 defineCPUMacros(Builder, "atom");
500 break;
501 case CK_Silvermont:
502 defineCPUMacros(Builder, "slm");
503 break;
504 case CK_Goldmont:
505 defineCPUMacros(Builder, "goldmont");
506 break;
507 case CK_GoldmontPlus:
508 defineCPUMacros(Builder, "goldmont_plus");
509 break;
510 case CK_Tremont:
511 defineCPUMacros(Builder, "tremont");
512 break;
513 case CK_Nehalem:
514 case CK_Westmere:
515 case CK_SandyBridge:
516 case CK_IvyBridge:
517 case CK_Haswell:
518 case CK_Broadwell:
519 case CK_SkylakeClient:
520 case CK_SkylakeServer:
521 case CK_Cascadelake:
522 case CK_Cooperlake:
523 case CK_Cannonlake:
524 case CK_IcelakeClient:
525 case CK_Rocketlake:
526 case CK_IcelakeServer:
527 case CK_Tigerlake:
528 case CK_SapphireRapids:
529 case CK_Alderlake:
530 case CK_Raptorlake:
531 case CK_Meteorlake:
532 case CK_Sierraforest:
533 case CK_Grandridge:
534 case CK_Graniterapids:
535 case CK_Emeraldrapids:
536 // FIXME: Historically, we defined this legacy name, it would be nice to
537 // remove it at some point. We've never exposed fine-grained names for
538 // recent primary x86 CPUs, and we should keep it that way.
539 defineCPUMacros(Builder, "corei7");
540 break;
541 case CK_KNL:
542 defineCPUMacros(Builder, "knl");
543 break;
544 case CK_KNM:
545 break;
546 case CK_Lakemont:
547 defineCPUMacros(Builder, "i586", /*Tuning*/false);
548 defineCPUMacros(Builder, "pentium", /*Tuning*/false);
549 Builder.defineMacro("__tune_lakemont__");
550 break;
551 case CK_K6_2:
552 Builder.defineMacro("__k6_2__");
553 Builder.defineMacro("__tune_k6_2__");
554 [[fallthrough]];
555 case CK_K6_3:
556 if (CPU != CK_K6_2) { // In case of fallthrough
557 // FIXME: GCC may be enabling these in cases where some other k6
558 // architecture is specified but -m3dnow is explicitly provided. The
559 // exact semantics need to be determined and emulated here.
560 Builder.defineMacro("__k6_3__");
561 Builder.defineMacro("__tune_k6_3__");
562 }
563 [[fallthrough]];
564 case CK_K6:
565 defineCPUMacros(Builder, "k6");
566 break;
567 case CK_Athlon:
568 case CK_AthlonXP:
569 defineCPUMacros(Builder, "athlon");
570 if (SSELevel != NoSSE) {
571 Builder.defineMacro("__athlon_sse__");
572 Builder.defineMacro("__tune_athlon_sse__");
573 }
574 break;
575 case CK_K8:
576 case CK_K8SSE3:
577 case CK_x86_64:
578 defineCPUMacros(Builder, "k8");
579 break;
580 case CK_x86_64_v2:
581 case CK_x86_64_v3:
582 case CK_x86_64_v4:
583 break;
584 case CK_AMDFAM10:
585 defineCPUMacros(Builder, "amdfam10");
586 break;
587 case CK_BTVER1:
588 defineCPUMacros(Builder, "btver1");
589 break;
590 case CK_BTVER2:
591 defineCPUMacros(Builder, "btver2");
592 break;
593 case CK_BDVER1:
594 defineCPUMacros(Builder, "bdver1");
595 break;
596 case CK_BDVER2:
597 defineCPUMacros(Builder, "bdver2");
598 break;
599 case CK_BDVER3:
600 defineCPUMacros(Builder, "bdver3");
601 break;
602 case CK_BDVER4:
603 defineCPUMacros(Builder, "bdver4");
604 break;
605 case CK_ZNVER1:
606 defineCPUMacros(Builder, "znver1");
607 break;
608 case CK_ZNVER2:
609 defineCPUMacros(Builder, "znver2");
610 break;
611 case CK_ZNVER3:
612 defineCPUMacros(Builder, "znver3");
613 break;
614 case CK_ZNVER4:
615 defineCPUMacros(Builder, "znver4");
616 break;
617 case CK_Geode:
618 defineCPUMacros(Builder, "geode");
619 break;
620 }
621
622 // Target properties.
623 Builder.defineMacro("__REGISTER_PREFIX__", "");
624
625 // Define __NO_MATH_INLINES on linux/x86 so that we don't get inline
626 // functions in glibc header files that use FP Stack inline asm which the
627 // backend can't deal with (PR879).
628 Builder.defineMacro("__NO_MATH_INLINES");
629
630 if (HasAES)
631 Builder.defineMacro("__AES__");
632
633 if (HasVAES)
634 Builder.defineMacro("__VAES__");
635
636 if (HasPCLMUL)
637 Builder.defineMacro("__PCLMUL__");
638
639 if (HasVPCLMULQDQ)
640 Builder.defineMacro("__VPCLMULQDQ__");
641
642 // Note, in 32-bit mode, GCC does not define the macro if -mno-sahf. In LLVM,
643 // the feature flag only applies to 64-bit mode.
644 if (HasLAHFSAHF || getTriple().getArch() == llvm::Triple::x86)
645 Builder.defineMacro("__LAHF_SAHF__");
646
647 if (HasLZCNT)
648 Builder.defineMacro("__LZCNT__");
649
650 if (HasRDRND)
651 Builder.defineMacro("__RDRND__");
652
653 if (HasFSGSBASE)
654 Builder.defineMacro("__FSGSBASE__");
655
656 if (HasBMI)
657 Builder.defineMacro("__BMI__");
658
659 if (HasBMI2)
660 Builder.defineMacro("__BMI2__");
661
662 if (HasPOPCNT)
663 Builder.defineMacro("__POPCNT__");
664
665 if (HasRTM)
666 Builder.defineMacro("__RTM__");
667
668 if (HasPRFCHW)
669 Builder.defineMacro("__PRFCHW__");
670
671 if (HasRDSEED)
672 Builder.defineMacro("__RDSEED__");
673
674 if (HasADX)
675 Builder.defineMacro("__ADX__");
676
677 if (HasTBM)
678 Builder.defineMacro("__TBM__");
679
680 if (HasLWP)
681 Builder.defineMacro("__LWP__");
682
683 if (HasMWAITX)
684 Builder.defineMacro("__MWAITX__");
685
686 if (HasMOVBE)
687 Builder.defineMacro("__MOVBE__");
688
689 switch (XOPLevel) {
690 case XOP:
691 Builder.defineMacro("__XOP__");
692 [[fallthrough]];
693 case FMA4:
694 Builder.defineMacro("__FMA4__");
695 [[fallthrough]];
696 case SSE4A:
697 Builder.defineMacro("__SSE4A__");
698 [[fallthrough]];
699 case NoXOP:
700 break;
701 }
702
703 if (HasFMA)
704 Builder.defineMacro("__FMA__");
705
706 if (HasF16C)
707 Builder.defineMacro("__F16C__");
708
709 if (HasGFNI)
710 Builder.defineMacro("__GFNI__");
711
712 if (HasAVX512CD)
713 Builder.defineMacro("__AVX512CD__");
714 if (HasAVX512VPOPCNTDQ)
715 Builder.defineMacro("__AVX512VPOPCNTDQ__");
716 if (HasAVX512VNNI)
717 Builder.defineMacro("__AVX512VNNI__");
718 if (HasAVX512BF16)
719 Builder.defineMacro("__AVX512BF16__");
720 if (HasAVX512ER)
721 Builder.defineMacro("__AVX512ER__");
722 if (HasAVX512FP16)
723 Builder.defineMacro("__AVX512FP16__");
724 if (HasAVX512PF)
725 Builder.defineMacro("__AVX512PF__");
726 if (HasAVX512DQ)
727 Builder.defineMacro("__AVX512DQ__");
728 if (HasAVX512BITALG)
729 Builder.defineMacro("__AVX512BITALG__");
730 if (HasAVX512BW)
731 Builder.defineMacro("__AVX512BW__");
732 if (HasAVX512VL)
733 Builder.defineMacro("__AVX512VL__");
734 if (HasAVX512VBMI)
735 Builder.defineMacro("__AVX512VBMI__");
736 if (HasAVX512VBMI2)
737 Builder.defineMacro("__AVX512VBMI2__");
738 if (HasAVX512IFMA)
739 Builder.defineMacro("__AVX512IFMA__");
740 if (HasAVX512VP2INTERSECT)
741 Builder.defineMacro("__AVX512VP2INTERSECT__");
742 if (HasSHA)
743 Builder.defineMacro("__SHA__");
744
745 if (HasFXSR)
746 Builder.defineMacro("__FXSR__");
747 if (HasXSAVE)
748 Builder.defineMacro("__XSAVE__");
749 if (HasXSAVEOPT)
750 Builder.defineMacro("__XSAVEOPT__");
751 if (HasXSAVEC)
752 Builder.defineMacro("__XSAVEC__");
753 if (HasXSAVES)
754 Builder.defineMacro("__XSAVES__");
755 if (HasPKU)
756 Builder.defineMacro("__PKU__");
757 if (HasCLFLUSHOPT)
758 Builder.defineMacro("__CLFLUSHOPT__");
759 if (HasCLWB)
760 Builder.defineMacro("__CLWB__");
761 if (HasWBNOINVD)
762 Builder.defineMacro("__WBNOINVD__");
763 if (HasSHSTK)
764 Builder.defineMacro("__SHSTK__");
765 if (HasSGX)
766 Builder.defineMacro("__SGX__");
767 if (HasPREFETCHI)
768 Builder.defineMacro("__PREFETCHI__");
769 if (HasPREFETCHWT1)
770 Builder.defineMacro("__PREFETCHWT1__");
771 if (HasCLZERO)
772 Builder.defineMacro("__CLZERO__");
773 if (HasKL)
774 Builder.defineMacro("__KL__");
775 if (HasWIDEKL)
776 Builder.defineMacro("__WIDEKL__");
777 if (HasRDPID)
778 Builder.defineMacro("__RDPID__");
779 if (HasRDPRU)
780 Builder.defineMacro("__RDPRU__");
781 if (HasCLDEMOTE)
782 Builder.defineMacro("__CLDEMOTE__");
783 if (HasWAITPKG)
784 Builder.defineMacro("__WAITPKG__");
785 if (HasMOVDIRI)
786 Builder.defineMacro("__MOVDIRI__");
787 if (HasMOVDIR64B)
788 Builder.defineMacro("__MOVDIR64B__");
789 if (HasPCONFIG)
790 Builder.defineMacro("__PCONFIG__");
791 if (HasPTWRITE)
792 Builder.defineMacro("__PTWRITE__");
793 if (HasINVPCID)
794 Builder.defineMacro("__INVPCID__");
795 if (HasENQCMD)
796 Builder.defineMacro("__ENQCMD__");
797 if (HasHRESET)
798 Builder.defineMacro("__HRESET__");
799 if (HasAMXTILE)
800 Builder.defineMacro("__AMX_TILE__");
801 if (HasAMXINT8)
802 Builder.defineMacro("__AMX_INT8__");
803 if (HasAMXBF16)
804 Builder.defineMacro("__AMX_BF16__");
805 if (HasAMXFP16)
806 Builder.defineMacro("__AMX_FP16__");
807 if (HasCMPCCXADD)
808 Builder.defineMacro("__CMPCCXADD__");
809 if (HasRAOINT)
810 Builder.defineMacro("__RAOINT__");
811 if (HasAVXIFMA)
812 Builder.defineMacro("__AVXIFMA__");
813 if (HasAVXNECONVERT)
814 Builder.defineMacro("__AVXNECONVERT__");
815 if (HasAVXVNNI)
816 Builder.defineMacro("__AVXVNNI__");
817 if (HasAVXVNNIINT8)
818 Builder.defineMacro("__AVXVNNIINT8__");
819 if (HasSERIALIZE)
820 Builder.defineMacro("__SERIALIZE__");
821 if (HasTSXLDTRK)
822 Builder.defineMacro("__TSXLDTRK__");
823 if (HasUINTR)
824 Builder.defineMacro("__UINTR__");
825 if (HasCRC32)
826 Builder.defineMacro("__CRC32__");
827
828 // Each case falls through to the previous one here.
829 switch (SSELevel) {
830 case AVX512F:
831 Builder.defineMacro("__AVX512F__");
832 [[fallthrough]];
833 case AVX2:
834 Builder.defineMacro("__AVX2__");
835 [[fallthrough]];
836 case AVX:
837 Builder.defineMacro("__AVX__");
838 [[fallthrough]];
839 case SSE42:
840 Builder.defineMacro("__SSE4_2__");
841 [[fallthrough]];
842 case SSE41:
843 Builder.defineMacro("__SSE4_1__");
844 [[fallthrough]];
845 case SSSE3:
846 Builder.defineMacro("__SSSE3__");
847 [[fallthrough]];
848 case SSE3:
849 Builder.defineMacro("__SSE3__");
850 [[fallthrough]];
851 case SSE2:
852 Builder.defineMacro("__SSE2__");
853 Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied.
854 [[fallthrough]];
855 case SSE1:
856 Builder.defineMacro("__SSE__");
857 Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied.
858 [[fallthrough]];
859 case NoSSE:
860 break;
861 }
862
863 if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) {
864 switch (SSELevel) {
865 case AVX512F:
866 case AVX2:
867 case AVX:
868 case SSE42:
869 case SSE41:
870 case SSSE3:
871 case SSE3:
872 case SSE2:
873 Builder.defineMacro("_M_IX86_FP", Twine(2));
874 break;
875 case SSE1:
876 Builder.defineMacro("_M_IX86_FP", Twine(1));
877 break;
878 default:
879 Builder.defineMacro("_M_IX86_FP", Twine(0));
880 break;
881 }
882 }
883
884 // Each case falls through to the previous one here.
885 switch (MMX3DNowLevel) {
886 case AMD3DNowAthlon:
887 Builder.defineMacro("__3dNOW_A__");
888 [[fallthrough]];
889 case AMD3DNow:
890 Builder.defineMacro("__3dNOW__");
891 [[fallthrough]];
892 case MMX:
893 Builder.defineMacro("__MMX__");
894 [[fallthrough]];
895 case NoMMX3DNow:
896 break;
897 }
898
899 if (CPU >= CK_i486 || CPU == CK_None) {
900 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
901 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
902 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
903 }
904 if (HasCX8)
905 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
906 if (HasCX16 && getTriple().getArch() == llvm::Triple::x86_64)
907 Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
908
909 if (HasFloat128)
910 Builder.defineMacro("__SIZEOF_FLOAT128__", "16");
911 }
912
isValidFeatureName(StringRef Name) const913 bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
914 return llvm::StringSwitch<bool>(Name)
915 .Case("3dnow", true)
916 .Case("3dnowa", true)
917 .Case("adx", true)
918 .Case("aes", true)
919 .Case("amx-bf16", true)
920 .Case("amx-fp16", true)
921 .Case("amx-int8", true)
922 .Case("amx-tile", true)
923 .Case("avx", true)
924 .Case("avx2", true)
925 .Case("avx512f", true)
926 .Case("avx512cd", true)
927 .Case("avx512vpopcntdq", true)
928 .Case("avx512vnni", true)
929 .Case("avx512bf16", true)
930 .Case("avx512er", true)
931 .Case("avx512fp16", true)
932 .Case("avx512pf", true)
933 .Case("avx512dq", true)
934 .Case("avx512bitalg", true)
935 .Case("avx512bw", true)
936 .Case("avx512vl", true)
937 .Case("avx512vbmi", true)
938 .Case("avx512vbmi2", true)
939 .Case("avx512ifma", true)
940 .Case("avx512vp2intersect", true)
941 .Case("avxifma", true)
942 .Case("avxneconvert", true)
943 .Case("avxvnni", true)
944 .Case("avxvnniint8", true)
945 .Case("bmi", true)
946 .Case("bmi2", true)
947 .Case("cldemote", true)
948 .Case("clflushopt", true)
949 .Case("clwb", true)
950 .Case("clzero", true)
951 .Case("cmpccxadd", true)
952 .Case("crc32", true)
953 .Case("cx16", true)
954 .Case("enqcmd", true)
955 .Case("f16c", true)
956 .Case("fma", true)
957 .Case("fma4", true)
958 .Case("fsgsbase", true)
959 .Case("fxsr", true)
960 .Case("general-regs-only", true)
961 .Case("gfni", true)
962 .Case("hreset", true)
963 .Case("invpcid", true)
964 .Case("kl", true)
965 .Case("widekl", true)
966 .Case("lwp", true)
967 .Case("lzcnt", true)
968 .Case("mmx", true)
969 .Case("movbe", true)
970 .Case("movdiri", true)
971 .Case("movdir64b", true)
972 .Case("mwaitx", true)
973 .Case("pclmul", true)
974 .Case("pconfig", true)
975 .Case("pku", true)
976 .Case("popcnt", true)
977 .Case("prefetchi", true)
978 .Case("prefetchwt1", true)
979 .Case("prfchw", true)
980 .Case("ptwrite", true)
981 .Case("raoint", true)
982 .Case("rdpid", true)
983 .Case("rdpru", true)
984 .Case("rdrnd", true)
985 .Case("rdseed", true)
986 .Case("rtm", true)
987 .Case("sahf", true)
988 .Case("serialize", true)
989 .Case("sgx", true)
990 .Case("sha", true)
991 .Case("shstk", true)
992 .Case("sse", true)
993 .Case("sse2", true)
994 .Case("sse3", true)
995 .Case("ssse3", true)
996 .Case("sse4", true)
997 .Case("sse4.1", true)
998 .Case("sse4.2", true)
999 .Case("sse4a", true)
1000 .Case("tbm", true)
1001 .Case("tsxldtrk", true)
1002 .Case("uintr", true)
1003 .Case("vaes", true)
1004 .Case("vpclmulqdq", true)
1005 .Case("wbnoinvd", true)
1006 .Case("waitpkg", true)
1007 .Case("x87", true)
1008 .Case("xop", true)
1009 .Case("xsave", true)
1010 .Case("xsavec", true)
1011 .Case("xsaves", true)
1012 .Case("xsaveopt", true)
1013 .Default(false);
1014 }
1015
hasFeature(StringRef Feature) const1016 bool X86TargetInfo::hasFeature(StringRef Feature) const {
1017 return llvm::StringSwitch<bool>(Feature)
1018 .Case("adx", HasADX)
1019 .Case("aes", HasAES)
1020 .Case("amx-bf16", HasAMXBF16)
1021 .Case("amx-fp16", HasAMXFP16)
1022 .Case("amx-int8", HasAMXINT8)
1023 .Case("amx-tile", HasAMXTILE)
1024 .Case("avx", SSELevel >= AVX)
1025 .Case("avx2", SSELevel >= AVX2)
1026 .Case("avx512f", SSELevel >= AVX512F)
1027 .Case("avx512cd", HasAVX512CD)
1028 .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
1029 .Case("avx512vnni", HasAVX512VNNI)
1030 .Case("avx512bf16", HasAVX512BF16)
1031 .Case("avx512er", HasAVX512ER)
1032 .Case("avx512fp16", HasAVX512FP16)
1033 .Case("avx512pf", HasAVX512PF)
1034 .Case("avx512dq", HasAVX512DQ)
1035 .Case("avx512bitalg", HasAVX512BITALG)
1036 .Case("avx512bw", HasAVX512BW)
1037 .Case("avx512vl", HasAVX512VL)
1038 .Case("avx512vbmi", HasAVX512VBMI)
1039 .Case("avx512vbmi2", HasAVX512VBMI2)
1040 .Case("avx512ifma", HasAVX512IFMA)
1041 .Case("avx512vp2intersect", HasAVX512VP2INTERSECT)
1042 .Case("avxifma", HasAVXIFMA)
1043 .Case("avxneconvert", HasAVXNECONVERT)
1044 .Case("avxvnni", HasAVXVNNI)
1045 .Case("avxvnniint8", HasAVXVNNIINT8)
1046 .Case("bmi", HasBMI)
1047 .Case("bmi2", HasBMI2)
1048 .Case("cldemote", HasCLDEMOTE)
1049 .Case("clflushopt", HasCLFLUSHOPT)
1050 .Case("clwb", HasCLWB)
1051 .Case("clzero", HasCLZERO)
1052 .Case("cmpccxadd", HasCMPCCXADD)
1053 .Case("crc32", HasCRC32)
1054 .Case("cx8", HasCX8)
1055 .Case("cx16", HasCX16)
1056 .Case("enqcmd", HasENQCMD)
1057 .Case("f16c", HasF16C)
1058 .Case("fma", HasFMA)
1059 .Case("fma4", XOPLevel >= FMA4)
1060 .Case("fsgsbase", HasFSGSBASE)
1061 .Case("fxsr", HasFXSR)
1062 .Case("gfni", HasGFNI)
1063 .Case("hreset", HasHRESET)
1064 .Case("invpcid", HasINVPCID)
1065 .Case("kl", HasKL)
1066 .Case("widekl", HasWIDEKL)
1067 .Case("lwp", HasLWP)
1068 .Case("lzcnt", HasLZCNT)
1069 .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
1070 .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
1071 .Case("mmx", MMX3DNowLevel >= MMX)
1072 .Case("movbe", HasMOVBE)
1073 .Case("movdiri", HasMOVDIRI)
1074 .Case("movdir64b", HasMOVDIR64B)
1075 .Case("save-args", HasSaveArgs)
1076 .Case("mwaitx", HasMWAITX)
1077 .Case("pclmul", HasPCLMUL)
1078 .Case("pconfig", HasPCONFIG)
1079 .Case("pku", HasPKU)
1080 .Case("popcnt", HasPOPCNT)
1081 .Case("prefetchi", HasPREFETCHI)
1082 .Case("prefetchwt1", HasPREFETCHWT1)
1083 .Case("prfchw", HasPRFCHW)
1084 .Case("ptwrite", HasPTWRITE)
1085 .Case("raoint", HasRAOINT)
1086 .Case("rdpid", HasRDPID)
1087 .Case("rdpru", HasRDPRU)
1088 .Case("rdrnd", HasRDRND)
1089 .Case("rdseed", HasRDSEED)
1090 .Case("retpoline-external-thunk", HasRetpolineExternalThunk)
1091 .Case("rtm", HasRTM)
1092 .Case("sahf", HasLAHFSAHF)
1093 .Case("serialize", HasSERIALIZE)
1094 .Case("sgx", HasSGX)
1095 .Case("sha", HasSHA)
1096 .Case("shstk", HasSHSTK)
1097 .Case("sse", SSELevel >= SSE1)
1098 .Case("sse2", SSELevel >= SSE2)
1099 .Case("sse3", SSELevel >= SSE3)
1100 .Case("ssse3", SSELevel >= SSSE3)
1101 .Case("sse4.1", SSELevel >= SSE41)
1102 .Case("sse4.2", SSELevel >= SSE42)
1103 .Case("sse4a", XOPLevel >= SSE4A)
1104 .Case("tbm", HasTBM)
1105 .Case("tsxldtrk", HasTSXLDTRK)
1106 .Case("uintr", HasUINTR)
1107 .Case("vaes", HasVAES)
1108 .Case("vpclmulqdq", HasVPCLMULQDQ)
1109 .Case("wbnoinvd", HasWBNOINVD)
1110 .Case("waitpkg", HasWAITPKG)
1111 .Case("x86", true)
1112 .Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
1113 .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
1114 .Case("x87", HasX87)
1115 .Case("xop", XOPLevel >= XOP)
1116 .Case("xsave", HasXSAVE)
1117 .Case("xsavec", HasXSAVEC)
1118 .Case("xsaves", HasXSAVES)
1119 .Case("xsaveopt", HasXSAVEOPT)
1120 .Default(false);
1121 }
1122
1123 // We can't use a generic validation scheme for the features accepted here
1124 // versus subtarget features accepted in the target attribute because the
1125 // bitfield structure that's initialized in the runtime only supports the
1126 // below currently rather than the full range of subtarget features. (See
1127 // X86TargetInfo::hasFeature for a somewhat comprehensive list).
validateCpuSupports(StringRef FeatureStr) const1128 bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
1129 return llvm::StringSwitch<bool>(FeatureStr)
1130 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) .Case(STR, true)
1131 #include "llvm/TargetParser/X86TargetParser.def"
1132 .Default(false);
1133 }
1134
getFeature(StringRef Name)1135 static llvm::X86::ProcessorFeatures getFeature(StringRef Name) {
1136 return llvm::StringSwitch<llvm::X86::ProcessorFeatures>(Name)
1137 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
1138 .Case(STR, llvm::X86::FEATURE_##ENUM)
1139
1140 #include "llvm/TargetParser/X86TargetParser.def"
1141 ;
1142 // Note, this function should only be used after ensuring the value is
1143 // correct, so it asserts if the value is out of range.
1144 }
1145
multiVersionSortPriority(StringRef Name) const1146 unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
1147 // Valid CPUs have a 'key feature' that compares just better than its key
1148 // feature.
1149 using namespace llvm::X86;
1150 CPUKind Kind = parseArchX86(Name);
1151 if (Kind != CK_None) {
1152 ProcessorFeatures KeyFeature = getKeyFeature(Kind);
1153 return (getFeaturePriority(KeyFeature) << 1) + 1;
1154 }
1155
1156 // Now we know we have a feature, so get its priority and shift it a few so
1157 // that we have sufficient room for the CPUs (above).
1158 return getFeaturePriority(getFeature(Name)) << 1;
1159 }
1160
validateCPUSpecificCPUDispatch(StringRef Name) const1161 bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const {
1162 return llvm::StringSwitch<bool>(Name)
1163 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, true)
1164 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, true)
1165 #include "llvm/TargetParser/X86TargetParser.def"
1166 .Default(false);
1167 }
1168
CPUSpecificCPUDispatchNameDealias(StringRef Name)1169 static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) {
1170 return llvm::StringSwitch<StringRef>(Name)
1171 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, NAME)
1172 #include "llvm/TargetParser/X86TargetParser.def"
1173 .Default(Name);
1174 }
1175
CPUSpecificManglingCharacter(StringRef Name) const1176 char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const {
1177 return llvm::StringSwitch<char>(CPUSpecificCPUDispatchNameDealias(Name))
1178 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, MANGLING)
1179 #include "llvm/TargetParser/X86TargetParser.def"
1180 .Default(0);
1181 }
1182
getCPUSpecificCPUDispatchFeatures(StringRef Name,llvm::SmallVectorImpl<StringRef> & Features) const1183 void X86TargetInfo::getCPUSpecificCPUDispatchFeatures(
1184 StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const {
1185 StringRef WholeList =
1186 llvm::StringSwitch<StringRef>(CPUSpecificCPUDispatchNameDealias(Name))
1187 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, FEATURES)
1188 #include "llvm/TargetParser/X86TargetParser.def"
1189 .Default("");
1190 WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1191 }
1192
getCPUSpecificTuneName(StringRef Name) const1193 StringRef X86TargetInfo::getCPUSpecificTuneName(StringRef Name) const {
1194 return llvm::StringSwitch<StringRef>(Name)
1195 #define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, TUNE_NAME)
1196 #define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, TUNE_NAME)
1197 #include "llvm/TargetParser/X86TargetParser.def"
1198 .Default("");
1199 }
1200
1201 // We can't use a generic validation scheme for the cpus accepted here
1202 // versus subtarget cpus accepted in the target attribute because the
1203 // variables intitialized by the runtime only support the below currently
1204 // rather than the full range of cpus.
validateCpuIs(StringRef FeatureStr) const1205 bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const {
1206 return llvm::StringSwitch<bool>(FeatureStr)
1207 #define X86_VENDOR(ENUM, STRING) .Case(STRING, true)
1208 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
1209 #define X86_CPU_TYPE(ENUM, STR) .Case(STR, true)
1210 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
1211 #define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true)
1212 #include "llvm/TargetParser/X86TargetParser.def"
1213 .Default(false);
1214 }
1215
matchAsmCCConstraint(const char * & Name)1216 static unsigned matchAsmCCConstraint(const char *&Name) {
1217 auto RV = llvm::StringSwitch<unsigned>(Name)
1218 .Case("@cca", 4)
1219 .Case("@ccae", 5)
1220 .Case("@ccb", 4)
1221 .Case("@ccbe", 5)
1222 .Case("@ccc", 4)
1223 .Case("@cce", 4)
1224 .Case("@ccz", 4)
1225 .Case("@ccg", 4)
1226 .Case("@ccge", 5)
1227 .Case("@ccl", 4)
1228 .Case("@ccle", 5)
1229 .Case("@ccna", 5)
1230 .Case("@ccnae", 6)
1231 .Case("@ccnb", 5)
1232 .Case("@ccnbe", 6)
1233 .Case("@ccnc", 5)
1234 .Case("@ccne", 5)
1235 .Case("@ccnz", 5)
1236 .Case("@ccng", 5)
1237 .Case("@ccnge", 6)
1238 .Case("@ccnl", 5)
1239 .Case("@ccnle", 6)
1240 .Case("@ccno", 5)
1241 .Case("@ccnp", 5)
1242 .Case("@ccns", 5)
1243 .Case("@cco", 4)
1244 .Case("@ccp", 4)
1245 .Case("@ccs", 4)
1246 .Default(0);
1247 return RV;
1248 }
1249
validateAsmConstraint(const char * & Name,TargetInfo::ConstraintInfo & Info) const1250 bool X86TargetInfo::validateAsmConstraint(
1251 const char *&Name, TargetInfo::ConstraintInfo &Info) const {
1252 switch (*Name) {
1253 default:
1254 return false;
1255 // Constant constraints.
1256 case 'e': // 32-bit signed integer constant for use with sign-extending x86_64
1257 // instructions.
1258 case 'Z': // 32-bit unsigned integer constant for use with zero-extending
1259 // x86_64 instructions.
1260 case 's':
1261 Info.setRequiresImmediate();
1262 return true;
1263 case 'I':
1264 Info.setRequiresImmediate(0, 31);
1265 return true;
1266 case 'J':
1267 Info.setRequiresImmediate(0, 63);
1268 return true;
1269 case 'K':
1270 Info.setRequiresImmediate(-128, 127);
1271 return true;
1272 case 'L':
1273 Info.setRequiresImmediate({int(0xff), int(0xffff), int(0xffffffff)});
1274 return true;
1275 case 'M':
1276 Info.setRequiresImmediate(0, 3);
1277 return true;
1278 case 'N':
1279 Info.setRequiresImmediate(0, 255);
1280 return true;
1281 case 'O':
1282 Info.setRequiresImmediate(0, 127);
1283 return true;
1284 // Register constraints.
1285 case 'Y': // 'Y' is the first character for several 2-character constraints.
1286 // Shift the pointer to the second character of the constraint.
1287 Name++;
1288 switch (*Name) {
1289 default:
1290 return false;
1291 case 'z': // First SSE register.
1292 case '2':
1293 case 't': // Any SSE register, when SSE2 is enabled.
1294 case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
1295 case 'm': // Any MMX register, when inter-unit moves enabled.
1296 case 'k': // AVX512 arch mask registers: k1-k7.
1297 Info.setAllowsRegister();
1298 return true;
1299 }
1300 case 'f': // Any x87 floating point stack register.
1301 // Constraint 'f' cannot be used for output operands.
1302 if (Info.ConstraintStr[0] == '=')
1303 return false;
1304 Info.setAllowsRegister();
1305 return true;
1306 case 'a': // eax.
1307 case 'b': // ebx.
1308 case 'c': // ecx.
1309 case 'd': // edx.
1310 case 'S': // esi.
1311 case 'D': // edi.
1312 case 'A': // edx:eax.
1313 case 't': // Top of floating point stack.
1314 case 'u': // Second from top of floating point stack.
1315 case 'q': // Any register accessible as [r]l: a, b, c, and d.
1316 case 'y': // Any MMX register.
1317 case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
1318 case 'x': // Any SSE register.
1319 case 'k': // Any AVX512 mask register (same as Yk, additionally allows k0
1320 // for intermideate k reg operations).
1321 case 'Q': // Any register accessible as [r]h: a, b, c, and d.
1322 case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
1323 case 'l': // "Index" registers: any general register that can be used as an
1324 // index in a base+index memory access.
1325 Info.setAllowsRegister();
1326 return true;
1327 // Floating point constant constraints.
1328 case 'C': // SSE floating point constant.
1329 case 'G': // x87 floating point constant.
1330 return true;
1331 case '@':
1332 // CC condition changes.
1333 if (auto Len = matchAsmCCConstraint(Name)) {
1334 Name += Len - 1;
1335 Info.setAllowsRegister();
1336 return true;
1337 }
1338 return false;
1339 }
1340 }
1341
1342 // Below is based on the following information:
1343 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
1344 // | Processor Name | Cache Line Size (Bytes) | Source |
1345 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
1346 // | i386 | 64 | https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf |
1347 // | i486 | 16 | "four doublewords" (doubleword = 32 bits, 4 bits * 32 bits = 16 bytes) https://en.wikichip.org/w/images/d/d3/i486_MICROPROCESSOR_HARDWARE_REFERENCE_MANUAL_%281990%29.pdf and http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.126.4216&rep=rep1&type=pdf (page 29) |
1348 // | i586/Pentium MMX | 32 | https://www.7-cpu.com/cpu/P-MMX.html |
1349 // | i686/Pentium | 32 | https://www.7-cpu.com/cpu/P6.html |
1350 // | Netburst/Pentium4 | 64 | https://www.7-cpu.com/cpu/P4-180.html |
1351 // | Atom | 64 | https://www.7-cpu.com/cpu/Atom.html |
1352 // | Westmere | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/sandy_bridge_(client) "Cache Architecture" |
1353 // | Sandy Bridge | 64 | https://en.wikipedia.org/wiki/Sandy_Bridge and https://www.7-cpu.com/cpu/SandyBridge.html |
1354 // | Ivy Bridge | 64 | https://blog.stuffedcow.net/2013/01/ivb-cache-replacement/ and https://www.7-cpu.com/cpu/IvyBridge.html |
1355 // | Haswell | 64 | https://www.7-cpu.com/cpu/Haswell.html |
1356 // | Boadwell | 64 | https://www.7-cpu.com/cpu/Broadwell.html |
1357 // | Skylake (including skylake-avx512) | 64 | https://www.nas.nasa.gov/hecc/support/kb/skylake-processors_550.html "Cache Hierarchy" |
1358 // | Cascade Lake | 64 | https://www.nas.nasa.gov/hecc/support/kb/cascade-lake-processors_579.html "Cache Hierarchy" |
1359 // | Skylake | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake "Memory Hierarchy" |
1360 // | Ice Lake | 64 | https://www.7-cpu.com/cpu/Ice_Lake.html |
1361 // | Knights Landing | 64 | https://software.intel.com/en-us/articles/intel-xeon-phi-processor-7200-family-memory-management-optimizations "The Intel® Xeon Phi™ Processor Architecture" |
1362 // | Knights Mill | 64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache " |
1363 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
getCPUCacheLineSize() const1364 std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
1365 using namespace llvm::X86;
1366 switch (CPU) {
1367 // i386
1368 case CK_i386:
1369 // i486
1370 case CK_i486:
1371 case CK_WinChipC6:
1372 case CK_WinChip2:
1373 case CK_C3:
1374 // Lakemont
1375 case CK_Lakemont:
1376 return 16;
1377
1378 // i586
1379 case CK_i586:
1380 case CK_Pentium:
1381 case CK_PentiumMMX:
1382 // i686
1383 case CK_PentiumPro:
1384 case CK_i686:
1385 case CK_Pentium2:
1386 case CK_Pentium3:
1387 case CK_PentiumM:
1388 case CK_C3_2:
1389 // K6
1390 case CK_K6:
1391 case CK_K6_2:
1392 case CK_K6_3:
1393 // Geode
1394 case CK_Geode:
1395 return 32;
1396
1397 // Netburst
1398 case CK_Pentium4:
1399 case CK_Prescott:
1400 case CK_Nocona:
1401 // Atom
1402 case CK_Bonnell:
1403 case CK_Silvermont:
1404 case CK_Goldmont:
1405 case CK_GoldmontPlus:
1406 case CK_Tremont:
1407
1408 case CK_Westmere:
1409 case CK_SandyBridge:
1410 case CK_IvyBridge:
1411 case CK_Haswell:
1412 case CK_Broadwell:
1413 case CK_SkylakeClient:
1414 case CK_SkylakeServer:
1415 case CK_Cascadelake:
1416 case CK_Nehalem:
1417 case CK_Cooperlake:
1418 case CK_Cannonlake:
1419 case CK_Tigerlake:
1420 case CK_SapphireRapids:
1421 case CK_IcelakeClient:
1422 case CK_Rocketlake:
1423 case CK_IcelakeServer:
1424 case CK_Alderlake:
1425 case CK_Raptorlake:
1426 case CK_Meteorlake:
1427 case CK_Sierraforest:
1428 case CK_Grandridge:
1429 case CK_Graniterapids:
1430 case CK_Emeraldrapids:
1431 case CK_KNL:
1432 case CK_KNM:
1433 // K7
1434 case CK_Athlon:
1435 case CK_AthlonXP:
1436 // K8
1437 case CK_K8:
1438 case CK_K8SSE3:
1439 case CK_AMDFAM10:
1440 // Bobcat
1441 case CK_BTVER1:
1442 case CK_BTVER2:
1443 // Bulldozer
1444 case CK_BDVER1:
1445 case CK_BDVER2:
1446 case CK_BDVER3:
1447 case CK_BDVER4:
1448 // Zen
1449 case CK_ZNVER1:
1450 case CK_ZNVER2:
1451 case CK_ZNVER3:
1452 case CK_ZNVER4:
1453 // Deprecated
1454 case CK_x86_64:
1455 case CK_x86_64_v2:
1456 case CK_x86_64_v3:
1457 case CK_x86_64_v4:
1458 case CK_Yonah:
1459 case CK_Penryn:
1460 case CK_Core2:
1461 return 64;
1462
1463 // The following currently have unknown cache line sizes (but they are probably all 64):
1464 // Core
1465 case CK_None:
1466 return std::nullopt;
1467 }
1468 llvm_unreachable("Unknown CPU kind");
1469 }
1470
validateOutputSize(const llvm::StringMap<bool> & FeatureMap,StringRef Constraint,unsigned Size) const1471 bool X86TargetInfo::validateOutputSize(const llvm::StringMap<bool> &FeatureMap,
1472 StringRef Constraint,
1473 unsigned Size) const {
1474 // Strip off constraint modifiers.
1475 while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&')
1476 Constraint = Constraint.substr(1);
1477
1478 return validateOperandSize(FeatureMap, Constraint, Size);
1479 }
1480
validateInputSize(const llvm::StringMap<bool> & FeatureMap,StringRef Constraint,unsigned Size) const1481 bool X86TargetInfo::validateInputSize(const llvm::StringMap<bool> &FeatureMap,
1482 StringRef Constraint,
1483 unsigned Size) const {
1484 return validateOperandSize(FeatureMap, Constraint, Size);
1485 }
1486
validateOperandSize(const llvm::StringMap<bool> & FeatureMap,StringRef Constraint,unsigned Size) const1487 bool X86TargetInfo::validateOperandSize(const llvm::StringMap<bool> &FeatureMap,
1488 StringRef Constraint,
1489 unsigned Size) const {
1490 switch (Constraint[0]) {
1491 default:
1492 break;
1493 case 'k':
1494 // Registers k0-k7 (AVX512) size limit is 64 bit.
1495 case 'y':
1496 return Size <= 64;
1497 case 'f':
1498 case 't':
1499 case 'u':
1500 return Size <= 128;
1501 case 'Y':
1502 // 'Y' is the first character for several 2-character constraints.
1503 switch (Constraint[1]) {
1504 default:
1505 return false;
1506 case 'm':
1507 // 'Ym' is synonymous with 'y'.
1508 case 'k':
1509 return Size <= 64;
1510 case 'z':
1511 // XMM0/YMM/ZMM0
1512 if (hasFeatureEnabled(FeatureMap, "avx512f"))
1513 // ZMM0 can be used if target supports AVX512F.
1514 return Size <= 512U;
1515 else if (hasFeatureEnabled(FeatureMap, "avx"))
1516 // YMM0 can be used if target supports AVX.
1517 return Size <= 256U;
1518 else if (hasFeatureEnabled(FeatureMap, "sse"))
1519 return Size <= 128U;
1520 return false;
1521 case 'i':
1522 case 't':
1523 case '2':
1524 // 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled.
1525 if (SSELevel < SSE2)
1526 return false;
1527 break;
1528 }
1529 break;
1530 case 'v':
1531 case 'x':
1532 if (hasFeatureEnabled(FeatureMap, "avx512f"))
1533 // 512-bit zmm registers can be used if target supports AVX512F.
1534 return Size <= 512U;
1535 else if (hasFeatureEnabled(FeatureMap, "avx"))
1536 // 256-bit ymm registers can be used if target supports AVX.
1537 return Size <= 256U;
1538 return Size <= 128U;
1539
1540 }
1541
1542 return true;
1543 }
1544
convertConstraint(const char * & Constraint) const1545 std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
1546 switch (*Constraint) {
1547 case '@':
1548 if (auto Len = matchAsmCCConstraint(Constraint)) {
1549 std::string Converted = "{" + std::string(Constraint, Len) + "}";
1550 Constraint += Len - 1;
1551 return Converted;
1552 }
1553 return std::string(1, *Constraint);
1554 case 'a':
1555 return std::string("{ax}");
1556 case 'b':
1557 return std::string("{bx}");
1558 case 'c':
1559 return std::string("{cx}");
1560 case 'd':
1561 return std::string("{dx}");
1562 case 'S':
1563 return std::string("{si}");
1564 case 'D':
1565 return std::string("{di}");
1566 case 'p': // Keep 'p' constraint (address).
1567 return std::string("p");
1568 case 't': // top of floating point stack.
1569 return std::string("{st}");
1570 case 'u': // second from top of floating point stack.
1571 return std::string("{st(1)}"); // second from top of floating point stack.
1572 case 'Y':
1573 switch (Constraint[1]) {
1574 default:
1575 // Break from inner switch and fall through (copy single char),
1576 // continue parsing after copying the current constraint into
1577 // the return string.
1578 break;
1579 case 'k':
1580 case 'm':
1581 case 'i':
1582 case 't':
1583 case 'z':
1584 case '2':
1585 // "^" hints llvm that this is a 2 letter constraint.
1586 // "Constraint++" is used to promote the string iterator
1587 // to the next constraint.
1588 return std::string("^") + std::string(Constraint++, 2);
1589 }
1590 [[fallthrough]];
1591 default:
1592 return std::string(1, *Constraint);
1593 }
1594 }
1595
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const1596 void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
1597 bool Only64Bit = getTriple().getArch() != llvm::Triple::x86;
1598 llvm::X86::fillValidCPUArchList(Values, Only64Bit);
1599 }
1600
fillValidTuneCPUList(SmallVectorImpl<StringRef> & Values) const1601 void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const {
1602 llvm::X86::fillValidTuneCPUList(Values);
1603 }
1604
getGCCRegNames() const1605 ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const {
1606 return llvm::ArrayRef(GCCRegNames);
1607 }
1608
getGCCAddlRegNames() const1609 ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const {
1610 return llvm::ArrayRef(AddlRegNames);
1611 }
1612
getTargetBuiltins() const1613 ArrayRef<Builtin::Info> X86_32TargetInfo::getTargetBuiltins() const {
1614 return llvm::ArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin -
1615 Builtin::FirstTSBuiltin + 1);
1616 }
1617
getTargetBuiltins() const1618 ArrayRef<Builtin::Info> X86_64TargetInfo::getTargetBuiltins() const {
1619 return llvm::ArrayRef(BuiltinInfoX86,
1620 X86::LastTSBuiltin - Builtin::FirstTSBuiltin);
1621 }
1622