xref: /llvm-project/llvm/lib/Target/X86/X86.td (revision 90968794e26709957d49dd660e4e453235d393e8)
1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a target description file for the Intel i386 architecture, referred
10// to here as the "X86" architecture.
11//
12//===----------------------------------------------------------------------===//
13
14// Get the target-independent interfaces which we are implementing...
15//
16include "llvm/Target/Target.td"
17
18//===----------------------------------------------------------------------===//
19// X86 Subtarget state
20//
21// disregarding specific ABI / programming model
22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23                               "64-bit mode (x86_64)">;
24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25                               "32-bit mode (80386)">;
26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27                               "16-bit mode (i8086)">;
28
29//===----------------------------------------------------------------------===//
30// X86 Subtarget ISA features
31//===----------------------------------------------------------------------===//
32
33def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
34                                      "Enable X87 float instructions">;
35
36def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
37                                      "Enable NOPL instruction (generally pentium pro+)">;
38
39def FeatureCMOV    : SubtargetFeature<"cmov","HasCMOV", "true",
40                                      "Enable conditional move instructions">;
41
42def FeatureCX8     : SubtargetFeature<"cx8", "HasCX8", "true",
43                                      "Support CMPXCHG8B instructions">;
44
45def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
46                                      "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
47
48def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
49                                       "Support POPCNT instruction">;
50
51def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
52                                      "Support fxsave/fxrestore instructions">;
53
54def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
55                                       "Support xsave instructions">;
56
57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
58                                       "Support xsaveopt instructions",
59                                       [FeatureXSAVE]>;
60
61def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
62                                       "Support xsavec instructions",
63                                       [FeatureXSAVE]>;
64
65def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
66                                       "Support xsaves instructions",
67                                       [FeatureXSAVE]>;
68
69def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
70                                      "Enable SSE instructions">;
71def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
72                                      "Enable SSE2 instructions",
73                                      [FeatureSSE1]>;
74def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
75                                      "Enable SSE3 instructions",
76                                      [FeatureSSE2]>;
77def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
78                                      "Enable SSSE3 instructions",
79                                      [FeatureSSE3]>;
80def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
81                                      "Enable SSE 4.1 instructions",
82                                      [FeatureSSSE3]>;
83def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
84                                      "Enable SSE 4.2 instructions",
85                                      [FeatureSSE41]>;
86// The MMX subtarget feature is separate from the rest of the SSE features
87// because it's important (for odd compatibility reasons) to be able to
88// turn it off explicitly while allowing SSE+ to be on.
89def FeatureMMX     : SubtargetFeature<"mmx","HasMMX", "true",
90                                      "Enable MMX instructions">;
91// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
92// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
93// without disabling 64-bit mode. Nothing should imply this feature bit. It
94// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
95def FeatureX86_64   : SubtargetFeature<"64bit", "HasX86_64", "true",
96                                      "Support 64-bit instructions">;
97def FeatureCX16     : SubtargetFeature<"cx16", "HasCX16", "true",
98                                       "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
99                                       [FeatureCX8]>;
100def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
101                                      "Support SSE 4a instructions",
102                                      [FeatureSSE3]>;
103
104def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
105                                      "Enable AVX instructions",
106                                      [FeatureSSE42]>;
107def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
108                                      "Enable AVX2 instructions",
109                                      [FeatureAVX]>;
110def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
111                                      "Enable three-operand fused multiple-add",
112                                      [FeatureAVX]>;
113def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
114                       "Support 16-bit floating point conversion instructions",
115                       [FeatureAVX]>;
116def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
117                        "Support ZMM and 64-bit mask instructions">;
118def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
119                                      "Enable AVX-512 instructions",
120                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
121def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
122                      "Enable AVX-512 Conflict Detection Instructions",
123                                      [FeatureAVX512]>;
124def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
125                       "true", "Enable AVX-512 Population Count Instructions",
126                                      [FeatureAVX512]>;
127def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
128                                   "true",
129                                   "Prefetch instruction with T0 or T1 Hint">;
130def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
131                      "Enable AVX-512 Doubleword and Quadword Instructions",
132                                      [FeatureAVX512]>;
133def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
134                      "Enable AVX-512 Byte and Word Instructions",
135                                      [FeatureAVX512]>;
136def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
137                      "Enable AVX-512 Vector Length eXtensions",
138                                      [FeatureAVX512]>;
139def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
140                      "Enable AVX-512 Vector Byte Manipulation Instructions",
141                                      [FeatureBWI]>;
142def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
143                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
144                                      [FeatureBWI]>;
145def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
146                           "Enable AVX-IFMA",
147                           [FeatureAVX2]>;
148def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
149                      "Enable AVX-512 Integer Fused Multiple-Add",
150                                      [FeatureAVX512]>;
151def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
152                      "Enable protection keys">;
153def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
154                          "Enable AVX-512 Vector Neural Network Instructions",
155                                      [FeatureAVX512]>;
156def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
157                           "Support AVX_VNNI encoding",
158                                      [FeatureAVX2]>;
159def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
160                           "Support bfloat16 floating point",
161                                      [FeatureBWI]>;
162def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
163                       "Enable AVX-512 Bit Algorithms",
164                        [FeatureBWI]>;
165def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
166                                            "HasVP2INTERSECT", "true",
167                                            "Enable AVX-512 vp2intersect",
168                                            [FeatureAVX512]>;
169// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
170// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
171// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
172// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
173// currently.
174def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
175                           "Support 16-bit floating point",
176                           [FeatureBWI, FeatureVLX, FeatureDQI]>;
177def FeatureAVXVNNIINT8  : SubtargetFeature<"avxvnniint8",
178                             "HasAVXVNNIINT8", "true",
179                             "Enable AVX-VNNI-INT8",
180                             [FeatureAVX2]>;
181def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
182                             "HasAVXVNNIINT16", "true",
183                             "Enable AVX-VNNI-INT16",
184                             [FeatureAVX2]>;
185def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
186                         "Enable packed carry-less multiplication instructions",
187                               [FeatureSSE2]>;
188def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
189                         "Enable Galois Field Arithmetic Instructions",
190                               [FeatureSSE2]>;
191def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
192                                         "Enable vpclmulqdq instructions",
193                                         [FeatureAVX, FeaturePCLMUL]>;
194def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
195                                      "Enable four-operand fused multiple-add",
196                                      [FeatureAVX, FeatureSSE4A]>;
197def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
198                                      "Enable XOP instructions",
199                                      [FeatureFMA4]>;
200def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
201                                          "HasSSEUnalignedMem", "true",
202                      "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
203def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
204                                      "Enable AES instructions",
205                                      [FeatureSSE2]>;
206def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
207                       "Promote selected AES instructions to AVX512/AVX registers",
208                        [FeatureAVX2, FeatureAES]>;
209def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
210                                      "Enable TBM instructions">;
211def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
212                                      "Enable LWP instructions">;
213def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
214                                      "Support MOVBE instruction">;
215def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
216                                      "Support RDRAND instruction">;
217def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
218                                       "Support FS/GS Base instructions">;
219def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
220                                      "Support LZCNT instruction">;
221def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
222                                      "Support BMI instructions">;
223def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
224                                      "Support BMI2 instructions">;
225def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
226                                      "Support RTM instructions">;
227def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
228                                      "Support ADX instructions">;
229def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
230                                      "Enable SHA instructions",
231                                      [FeatureSSE2]>;
232def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
233                                      "Support SHA512 instructions",
234                                      [FeatureAVX2]>;
235// Processor supports CET SHSTK - Control-Flow Enforcement Technology
236// using Shadow Stack
237def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
238                       "Support CET Shadow-Stack instructions">;
239def FeatureSM3     : SubtargetFeature<"sm3", "HasSM3", "true",
240                                      "Support SM3 instructions",
241                                      [FeatureAVX]>;
242def FeatureSM4     : SubtargetFeature<"sm4", "HasSM4", "true",
243                                      "Support SM4 instructions",
244                                      [FeatureAVX2]>;
245def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
246                                      "Support PRFCHW instructions">;
247def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
248                                      "Support RDSEED instruction">;
249def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
250                           "Support LAHF and SAHF instructions in 64-bit mode">;
251def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
252                                      "Enable MONITORX/MWAITX timer functionality">;
253def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
254                                      "Enable Cache Line Zero">;
255def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
256                                      "Enable Cache Line Demote">;
257def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
258                                      "Support ptwrite instruction">;
259def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
260                                      "Support AMX-TILE instructions">;
261def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
262                                      "Support AMX-INT8 instructions",
263                                      [FeatureAMXTILE]>;
264def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
265                                      "Support AMX-BF16 instructions",
266                                      [FeatureAMXTILE]>;
267def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
268                                      "Support AMX amx-fp16 instructions",
269                                      [FeatureAMXTILE]>;
270def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
271                                         "Support AMX-COMPLEX instructions",
272                                         [FeatureAMXTILE]>;
273def FeatureAMXFP8 : SubtargetFeature<"amx-fp8", "HasAMXFP8", "true",
274                                     "Support AMX-FP8 instructions",
275                                     [FeatureAMXTILE]>;
276def FeatureAMXMOVRS : SubtargetFeature<"amx-movrs", "HasAMXMOVRS", "true",
277                                       "Support AMX-MOVRS instructions",
278                                       [FeatureAMXTILE]>;
279def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "true",
280                                           "Support AMX amx-transpose instructions",
281                                           [FeatureAMXTILE]>;
282def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512",
283                                        "HasAMXAVX512", "true",
284                                        "Support AMX-AVX512 instructions",
285                                        [FeatureAMXTILE]>;
286def FeatureAMXTF32 : SubtargetFeature<"amx-tf32", "HasAMXTF32", "true",
287                                      "Support AMX-TF32 instructions",
288                                      [FeatureAMXTILE]>;
289def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
290                                        "Support CMPCCXADD instructions">;
291def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
292                                     "Support RAO-INT instructions",
293                                     []>;
294def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
295                                           "Support AVX-NE-CONVERT instructions",
296                                           [FeatureAVX2]>;
297def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
298                                      "Invalidate Process-Context Identifier">;
299def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
300                                      "Enable Software Guard Extensions">;
301def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
302                                      "Flush A Cache Line Optimized">;
303def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
304                                      "Cache Line Write Back">;
305def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
306                                      "Write Back No Invalidate">;
307def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
308                                    "Support RDPID instructions">;
309def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
310                                    "Support RDPRU instructions">;
311def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
312                                      "Wait and pause enhancements">;
313def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
314                                     "Has ENQCMD instructions">;
315def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
316                                  "Support Key Locker kl Instructions",
317                                  [FeatureSSE2]>;
318def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
319                                      "Support Key Locker wide Instructions",
320                                      [FeatureKL]>;
321def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
322                                      "Has hreset instruction">;
323def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
324                                        "Has serialize instruction">;
325def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
326                                       "Support TSXLDTRK instructions">;
327def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
328                                    "Has UINTR Instructions">;
329def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
330                                      "Support USERMSR instructions">;
331def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
332                                      "platform configuration instruction">;
333def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
334                                       "Support movdiri instruction (direct store integer)">;
335def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
336                                        "Support movdir64b instruction (direct store 64 bytes)">;
337def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
338                                      "Support AVX10.1 up to 256-bit instruction",
339                                      [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
340                                       FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
341                                       FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
342def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
343                                          "Support AVX10.1 up to 512-bit instruction",
344                                          [FeatureAVX10_1, FeatureEVEX512]>;
345def FeatureAVX10_2 : SubtargetFeature<"avx10.2-256", "HasAVX10_2", "true",
346                                      "Support AVX10.2 up to 256-bit instruction",
347                                      [FeatureAVX10_1]>;
348def FeatureAVX10_2_512 : SubtargetFeature<"avx10.2-512", "HasAVX10_2_512", "true",
349                                          "Support AVX10.2 up to 512-bit instruction",
350                                          [FeatureAVX10_2, FeatureAVX10_1_512]>;
351def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
352                                   "Support extended general purpose register">;
353def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
354                                        "Support PUSH2/POP2 instructions">;
355def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
356                                  "Support Push-Pop Acceleration">;
357def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
358                                  "Support non-destructive destination">;
359def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
360                                   "Support conditional cmp & test instructions">;
361def FeatureNF : SubtargetFeature<"nf", "HasNF", "true",
362                                 "Support status flags update suppression">;
363def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
364                                 "Support conditional faulting">;
365def FeatureZU : SubtargetFeature<"zu", "HasZU", "true",
366                                 "Support zero-upper SETcc/IMUL">;
367def FeatureUseGPR32InInlineAsm
368    : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
369                       "Enable use of GPR32 in inline assembly for APX">;
370def FeatureMOVRS   : SubtargetFeature<"movrs", "HasMOVRS", "true",
371                           "Enable MOVRS", []>;
372
373// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
374// "string operations"). See "REP String Enhancement" in the Intel Software
375// Development Manual. This feature essentially means that REP MOVSB will copy
376// using the largest available size instead of copying bytes one by one, making
377// it at least as fast as REPMOVS{W,D,Q}.
378def FeatureERMSB
379    : SubtargetFeature<
380          "ermsb", "HasERMSB", "true",
381          "REP MOVS/STOS are fast">;
382
383// Icelake and newer processors have Fast Short REP MOV.
384def FeatureFSRM
385    : SubtargetFeature<
386          "fsrm", "HasFSRM", "true",
387          "REP MOVSB of short lengths is faster">;
388
389def FeatureSoftFloat
390    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
391                       "Use software floating point features">;
392
393//===----------------------------------------------------------------------===//
394// X86 Subtarget Security Mitigation features
395//===----------------------------------------------------------------------===//
396
397// Lower indirect calls using a special construct called a `retpoline` to
398// mitigate potential Spectre v2 attacks against them.
399def FeatureRetpolineIndirectCalls
400    : SubtargetFeature<
401          "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
402          "Remove speculation of indirect calls from the generated code">;
403
404// Lower indirect branches and switches either using conditional branch trees
405// or using a special construct called a `retpoline` to mitigate potential
406// Spectre v2 attacks against them.
407def FeatureRetpolineIndirectBranches
408    : SubtargetFeature<
409          "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
410          "Remove speculation of indirect branches from the generated code">;
411
412// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
413// `retpoline-indirect-branches` above.
414def FeatureRetpoline
415    : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
416                       "Remove speculation of indirect branches from the "
417                       "generated code, either by avoiding them entirely or "
418                       "lowering them with a speculation blocking construct",
419                       [FeatureRetpolineIndirectCalls,
420                        FeatureRetpolineIndirectBranches]>;
421
422// Rely on external thunks for the emitted retpoline calls. This allows users
423// to provide their own custom thunk definitions in highly specialized
424// environments such as a kernel that does boot-time hot patching.
425def FeatureRetpolineExternalThunk
426    : SubtargetFeature<
427          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
428          "When lowering an indirect call or branch using a `retpoline`, rely "
429          "on the specified user provided thunk rather than emitting one "
430          "ourselves. Only has effect when combined with some other retpoline "
431          "feature", [FeatureRetpolineIndirectCalls]>;
432
433// Mitigate LVI attacks against indirect calls/branches and call returns
434def FeatureLVIControlFlowIntegrity
435    : SubtargetFeature<
436          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
437          "Prevent indirect calls/branches from using a memory operand, and "
438          "precede all indirect calls/branches from a register with an "
439          "LFENCE instruction to serialize control flow. Also decompose RET "
440          "instructions into a POP+LFENCE+JMP sequence.">;
441
442// Enable SESES to mitigate speculative execution attacks
443def FeatureSpeculativeExecutionSideEffectSuppression
444    : SubtargetFeature<
445          "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
446          "Prevent speculative execution side channel timing attacks by "
447          "inserting a speculation barrier before memory reads, memory writes, "
448          "and conditional branches. Implies LVI Control Flow integrity.",
449          [FeatureLVIControlFlowIntegrity]>;
450
451// Mitigate LVI attacks against data loads
452def FeatureLVILoadHardening
453    : SubtargetFeature<
454          "lvi-load-hardening", "UseLVILoadHardening", "true",
455          "Insert LFENCE instructions to prevent data speculatively injected "
456          "into loads from being used maliciously.">;
457
458def FeatureTaggedGlobals
459    : SubtargetFeature<
460          "tagged-globals", "AllowTaggedGlobals", "true",
461          "Use an instruction sequence for taking the address of a global "
462          "that allows a memory tag in the upper address bits.">;
463
464// Control codegen mitigation against Straight Line Speculation vulnerability.
465def FeatureHardenSlsRet
466    : SubtargetFeature<
467          "harden-sls-ret", "HardenSlsRet", "true",
468          "Harden against straight line speculation across RET instructions.">;
469
470def FeatureHardenSlsIJmp
471    : SubtargetFeature<
472          "harden-sls-ijmp", "HardenSlsIJmp", "true",
473          "Harden against straight line speculation across indirect JMP instructions.">;
474
475//===----------------------------------------------------------------------===//
476// X86 Subtarget Tuning features
477//===----------------------------------------------------------------------===//
478def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
479                                       "PreferMovmskOverVTest", "true",
480                                       "Prefer movmsk over vtest instruction">;
481
482def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
483                                       "SHLD instruction is slow">;
484
485def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
486                                        "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
487
488def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
489                                          "true",
490                                          "PMADDWD is slower than PMULLD">;
491
492// FIXME: This should not apply to CPUs that do not have SSE.
493def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
494                                "IsUnalignedMem16Slow", "true",
495                                "Slow unaligned 16-byte memory access">;
496
497def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
498                                "IsUnalignedMem32Slow", "true",
499                                "Slow unaligned 32-byte memory access">;
500
501def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
502                                     "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
503
504// True if 8-bit divisions are significantly faster than
505// 32-bit divisions and should be used when possible.
506def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
507                                     "HasSlowDivide32", "true",
508                                     "Use 8-bit divide for positive values less than 256">;
509
510// True if 32-bit divides are significantly faster than
511// 64-bit divisions and should be used when possible.
512def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
513                                     "HasSlowDivide64", "true",
514                                     "Use 32-bit divide for positive values less than 2^32">;
515
516def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
517                                     "PadShortFunctions", "true",
518                                     "Pad short functions (to prevent a stall when returning too early)">;
519
520// On some processors, instructions that implicitly take two memory operands are
521// slow. In practice, this means that CALL, PUSH, and POP with memory operands
522// should be avoided in favor of a MOV + register CALL/PUSH/POP.
523def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
524                                     "SlowTwoMemOps", "true",
525                                     "Two memory operand instructions are slow">;
526
527// True if the LEA instruction inputs have to be ready at address generation
528// (AG) time.
529def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
530                                   "LEA instruction needs inputs at AG stage">;
531
532def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
533                                   "LEA instruction with certain arguments is slow">;
534
535// True if the LEA instruction has all three source operands: base, index,
536// and offset or if the LEA instruction uses base and index registers where
537// the base is EBP, RBP,or R13
538def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
539                                   "LEA instruction with 3 ops or certain registers is slow">;
540
541// True if INC and DEC instructions are slow when writing to flags
542def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
543                                   "INC and DEC instructions are slower than ADD and SUB">;
544
545def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
546                                     "HasPOPCNTFalseDeps", "true",
547                                     "POPCNT has a false dependency on dest register">;
548
549def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
550                                     "HasLZCNTFalseDeps", "true",
551                                     "LZCNT/TZCNT have a false dependency on dest register">;
552
553def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
554                               "HasMULCFalseDeps", "true",
555                               "VF[C]MULCPH/SH has a false dependency on dest register">;
556
557def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
558                               "HasPERMFalseDeps", "true",
559                               "VPERMD/Q/PS/PD has a false dependency on dest register">;
560
561def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
562                               "HasRANGEFalseDeps", "true",
563                               "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
564
565def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
566                               "HasGETMANTFalseDeps", "true",
567                               "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
568                               " false dependency on dest register">;
569
570def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
571                               "HasMULLQFalseDeps", "true",
572                               "VPMULLQ has a false dependency on dest register">;
573
574def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
575                                     "HasSBBDepBreaking", "true",
576                                     "SBB with same register has no source dependency">;
577
578// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
579// using a variable mask over multiple fixed shuffles.
580def TuningFastVariableCrossLaneShuffle
581    : SubtargetFeature<"fast-variable-crosslane-shuffle",
582                       "HasFastVariableCrossLaneShuffle",
583                       "true", "Cross-lane shuffles with variable masks are fast">;
584def TuningFastVariablePerLaneShuffle
585    : SubtargetFeature<"fast-variable-perlane-shuffle",
586                       "HasFastVariablePerLaneShuffle",
587                       "true", "Per-lane shuffles with variable masks are fast">;
588
589// Goldmont / Tremont (atom in general) has no bypass delay
590def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
591                                   "NoDomainDelay","true",
592                                   "Has no bypass delay when using the 'wrong' domain">;
593
594// Many processors (Nehalem+ on Intel) have no bypass delay when
595// using the wrong mov type.
596def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
597                                   "NoDomainDelayMov","true",
598                                   "Has no bypass delay when using the 'wrong' mov type">;
599
600// Newer processors (Skylake+ on Intel) have no bypass delay when
601// using the wrong blend type.
602def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
603                                   "NoDomainDelayBlend","true",
604                                   "Has no bypass delay when using the 'wrong' blend type">;
605
606// Newer processors (Haswell+ on Intel) have no bypass delay when
607// using the wrong shuffle type.
608def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
609                                   "NoDomainDelayShuffle","true",
610                                   "Has no bypass delay when using the 'wrong' shuffle type">;
611
612// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
613// imm shifts/rotate if they can use more ports than regular shuffles.
614def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
615                                   "PreferLowerShuffleAsShift", "true",
616                                   "Shifts are faster (or as fast) as shuffle">;
617
618def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
619                                   "FastImmVectorShift", "true",
620                                   "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
621
622// On some X86 processors, a vzeroupper instruction should be inserted after
623// using ymm/zmm registers before executing code that may use SSE instructions.
624def TuningInsertVZEROUPPER
625    : SubtargetFeature<"vzeroupper",
626                       "InsertVZEROUPPER",
627                       "true", "Should insert vzeroupper instructions">;
628
629// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
630// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
631// vector FSQRT has higher throughput than the corresponding NR code.
632// The idea is that throughput bound code is likely to be vectorized, so for
633// vectorized code we should care about the throughput of SQRT operations.
634// But if the code is scalar that probably means that the code has some kind of
635// dependency and we should care more about reducing the latency.
636
637// True if hardware SQRTSS instruction is at least as fast (latency) as
638// RSQRTSS followed by a Newton-Raphson iteration.
639def TuningFastScalarFSQRT
640    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
641                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
642// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
643// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
644def TuningFastVectorFSQRT
645    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
646                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
647
648// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
649// be used to replace test/set sequences.
650def TuningFastLZCNT
651    : SubtargetFeature<
652          "fast-lzcnt", "HasFastLZCNT", "true",
653          "LZCNT instructions are as fast as most simple integer ops">;
654
655// If the target can efficiently decode NOPs upto 7-bytes in length.
656def TuningFast7ByteNOP
657    : SubtargetFeature<
658          "fast-7bytenop", "HasFast7ByteNOP", "true",
659          "Target can quickly decode up to 7 byte NOPs">;
660
661// If the target can efficiently decode NOPs upto 11-bytes in length.
662def TuningFast11ByteNOP
663    : SubtargetFeature<
664          "fast-11bytenop", "HasFast11ByteNOP", "true",
665          "Target can quickly decode up to 11 byte NOPs">;
666
667// If the target can efficiently decode NOPs upto 15-bytes in length.
668def TuningFast15ByteNOP
669    : SubtargetFeature<
670          "fast-15bytenop", "HasFast15ByteNOP", "true",
671          "Target can quickly decode up to 15 byte NOPs">;
672
673// Sandy Bridge and newer processors can use SHLD with the same source on both
674// inputs to implement rotate to avoid the partial flag update of the normal
675// rotate instructions.
676def TuningFastSHLDRotate
677    : SubtargetFeature<
678          "fast-shld-rotate", "HasFastSHLDRotate", "true",
679          "SHLD can be used as a faster rotate">;
680
681// Bulldozer and newer processors can merge CMP/TEST (but not other
682// instructions) with conditional branches.
683def TuningBranchFusion
684    : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
685                 "CMP/TEST can be fused with conditional branches">;
686
687// Sandy Bridge and newer processors have many instructions that can be
688// fused with conditional branches and pass through the CPU as a single
689// operation.
690def TuningMacroFusion
691    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
692                 "Various instructions can be fused with conditional branches">;
693
694// Gather is available since Haswell (AVX2 set). So technically, we can
695// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
696// Skylake Client processor has faster Gathers than HSW and performance is
697// similar to Skylake Server (AVX-512).
698def TuningFastGather
699    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
700                       "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
701
702// Generate vpdpwssd instead of vpmaddwd+vpaddd sequence.
703def TuningFastDPWSSD
704    : SubtargetFeature<
705          "fast-dpwssd", "HasFastDPWSSD", "true",
706          "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">;
707
708def TuningPreferNoGather
709    : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
710                       "Prefer no gather instructions">;
711def TuningPreferNoScatter
712    : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
713                       "Prefer no scatter instructions">;
714
715def TuningPrefer128Bit
716    : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
717                       "Prefer 128-bit AVX instructions">;
718
719def TuningPrefer256Bit
720    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
721                       "Prefer 256-bit AVX instructions">;
722
723def TuningAllowLight256Bit
724    : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
725                       "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
726
727def TuningPreferMaskRegisters
728    : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
729                       "Prefer AVX512 mask registers over PTEST/MOVMSK">;
730
731def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
732          "Indicates that the BEXTR instruction is implemented as a single uop "
733          "with good throughput">;
734
735// Combine vector math operations with shuffles into horizontal math
736// instructions if a CPU implements horizontal operations (introduced with
737// SSE3) with better latency/throughput than the alternative sequence.
738def TuningFastHorizontalOps
739    : SubtargetFeature<
740        "fast-hops", "HasFastHorizontalOps", "true",
741        "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
742        "normal vector instructions with shuffles">;
743
744def TuningFastScalarShiftMasks
745    : SubtargetFeature<
746        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
747        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
748
749def TuningFastVectorShiftMasks
750    : SubtargetFeature<
751        "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
752        "Prefer a left/right vector logical shift pair over a shift+and pair">;
753
754def TuningFastMOVBE
755    : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
756    "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
757
758def TuningFastImm16
759    : SubtargetFeature<"fast-imm16", "HasFastImm16", "true",
760    "Prefer a i16 instruction with i16 immediate over extension to i32">;
761
762def TuningUseSLMArithCosts
763    : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
764        "Use Silvermont specific arithmetic costs">;
765
766def TuningUseGLMDivSqrtCosts
767    : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
768        "Use Goldmont specific floating point div/sqrt costs">;
769
770// Starting with Redwood Cove architecture, the branch has branch taken hint
771// (i.e., instruction prefix 3EH).
772def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
773                                        "Target has branch hint feature">;
774
775//===----------------------------------------------------------------------===//
776// X86 CPU Families
777// TODO: Remove these - use general tuning features to determine codegen.
778//===----------------------------------------------------------------------===//
779
780// Bonnell
781def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
782
783//===----------------------------------------------------------------------===//
784// Register File Description
785//===----------------------------------------------------------------------===//
786
787include "X86RegisterInfo.td"
788include "X86RegisterBanks.td"
789
790//===----------------------------------------------------------------------===//
791// Instruction Descriptions
792//===----------------------------------------------------------------------===//
793
794include "X86Schedule.td"
795include "X86InstrInfo.td"
796include "X86SchedPredicates.td"
797
798def X86InstrInfo : InstrInfo;
799
800//===----------------------------------------------------------------------===//
801// X86 Scheduler Models
802//===----------------------------------------------------------------------===//
803
804include "X86ScheduleAtom.td"
805include "X86SchedSandyBridge.td"
806include "X86SchedHaswell.td"
807include "X86SchedBroadwell.td"
808include "X86ScheduleSLM.td"
809include "X86ScheduleZnver1.td"
810include "X86ScheduleZnver2.td"
811include "X86ScheduleZnver3.td"
812include "X86ScheduleZnver4.td"
813include "X86ScheduleBdVer2.td"
814include "X86ScheduleBtVer2.td"
815include "X86SchedSkylakeClient.td"
816include "X86SchedSkylakeServer.td"
817include "X86SchedIceLake.td"
818include "X86SchedAlderlakeP.td"
819include "X86SchedSapphireRapids.td"
820
821//===----------------------------------------------------------------------===//
822// X86 Processor Feature Lists
823//===----------------------------------------------------------------------===//
824
825def ProcessorFeatures {
826  // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
827  list<SubtargetFeature> X86_64V1Features = [
828    FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
829    FeatureFXSR, FeatureNOPL, FeatureX86_64,
830  ];
831  list<SubtargetFeature> X86_64V1Tuning = [
832    TuningMacroFusion,
833    TuningSlow3OpsLEA,
834    TuningSlowDivide64,
835    TuningSlowIncDec,
836    TuningInsertVZEROUPPER
837  ];
838
839  list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
840    FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
841    FeatureSSE42
842  ]);
843  list<SubtargetFeature> X86_64V2Tuning = [
844    TuningMacroFusion,
845    TuningSlow3OpsLEA,
846    TuningSlowDivide64,
847    TuningSlowUAMem32,
848    TuningFastScalarFSQRT,
849    TuningFastSHLDRotate,
850    TuningFast15ByteNOP,
851    TuningPOPCNTFalseDeps,
852    TuningInsertVZEROUPPER
853  ];
854
855  list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
856    FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
857    FeatureMOVBE, FeatureXSAVE
858  ]);
859  list<SubtargetFeature> X86_64V3Tuning = [
860    TuningMacroFusion,
861    TuningSlow3OpsLEA,
862    TuningSlowDivide64,
863    TuningFastScalarFSQRT,
864    TuningFastSHLDRotate,
865    TuningFast15ByteNOP,
866    TuningFastVariableCrossLaneShuffle,
867    TuningFastVariablePerLaneShuffle,
868    TuningPOPCNTFalseDeps,
869    TuningLZCNTFalseDeps,
870    TuningInsertVZEROUPPER,
871    TuningAllowLight256Bit
872  ];
873
874  list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
875    FeatureEVEX512,
876    FeatureBWI,
877    FeatureCDI,
878    FeatureDQI,
879    FeatureVLX,
880  ]);
881  list<SubtargetFeature> X86_64V4Tuning = [
882    TuningMacroFusion,
883    TuningSlow3OpsLEA,
884    TuningSlowDivide64,
885    TuningFastScalarFSQRT,
886    TuningFastVectorFSQRT,
887    TuningFastSHLDRotate,
888    TuningFast15ByteNOP,
889    TuningFastVariableCrossLaneShuffle,
890    TuningFastVariablePerLaneShuffle,
891    TuningPrefer256Bit,
892    TuningFastGather,
893    TuningPOPCNTFalseDeps,
894    TuningInsertVZEROUPPER,
895    TuningAllowLight256Bit
896  ];
897
898  // Nehalem
899  list<SubtargetFeature> NHMFeatures = X86_64V2Features;
900  list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
901                                      TuningSlowDivide64,
902                                      TuningInsertVZEROUPPER,
903                                      TuningNoDomainDelayMov];
904
905  // Westmere
906  list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
907  list<SubtargetFeature> WSMTuning = NHMTuning;
908  list<SubtargetFeature> WSMFeatures =
909    !listconcat(NHMFeatures, WSMAdditionalFeatures);
910
911  // Sandybridge
912  list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
913                                                  FeatureXSAVE,
914                                                  FeatureXSAVEOPT];
915  list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
916                                      TuningSlow3OpsLEA,
917                                      TuningSlowDivide64,
918                                      TuningSlowUAMem32,
919                                      TuningFastScalarFSQRT,
920                                      TuningFastSHLDRotate,
921                                      TuningFast15ByteNOP,
922                                      TuningPOPCNTFalseDeps,
923                                      TuningInsertVZEROUPPER,
924                                      TuningNoDomainDelayMov];
925  list<SubtargetFeature> SNBFeatures =
926    !listconcat(WSMFeatures, SNBAdditionalFeatures);
927
928  // Ivybridge
929  list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
930                                                  FeatureF16C,
931                                                  FeatureFSGSBase];
932  list<SubtargetFeature> IVBTuning = SNBTuning;
933  list<SubtargetFeature> IVBFeatures =
934    !listconcat(SNBFeatures, IVBAdditionalFeatures);
935
936  // Haswell
937  list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
938                                                  FeatureBMI,
939                                                  FeatureBMI2,
940                                                  FeatureERMSB,
941                                                  FeatureFMA,
942                                                  FeatureINVPCID,
943                                                  FeatureLZCNT,
944                                                  FeatureMOVBE];
945  list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
946                                      TuningSlow3OpsLEA,
947                                      TuningSlowDivide64,
948                                      TuningFastScalarFSQRT,
949                                      TuningFastSHLDRotate,
950                                      TuningFast15ByteNOP,
951                                      TuningFastVariableCrossLaneShuffle,
952                                      TuningFastVariablePerLaneShuffle,
953                                      TuningPOPCNTFalseDeps,
954                                      TuningLZCNTFalseDeps,
955                                      TuningInsertVZEROUPPER,
956                                      TuningAllowLight256Bit,
957                                      TuningNoDomainDelayMov,
958                                      TuningNoDomainDelayShuffle];
959  list<SubtargetFeature> HSWFeatures =
960    !listconcat(IVBFeatures, HSWAdditionalFeatures);
961
962  // Broadwell
963  list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
964                                                  FeatureRDSEED,
965                                                  FeaturePRFCHW];
966  list<SubtargetFeature> BDWTuning = HSWTuning;
967  list<SubtargetFeature> BDWFeatures =
968    !listconcat(HSWFeatures, BDWAdditionalFeatures);
969
970  // Skylake
971  list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
972                                                  FeatureXSAVEC,
973                                                  FeatureXSAVES,
974                                                  FeatureCLFLUSHOPT];
975  list<SubtargetFeature> SKLTuning = [TuningFastGather,
976                                      TuningMacroFusion,
977                                      TuningSlow3OpsLEA,
978                                      TuningSlowDivide64,
979                                      TuningFastScalarFSQRT,
980                                      TuningFastVectorFSQRT,
981                                      TuningFastSHLDRotate,
982                                      TuningFast15ByteNOP,
983                                      TuningFastVariableCrossLaneShuffle,
984                                      TuningFastVariablePerLaneShuffle,
985                                      TuningPOPCNTFalseDeps,
986                                      TuningInsertVZEROUPPER,
987                                      TuningAllowLight256Bit,
988                                      TuningNoDomainDelayMov,
989                                      TuningNoDomainDelayShuffle,
990                                      TuningNoDomainDelayBlend];
991  list<SubtargetFeature> SKLFeatures =
992    !listconcat(BDWFeatures, SKLAdditionalFeatures);
993
994  // Skylake-AVX512
995  list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
996                                                  FeatureXSAVEC,
997                                                  FeatureXSAVES,
998                                                  FeatureCLFLUSHOPT,
999                                                  FeatureAVX512,
1000                                                  FeatureEVEX512,
1001                                                  FeatureCDI,
1002                                                  FeatureDQI,
1003                                                  FeatureBWI,
1004                                                  FeatureVLX,
1005                                                  FeaturePKU,
1006                                                  FeatureCLWB];
1007  list<SubtargetFeature> SKXTuning = [TuningFastGather,
1008                                      TuningMacroFusion,
1009                                      TuningSlow3OpsLEA,
1010                                      TuningSlowDivide64,
1011                                      TuningFastScalarFSQRT,
1012                                      TuningFastVectorFSQRT,
1013                                      TuningFastSHLDRotate,
1014                                      TuningFast15ByteNOP,
1015                                      TuningFastVariableCrossLaneShuffle,
1016                                      TuningFastVariablePerLaneShuffle,
1017                                      TuningPrefer256Bit,
1018                                      TuningPOPCNTFalseDeps,
1019                                      TuningInsertVZEROUPPER,
1020                                      TuningAllowLight256Bit,
1021                                      TuningPreferShiftShuffle,
1022                                      TuningNoDomainDelayMov,
1023                                      TuningNoDomainDelayShuffle,
1024                                      TuningNoDomainDelayBlend,
1025                                      TuningFastImmVectorShift];
1026  list<SubtargetFeature> SKXFeatures =
1027    !listconcat(BDWFeatures, SKXAdditionalFeatures);
1028
1029  // Cascadelake
1030  list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
1031  list<SubtargetFeature> CLXTuning = SKXTuning;
1032  list<SubtargetFeature> CLXFeatures =
1033    !listconcat(SKXFeatures, CLXAdditionalFeatures);
1034
1035  // Cooperlake
1036  list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
1037  list<SubtargetFeature> CPXTuning = SKXTuning;
1038  list<SubtargetFeature> CPXFeatures =
1039    !listconcat(CLXFeatures, CPXAdditionalFeatures);
1040
1041  // Cannonlake
1042  list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
1043                                                  FeatureEVEX512,
1044                                                  FeatureCDI,
1045                                                  FeatureDQI,
1046                                                  FeatureBWI,
1047                                                  FeatureVLX,
1048                                                  FeaturePKU,
1049                                                  FeatureVBMI,
1050                                                  FeatureIFMA,
1051                                                  FeatureSHA];
1052  list<SubtargetFeature> CNLTuning = [TuningFastGather,
1053                                      TuningMacroFusion,
1054                                      TuningSlow3OpsLEA,
1055                                      TuningSlowDivide64,
1056                                      TuningFastScalarFSQRT,
1057                                      TuningFastVectorFSQRT,
1058                                      TuningFastSHLDRotate,
1059                                      TuningFast15ByteNOP,
1060                                      TuningFastVariableCrossLaneShuffle,
1061                                      TuningFastVariablePerLaneShuffle,
1062                                      TuningPrefer256Bit,
1063                                      TuningInsertVZEROUPPER,
1064                                      TuningAllowLight256Bit,
1065                                      TuningNoDomainDelayMov,
1066                                      TuningNoDomainDelayShuffle,
1067                                      TuningNoDomainDelayBlend,
1068                                      TuningFastImmVectorShift];
1069  list<SubtargetFeature> CNLFeatures =
1070    !listconcat(SKLFeatures, CNLAdditionalFeatures);
1071
1072  // Icelake
1073  list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1074                                                  FeatureVAES,
1075                                                  FeatureVBMI2,
1076                                                  FeatureVNNI,
1077                                                  FeatureVPCLMULQDQ,
1078                                                  FeatureVPOPCNTDQ,
1079                                                  FeatureGFNI,
1080                                                  FeatureRDPID,
1081                                                  FeatureFSRM];
1082  list<SubtargetFeature> ICLTuning = [TuningFastGather,
1083                                      TuningMacroFusion,
1084                                      TuningSlowDivide64,
1085                                      TuningFastScalarFSQRT,
1086                                      TuningFastVectorFSQRT,
1087                                      TuningFastSHLDRotate,
1088                                      TuningFast15ByteNOP,
1089                                      TuningFastVariableCrossLaneShuffle,
1090                                      TuningFastVariablePerLaneShuffle,
1091                                      TuningPrefer256Bit,
1092                                      TuningInsertVZEROUPPER,
1093                                      TuningAllowLight256Bit,
1094                                      TuningNoDomainDelayMov,
1095                                      TuningNoDomainDelayShuffle,
1096                                      TuningNoDomainDelayBlend,
1097                                      TuningFastImmVectorShift];
1098  list<SubtargetFeature> ICLFeatures =
1099    !listconcat(CNLFeatures, ICLAdditionalFeatures);
1100
1101  // Icelake Server
1102  list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1103                                                  FeatureCLWB,
1104                                                  FeatureWBNOINVD];
1105  list<SubtargetFeature> ICXTuning = ICLTuning;
1106  list<SubtargetFeature> ICXFeatures =
1107    !listconcat(ICLFeatures, ICXAdditionalFeatures);
1108
1109  // Tigerlake
1110  list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1111                                                  FeatureCLWB,
1112                                                  FeatureMOVDIRI,
1113                                                  FeatureMOVDIR64B,
1114                                                  FeatureSHSTK];
1115  list<SubtargetFeature> TGLTuning = ICLTuning;
1116  list<SubtargetFeature> TGLFeatures =
1117    !listconcat(ICLFeatures, TGLAdditionalFeatures );
1118
1119  // Sapphirerapids
1120  list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1121                                                  FeatureAMXINT8,
1122                                                  FeatureAMXBF16,
1123                                                  FeatureBF16,
1124                                                  FeatureSERIALIZE,
1125                                                  FeatureCLDEMOTE,
1126                                                  FeatureWAITPKG,
1127                                                  FeaturePTWRITE,
1128                                                  FeatureFP16,
1129                                                  FeatureAVXVNNI,
1130                                                  FeatureTSXLDTRK,
1131                                                  FeatureENQCMD,
1132                                                  FeatureSHSTK,
1133                                                  FeatureMOVDIRI,
1134                                                  FeatureMOVDIR64B,
1135                                                  FeatureUINTR];
1136  list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1137                                                TuningPERMFalseDeps,
1138                                                TuningRANGEFalseDeps,
1139                                                TuningGETMANTFalseDeps,
1140                                                TuningMULLQFalseDeps];
1141  list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1142  list<SubtargetFeature> SPRFeatures =
1143    !listconcat(ICXFeatures, SPRAdditionalFeatures);
1144
1145  // Graniterapids
1146  list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1147                                                  FeaturePREFETCHI];
1148  list<SubtargetFeature> GNRFeatures =
1149    !listconcat(SPRFeatures, GNRAdditionalFeatures);
1150  list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint];
1151  list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning);
1152
1153  // Graniterapids D
1154  list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1155  list<SubtargetFeature> GNRDFeatures =
1156    !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1157
1158  // Diamond Rapids
1159  list<SubtargetFeature> DMRAdditionalFeatures = [FeatureAVX10_2_512,
1160                                                  FeatureSM4,
1161                                                  FeatureCMPCCXADD,
1162                                                  FeatureAVXIFMA,
1163                                                  FeatureAVXNECONVERT,
1164                                                  FeatureAVXVNNIINT8,
1165                                                  FeatureAVXVNNIINT16,
1166                                                  FeatureUSERMSR,
1167                                                  FeatureSHA512,
1168                                                  FeatureSM3,
1169                                                  FeatureEGPR,
1170                                                  FeatureZU,
1171                                                  FeatureCCMP,
1172                                                  FeaturePush2Pop2,
1173                                                  FeaturePPX,
1174                                                  FeatureNDD,
1175                                                  FeatureNF,
1176                                                  FeatureCF,
1177                                                  FeatureMOVRS,
1178                                                  FeatureAMXMOVRS,
1179                                                  FeatureAMXAVX512,
1180                                                  FeatureAMXFP8,
1181                                                  FeatureAMXTF32,
1182                                                  FeatureAMXTRANSPOSE];
1183  list<SubtargetFeature> DMRFeatures =
1184    !listconcat(GNRDFeatures, DMRAdditionalFeatures);
1185
1186  // Atom
1187  list<SubtargetFeature> AtomFeatures = [FeatureX87,
1188                                         FeatureCX8,
1189                                         FeatureCMOV,
1190                                         FeatureMMX,
1191                                         FeatureSSSE3,
1192                                         FeatureFXSR,
1193                                         FeatureNOPL,
1194                                         FeatureX86_64,
1195                                         FeatureCX16,
1196                                         FeatureMOVBE,
1197                                         FeatureLAHFSAHF64];
1198  list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1199                                       TuningSlowUAMem16,
1200                                       TuningLEAForSP,
1201                                       TuningSlowDivide32,
1202                                       TuningSlowDivide64,
1203                                       TuningSlowTwoMemOps,
1204                                       TuningFastImm16,
1205                                       TuningLEAUsesAG,
1206                                       TuningPadShortFunctions,
1207                                       TuningInsertVZEROUPPER,
1208                                       TuningNoDomainDelay];
1209
1210  // Silvermont
1211  list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1212                                                  FeatureCRC32,
1213                                                  FeaturePOPCNT,
1214                                                  FeaturePCLMUL,
1215                                                  FeaturePRFCHW,
1216                                                  FeatureRDRAND];
1217  list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1218                                      TuningSlowTwoMemOps,
1219                                      TuningSlowLEA,
1220                                      TuningSlowIncDec,
1221                                      TuningSlowDivide64,
1222                                      TuningSlowPMULLD,
1223                                      TuningFast7ByteNOP,
1224                                      TuningFastMOVBE,
1225                                      TuningFastImm16,
1226                                      TuningPOPCNTFalseDeps,
1227                                      TuningInsertVZEROUPPER,
1228                                      TuningNoDomainDelay];
1229  list<SubtargetFeature> SLMFeatures =
1230    !listconcat(AtomFeatures, SLMAdditionalFeatures);
1231
1232  // Goldmont
1233  list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1234                                                  FeatureSHA,
1235                                                  FeatureRDSEED,
1236                                                  FeatureXSAVE,
1237                                                  FeatureXSAVEOPT,
1238                                                  FeatureXSAVEC,
1239                                                  FeatureXSAVES,
1240                                                  FeatureCLFLUSHOPT,
1241                                                  FeatureFSGSBase];
1242  list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1243                                      TuningSlowTwoMemOps,
1244                                      TuningSlowLEA,
1245                                      TuningSlowIncDec,
1246                                      TuningFastMOVBE,
1247                                      TuningFastImm16,
1248                                      TuningPOPCNTFalseDeps,
1249                                      TuningInsertVZEROUPPER,
1250                                      TuningNoDomainDelay];
1251  list<SubtargetFeature> GLMFeatures =
1252    !listconcat(SLMFeatures, GLMAdditionalFeatures);
1253
1254  // Goldmont Plus
1255  list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1256                                                  FeatureRDPID];
1257  list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1258                                      TuningSlowTwoMemOps,
1259                                      TuningSlowLEA,
1260                                      TuningSlowIncDec,
1261                                      TuningFastMOVBE,
1262                                      TuningFastImm16,
1263                                      TuningInsertVZEROUPPER,
1264                                      TuningNoDomainDelay];
1265  list<SubtargetFeature> GLPFeatures =
1266    !listconcat(GLMFeatures, GLPAdditionalFeatures);
1267
1268  // Tremont
1269  list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1270                                                  FeatureGFNI];
1271  list<SubtargetFeature> TRMTuning = GLPTuning;
1272  list<SubtargetFeature> TRMFeatures =
1273    !listconcat(GLPFeatures, TRMAdditionalFeatures);
1274
1275  // Alderlake
1276  list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1277                                                  FeaturePCONFIG,
1278                                                  FeatureSHSTK,
1279                                                  FeatureWIDEKL,
1280                                                  FeatureINVPCID,
1281                                                  FeatureADX,
1282                                                  FeatureFMA,
1283                                                  FeatureVAES,
1284                                                  FeatureVPCLMULQDQ,
1285                                                  FeatureF16C,
1286                                                  FeatureBMI,
1287                                                  FeatureBMI2,
1288                                                  FeatureLZCNT,
1289                                                  FeatureAVXVNNI,
1290                                                  FeaturePKU,
1291                                                  FeatureHRESET,
1292                                                  FeatureCLDEMOTE,
1293                                                  FeatureMOVDIRI,
1294                                                  FeatureMOVDIR64B,
1295                                                  FeatureWAITPKG];
1296  list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1297                                                TuningPreferMovmskOverVTest,
1298                                                TuningFastImmVectorShift];
1299  list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1300  list<SubtargetFeature> ADLFeatures =
1301    !listconcat(TRMFeatures, ADLAdditionalFeatures);
1302
1303  // Gracemont
1304  list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
1305                                      TuningSlow3OpsLEA,
1306                                      TuningFastScalarFSQRT,
1307                                      TuningFastVectorFSQRT,
1308                                      TuningFast15ByteNOP,
1309                                      TuningFastVariablePerLaneShuffle,
1310                                      TuningPOPCNTFalseDeps,
1311                                      TuningInsertVZEROUPPER];
1312
1313  // Sierraforest
1314  list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1315                                                  FeatureAVXIFMA,
1316                                                  FeatureAVXNECONVERT,
1317                                                  FeatureENQCMD,
1318                                                  FeatureUINTR,
1319                                                  FeatureAVXVNNIINT8];
1320  list<SubtargetFeature> SRFFeatures =
1321    !listconcat(ADLFeatures, SRFAdditionalFeatures);
1322
1323  // Arrowlake S
1324  list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
1325                                                   FeatureSHA512,
1326                                                   FeatureSM3,
1327                                                   FeatureSM4];
1328  list<SubtargetFeature> ARLSFeatures =
1329    !listconcat(SRFFeatures, ARLSAdditionalFeatures);
1330
1331  // Pantherlake
1332  list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
1333  list<SubtargetFeature> PTLFeatures =
1334    !listconcat(ARLSFeatures, PTLAdditionalFeatures);
1335
1336
1337  // Clearwaterforest
1338  list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
1339                                                  FeatureUSERMSR];
1340  list<SubtargetFeature> CWFFeatures =
1341    !listconcat(ARLSFeatures, CWFAdditionalFeatures);
1342
1343  // Knights Landing
1344  list<SubtargetFeature> KNLFeatures = [FeatureX87,
1345                                        FeatureCX8,
1346                                        FeatureCMOV,
1347                                        FeatureMMX,
1348                                        FeatureFXSR,
1349                                        FeatureNOPL,
1350                                        FeatureX86_64,
1351                                        FeatureCX16,
1352                                        FeatureCRC32,
1353                                        FeaturePOPCNT,
1354                                        FeaturePCLMUL,
1355                                        FeatureXSAVE,
1356                                        FeatureXSAVEOPT,
1357                                        FeatureLAHFSAHF64,
1358                                        FeatureAES,
1359                                        FeatureRDRAND,
1360                                        FeatureF16C,
1361                                        FeatureFSGSBase,
1362                                        FeatureAVX512,
1363                                        FeatureEVEX512,
1364                                        FeatureCDI,
1365                                        FeatureADX,
1366                                        FeatureRDSEED,
1367                                        FeatureMOVBE,
1368                                        FeatureLZCNT,
1369                                        FeatureBMI,
1370                                        FeatureBMI2,
1371                                        FeatureFMA,
1372                                        FeaturePRFCHW];
1373  list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1374                                      TuningSlow3OpsLEA,
1375                                      TuningSlowIncDec,
1376                                      TuningSlowTwoMemOps,
1377                                      TuningPreferMaskRegisters,
1378                                      TuningFastGather,
1379                                      TuningFastMOVBE,
1380                                      TuningFastImm16,
1381                                      TuningSlowPMADDWD];
1382  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1383  list<SubtargetFeature> KNMFeatures =
1384    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1385
1386  // Barcelona
1387  list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1388                                              FeatureCX8,
1389                                              FeatureSSE4A,
1390                                              FeatureFXSR,
1391                                              FeatureNOPL,
1392                                              FeatureCX16,
1393                                              FeaturePRFCHW,
1394                                              FeatureLZCNT,
1395                                              FeaturePOPCNT,
1396                                              FeatureLAHFSAHF64,
1397                                              FeatureCMOV,
1398                                              FeatureX86_64];
1399  list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1400                                            TuningSlowDivide64,
1401                                            TuningSlowSHLD,
1402                                            TuningSBBDepBreaking,
1403                                            TuningInsertVZEROUPPER];
1404
1405  // Bobcat
1406  list<SubtargetFeature> BtVer1Features = [FeatureX87,
1407                                           FeatureCX8,
1408                                           FeatureCMOV,
1409                                           FeatureMMX,
1410                                           FeatureSSSE3,
1411                                           FeatureSSE4A,
1412                                           FeatureFXSR,
1413                                           FeatureNOPL,
1414                                           FeatureX86_64,
1415                                           FeatureCX16,
1416                                           FeaturePRFCHW,
1417                                           FeatureLZCNT,
1418                                           FeaturePOPCNT,
1419                                           FeatureLAHFSAHF64];
1420  list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1421                                         TuningFastScalarShiftMasks,
1422                                         TuningFastVectorShiftMasks,
1423                                         TuningSlowDivide64,
1424                                         TuningSlowSHLD,
1425                                         TuningFastImm16,
1426                                         TuningSBBDepBreaking,
1427                                         TuningInsertVZEROUPPER];
1428
1429  // Jaguar
1430  list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1431                                                     FeatureAES,
1432                                                     FeatureCRC32,
1433                                                     FeaturePCLMUL,
1434                                                     FeatureBMI,
1435                                                     FeatureF16C,
1436                                                     FeatureMOVBE,
1437                                                     FeatureXSAVE,
1438                                                     FeatureXSAVEOPT];
1439  list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1440                                         TuningFastBEXTR,
1441                                         TuningFastHorizontalOps,
1442                                         TuningFast15ByteNOP,
1443                                         TuningFastScalarShiftMasks,
1444                                         TuningFastVectorShiftMasks,
1445                                         TuningFastMOVBE,
1446                                         TuningFastImm16,
1447                                         TuningSBBDepBreaking,
1448                                         TuningSlowDivide64,
1449                                         TuningSlowSHLD];
1450  list<SubtargetFeature> BtVer2Features =
1451    !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1452
1453  // Bulldozer
1454  list<SubtargetFeature> BdVer1Features = [FeatureX87,
1455                                           FeatureCX8,
1456                                           FeatureCMOV,
1457                                           FeatureXOP,
1458                                           FeatureX86_64,
1459                                           FeatureCX16,
1460                                           FeatureAES,
1461                                           FeatureCRC32,
1462                                           FeaturePRFCHW,
1463                                           FeaturePCLMUL,
1464                                           FeatureMMX,
1465                                           FeatureFXSR,
1466                                           FeatureNOPL,
1467                                           FeatureLZCNT,
1468                                           FeaturePOPCNT,
1469                                           FeatureXSAVE,
1470                                           FeatureLWP,
1471                                           FeatureLAHFSAHF64];
1472  list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1473                                         TuningSlowDivide64,
1474                                         TuningFast11ByteNOP,
1475                                         TuningFastScalarShiftMasks,
1476                                         TuningBranchFusion,
1477                                         TuningSBBDepBreaking,
1478                                         TuningInsertVZEROUPPER];
1479
1480  // PileDriver
1481  list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1482                                                     FeatureBMI,
1483                                                     FeatureTBM,
1484                                                     FeatureFMA];
1485  list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1486                                                   TuningFastMOVBE];
1487  list<SubtargetFeature> BdVer2Tuning =
1488    !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1489  list<SubtargetFeature> BdVer2Features =
1490    !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1491
1492  // Steamroller
1493  list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1494                                                     FeatureFSGSBase];
1495  list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1496  list<SubtargetFeature> BdVer3Features =
1497    !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1498
1499  // Excavator
1500  list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1501                                                     FeatureBMI2,
1502                                                     FeatureMOVBE,
1503                                                     FeatureRDRAND,
1504                                                     FeatureMWAITX];
1505  list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1506  list<SubtargetFeature> BdVer4Features =
1507    !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1508
1509
1510  // AMD Zen Processors common ISAs
1511  list<SubtargetFeature> ZNFeatures = [FeatureADX,
1512                                       FeatureAES,
1513                                       FeatureAVX2,
1514                                       FeatureBMI,
1515                                       FeatureBMI2,
1516                                       FeatureCLFLUSHOPT,
1517                                       FeatureCLZERO,
1518                                       FeatureCMOV,
1519                                       FeatureX86_64,
1520                                       FeatureCX16,
1521                                       FeatureCRC32,
1522                                       FeatureF16C,
1523                                       FeatureFMA,
1524                                       FeatureFSGSBase,
1525                                       FeatureFXSR,
1526                                       FeatureNOPL,
1527                                       FeatureLAHFSAHF64,
1528                                       FeatureLZCNT,
1529                                       FeatureMMX,
1530                                       FeatureMOVBE,
1531                                       FeatureMWAITX,
1532                                       FeaturePCLMUL,
1533                                       FeaturePOPCNT,
1534                                       FeaturePRFCHW,
1535                                       FeatureRDRAND,
1536                                       FeatureRDSEED,
1537                                       FeatureSHA,
1538                                       FeatureSSE4A,
1539                                       FeatureX87,
1540                                       FeatureXSAVE,
1541                                       FeatureXSAVEC,
1542                                       FeatureXSAVEOPT,
1543                                       FeatureXSAVES];
1544  list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1545                                     TuningFastBEXTR,
1546                                     TuningFast15ByteNOP,
1547                                     TuningBranchFusion,
1548                                     TuningFastScalarFSQRT,
1549                                     TuningFastVectorFSQRT,
1550                                     TuningFastScalarShiftMasks,
1551                                     TuningFastVariablePerLaneShuffle,
1552                                     TuningFastMOVBE,
1553                                     TuningFastImm16,
1554                                     TuningSlowDivide64,
1555                                     TuningSlowSHLD,
1556                                     TuningSBBDepBreaking,
1557                                     TuningInsertVZEROUPPER,
1558                                     TuningAllowLight256Bit];
1559  list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1560                                                  FeatureRDPID,
1561                                                  FeatureRDPRU,
1562                                                  FeatureWBNOINVD];
1563  list<SubtargetFeature> ZN2Tuning = ZNTuning;
1564  list<SubtargetFeature> ZN2Features =
1565    !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1566  list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1567                                                  FeatureINVPCID,
1568                                                  FeaturePKU,
1569                                                  FeatureVAES,
1570                                                  FeatureVPCLMULQDQ];
1571  list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1572  list<SubtargetFeature> ZN3Tuning =
1573    !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1574  list<SubtargetFeature> ZN3Features =
1575    !listconcat(ZN2Features, ZN3AdditionalFeatures);
1576
1577  list<SubtargetFeature> ZN4AdditionalTuning = [TuningFastDPWSSD];
1578  list<SubtargetFeature> ZN4Tuning =
1579    !listconcat(ZN3Tuning, ZN4AdditionalTuning);
1580  list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1581                                                  FeatureEVEX512,
1582                                                  FeatureCDI,
1583                                                  FeatureDQI,
1584                                                  FeatureBWI,
1585                                                  FeatureVLX,
1586                                                  FeatureVBMI,
1587                                                  FeatureVBMI2,
1588                                                  FeatureIFMA,
1589                                                  FeatureVNNI,
1590                                                  FeatureBITALG,
1591                                                  FeatureGFNI,
1592                                                  FeatureBF16,
1593                                                  FeatureSHSTK,
1594                                                  FeatureVPOPCNTDQ];
1595  list<SubtargetFeature> ZN4Features =
1596    !listconcat(ZN3Features, ZN4AdditionalFeatures);
1597
1598  list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
1599  list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
1600                                                  FeatureMOVDIRI,
1601                                                  FeatureMOVDIR64B,
1602                                                  FeatureVP2INTERSECT,
1603                                                  FeaturePREFETCHI,
1604                                                  FeatureAVXVNNI
1605                                                  ];
1606  list<SubtargetFeature> ZN5Features =
1607    !listconcat(ZN4Features, ZN5AdditionalFeatures);
1608}
1609
1610//===----------------------------------------------------------------------===//
1611// X86 processors supported.
1612//===----------------------------------------------------------------------===//
1613
1614class Proc<string Name, list<SubtargetFeature> Features,
1615           list<SubtargetFeature> TuneFeatures>
1616 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1617
1618class ProcModel<string Name, SchedMachineModel Model,
1619                list<SubtargetFeature> Features,
1620                list<SubtargetFeature> TuneFeatures>
1621 : ProcessorModel<Name, Model, Features, TuneFeatures>;
1622
1623// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1624// if i386/i486 is specifically requested.
1625// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1626// constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1627// enabled. It has no effect on code generation.
1628// NOTE: As a default tuning, "generic" aims to produce code optimized for the
1629// most common X86 processors. The tunings might be changed over time. It is
1630// recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1631def : ProcModel<"generic", SandyBridgeModel,
1632                [FeatureX87, FeatureCX8, FeatureX86_64],
1633                [TuningSlow3OpsLEA,
1634                 TuningSlowDivide64,
1635                 TuningMacroFusion,
1636                 TuningFastScalarFSQRT,
1637                 TuningFast15ByteNOP,
1638                 TuningInsertVZEROUPPER]>;
1639
1640def : Proc<"i386",            [FeatureX87],
1641                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1642def : Proc<"i486",            [FeatureX87],
1643                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1644def : Proc<"i586",            [FeatureX87, FeatureCX8],
1645                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1646def : Proc<"pentium",         [FeatureX87, FeatureCX8],
1647                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1648foreach P = ["pentium-mmx", "pentium_mmx"] in {
1649  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1650                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1651}
1652def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1653                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1654foreach P = ["pentiumpro", "pentium_pro"] in {
1655  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1656                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1657}
1658foreach P = ["pentium2", "pentium_ii"] in {
1659  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1660                          FeatureFXSR, FeatureNOPL],
1661                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1662}
1663foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1664  def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1665                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1666                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1667}
1668
1669// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1670// The intent is to enable it for pentium4 which is the current default
1671// processor in a vanilla 32-bit clang compilation when no specific
1672// architecture is specified.  This generally gives a nice performance
1673// increase on silvermont, with largely neutral behavior on other
1674// contemporary large core processors.
1675// pentium-m, pentium4m, prescott and nocona are included as a preventative
1676// measure to avoid performance surprises, in case clang's default cpu
1677// changes slightly.
1678
1679foreach P = ["pentium_m", "pentium-m"] in {
1680def : ProcModel<P, GenericPostRAModel,
1681                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1682                FeatureFXSR, FeatureNOPL, FeatureCMOV],
1683                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1684}
1685
1686foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1687  def : ProcModel<P, GenericPostRAModel,
1688                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1689                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1690                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1691}
1692
1693// Intel Quark.
1694def : Proc<"lakemont", [FeatureCX8],
1695                       [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1696
1697// Intel Core Duo.
1698def : ProcModel<"yonah", SandyBridgeModel,
1699                [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1700                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1701                [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1702
1703// NetBurst.
1704foreach P = ["prescott", "pentium_4_sse3"] in {
1705  def : ProcModel<P, GenericPostRAModel,
1706                  [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1707                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1708                  [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1709}
1710def : ProcModel<"nocona", GenericPostRAModel, [
1711  FeatureX87,
1712  FeatureCX8,
1713  FeatureCMOV,
1714  FeatureMMX,
1715  FeatureSSE3,
1716  FeatureFXSR,
1717  FeatureNOPL,
1718  FeatureX86_64,
1719  FeatureCX16,
1720],
1721[
1722  TuningSlowUAMem16,
1723  TuningInsertVZEROUPPER
1724]>;
1725
1726// Intel Core 2 Solo/Duo.
1727foreach P = ["core2", "core_2_duo_ssse3"] in {
1728def : ProcModel<P, SandyBridgeModel, [
1729  FeatureX87,
1730  FeatureCX8,
1731  FeatureCMOV,
1732  FeatureMMX,
1733  FeatureSSSE3,
1734  FeatureFXSR,
1735  FeatureNOPL,
1736  FeatureX86_64,
1737  FeatureCX16,
1738  FeatureLAHFSAHF64
1739],
1740[
1741  TuningMacroFusion,
1742  TuningSlowUAMem16,
1743  TuningInsertVZEROUPPER
1744]>;
1745}
1746foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1747def : ProcModel<P, SandyBridgeModel, [
1748  FeatureX87,
1749  FeatureCX8,
1750  FeatureCMOV,
1751  FeatureMMX,
1752  FeatureSSE41,
1753  FeatureFXSR,
1754  FeatureNOPL,
1755  FeatureX86_64,
1756  FeatureCX16,
1757  FeatureLAHFSAHF64
1758],
1759[
1760  TuningMacroFusion,
1761  TuningSlowUAMem16,
1762  TuningInsertVZEROUPPER
1763]>;
1764}
1765
1766// Atom CPUs.
1767foreach P = ["bonnell", "atom"] in {
1768  def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1769                  ProcessorFeatures.AtomTuning>;
1770}
1771
1772foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1773  def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1774                  ProcessorFeatures.SLMTuning>;
1775}
1776
1777def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1778                ProcessorFeatures.SLMTuning>;
1779def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1780                ProcessorFeatures.GLMTuning>;
1781foreach P = ["goldmont_plus", "goldmont-plus"] in {
1782  def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1783                  ProcessorFeatures.GLPTuning>;
1784}
1785def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1786                ProcessorFeatures.TRMTuning>;
1787
1788// "Arrandale" along with corei3 and corei5
1789foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1790  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1791                  ProcessorFeatures.NHMTuning>;
1792}
1793
1794// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1795foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1796  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1797                  ProcessorFeatures.WSMTuning>;
1798}
1799
1800foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1801  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1802                  ProcessorFeatures.SNBTuning>;
1803}
1804
1805foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1806  def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1807                  ProcessorFeatures.IVBTuning>;
1808}
1809
1810foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1811  def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1812                  ProcessorFeatures.HSWTuning>;
1813}
1814
1815foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1816  def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1817                  ProcessorFeatures.BDWTuning>;
1818}
1819
1820def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1821                ProcessorFeatures.SKLTuning>;
1822
1823// FIXME: define KNL scheduler model
1824foreach P = ["knl", "mic_avx512"] in {
1825  def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1826                  ProcessorFeatures.KNLTuning>;
1827}
1828def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1829                ProcessorFeatures.KNLTuning>;
1830
1831foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1832  def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1833                  ProcessorFeatures.SKXTuning>;
1834}
1835
1836def : ProcModel<"cascadelake", SkylakeServerModel,
1837                ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1838def : ProcModel<"cooperlake", SkylakeServerModel,
1839                ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1840def : ProcModel<"cannonlake", SkylakeServerModel,
1841                ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1842foreach P = ["icelake-client", "icelake_client"] in {
1843def : ProcModel<P, IceLakeModel,
1844                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1845}
1846def : ProcModel<"rocketlake", IceLakeModel,
1847                ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1848foreach P = ["icelake-server", "icelake_server"] in {
1849def : ProcModel<P, IceLakeModel,
1850                ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1851}
1852def : ProcModel<"tigerlake", IceLakeModel,
1853                ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1854def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1855                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1856def : ProcModel<"alderlake", AlderlakePModel,
1857                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1858// FIXME: Use Gracemont Schedule Model when it is ready.
1859def : ProcModel<"gracemont", AlderlakePModel,
1860                ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
1861foreach P = ["sierraforest", "grandridge"] in {
1862  def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
1863                ProcessorFeatures.GRTTuning>;
1864}
1865def : ProcModel<"raptorlake", AlderlakePModel,
1866                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1867def : ProcModel<"meteorlake", AlderlakePModel,
1868                ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1869def : ProcModel<"arrowlake", AlderlakePModel,
1870                ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
1871foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1872def : ProcModel<P, AlderlakePModel,
1873                ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
1874}
1875def : ProcModel<"pantherlake", AlderlakePModel,
1876                ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
1877def : ProcModel<"clearwaterforest", AlderlakePModel,
1878                ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
1879def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1880                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1881def : ProcModel<"graniterapids", SapphireRapidsModel,
1882                ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>;
1883foreach P = ["graniterapids-d", "graniterapids_d"] in {
1884def : ProcModel<P, SapphireRapidsModel,
1885                ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
1886}
1887def : ProcModel<"diamondrapids", SapphireRapidsModel,
1888                ProcessorFeatures.DMRFeatures, ProcessorFeatures.GNRTuning>;
1889
1890// AMD CPUs.
1891
1892def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
1893                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1894def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1895                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1896def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1897                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1898
1899foreach P = ["athlon", "athlon-tbird"] in {
1900  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeaturePRFCHW,
1901                 FeatureNOPL],
1902                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1903}
1904
1905foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1906  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1907                 FeatureSSE1, FeatureMMX, FeaturePRFCHW, FeatureFXSR, FeatureNOPL],
1908                [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1909}
1910
1911foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1912  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, FeatureMMX, FeaturePRFCHW,
1913                 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1914                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1915                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1916}
1917
1918foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1919  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, FeatureMMX, FeaturePRFCHW,
1920                 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1921                 FeatureX86_64],
1922                [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1923                 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1924}
1925
1926foreach P = ["amdfam10", "barcelona"] in {
1927  def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1928             ProcessorFeatures.BarcelonaTuning>;
1929}
1930
1931// Bobcat
1932def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1933           ProcessorFeatures.BtVer1Tuning>;
1934// Jaguar
1935def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1936                ProcessorFeatures.BtVer2Tuning>;
1937
1938// Bulldozer
1939def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1940                ProcessorFeatures.BdVer1Tuning>;
1941// Piledriver
1942def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1943                ProcessorFeatures.BdVer2Tuning>;
1944// Steamroller
1945// NOTE: BdVer2Model is only an approx model for Steamroller.
1946def : ProcModel<"bdver3", BdVer2Model, ProcessorFeatures.BdVer3Features,
1947                ProcessorFeatures.BdVer3Tuning>;
1948// Excavator
1949// NOTE: Znver1Model is only an approx model for Excavator (with AVX2).
1950def : ProcModel<"bdver4", Znver1Model, ProcessorFeatures.BdVer4Features,
1951                ProcessorFeatures.BdVer4Tuning>;
1952
1953def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1954                ProcessorFeatures.ZNTuning>;
1955def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1956                ProcessorFeatures.ZN2Tuning>;
1957def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1958                ProcessorFeatures.ZN3Tuning>;
1959def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1960                ProcessorFeatures.ZN4Tuning>;
1961def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
1962                ProcessorFeatures.ZN5Tuning>;
1963
1964def : Proc<"geode",           [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1965                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1966
1967def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1968                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1969def : Proc<"winchip2",        [FeatureX87, FeatureMMX, FeaturePRFCHW],
1970                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1971def : Proc<"c3",              [FeatureX87, FeatureMMX, FeaturePRFCHW],
1972                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1973def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1974                               FeatureSSE1, FeatureFXSR, FeatureCMOV],
1975                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1976
1977// We also provide a generic 64-bit specific x86 processor model which tries to
1978// be good for modern chips without enabling instruction set encodings past the
1979// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1980// modern 64-bit x86 chip, and enables features that are generally beneficial.
1981//
1982// We currently use the Sandy Bridge model as the default scheduling model as
1983// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1984// covers a huge swath of x86 processors. If there are specific scheduling
1985// knobs which need to be tuned differently for AMD chips, we might consider
1986// forming a common base for them.
1987def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1988                ProcessorFeatures.X86_64V1Tuning>;
1989// Close to Sandybridge.
1990def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1991                ProcessorFeatures.X86_64V2Tuning>;
1992// Close to Haswell.
1993def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1994                ProcessorFeatures.X86_64V3Tuning>;
1995// Close to the AVX-512 level implemented by Xeon Scalable Processors.
1996def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1997                ProcessorFeatures.X86_64V4Tuning>;
1998
1999//===----------------------------------------------------------------------===//
2000// Calling Conventions
2001//===----------------------------------------------------------------------===//
2002
2003include "X86CallingConv.td"
2004
2005
2006//===----------------------------------------------------------------------===//
2007// Assembly Parser
2008//===----------------------------------------------------------------------===//
2009
2010def ATTAsmParserVariant : AsmParserVariant {
2011  int Variant = 0;
2012
2013  // Variant name.
2014  string Name = "att";
2015
2016  // Discard comments in assembly strings.
2017  string CommentDelimiter = "#";
2018
2019  // Recognize hard coded registers.
2020  string RegisterPrefix = "%";
2021}
2022
2023def IntelAsmParserVariant : AsmParserVariant {
2024  int Variant = 1;
2025
2026  // Variant name.
2027  string Name = "intel";
2028
2029  // Discard comments in assembly strings.
2030  string CommentDelimiter = ";";
2031
2032  // Recognize hard coded registers.
2033  string RegisterPrefix = "";
2034}
2035
2036//===----------------------------------------------------------------------===//
2037// Assembly Printers
2038//===----------------------------------------------------------------------===//
2039
2040// The X86 target supports two different syntaxes for emitting machine code.
2041// This is controlled by the -x86-asm-syntax={att|intel}
2042def ATTAsmWriter : AsmWriter {
2043  string AsmWriterClassName  = "ATTInstPrinter";
2044  int Variant = 0;
2045}
2046def IntelAsmWriter : AsmWriter {
2047  string AsmWriterClassName  = "IntelInstPrinter";
2048  int Variant = 1;
2049}
2050
2051def X86 : Target {
2052  // Information about the instructions...
2053  let InstructionSet = X86InstrInfo;
2054  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
2055  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
2056  let AllowRegisterRenaming = 1;
2057}
2058
2059//===----------------------------------------------------------------------===//
2060// Pfm Counters
2061//===----------------------------------------------------------------------===//
2062
2063include "X86PfmCounters.td"
2064