1//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is a target description file for the Intel i386 architecture, referred 10// to here as the "X86" architecture. 11// 12//===----------------------------------------------------------------------===// 13 14// Get the target-independent interfaces which we are implementing... 15// 16include "llvm/Target/Target.td" 17 18//===----------------------------------------------------------------------===// 19// X86 Subtarget state 20// 21// disregarding specific ABI / programming model 22def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true", 23 "64-bit mode (x86_64)">; 24def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true", 25 "32-bit mode (80386)">; 26def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true", 27 "16-bit mode (i8086)">; 28 29//===----------------------------------------------------------------------===// 30// X86 Subtarget ISA features 31//===----------------------------------------------------------------------===// 32 33def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", 34 "Enable X87 float instructions">; 35 36def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", 37 "Enable NOPL instruction (generally pentium pro+)">; 38 39def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true", 40 "Enable conditional move instructions">; 41 42def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true", 43 "Support CMPXCHG8B instructions">; 44 45def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", 46 "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">; 47 48def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", 49 "Support POPCNT instruction">; 50 51def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", 52 "Support fxsave/fxrestore instructions">; 53 54def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", 55 "Support xsave instructions">; 56 57def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", 58 "Support xsaveopt instructions", 59 [FeatureXSAVE]>; 60 61def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", 62 "Support xsavec instructions", 63 [FeatureXSAVE]>; 64 65def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", 66 "Support xsaves instructions", 67 [FeatureXSAVE]>; 68 69def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", 70 "Enable SSE instructions">; 71def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", 72 "Enable SSE2 instructions", 73 [FeatureSSE1]>; 74def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", 75 "Enable SSE3 instructions", 76 [FeatureSSE2]>; 77def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", 78 "Enable SSSE3 instructions", 79 [FeatureSSE3]>; 80def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", 81 "Enable SSE 4.1 instructions", 82 [FeatureSSSE3]>; 83def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", 84 "Enable SSE 4.2 instructions", 85 [FeatureSSE41]>; 86// The MMX subtarget feature is separate from the rest of the SSE features 87// because it's important (for odd compatibility reasons) to be able to 88// turn it off explicitly while allowing SSE+ to be on. 89def FeatureMMX : SubtargetFeature<"mmx","HasMMX", "true", 90 "Enable MMX instructions">; 91// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied 92// feature, because SSE2 can be disabled (e.g. for compiling OS kernels) 93// without disabling 64-bit mode. Nothing should imply this feature bit. It 94// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. 95def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true", 96 "Support 64-bit instructions">; 97def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true", 98 "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)", 99 [FeatureCX8]>; 100def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", 101 "Support SSE 4a instructions", 102 [FeatureSSE3]>; 103 104def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", 105 "Enable AVX instructions", 106 [FeatureSSE42]>; 107def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", 108 "Enable AVX2 instructions", 109 [FeatureAVX]>; 110def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", 111 "Enable three-operand fused multiple-add", 112 [FeatureAVX]>; 113def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", 114 "Support 16-bit floating point conversion instructions", 115 [FeatureAVX]>; 116def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true", 117 "Support ZMM and 64-bit mask instructions">; 118def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512", 119 "Enable AVX-512 instructions", 120 [FeatureAVX2, FeatureFMA, FeatureF16C]>; 121def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", 122 "Enable AVX-512 Conflict Detection Instructions", 123 [FeatureAVX512]>; 124def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", 125 "true", "Enable AVX-512 Population Count Instructions", 126 [FeatureAVX512]>; 127def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI", 128 "true", 129 "Prefetch instruction with T0 or T1 Hint">; 130def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", 131 "Enable AVX-512 Doubleword and Quadword Instructions", 132 [FeatureAVX512]>; 133def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", 134 "Enable AVX-512 Byte and Word Instructions", 135 [FeatureAVX512]>; 136def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", 137 "Enable AVX-512 Vector Length eXtensions", 138 [FeatureAVX512]>; 139def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", 140 "Enable AVX-512 Vector Byte Manipulation Instructions", 141 [FeatureBWI]>; 142def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", 143 "Enable AVX-512 further Vector Byte Manipulation Instructions", 144 [FeatureBWI]>; 145def FeatureAVXIFMA : SubtargetFeature<"avxifma", "HasAVXIFMA", "true", 146 "Enable AVX-IFMA", 147 [FeatureAVX2]>; 148def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", 149 "Enable AVX-512 Integer Fused Multiple-Add", 150 [FeatureAVX512]>; 151def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", 152 "Enable protection keys">; 153def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", 154 "Enable AVX-512 Vector Neural Network Instructions", 155 [FeatureAVX512]>; 156def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", 157 "Support AVX_VNNI encoding", 158 [FeatureAVX2]>; 159def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", 160 "Support bfloat16 floating point", 161 [FeatureBWI]>; 162def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", 163 "Enable AVX-512 Bit Algorithms", 164 [FeatureBWI]>; 165def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", 166 "HasVP2INTERSECT", "true", 167 "Enable AVX-512 vp2intersect", 168 [FeatureAVX512]>; 169// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be 170// guarded under condition hasVLX. So we imply it in FeatureFP16 currently. 171// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is 172// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16 173// currently. 174def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true", 175 "Support 16-bit floating point", 176 [FeatureBWI, FeatureVLX, FeatureDQI]>; 177def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8", 178 "HasAVXVNNIINT8", "true", 179 "Enable AVX-VNNI-INT8", 180 [FeatureAVX2]>; 181def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16", 182 "HasAVXVNNIINT16", "true", 183 "Enable AVX-VNNI-INT16", 184 [FeatureAVX2]>; 185def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", 186 "Enable packed carry-less multiplication instructions", 187 [FeatureSSE2]>; 188def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", 189 "Enable Galois Field Arithmetic Instructions", 190 [FeatureSSE2]>; 191def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", 192 "Enable vpclmulqdq instructions", 193 [FeatureAVX, FeaturePCLMUL]>; 194def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", 195 "Enable four-operand fused multiple-add", 196 [FeatureAVX, FeatureSSE4A]>; 197def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", 198 "Enable XOP instructions", 199 [FeatureFMA4]>; 200def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", 201 "HasSSEUnalignedMem", "true", 202 "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">; 203def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", 204 "Enable AES instructions", 205 [FeatureSSE2]>; 206def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", 207 "Promote selected AES instructions to AVX512/AVX registers", 208 [FeatureAVX2, FeatureAES]>; 209def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", 210 "Enable TBM instructions">; 211def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", 212 "Enable LWP instructions">; 213def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", 214 "Support MOVBE instruction">; 215def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", 216 "Support RDRAND instruction">; 217def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", 218 "Support FS/GS Base instructions">; 219def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", 220 "Support LZCNT instruction">; 221def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", 222 "Support BMI instructions">; 223def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", 224 "Support BMI2 instructions">; 225def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", 226 "Support RTM instructions">; 227def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", 228 "Support ADX instructions">; 229def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", 230 "Enable SHA instructions", 231 [FeatureSSE2]>; 232def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true", 233 "Support SHA512 instructions", 234 [FeatureAVX2]>; 235// Processor supports CET SHSTK - Control-Flow Enforcement Technology 236// using Shadow Stack 237def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", 238 "Support CET Shadow-Stack instructions">; 239def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true", 240 "Support SM3 instructions", 241 [FeatureAVX]>; 242def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true", 243 "Support SM4 instructions", 244 [FeatureAVX2]>; 245def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", 246 "Support PRFCHW instructions">; 247def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", 248 "Support RDSEED instruction">; 249def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", 250 "Support LAHF and SAHF instructions in 64-bit mode">; 251def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", 252 "Enable MONITORX/MWAITX timer functionality">; 253def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", 254 "Enable Cache Line Zero">; 255def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", 256 "Enable Cache Line Demote">; 257def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", 258 "Support ptwrite instruction">; 259def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", 260 "Support AMX-TILE instructions">; 261def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", 262 "Support AMX-INT8 instructions", 263 [FeatureAMXTILE]>; 264def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", 265 "Support AMX-BF16 instructions", 266 [FeatureAMXTILE]>; 267def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true", 268 "Support AMX amx-fp16 instructions", 269 [FeatureAMXTILE]>; 270def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true", 271 "Support AMX-COMPLEX instructions", 272 [FeatureAMXTILE]>; 273def FeatureAMXFP8 : SubtargetFeature<"amx-fp8", "HasAMXFP8", "true", 274 "Support AMX-FP8 instructions", 275 [FeatureAMXTILE]>; 276def FeatureAMXMOVRS : SubtargetFeature<"amx-movrs", "HasAMXMOVRS", "true", 277 "Support AMX-MOVRS instructions", 278 [FeatureAMXTILE]>; 279def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "true", 280 "Support AMX amx-transpose instructions", 281 [FeatureAMXTILE]>; 282def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512", 283 "HasAMXAVX512", "true", 284 "Support AMX-AVX512 instructions", 285 [FeatureAMXTILE]>; 286def FeatureAMXTF32 : SubtargetFeature<"amx-tf32", "HasAMXTF32", "true", 287 "Support AMX-TF32 instructions", 288 [FeatureAMXTILE]>; 289def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true", 290 "Support CMPCCXADD instructions">; 291def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true", 292 "Support RAO-INT instructions", 293 []>; 294def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true", 295 "Support AVX-NE-CONVERT instructions", 296 [FeatureAVX2]>; 297def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", 298 "Invalidate Process-Context Identifier">; 299def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", 300 "Enable Software Guard Extensions">; 301def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", 302 "Flush A Cache Line Optimized">; 303def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", 304 "Cache Line Write Back">; 305def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true", 306 "Write Back No Invalidate">; 307def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", 308 "Support RDPID instructions">; 309def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true", 310 "Support RDPRU instructions">; 311def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true", 312 "Wait and pause enhancements">; 313def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true", 314 "Has ENQCMD instructions">; 315def FeatureKL : SubtargetFeature<"kl", "HasKL", "true", 316 "Support Key Locker kl Instructions", 317 [FeatureSSE2]>; 318def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true", 319 "Support Key Locker wide Instructions", 320 [FeatureKL]>; 321def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true", 322 "Has hreset instruction">; 323def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true", 324 "Has serialize instruction">; 325def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true", 326 "Support TSXLDTRK instructions">; 327def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true", 328 "Has UINTR Instructions">; 329def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true", 330 "Support USERMSR instructions">; 331def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true", 332 "platform configuration instruction">; 333def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", 334 "Support movdiri instruction (direct store integer)">; 335def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true", 336 "Support movdir64b instruction (direct store 64 bytes)">; 337def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true", 338 "Support AVX10.1 up to 256-bit instruction", 339 [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI, 340 FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG, 341 FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>; 342def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true", 343 "Support AVX10.1 up to 512-bit instruction", 344 [FeatureAVX10_1, FeatureEVEX512]>; 345def FeatureAVX10_2 : SubtargetFeature<"avx10.2-256", "HasAVX10_2", "true", 346 "Support AVX10.2 up to 256-bit instruction", 347 [FeatureAVX10_1]>; 348def FeatureAVX10_2_512 : SubtargetFeature<"avx10.2-512", "HasAVX10_2_512", "true", 349 "Support AVX10.2 up to 512-bit instruction", 350 [FeatureAVX10_2, FeatureAVX10_1_512]>; 351def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true", 352 "Support extended general purpose register">; 353def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true", 354 "Support PUSH2/POP2 instructions">; 355def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true", 356 "Support Push-Pop Acceleration">; 357def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true", 358 "Support non-destructive destination">; 359def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true", 360 "Support conditional cmp & test instructions">; 361def FeatureNF : SubtargetFeature<"nf", "HasNF", "true", 362 "Support status flags update suppression">; 363def FeatureCF : SubtargetFeature<"cf", "HasCF", "true", 364 "Support conditional faulting">; 365def FeatureZU : SubtargetFeature<"zu", "HasZU", "true", 366 "Support zero-upper SETcc/IMUL">; 367def FeatureUseGPR32InInlineAsm 368 : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true", 369 "Enable use of GPR32 in inline assembly for APX">; 370def FeatureMOVRS : SubtargetFeature<"movrs", "HasMOVRS", "true", 371 "Enable MOVRS", []>; 372 373// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka 374// "string operations"). See "REP String Enhancement" in the Intel Software 375// Development Manual. This feature essentially means that REP MOVSB will copy 376// using the largest available size instead of copying bytes one by one, making 377// it at least as fast as REPMOVS{W,D,Q}. 378def FeatureERMSB 379 : SubtargetFeature< 380 "ermsb", "HasERMSB", "true", 381 "REP MOVS/STOS are fast">; 382 383// Icelake and newer processors have Fast Short REP MOV. 384def FeatureFSRM 385 : SubtargetFeature< 386 "fsrm", "HasFSRM", "true", 387 "REP MOVSB of short lengths is faster">; 388 389def FeatureSoftFloat 390 : SubtargetFeature<"soft-float", "UseSoftFloat", "true", 391 "Use software floating point features">; 392 393//===----------------------------------------------------------------------===// 394// X86 Subtarget Security Mitigation features 395//===----------------------------------------------------------------------===// 396 397// Lower indirect calls using a special construct called a `retpoline` to 398// mitigate potential Spectre v2 attacks against them. 399def FeatureRetpolineIndirectCalls 400 : SubtargetFeature< 401 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true", 402 "Remove speculation of indirect calls from the generated code">; 403 404// Lower indirect branches and switches either using conditional branch trees 405// or using a special construct called a `retpoline` to mitigate potential 406// Spectre v2 attacks against them. 407def FeatureRetpolineIndirectBranches 408 : SubtargetFeature< 409 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true", 410 "Remove speculation of indirect branches from the generated code">; 411 412// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and 413// `retpoline-indirect-branches` above. 414def FeatureRetpoline 415 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true", 416 "Remove speculation of indirect branches from the " 417 "generated code, either by avoiding them entirely or " 418 "lowering them with a speculation blocking construct", 419 [FeatureRetpolineIndirectCalls, 420 FeatureRetpolineIndirectBranches]>; 421 422// Rely on external thunks for the emitted retpoline calls. This allows users 423// to provide their own custom thunk definitions in highly specialized 424// environments such as a kernel that does boot-time hot patching. 425def FeatureRetpolineExternalThunk 426 : SubtargetFeature< 427 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", 428 "When lowering an indirect call or branch using a `retpoline`, rely " 429 "on the specified user provided thunk rather than emitting one " 430 "ourselves. Only has effect when combined with some other retpoline " 431 "feature", [FeatureRetpolineIndirectCalls]>; 432 433// Mitigate LVI attacks against indirect calls/branches and call returns 434def FeatureLVIControlFlowIntegrity 435 : SubtargetFeature< 436 "lvi-cfi", "UseLVIControlFlowIntegrity", "true", 437 "Prevent indirect calls/branches from using a memory operand, and " 438 "precede all indirect calls/branches from a register with an " 439 "LFENCE instruction to serialize control flow. Also decompose RET " 440 "instructions into a POP+LFENCE+JMP sequence.">; 441 442// Enable SESES to mitigate speculative execution attacks 443def FeatureSpeculativeExecutionSideEffectSuppression 444 : SubtargetFeature< 445 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true", 446 "Prevent speculative execution side channel timing attacks by " 447 "inserting a speculation barrier before memory reads, memory writes, " 448 "and conditional branches. Implies LVI Control Flow integrity.", 449 [FeatureLVIControlFlowIntegrity]>; 450 451// Mitigate LVI attacks against data loads 452def FeatureLVILoadHardening 453 : SubtargetFeature< 454 "lvi-load-hardening", "UseLVILoadHardening", "true", 455 "Insert LFENCE instructions to prevent data speculatively injected " 456 "into loads from being used maliciously.">; 457 458def FeatureTaggedGlobals 459 : SubtargetFeature< 460 "tagged-globals", "AllowTaggedGlobals", "true", 461 "Use an instruction sequence for taking the address of a global " 462 "that allows a memory tag in the upper address bits.">; 463 464// Control codegen mitigation against Straight Line Speculation vulnerability. 465def FeatureHardenSlsRet 466 : SubtargetFeature< 467 "harden-sls-ret", "HardenSlsRet", "true", 468 "Harden against straight line speculation across RET instructions.">; 469 470def FeatureHardenSlsIJmp 471 : SubtargetFeature< 472 "harden-sls-ijmp", "HardenSlsIJmp", "true", 473 "Harden against straight line speculation across indirect JMP instructions.">; 474 475//===----------------------------------------------------------------------===// 476// X86 Subtarget Tuning features 477//===----------------------------------------------------------------------===// 478def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest", 479 "PreferMovmskOverVTest", "true", 480 "Prefer movmsk over vtest instruction">; 481 482def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", 483 "SHLD instruction is slow">; 484 485def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", 486 "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">; 487 488def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", 489 "true", 490 "PMADDWD is slower than PMULLD">; 491 492// FIXME: This should not apply to CPUs that do not have SSE. 493def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", 494 "IsUnalignedMem16Slow", "true", 495 "Slow unaligned 16-byte memory access">; 496 497def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", 498 "IsUnalignedMem32Slow", "true", 499 "Slow unaligned 32-byte memory access">; 500 501def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", 502 "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">; 503 504// True if 8-bit divisions are significantly faster than 505// 32-bit divisions and should be used when possible. 506def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb", 507 "HasSlowDivide32", "true", 508 "Use 8-bit divide for positive values less than 256">; 509 510// True if 32-bit divides are significantly faster than 511// 64-bit divisions and should be used when possible. 512def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl", 513 "HasSlowDivide64", "true", 514 "Use 32-bit divide for positive values less than 2^32">; 515 516def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions", 517 "PadShortFunctions", "true", 518 "Pad short functions (to prevent a stall when returning too early)">; 519 520// On some processors, instructions that implicitly take two memory operands are 521// slow. In practice, this means that CALL, PUSH, and POP with memory operands 522// should be avoided in favor of a MOV + register CALL/PUSH/POP. 523def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", 524 "SlowTwoMemOps", "true", 525 "Two memory operand instructions are slow">; 526 527// True if the LEA instruction inputs have to be ready at address generation 528// (AG) time. 529def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true", 530 "LEA instruction needs inputs at AG stage">; 531 532def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", 533 "LEA instruction with certain arguments is slow">; 534 535// True if the LEA instruction has all three source operands: base, index, 536// and offset or if the LEA instruction uses base and index registers where 537// the base is EBP, RBP,or R13 538def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", 539 "LEA instruction with 3 ops or certain registers is slow">; 540 541// True if INC and DEC instructions are slow when writing to flags 542def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", 543 "INC and DEC instructions are slower than ADD and SUB">; 544 545def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", 546 "HasPOPCNTFalseDeps", "true", 547 "POPCNT has a false dependency on dest register">; 548 549def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", 550 "HasLZCNTFalseDeps", "true", 551 "LZCNT/TZCNT have a false dependency on dest register">; 552 553def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc", 554 "HasMULCFalseDeps", "true", 555 "VF[C]MULCPH/SH has a false dependency on dest register">; 556 557def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm", 558 "HasPERMFalseDeps", "true", 559 "VPERMD/Q/PS/PD has a false dependency on dest register">; 560 561def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range", 562 "HasRANGEFalseDeps", "true", 563 "VRANGEPD/PS/SD/SS has a false dependency on dest register">; 564 565def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant", 566 "HasGETMANTFalseDeps", "true", 567 "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a" 568 " false dependency on dest register">; 569 570def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq", 571 "HasMULLQFalseDeps", "true", 572 "VPMULLQ has a false dependency on dest register">; 573 574def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking", 575 "HasSBBDepBreaking", "true", 576 "SBB with same register has no source dependency">; 577 578// On recent X86 (port bound) processors, its preferable to combine to a single shuffle 579// using a variable mask over multiple fixed shuffles. 580def TuningFastVariableCrossLaneShuffle 581 : SubtargetFeature<"fast-variable-crosslane-shuffle", 582 "HasFastVariableCrossLaneShuffle", 583 "true", "Cross-lane shuffles with variable masks are fast">; 584def TuningFastVariablePerLaneShuffle 585 : SubtargetFeature<"fast-variable-perlane-shuffle", 586 "HasFastVariablePerLaneShuffle", 587 "true", "Per-lane shuffles with variable masks are fast">; 588 589// Goldmont / Tremont (atom in general) has no bypass delay 590def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay", 591 "NoDomainDelay","true", 592 "Has no bypass delay when using the 'wrong' domain">; 593 594// Many processors (Nehalem+ on Intel) have no bypass delay when 595// using the wrong mov type. 596def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov", 597 "NoDomainDelayMov","true", 598 "Has no bypass delay when using the 'wrong' mov type">; 599 600// Newer processors (Skylake+ on Intel) have no bypass delay when 601// using the wrong blend type. 602def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend", 603 "NoDomainDelayBlend","true", 604 "Has no bypass delay when using the 'wrong' blend type">; 605 606// Newer processors (Haswell+ on Intel) have no bypass delay when 607// using the wrong shuffle type. 608def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle", 609 "NoDomainDelayShuffle","true", 610 "Has no bypass delay when using the 'wrong' shuffle type">; 611 612// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to 613// imm shifts/rotate if they can use more ports than regular shuffles. 614def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle", 615 "PreferLowerShuffleAsShift", "true", 616 "Shifts are faster (or as fast) as shuffle">; 617 618def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift", 619 "FastImmVectorShift", "true", 620 "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">; 621 622// On some X86 processors, a vzeroupper instruction should be inserted after 623// using ymm/zmm registers before executing code that may use SSE instructions. 624def TuningInsertVZEROUPPER 625 : SubtargetFeature<"vzeroupper", 626 "InsertVZEROUPPER", 627 "true", "Should insert vzeroupper instructions">; 628 629// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency 630// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if 631// vector FSQRT has higher throughput than the corresponding NR code. 632// The idea is that throughput bound code is likely to be vectorized, so for 633// vectorized code we should care about the throughput of SQRT operations. 634// But if the code is scalar that probably means that the code has some kind of 635// dependency and we should care more about reducing the latency. 636 637// True if hardware SQRTSS instruction is at least as fast (latency) as 638// RSQRTSS followed by a Newton-Raphson iteration. 639def TuningFastScalarFSQRT 640 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", 641 "true", "Scalar SQRT is fast (disable Newton-Raphson)">; 642// True if hardware SQRTPS/VSQRTPS instructions are at least as fast 643// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. 644def TuningFastVectorFSQRT 645 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", 646 "true", "Vector SQRT is fast (disable Newton-Raphson)">; 647 648// If lzcnt has equivalent latency/throughput to most simple integer ops, it can 649// be used to replace test/set sequences. 650def TuningFastLZCNT 651 : SubtargetFeature< 652 "fast-lzcnt", "HasFastLZCNT", "true", 653 "LZCNT instructions are as fast as most simple integer ops">; 654 655// If the target can efficiently decode NOPs upto 7-bytes in length. 656def TuningFast7ByteNOP 657 : SubtargetFeature< 658 "fast-7bytenop", "HasFast7ByteNOP", "true", 659 "Target can quickly decode up to 7 byte NOPs">; 660 661// If the target can efficiently decode NOPs upto 11-bytes in length. 662def TuningFast11ByteNOP 663 : SubtargetFeature< 664 "fast-11bytenop", "HasFast11ByteNOP", "true", 665 "Target can quickly decode up to 11 byte NOPs">; 666 667// If the target can efficiently decode NOPs upto 15-bytes in length. 668def TuningFast15ByteNOP 669 : SubtargetFeature< 670 "fast-15bytenop", "HasFast15ByteNOP", "true", 671 "Target can quickly decode up to 15 byte NOPs">; 672 673// Sandy Bridge and newer processors can use SHLD with the same source on both 674// inputs to implement rotate to avoid the partial flag update of the normal 675// rotate instructions. 676def TuningFastSHLDRotate 677 : SubtargetFeature< 678 "fast-shld-rotate", "HasFastSHLDRotate", "true", 679 "SHLD can be used as a faster rotate">; 680 681// Bulldozer and newer processors can merge CMP/TEST (but not other 682// instructions) with conditional branches. 683def TuningBranchFusion 684 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", 685 "CMP/TEST can be fused with conditional branches">; 686 687// Sandy Bridge and newer processors have many instructions that can be 688// fused with conditional branches and pass through the CPU as a single 689// operation. 690def TuningMacroFusion 691 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", 692 "Various instructions can be fused with conditional branches">; 693 694// Gather is available since Haswell (AVX2 set). So technically, we can 695// generate Gathers on all AVX2 processors. But the overhead on HSW is high. 696// Skylake Client processor has faster Gathers than HSW and performance is 697// similar to Skylake Server (AVX-512). 698def TuningFastGather 699 : SubtargetFeature<"fast-gather", "HasFastGather", "true", 700 "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">; 701 702// Generate vpdpwssd instead of vpmaddwd+vpaddd sequence. 703def TuningFastDPWSSD 704 : SubtargetFeature< 705 "fast-dpwssd", "HasFastDPWSSD", "true", 706 "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">; 707 708def TuningPreferNoGather 709 : SubtargetFeature<"prefer-no-gather", "PreferGather", "false", 710 "Prefer no gather instructions">; 711def TuningPreferNoScatter 712 : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false", 713 "Prefer no scatter instructions">; 714 715def TuningPrefer128Bit 716 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", 717 "Prefer 128-bit AVX instructions">; 718 719def TuningPrefer256Bit 720 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", 721 "Prefer 256-bit AVX instructions">; 722 723def TuningAllowLight256Bit 724 : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true", 725 "Enable generation of 256-bit load/stores even if we prefer 128-bit">; 726 727def TuningPreferMaskRegisters 728 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", 729 "Prefer AVX512 mask registers over PTEST/MOVMSK">; 730 731def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", 732 "Indicates that the BEXTR instruction is implemented as a single uop " 733 "with good throughput">; 734 735// Combine vector math operations with shuffles into horizontal math 736// instructions if a CPU implements horizontal operations (introduced with 737// SSE3) with better latency/throughput than the alternative sequence. 738def TuningFastHorizontalOps 739 : SubtargetFeature< 740 "fast-hops", "HasFastHorizontalOps", "true", 741 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " 742 "normal vector instructions with shuffles">; 743 744def TuningFastScalarShiftMasks 745 : SubtargetFeature< 746 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", 747 "Prefer a left/right scalar logical shift pair over a shift+and pair">; 748 749def TuningFastVectorShiftMasks 750 : SubtargetFeature< 751 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", 752 "Prefer a left/right vector logical shift pair over a shift+and pair">; 753 754def TuningFastMOVBE 755 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true", 756 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">; 757 758def TuningFastImm16 759 : SubtargetFeature<"fast-imm16", "HasFastImm16", "true", 760 "Prefer a i16 instruction with i16 immediate over extension to i32">; 761 762def TuningUseSLMArithCosts 763 : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true", 764 "Use Silvermont specific arithmetic costs">; 765 766def TuningUseGLMDivSqrtCosts 767 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", 768 "Use Goldmont specific floating point div/sqrt costs">; 769 770// Starting with Redwood Cove architecture, the branch has branch taken hint 771// (i.e., instruction prefix 3EH). 772def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true", 773 "Target has branch hint feature">; 774 775//===----------------------------------------------------------------------===// 776// X86 CPU Families 777// TODO: Remove these - use general tuning features to determine codegen. 778//===----------------------------------------------------------------------===// 779 780// Bonnell 781def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">; 782 783//===----------------------------------------------------------------------===// 784// Register File Description 785//===----------------------------------------------------------------------===// 786 787include "X86RegisterInfo.td" 788include "X86RegisterBanks.td" 789 790//===----------------------------------------------------------------------===// 791// Instruction Descriptions 792//===----------------------------------------------------------------------===// 793 794include "X86Schedule.td" 795include "X86InstrInfo.td" 796include "X86SchedPredicates.td" 797 798def X86InstrInfo : InstrInfo; 799 800//===----------------------------------------------------------------------===// 801// X86 Scheduler Models 802//===----------------------------------------------------------------------===// 803 804include "X86ScheduleAtom.td" 805include "X86SchedSandyBridge.td" 806include "X86SchedHaswell.td" 807include "X86SchedBroadwell.td" 808include "X86ScheduleSLM.td" 809include "X86ScheduleZnver1.td" 810include "X86ScheduleZnver2.td" 811include "X86ScheduleZnver3.td" 812include "X86ScheduleZnver4.td" 813include "X86ScheduleBdVer2.td" 814include "X86ScheduleBtVer2.td" 815include "X86SchedSkylakeClient.td" 816include "X86SchedSkylakeServer.td" 817include "X86SchedIceLake.td" 818include "X86SchedAlderlakeP.td" 819include "X86SchedSapphireRapids.td" 820 821//===----------------------------------------------------------------------===// 822// X86 Processor Feature Lists 823//===----------------------------------------------------------------------===// 824 825def ProcessorFeatures { 826 // x86-64 micro-architecture levels: x86-64 and x86-64-v[234] 827 list<SubtargetFeature> X86_64V1Features = [ 828 FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2, 829 FeatureFXSR, FeatureNOPL, FeatureX86_64, 830 ]; 831 list<SubtargetFeature> X86_64V1Tuning = [ 832 TuningMacroFusion, 833 TuningSlow3OpsLEA, 834 TuningSlowDivide64, 835 TuningSlowIncDec, 836 TuningInsertVZEROUPPER 837 ]; 838 839 list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [ 840 FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, 841 FeatureSSE42 842 ]); 843 list<SubtargetFeature> X86_64V2Tuning = [ 844 TuningMacroFusion, 845 TuningSlow3OpsLEA, 846 TuningSlowDivide64, 847 TuningSlowUAMem32, 848 TuningFastScalarFSQRT, 849 TuningFastSHLDRotate, 850 TuningFast15ByteNOP, 851 TuningPOPCNTFalseDeps, 852 TuningInsertVZEROUPPER 853 ]; 854 855 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [ 856 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, 857 FeatureMOVBE, FeatureXSAVE 858 ]); 859 list<SubtargetFeature> X86_64V3Tuning = [ 860 TuningMacroFusion, 861 TuningSlow3OpsLEA, 862 TuningSlowDivide64, 863 TuningFastScalarFSQRT, 864 TuningFastSHLDRotate, 865 TuningFast15ByteNOP, 866 TuningFastVariableCrossLaneShuffle, 867 TuningFastVariablePerLaneShuffle, 868 TuningPOPCNTFalseDeps, 869 TuningLZCNTFalseDeps, 870 TuningInsertVZEROUPPER, 871 TuningAllowLight256Bit 872 ]; 873 874 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ 875 FeatureEVEX512, 876 FeatureBWI, 877 FeatureCDI, 878 FeatureDQI, 879 FeatureVLX, 880 ]); 881 list<SubtargetFeature> X86_64V4Tuning = [ 882 TuningMacroFusion, 883 TuningSlow3OpsLEA, 884 TuningSlowDivide64, 885 TuningFastScalarFSQRT, 886 TuningFastVectorFSQRT, 887 TuningFastSHLDRotate, 888 TuningFast15ByteNOP, 889 TuningFastVariableCrossLaneShuffle, 890 TuningFastVariablePerLaneShuffle, 891 TuningPrefer256Bit, 892 TuningFastGather, 893 TuningPOPCNTFalseDeps, 894 TuningInsertVZEROUPPER, 895 TuningAllowLight256Bit 896 ]; 897 898 // Nehalem 899 list<SubtargetFeature> NHMFeatures = X86_64V2Features; 900 list<SubtargetFeature> NHMTuning = [TuningMacroFusion, 901 TuningSlowDivide64, 902 TuningInsertVZEROUPPER, 903 TuningNoDomainDelayMov]; 904 905 // Westmere 906 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL]; 907 list<SubtargetFeature> WSMTuning = NHMTuning; 908 list<SubtargetFeature> WSMFeatures = 909 !listconcat(NHMFeatures, WSMAdditionalFeatures); 910 911 // Sandybridge 912 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX, 913 FeatureXSAVE, 914 FeatureXSAVEOPT]; 915 list<SubtargetFeature> SNBTuning = [TuningMacroFusion, 916 TuningSlow3OpsLEA, 917 TuningSlowDivide64, 918 TuningSlowUAMem32, 919 TuningFastScalarFSQRT, 920 TuningFastSHLDRotate, 921 TuningFast15ByteNOP, 922 TuningPOPCNTFalseDeps, 923 TuningInsertVZEROUPPER, 924 TuningNoDomainDelayMov]; 925 list<SubtargetFeature> SNBFeatures = 926 !listconcat(WSMFeatures, SNBAdditionalFeatures); 927 928 // Ivybridge 929 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND, 930 FeatureF16C, 931 FeatureFSGSBase]; 932 list<SubtargetFeature> IVBTuning = SNBTuning; 933 list<SubtargetFeature> IVBFeatures = 934 !listconcat(SNBFeatures, IVBAdditionalFeatures); 935 936 // Haswell 937 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2, 938 FeatureBMI, 939 FeatureBMI2, 940 FeatureERMSB, 941 FeatureFMA, 942 FeatureINVPCID, 943 FeatureLZCNT, 944 FeatureMOVBE]; 945 list<SubtargetFeature> HSWTuning = [TuningMacroFusion, 946 TuningSlow3OpsLEA, 947 TuningSlowDivide64, 948 TuningFastScalarFSQRT, 949 TuningFastSHLDRotate, 950 TuningFast15ByteNOP, 951 TuningFastVariableCrossLaneShuffle, 952 TuningFastVariablePerLaneShuffle, 953 TuningPOPCNTFalseDeps, 954 TuningLZCNTFalseDeps, 955 TuningInsertVZEROUPPER, 956 TuningAllowLight256Bit, 957 TuningNoDomainDelayMov, 958 TuningNoDomainDelayShuffle]; 959 list<SubtargetFeature> HSWFeatures = 960 !listconcat(IVBFeatures, HSWAdditionalFeatures); 961 962 // Broadwell 963 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX, 964 FeatureRDSEED, 965 FeaturePRFCHW]; 966 list<SubtargetFeature> BDWTuning = HSWTuning; 967 list<SubtargetFeature> BDWFeatures = 968 !listconcat(HSWFeatures, BDWAdditionalFeatures); 969 970 // Skylake 971 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES, 972 FeatureXSAVEC, 973 FeatureXSAVES, 974 FeatureCLFLUSHOPT]; 975 list<SubtargetFeature> SKLTuning = [TuningFastGather, 976 TuningMacroFusion, 977 TuningSlow3OpsLEA, 978 TuningSlowDivide64, 979 TuningFastScalarFSQRT, 980 TuningFastVectorFSQRT, 981 TuningFastSHLDRotate, 982 TuningFast15ByteNOP, 983 TuningFastVariableCrossLaneShuffle, 984 TuningFastVariablePerLaneShuffle, 985 TuningPOPCNTFalseDeps, 986 TuningInsertVZEROUPPER, 987 TuningAllowLight256Bit, 988 TuningNoDomainDelayMov, 989 TuningNoDomainDelayShuffle, 990 TuningNoDomainDelayBlend]; 991 list<SubtargetFeature> SKLFeatures = 992 !listconcat(BDWFeatures, SKLAdditionalFeatures); 993 994 // Skylake-AVX512 995 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES, 996 FeatureXSAVEC, 997 FeatureXSAVES, 998 FeatureCLFLUSHOPT, 999 FeatureAVX512, 1000 FeatureEVEX512, 1001 FeatureCDI, 1002 FeatureDQI, 1003 FeatureBWI, 1004 FeatureVLX, 1005 FeaturePKU, 1006 FeatureCLWB]; 1007 list<SubtargetFeature> SKXTuning = [TuningFastGather, 1008 TuningMacroFusion, 1009 TuningSlow3OpsLEA, 1010 TuningSlowDivide64, 1011 TuningFastScalarFSQRT, 1012 TuningFastVectorFSQRT, 1013 TuningFastSHLDRotate, 1014 TuningFast15ByteNOP, 1015 TuningFastVariableCrossLaneShuffle, 1016 TuningFastVariablePerLaneShuffle, 1017 TuningPrefer256Bit, 1018 TuningPOPCNTFalseDeps, 1019 TuningInsertVZEROUPPER, 1020 TuningAllowLight256Bit, 1021 TuningPreferShiftShuffle, 1022 TuningNoDomainDelayMov, 1023 TuningNoDomainDelayShuffle, 1024 TuningNoDomainDelayBlend, 1025 TuningFastImmVectorShift]; 1026 list<SubtargetFeature> SKXFeatures = 1027 !listconcat(BDWFeatures, SKXAdditionalFeatures); 1028 1029 // Cascadelake 1030 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI]; 1031 list<SubtargetFeature> CLXTuning = SKXTuning; 1032 list<SubtargetFeature> CLXFeatures = 1033 !listconcat(SKXFeatures, CLXAdditionalFeatures); 1034 1035 // Cooperlake 1036 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16]; 1037 list<SubtargetFeature> CPXTuning = SKXTuning; 1038 list<SubtargetFeature> CPXFeatures = 1039 !listconcat(CLXFeatures, CPXAdditionalFeatures); 1040 1041 // Cannonlake 1042 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, 1043 FeatureEVEX512, 1044 FeatureCDI, 1045 FeatureDQI, 1046 FeatureBWI, 1047 FeatureVLX, 1048 FeaturePKU, 1049 FeatureVBMI, 1050 FeatureIFMA, 1051 FeatureSHA]; 1052 list<SubtargetFeature> CNLTuning = [TuningFastGather, 1053 TuningMacroFusion, 1054 TuningSlow3OpsLEA, 1055 TuningSlowDivide64, 1056 TuningFastScalarFSQRT, 1057 TuningFastVectorFSQRT, 1058 TuningFastSHLDRotate, 1059 TuningFast15ByteNOP, 1060 TuningFastVariableCrossLaneShuffle, 1061 TuningFastVariablePerLaneShuffle, 1062 TuningPrefer256Bit, 1063 TuningInsertVZEROUPPER, 1064 TuningAllowLight256Bit, 1065 TuningNoDomainDelayMov, 1066 TuningNoDomainDelayShuffle, 1067 TuningNoDomainDelayBlend, 1068 TuningFastImmVectorShift]; 1069 list<SubtargetFeature> CNLFeatures = 1070 !listconcat(SKLFeatures, CNLAdditionalFeatures); 1071 1072 // Icelake 1073 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG, 1074 FeatureVAES, 1075 FeatureVBMI2, 1076 FeatureVNNI, 1077 FeatureVPCLMULQDQ, 1078 FeatureVPOPCNTDQ, 1079 FeatureGFNI, 1080 FeatureRDPID, 1081 FeatureFSRM]; 1082 list<SubtargetFeature> ICLTuning = [TuningFastGather, 1083 TuningMacroFusion, 1084 TuningSlowDivide64, 1085 TuningFastScalarFSQRT, 1086 TuningFastVectorFSQRT, 1087 TuningFastSHLDRotate, 1088 TuningFast15ByteNOP, 1089 TuningFastVariableCrossLaneShuffle, 1090 TuningFastVariablePerLaneShuffle, 1091 TuningPrefer256Bit, 1092 TuningInsertVZEROUPPER, 1093 TuningAllowLight256Bit, 1094 TuningNoDomainDelayMov, 1095 TuningNoDomainDelayShuffle, 1096 TuningNoDomainDelayBlend, 1097 TuningFastImmVectorShift]; 1098 list<SubtargetFeature> ICLFeatures = 1099 !listconcat(CNLFeatures, ICLAdditionalFeatures); 1100 1101 // Icelake Server 1102 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG, 1103 FeatureCLWB, 1104 FeatureWBNOINVD]; 1105 list<SubtargetFeature> ICXTuning = ICLTuning; 1106 list<SubtargetFeature> ICXFeatures = 1107 !listconcat(ICLFeatures, ICXAdditionalFeatures); 1108 1109 // Tigerlake 1110 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT, 1111 FeatureCLWB, 1112 FeatureMOVDIRI, 1113 FeatureMOVDIR64B, 1114 FeatureSHSTK]; 1115 list<SubtargetFeature> TGLTuning = ICLTuning; 1116 list<SubtargetFeature> TGLFeatures = 1117 !listconcat(ICLFeatures, TGLAdditionalFeatures ); 1118 1119 // Sapphirerapids 1120 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE, 1121 FeatureAMXINT8, 1122 FeatureAMXBF16, 1123 FeatureBF16, 1124 FeatureSERIALIZE, 1125 FeatureCLDEMOTE, 1126 FeatureWAITPKG, 1127 FeaturePTWRITE, 1128 FeatureFP16, 1129 FeatureAVXVNNI, 1130 FeatureTSXLDTRK, 1131 FeatureENQCMD, 1132 FeatureSHSTK, 1133 FeatureMOVDIRI, 1134 FeatureMOVDIR64B, 1135 FeatureUINTR]; 1136 list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps, 1137 TuningPERMFalseDeps, 1138 TuningRANGEFalseDeps, 1139 TuningGETMANTFalseDeps, 1140 TuningMULLQFalseDeps]; 1141 list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning); 1142 list<SubtargetFeature> SPRFeatures = 1143 !listconcat(ICXFeatures, SPRAdditionalFeatures); 1144 1145 // Graniterapids 1146 list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16, 1147 FeaturePREFETCHI]; 1148 list<SubtargetFeature> GNRFeatures = 1149 !listconcat(SPRFeatures, GNRAdditionalFeatures); 1150 list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint]; 1151 list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning); 1152 1153 // Graniterapids D 1154 list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX]; 1155 list<SubtargetFeature> GNRDFeatures = 1156 !listconcat(GNRFeatures, GNRDAdditionalFeatures); 1157 1158 // Diamond Rapids 1159 list<SubtargetFeature> DMRAdditionalFeatures = [FeatureAVX10_2_512, 1160 FeatureSM4, 1161 FeatureCMPCCXADD, 1162 FeatureAVXIFMA, 1163 FeatureAVXNECONVERT, 1164 FeatureAVXVNNIINT8, 1165 FeatureAVXVNNIINT16, 1166 FeatureUSERMSR, 1167 FeatureSHA512, 1168 FeatureSM3, 1169 FeatureEGPR, 1170 FeatureZU, 1171 FeatureCCMP, 1172 FeaturePush2Pop2, 1173 FeaturePPX, 1174 FeatureNDD, 1175 FeatureNF, 1176 FeatureCF, 1177 FeatureMOVRS, 1178 FeatureAMXMOVRS, 1179 FeatureAMXAVX512, 1180 FeatureAMXFP8, 1181 FeatureAMXTF32, 1182 FeatureAMXTRANSPOSE]; 1183 list<SubtargetFeature> DMRFeatures = 1184 !listconcat(GNRDFeatures, DMRAdditionalFeatures); 1185 1186 // Atom 1187 list<SubtargetFeature> AtomFeatures = [FeatureX87, 1188 FeatureCX8, 1189 FeatureCMOV, 1190 FeatureMMX, 1191 FeatureSSSE3, 1192 FeatureFXSR, 1193 FeatureNOPL, 1194 FeatureX86_64, 1195 FeatureCX16, 1196 FeatureMOVBE, 1197 FeatureLAHFSAHF64]; 1198 list<SubtargetFeature> AtomTuning = [ProcIntelAtom, 1199 TuningSlowUAMem16, 1200 TuningLEAForSP, 1201 TuningSlowDivide32, 1202 TuningSlowDivide64, 1203 TuningSlowTwoMemOps, 1204 TuningFastImm16, 1205 TuningLEAUsesAG, 1206 TuningPadShortFunctions, 1207 TuningInsertVZEROUPPER, 1208 TuningNoDomainDelay]; 1209 1210 // Silvermont 1211 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42, 1212 FeatureCRC32, 1213 FeaturePOPCNT, 1214 FeaturePCLMUL, 1215 FeaturePRFCHW, 1216 FeatureRDRAND]; 1217 list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts, 1218 TuningSlowTwoMemOps, 1219 TuningSlowLEA, 1220 TuningSlowIncDec, 1221 TuningSlowDivide64, 1222 TuningSlowPMULLD, 1223 TuningFast7ByteNOP, 1224 TuningFastMOVBE, 1225 TuningFastImm16, 1226 TuningPOPCNTFalseDeps, 1227 TuningInsertVZEROUPPER, 1228 TuningNoDomainDelay]; 1229 list<SubtargetFeature> SLMFeatures = 1230 !listconcat(AtomFeatures, SLMAdditionalFeatures); 1231 1232 // Goldmont 1233 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES, 1234 FeatureSHA, 1235 FeatureRDSEED, 1236 FeatureXSAVE, 1237 FeatureXSAVEOPT, 1238 FeatureXSAVEC, 1239 FeatureXSAVES, 1240 FeatureCLFLUSHOPT, 1241 FeatureFSGSBase]; 1242 list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts, 1243 TuningSlowTwoMemOps, 1244 TuningSlowLEA, 1245 TuningSlowIncDec, 1246 TuningFastMOVBE, 1247 TuningFastImm16, 1248 TuningPOPCNTFalseDeps, 1249 TuningInsertVZEROUPPER, 1250 TuningNoDomainDelay]; 1251 list<SubtargetFeature> GLMFeatures = 1252 !listconcat(SLMFeatures, GLMAdditionalFeatures); 1253 1254 // Goldmont Plus 1255 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE, 1256 FeatureRDPID]; 1257 list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts, 1258 TuningSlowTwoMemOps, 1259 TuningSlowLEA, 1260 TuningSlowIncDec, 1261 TuningFastMOVBE, 1262 TuningFastImm16, 1263 TuningInsertVZEROUPPER, 1264 TuningNoDomainDelay]; 1265 list<SubtargetFeature> GLPFeatures = 1266 !listconcat(GLMFeatures, GLPAdditionalFeatures); 1267 1268 // Tremont 1269 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB, 1270 FeatureGFNI]; 1271 list<SubtargetFeature> TRMTuning = GLPTuning; 1272 list<SubtargetFeature> TRMFeatures = 1273 !listconcat(GLPFeatures, TRMAdditionalFeatures); 1274 1275 // Alderlake 1276 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE, 1277 FeaturePCONFIG, 1278 FeatureSHSTK, 1279 FeatureWIDEKL, 1280 FeatureINVPCID, 1281 FeatureADX, 1282 FeatureFMA, 1283 FeatureVAES, 1284 FeatureVPCLMULQDQ, 1285 FeatureF16C, 1286 FeatureBMI, 1287 FeatureBMI2, 1288 FeatureLZCNT, 1289 FeatureAVXVNNI, 1290 FeaturePKU, 1291 FeatureHRESET, 1292 FeatureCLDEMOTE, 1293 FeatureMOVDIRI, 1294 FeatureMOVDIR64B, 1295 FeatureWAITPKG]; 1296 list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps, 1297 TuningPreferMovmskOverVTest, 1298 TuningFastImmVectorShift]; 1299 list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning); 1300 list<SubtargetFeature> ADLFeatures = 1301 !listconcat(TRMFeatures, ADLAdditionalFeatures); 1302 1303 // Gracemont 1304 list<SubtargetFeature> GRTTuning = [TuningMacroFusion, 1305 TuningSlow3OpsLEA, 1306 TuningFastScalarFSQRT, 1307 TuningFastVectorFSQRT, 1308 TuningFast15ByteNOP, 1309 TuningFastVariablePerLaneShuffle, 1310 TuningPOPCNTFalseDeps, 1311 TuningInsertVZEROUPPER]; 1312 1313 // Sierraforest 1314 list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD, 1315 FeatureAVXIFMA, 1316 FeatureAVXNECONVERT, 1317 FeatureENQCMD, 1318 FeatureUINTR, 1319 FeatureAVXVNNIINT8]; 1320 list<SubtargetFeature> SRFFeatures = 1321 !listconcat(ADLFeatures, SRFAdditionalFeatures); 1322 1323 // Arrowlake S 1324 list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16, 1325 FeatureSHA512, 1326 FeatureSM3, 1327 FeatureSM4]; 1328 list<SubtargetFeature> ARLSFeatures = 1329 !listconcat(SRFFeatures, ARLSAdditionalFeatures); 1330 1331 // Pantherlake 1332 list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI]; 1333 list<SubtargetFeature> PTLFeatures = 1334 !listconcat(ARLSFeatures, PTLAdditionalFeatures); 1335 1336 1337 // Clearwaterforest 1338 list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, 1339 FeatureUSERMSR]; 1340 list<SubtargetFeature> CWFFeatures = 1341 !listconcat(ARLSFeatures, CWFAdditionalFeatures); 1342 1343 // Knights Landing 1344 list<SubtargetFeature> KNLFeatures = [FeatureX87, 1345 FeatureCX8, 1346 FeatureCMOV, 1347 FeatureMMX, 1348 FeatureFXSR, 1349 FeatureNOPL, 1350 FeatureX86_64, 1351 FeatureCX16, 1352 FeatureCRC32, 1353 FeaturePOPCNT, 1354 FeaturePCLMUL, 1355 FeatureXSAVE, 1356 FeatureXSAVEOPT, 1357 FeatureLAHFSAHF64, 1358 FeatureAES, 1359 FeatureRDRAND, 1360 FeatureF16C, 1361 FeatureFSGSBase, 1362 FeatureAVX512, 1363 FeatureEVEX512, 1364 FeatureCDI, 1365 FeatureADX, 1366 FeatureRDSEED, 1367 FeatureMOVBE, 1368 FeatureLZCNT, 1369 FeatureBMI, 1370 FeatureBMI2, 1371 FeatureFMA, 1372 FeaturePRFCHW]; 1373 list<SubtargetFeature> KNLTuning = [TuningSlowDivide64, 1374 TuningSlow3OpsLEA, 1375 TuningSlowIncDec, 1376 TuningSlowTwoMemOps, 1377 TuningPreferMaskRegisters, 1378 TuningFastGather, 1379 TuningFastMOVBE, 1380 TuningFastImm16, 1381 TuningSlowPMADDWD]; 1382 // TODO Add AVX5124FMAPS/AVX5124VNNIW features 1383 list<SubtargetFeature> KNMFeatures = 1384 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); 1385 1386 // Barcelona 1387 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87, 1388 FeatureCX8, 1389 FeatureSSE4A, 1390 FeatureFXSR, 1391 FeatureNOPL, 1392 FeatureCX16, 1393 FeaturePRFCHW, 1394 FeatureLZCNT, 1395 FeaturePOPCNT, 1396 FeatureLAHFSAHF64, 1397 FeatureCMOV, 1398 FeatureX86_64]; 1399 list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks, 1400 TuningSlowDivide64, 1401 TuningSlowSHLD, 1402 TuningSBBDepBreaking, 1403 TuningInsertVZEROUPPER]; 1404 1405 // Bobcat 1406 list<SubtargetFeature> BtVer1Features = [FeatureX87, 1407 FeatureCX8, 1408 FeatureCMOV, 1409 FeatureMMX, 1410 FeatureSSSE3, 1411 FeatureSSE4A, 1412 FeatureFXSR, 1413 FeatureNOPL, 1414 FeatureX86_64, 1415 FeatureCX16, 1416 FeaturePRFCHW, 1417 FeatureLZCNT, 1418 FeaturePOPCNT, 1419 FeatureLAHFSAHF64]; 1420 list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP, 1421 TuningFastScalarShiftMasks, 1422 TuningFastVectorShiftMasks, 1423 TuningSlowDivide64, 1424 TuningSlowSHLD, 1425 TuningFastImm16, 1426 TuningSBBDepBreaking, 1427 TuningInsertVZEROUPPER]; 1428 1429 // Jaguar 1430 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX, 1431 FeatureAES, 1432 FeatureCRC32, 1433 FeaturePCLMUL, 1434 FeatureBMI, 1435 FeatureF16C, 1436 FeatureMOVBE, 1437 FeatureXSAVE, 1438 FeatureXSAVEOPT]; 1439 list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT, 1440 TuningFastBEXTR, 1441 TuningFastHorizontalOps, 1442 TuningFast15ByteNOP, 1443 TuningFastScalarShiftMasks, 1444 TuningFastVectorShiftMasks, 1445 TuningFastMOVBE, 1446 TuningFastImm16, 1447 TuningSBBDepBreaking, 1448 TuningSlowDivide64, 1449 TuningSlowSHLD]; 1450 list<SubtargetFeature> BtVer2Features = 1451 !listconcat(BtVer1Features, BtVer2AdditionalFeatures); 1452 1453 // Bulldozer 1454 list<SubtargetFeature> BdVer1Features = [FeatureX87, 1455 FeatureCX8, 1456 FeatureCMOV, 1457 FeatureXOP, 1458 FeatureX86_64, 1459 FeatureCX16, 1460 FeatureAES, 1461 FeatureCRC32, 1462 FeaturePRFCHW, 1463 FeaturePCLMUL, 1464 FeatureMMX, 1465 FeatureFXSR, 1466 FeatureNOPL, 1467 FeatureLZCNT, 1468 FeaturePOPCNT, 1469 FeatureXSAVE, 1470 FeatureLWP, 1471 FeatureLAHFSAHF64]; 1472 list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD, 1473 TuningSlowDivide64, 1474 TuningFast11ByteNOP, 1475 TuningFastScalarShiftMasks, 1476 TuningBranchFusion, 1477 TuningSBBDepBreaking, 1478 TuningInsertVZEROUPPER]; 1479 1480 // PileDriver 1481 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C, 1482 FeatureBMI, 1483 FeatureTBM, 1484 FeatureFMA]; 1485 list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR, 1486 TuningFastMOVBE]; 1487 list<SubtargetFeature> BdVer2Tuning = 1488 !listconcat(BdVer1Tuning, BdVer2AdditionalTuning); 1489 list<SubtargetFeature> BdVer2Features = 1490 !listconcat(BdVer1Features, BdVer2AdditionalFeatures); 1491 1492 // Steamroller 1493 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT, 1494 FeatureFSGSBase]; 1495 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning; 1496 list<SubtargetFeature> BdVer3Features = 1497 !listconcat(BdVer2Features, BdVer3AdditionalFeatures); 1498 1499 // Excavator 1500 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2, 1501 FeatureBMI2, 1502 FeatureMOVBE, 1503 FeatureRDRAND, 1504 FeatureMWAITX]; 1505 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning; 1506 list<SubtargetFeature> BdVer4Features = 1507 !listconcat(BdVer3Features, BdVer4AdditionalFeatures); 1508 1509 1510 // AMD Zen Processors common ISAs 1511 list<SubtargetFeature> ZNFeatures = [FeatureADX, 1512 FeatureAES, 1513 FeatureAVX2, 1514 FeatureBMI, 1515 FeatureBMI2, 1516 FeatureCLFLUSHOPT, 1517 FeatureCLZERO, 1518 FeatureCMOV, 1519 FeatureX86_64, 1520 FeatureCX16, 1521 FeatureCRC32, 1522 FeatureF16C, 1523 FeatureFMA, 1524 FeatureFSGSBase, 1525 FeatureFXSR, 1526 FeatureNOPL, 1527 FeatureLAHFSAHF64, 1528 FeatureLZCNT, 1529 FeatureMMX, 1530 FeatureMOVBE, 1531 FeatureMWAITX, 1532 FeaturePCLMUL, 1533 FeaturePOPCNT, 1534 FeaturePRFCHW, 1535 FeatureRDRAND, 1536 FeatureRDSEED, 1537 FeatureSHA, 1538 FeatureSSE4A, 1539 FeatureX87, 1540 FeatureXSAVE, 1541 FeatureXSAVEC, 1542 FeatureXSAVEOPT, 1543 FeatureXSAVES]; 1544 list<SubtargetFeature> ZNTuning = [TuningFastLZCNT, 1545 TuningFastBEXTR, 1546 TuningFast15ByteNOP, 1547 TuningBranchFusion, 1548 TuningFastScalarFSQRT, 1549 TuningFastVectorFSQRT, 1550 TuningFastScalarShiftMasks, 1551 TuningFastVariablePerLaneShuffle, 1552 TuningFastMOVBE, 1553 TuningFastImm16, 1554 TuningSlowDivide64, 1555 TuningSlowSHLD, 1556 TuningSBBDepBreaking, 1557 TuningInsertVZEROUPPER, 1558 TuningAllowLight256Bit]; 1559 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB, 1560 FeatureRDPID, 1561 FeatureRDPRU, 1562 FeatureWBNOINVD]; 1563 list<SubtargetFeature> ZN2Tuning = ZNTuning; 1564 list<SubtargetFeature> ZN2Features = 1565 !listconcat(ZNFeatures, ZN2AdditionalFeatures); 1566 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM, 1567 FeatureINVPCID, 1568 FeaturePKU, 1569 FeatureVAES, 1570 FeatureVPCLMULQDQ]; 1571 list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion]; 1572 list<SubtargetFeature> ZN3Tuning = 1573 !listconcat(ZN2Tuning, ZN3AdditionalTuning); 1574 list<SubtargetFeature> ZN3Features = 1575 !listconcat(ZN2Features, ZN3AdditionalFeatures); 1576 1577 list<SubtargetFeature> ZN4AdditionalTuning = [TuningFastDPWSSD]; 1578 list<SubtargetFeature> ZN4Tuning = 1579 !listconcat(ZN3Tuning, ZN4AdditionalTuning); 1580 list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512, 1581 FeatureEVEX512, 1582 FeatureCDI, 1583 FeatureDQI, 1584 FeatureBWI, 1585 FeatureVLX, 1586 FeatureVBMI, 1587 FeatureVBMI2, 1588 FeatureIFMA, 1589 FeatureVNNI, 1590 FeatureBITALG, 1591 FeatureGFNI, 1592 FeatureBF16, 1593 FeatureSHSTK, 1594 FeatureVPOPCNTDQ]; 1595 list<SubtargetFeature> ZN4Features = 1596 !listconcat(ZN3Features, ZN4AdditionalFeatures); 1597 1598 list<SubtargetFeature> ZN5Tuning = ZN4Tuning; 1599 list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI, 1600 FeatureMOVDIRI, 1601 FeatureMOVDIR64B, 1602 FeatureVP2INTERSECT, 1603 FeaturePREFETCHI, 1604 FeatureAVXVNNI 1605 ]; 1606 list<SubtargetFeature> ZN5Features = 1607 !listconcat(ZN4Features, ZN5AdditionalFeatures); 1608} 1609 1610//===----------------------------------------------------------------------===// 1611// X86 processors supported. 1612//===----------------------------------------------------------------------===// 1613 1614class Proc<string Name, list<SubtargetFeature> Features, 1615 list<SubtargetFeature> TuneFeatures> 1616 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>; 1617 1618class ProcModel<string Name, SchedMachineModel Model, 1619 list<SubtargetFeature> Features, 1620 list<SubtargetFeature> TuneFeatures> 1621 : ProcessorModel<Name, Model, Features, TuneFeatures>; 1622 1623// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled 1624// if i386/i486 is specifically requested. 1625// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget 1626// constructor checks that any CPU used in 64-bit mode has FeatureX86_64 1627// enabled. It has no effect on code generation. 1628// NOTE: As a default tuning, "generic" aims to produce code optimized for the 1629// most common X86 processors. The tunings might be changed over time. It is 1630// recommended to use "tune-cpu"="x86-64" in function attribute for consistency. 1631def : ProcModel<"generic", SandyBridgeModel, 1632 [FeatureX87, FeatureCX8, FeatureX86_64], 1633 [TuningSlow3OpsLEA, 1634 TuningSlowDivide64, 1635 TuningMacroFusion, 1636 TuningFastScalarFSQRT, 1637 TuningFast15ByteNOP, 1638 TuningInsertVZEROUPPER]>; 1639 1640def : Proc<"i386", [FeatureX87], 1641 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1642def : Proc<"i486", [FeatureX87], 1643 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1644def : Proc<"i586", [FeatureX87, FeatureCX8], 1645 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1646def : Proc<"pentium", [FeatureX87, FeatureCX8], 1647 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1648foreach P = ["pentium-mmx", "pentium_mmx"] in { 1649 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX], 1650 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1651} 1652def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV], 1653 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1654foreach P = ["pentiumpro", "pentium_pro"] in { 1655 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL], 1656 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1657} 1658foreach P = ["pentium2", "pentium_ii"] in { 1659 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV, 1660 FeatureFXSR, FeatureNOPL], 1661 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1662} 1663foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in { 1664 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, 1665 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV], 1666 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1667} 1668 1669// Enable the PostRAScheduler for SSE2 and SSE3 class cpus. 1670// The intent is to enable it for pentium4 which is the current default 1671// processor in a vanilla 32-bit clang compilation when no specific 1672// architecture is specified. This generally gives a nice performance 1673// increase on silvermont, with largely neutral behavior on other 1674// contemporary large core processors. 1675// pentium-m, pentium4m, prescott and nocona are included as a preventative 1676// measure to avoid performance surprises, in case clang's default cpu 1677// changes slightly. 1678 1679foreach P = ["pentium_m", "pentium-m"] in { 1680def : ProcModel<P, GenericPostRAModel, 1681 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1682 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1683 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1684} 1685 1686foreach P = ["pentium4", "pentium4m", "pentium_4"] in { 1687 def : ProcModel<P, GenericPostRAModel, 1688 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, 1689 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1690 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1691} 1692 1693// Intel Quark. 1694def : Proc<"lakemont", [FeatureCX8], 1695 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1696 1697// Intel Core Duo. 1698def : ProcModel<"yonah", SandyBridgeModel, 1699 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1700 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1701 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1702 1703// NetBurst. 1704foreach P = ["prescott", "pentium_4_sse3"] in { 1705 def : ProcModel<P, GenericPostRAModel, 1706 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, 1707 FeatureFXSR, FeatureNOPL, FeatureCMOV], 1708 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1709} 1710def : ProcModel<"nocona", GenericPostRAModel, [ 1711 FeatureX87, 1712 FeatureCX8, 1713 FeatureCMOV, 1714 FeatureMMX, 1715 FeatureSSE3, 1716 FeatureFXSR, 1717 FeatureNOPL, 1718 FeatureX86_64, 1719 FeatureCX16, 1720], 1721[ 1722 TuningSlowUAMem16, 1723 TuningInsertVZEROUPPER 1724]>; 1725 1726// Intel Core 2 Solo/Duo. 1727foreach P = ["core2", "core_2_duo_ssse3"] in { 1728def : ProcModel<P, SandyBridgeModel, [ 1729 FeatureX87, 1730 FeatureCX8, 1731 FeatureCMOV, 1732 FeatureMMX, 1733 FeatureSSSE3, 1734 FeatureFXSR, 1735 FeatureNOPL, 1736 FeatureX86_64, 1737 FeatureCX16, 1738 FeatureLAHFSAHF64 1739], 1740[ 1741 TuningMacroFusion, 1742 TuningSlowUAMem16, 1743 TuningInsertVZEROUPPER 1744]>; 1745} 1746foreach P = ["penryn", "core_2_duo_sse4_1"] in { 1747def : ProcModel<P, SandyBridgeModel, [ 1748 FeatureX87, 1749 FeatureCX8, 1750 FeatureCMOV, 1751 FeatureMMX, 1752 FeatureSSE41, 1753 FeatureFXSR, 1754 FeatureNOPL, 1755 FeatureX86_64, 1756 FeatureCX16, 1757 FeatureLAHFSAHF64 1758], 1759[ 1760 TuningMacroFusion, 1761 TuningSlowUAMem16, 1762 TuningInsertVZEROUPPER 1763]>; 1764} 1765 1766// Atom CPUs. 1767foreach P = ["bonnell", "atom"] in { 1768 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures, 1769 ProcessorFeatures.AtomTuning>; 1770} 1771 1772foreach P = ["silvermont", "slm", "atom_sse4_2"] in { 1773 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures, 1774 ProcessorFeatures.SLMTuning>; 1775} 1776 1777def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures, 1778 ProcessorFeatures.SLMTuning>; 1779def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, 1780 ProcessorFeatures.GLMTuning>; 1781foreach P = ["goldmont_plus", "goldmont-plus"] in { 1782 def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures, 1783 ProcessorFeatures.GLPTuning>; 1784} 1785def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, 1786 ProcessorFeatures.TRMTuning>; 1787 1788// "Arrandale" along with corei3 and corei5 1789foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in { 1790 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures, 1791 ProcessorFeatures.NHMTuning>; 1792} 1793 1794// Westmere is the corei3/i5/i7 path from nehalem to sandybridge 1795foreach P = ["westmere", "core_aes_pclmulqdq"] in { 1796 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures, 1797 ProcessorFeatures.WSMTuning>; 1798} 1799 1800foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in { 1801 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures, 1802 ProcessorFeatures.SNBTuning>; 1803} 1804 1805foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in { 1806 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures, 1807 ProcessorFeatures.IVBTuning>; 1808} 1809 1810foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in { 1811 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures, 1812 ProcessorFeatures.HSWTuning>; 1813} 1814 1815foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in { 1816 def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures, 1817 ProcessorFeatures.BDWTuning>; 1818} 1819 1820def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, 1821 ProcessorFeatures.SKLTuning>; 1822 1823// FIXME: define KNL scheduler model 1824foreach P = ["knl", "mic_avx512"] in { 1825 def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures, 1826 ProcessorFeatures.KNLTuning>; 1827} 1828def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, 1829 ProcessorFeatures.KNLTuning>; 1830 1831foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in { 1832 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures, 1833 ProcessorFeatures.SKXTuning>; 1834} 1835 1836def : ProcModel<"cascadelake", SkylakeServerModel, 1837 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; 1838def : ProcModel<"cooperlake", SkylakeServerModel, 1839 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; 1840def : ProcModel<"cannonlake", SkylakeServerModel, 1841 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; 1842foreach P = ["icelake-client", "icelake_client"] in { 1843def : ProcModel<P, IceLakeModel, 1844 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1845} 1846def : ProcModel<"rocketlake", IceLakeModel, 1847 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; 1848foreach P = ["icelake-server", "icelake_server"] in { 1849def : ProcModel<P, IceLakeModel, 1850 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; 1851} 1852def : ProcModel<"tigerlake", IceLakeModel, 1853 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; 1854def : ProcModel<"sapphirerapids", SapphireRapidsModel, 1855 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1856def : ProcModel<"alderlake", AlderlakePModel, 1857 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1858// FIXME: Use Gracemont Schedule Model when it is ready. 1859def : ProcModel<"gracemont", AlderlakePModel, 1860 ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>; 1861foreach P = ["sierraforest", "grandridge"] in { 1862 def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures, 1863 ProcessorFeatures.GRTTuning>; 1864} 1865def : ProcModel<"raptorlake", AlderlakePModel, 1866 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1867def : ProcModel<"meteorlake", AlderlakePModel, 1868 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; 1869def : ProcModel<"arrowlake", AlderlakePModel, 1870 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>; 1871foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in { 1872def : ProcModel<P, AlderlakePModel, 1873 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>; 1874} 1875def : ProcModel<"pantherlake", AlderlakePModel, 1876 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; 1877def : ProcModel<"clearwaterforest", AlderlakePModel, 1878 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; 1879def : ProcModel<"emeraldrapids", SapphireRapidsModel, 1880 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; 1881def : ProcModel<"graniterapids", SapphireRapidsModel, 1882 ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>; 1883foreach P = ["graniterapids-d", "graniterapids_d"] in { 1884def : ProcModel<P, SapphireRapidsModel, 1885 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>; 1886} 1887def : ProcModel<"diamondrapids", SapphireRapidsModel, 1888 ProcessorFeatures.DMRFeatures, ProcessorFeatures.GNRTuning>; 1889 1890// AMD CPUs. 1891 1892def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX], 1893 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1894def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], 1895 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1896def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], 1897 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1898 1899foreach P = ["athlon", "athlon-tbird"] in { 1900 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeaturePRFCHW, 1901 FeatureNOPL], 1902 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1903} 1904 1905foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { 1906 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, 1907 FeatureSSE1, FeatureMMX, FeaturePRFCHW, FeatureFXSR, FeatureNOPL], 1908 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1909} 1910 1911foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { 1912 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, FeatureMMX, FeaturePRFCHW, 1913 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV], 1914 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1915 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1916} 1917 1918foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { 1919 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, FeatureMMX, FeaturePRFCHW, 1920 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV, 1921 FeatureX86_64], 1922 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16, 1923 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>; 1924} 1925 1926foreach P = ["amdfam10", "barcelona"] in { 1927 def : Proc<P, ProcessorFeatures.BarcelonaFeatures, 1928 ProcessorFeatures.BarcelonaTuning>; 1929} 1930 1931// Bobcat 1932def : Proc<"btver1", ProcessorFeatures.BtVer1Features, 1933 ProcessorFeatures.BtVer1Tuning>; 1934// Jaguar 1935def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, 1936 ProcessorFeatures.BtVer2Tuning>; 1937 1938// Bulldozer 1939def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, 1940 ProcessorFeatures.BdVer1Tuning>; 1941// Piledriver 1942def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, 1943 ProcessorFeatures.BdVer2Tuning>; 1944// Steamroller 1945// NOTE: BdVer2Model is only an approx model for Steamroller. 1946def : ProcModel<"bdver3", BdVer2Model, ProcessorFeatures.BdVer3Features, 1947 ProcessorFeatures.BdVer3Tuning>; 1948// Excavator 1949// NOTE: Znver1Model is only an approx model for Excavator (with AVX2). 1950def : ProcModel<"bdver4", Znver1Model, ProcessorFeatures.BdVer4Features, 1951 ProcessorFeatures.BdVer4Tuning>; 1952 1953def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, 1954 ProcessorFeatures.ZNTuning>; 1955def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, 1956 ProcessorFeatures.ZN2Tuning>; 1957def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, 1958 ProcessorFeatures.ZN3Tuning>; 1959def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features, 1960 ProcessorFeatures.ZN4Tuning>; 1961def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features, 1962 ProcessorFeatures.ZN5Tuning>; 1963 1964def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], 1965 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1966 1967def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], 1968 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1969def : Proc<"winchip2", [FeatureX87, FeatureMMX, FeaturePRFCHW], 1970 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1971def : Proc<"c3", [FeatureX87, FeatureMMX, FeaturePRFCHW], 1972 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1973def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX, 1974 FeatureSSE1, FeatureFXSR, FeatureCMOV], 1975 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; 1976 1977// We also provide a generic 64-bit specific x86 processor model which tries to 1978// be good for modern chips without enabling instruction set encodings past the 1979// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and 1980// modern 64-bit x86 chip, and enables features that are generally beneficial. 1981// 1982// We currently use the Sandy Bridge model as the default scheduling model as 1983// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which 1984// covers a huge swath of x86 processors. If there are specific scheduling 1985// knobs which need to be tuned differently for AMD chips, we might consider 1986// forming a common base for them. 1987def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, 1988 ProcessorFeatures.X86_64V1Tuning>; 1989// Close to Sandybridge. 1990def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, 1991 ProcessorFeatures.X86_64V2Tuning>; 1992// Close to Haswell. 1993def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, 1994 ProcessorFeatures.X86_64V3Tuning>; 1995// Close to the AVX-512 level implemented by Xeon Scalable Processors. 1996def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features, 1997 ProcessorFeatures.X86_64V4Tuning>; 1998 1999//===----------------------------------------------------------------------===// 2000// Calling Conventions 2001//===----------------------------------------------------------------------===// 2002 2003include "X86CallingConv.td" 2004 2005 2006//===----------------------------------------------------------------------===// 2007// Assembly Parser 2008//===----------------------------------------------------------------------===// 2009 2010def ATTAsmParserVariant : AsmParserVariant { 2011 int Variant = 0; 2012 2013 // Variant name. 2014 string Name = "att"; 2015 2016 // Discard comments in assembly strings. 2017 string CommentDelimiter = "#"; 2018 2019 // Recognize hard coded registers. 2020 string RegisterPrefix = "%"; 2021} 2022 2023def IntelAsmParserVariant : AsmParserVariant { 2024 int Variant = 1; 2025 2026 // Variant name. 2027 string Name = "intel"; 2028 2029 // Discard comments in assembly strings. 2030 string CommentDelimiter = ";"; 2031 2032 // Recognize hard coded registers. 2033 string RegisterPrefix = ""; 2034} 2035 2036//===----------------------------------------------------------------------===// 2037// Assembly Printers 2038//===----------------------------------------------------------------------===// 2039 2040// The X86 target supports two different syntaxes for emitting machine code. 2041// This is controlled by the -x86-asm-syntax={att|intel} 2042def ATTAsmWriter : AsmWriter { 2043 string AsmWriterClassName = "ATTInstPrinter"; 2044 int Variant = 0; 2045} 2046def IntelAsmWriter : AsmWriter { 2047 string AsmWriterClassName = "IntelInstPrinter"; 2048 int Variant = 1; 2049} 2050 2051def X86 : Target { 2052 // Information about the instructions... 2053 let InstructionSet = X86InstrInfo; 2054 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; 2055 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; 2056 let AllowRegisterRenaming = 1; 2057} 2058 2059//===----------------------------------------------------------------------===// 2060// Pfm Counters 2061//===----------------------------------------------------------------------===// 2062 2063include "X86PfmCounters.td" 2064