1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===------------------------------------------------------------===// 8 9include "llvm/TableGen/SearchableTable.td" 10include "llvm/Target/Target.td" 11include "AMDGPUFeatures.td" 12include "AMDGPUPredicateControl.td" 13 14def p0 : PtrValueType<i64, 0>; 15def p1 : PtrValueType<i64, 1>; 16def p2 : PtrValueType<i32, 2>; 17def p3 : PtrValueType<i32, 3>; 18def p4 : PtrValueType<i64, 4>; 19def p5 : PtrValueType<i32, 5>; 20def p6 : PtrValueType<i32, 6>; 21 22//===------------------------------------------------------------===// 23// Subtarget Features (device properties) 24//===------------------------------------------------------------===// 25 26def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 27 "FastFMAF32", 28 "true", 29 "Assuming f32 fma is at least as fast as mul + add" 30>; 31 32def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32", 33 "FastDenormalF32", 34 "true", 35 "Enabling denormals does not cause f32 instructions to run at f64 rates" 36>; 37 38def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", 39 "MIMG_R128", 40 "true", 41 "Support 128-bit texture resources" 42>; 43 44def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 45 "HalfRate64Ops", 46 "true", 47 "Most fp64 instructions are half rate instead of quarter" 48>; 49 50def FullRate64Ops : SubtargetFeature<"full-rate-64-ops", 51 "FullRate64Ops", 52 "true", 53 "Most fp64 instructions are full rate" 54>; 55 56def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 57 "FlatAddressSpace", 58 "true", 59 "Support flat address space" 60>; 61 62def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets", 63 "FlatInstOffsets", 64 "true", 65 "Flat instructions have immediate offset addressing mode" 66>; 67 68def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts", 69 "FlatGlobalInsts", 70 "true", 71 "Have global_* flat memory instructions" 72>; 73 74def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", 75 "FlatScratchInsts", 76 "true", 77 "Have scratch_* flat memory instructions" 78>; 79 80def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts", 81 "ScalarFlatScratchInsts", 82 "true", 83 "Have s_scratch_* flat memory instructions" 84>; 85 86def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch", 87 "EnableFlatScratch", 88 "true", 89 "Use scratch_* flat memory instructions to access scratch" 90>; 91 92def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", 93 "AddNoCarryInsts", 94 "true", 95 "Have VALU add/sub instructions without carry out" 96>; 97 98def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 99 "UnalignedBufferAccess", 100 "true", 101 "Hardware supports unaligned global loads and stores" 102>; 103 104def FeatureTrapHandler: SubtargetFeature<"trap-handler", 105 "TrapHandler", 106 "true", 107 "Trap handler support" 108>; 109 110def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", 111 "UnalignedScratchAccess", 112 "true", 113 "Support unaligned scratch loads and stores" 114>; 115 116def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", 117 "UnalignedDSAccess", 118 "true", 119 "Hardware supports unaligned local and region loads and stores" 120>; 121 122def FeatureApertureRegs : SubtargetFeature<"aperture-regs", 123 "HasApertureRegs", 124 "true", 125 "Has Memory Aperture Base and Size Registers" 126>; 127 128def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts", 129 "HasMadMixInsts", 130 "true", 131 "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions" 132>; 133 134def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", 135 "HasFmaMixInsts", 136 "true", 137 "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" 138>; 139 140def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32", 141 "HasMinimum3Maximum3F32", 142 "true", 143 "Has v_minimum3_f32 and v_maximum3_f32 instructions" 144>; 145 146def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16", 147 "HasMinimum3Maximum3F16", 148 "true", 149 "Has v_minimum3_f16 and v_maximum3_f16 instructions" 150>; 151 152def FeatureMinimum3Maximum3PKF16 : SubtargetFeature<"minimum3-maximum3-pkf16", 153 "HasMinimum3Maximum3PKF16", 154 "true", 155 "Has v_pk_minimum3_f16 and v_pk_maximum3_f16 instructions" 156>; 157 158def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", 159 "SupportsXNACK", 160 "true", 161 "Hardware supports XNACK" 162>; 163 164// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support 165// XNACK. The current default kernel driver setting is: 166// - graphics ring: XNACK disabled 167// - compute ring: XNACK enabled 168// 169// If XNACK is enabled, the VMEM latency can be worse. 170// If XNACK is disabled, the 2 SGPRs can be used for general purposes. 171def FeatureXNACK : SubtargetFeature<"xnack", 172 "EnableXNACK", 173 "true", 174 "Enable XNACK support" 175>; 176 177def FeatureTgSplit : SubtargetFeature<"tgsplit", 178 "EnableTgSplit", 179 "true", 180 "Enable threadgroup split execution" 181>; 182 183def FeatureCuMode : SubtargetFeature<"cumode", 184 "EnableCuMode", 185 "true", 186 "Enable CU wavefront execution mode" 187>; 188 189def FeaturePreciseMemory 190 : SubtargetFeature<"precise-memory", "EnablePreciseMemory", 191 "true", "Enable precise memory mode">; 192 193def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 194 "SGPRInitBug", 195 "true", 196 "VI SGPR initialization bug requiring a fixed SGPR allocation size" 197>; 198 199def FeatureUserSGPRInit16Bug : SubtargetFeature<"user-sgpr-init16-bug", 200 "UserSGPRInit16Bug", 201 "true", 202 "Bug requiring at least 16 user+system SGPRs to be enabled" 203>; 204 205def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", 206 "LDSMisalignedBug", 207 "true", 208 "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode" 209>; 210 211def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug", 212 "HasMFMAInlineLiteralBug", 213 "true", 214 "MFMA cannot use inline literal as SrcC" 215>; 216 217def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard", 218 "HasVcmpxPermlaneHazard", 219 "true", 220 "TODO: describe me" 221>; 222 223def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard", 224 "HasVMEMtoScalarWriteHazard", 225 "true", 226 "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution." 227>; 228 229def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard", 230 "HasSMEMtoVectorWriteHazard", 231 "true", 232 "s_load_dword followed by v_cmp page faults" 233>; 234 235def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug", 236 "HasInstFwdPrefetchBug", 237 "true", 238 "S_INST_PREFETCH instruction causes shader to hang" 239>; 240 241def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard", 242 "HasVcmpxExecWARHazard", 243 "true", 244 "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)" 245>; 246 247def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard", 248 "HasLdsBranchVmemWARHazard", 249 "true", 250 "Switching between LDS and VMEM-tex not waiting VM_VSRC=0" 251>; 252 253class FeatureMaxHardClauseLength<int size> : SubtargetFeature< 254 "max-hard-clause-length-"#size, 255 "MaxHardClauseLength", 256 !cast<string>(size), 257 "Maximum number of instructions in an explicit S_CLAUSE is "#size 258>; 259 260/// Work around a hardware bug on some chips that can be triggered 261/// under certain circumstances when clauses are longer than 32 operations. 262def FeatureMaxHardClauseLength32 : FeatureMaxHardClauseLength<32>; 263/// While the S_CLAUSE instruction permits encoding clause lengths up to 64, 264/// hardware documentation for gfx10+ indicates that 63 is the maximum 265/// permitted clause length. 266def FeatureMaxHardClauseLength63 : FeatureMaxHardClauseLength<63>; 267 268def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug", 269 "HasNSAtoVMEMBug", 270 "true", 271 "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero" 272>; 273 274def FeatureNSAClauseBug : SubtargetFeature<"nsa-clause-bug", 275 "HasNSAClauseBug", 276 "true", 277 "MIMG-NSA in a hard clause has unpredictable results on GFX10.1" 278>; 279 280def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug", 281 "HasFlatSegmentOffsetBug", 282 "true", 283 "GFX10 bug where inst_offset is ignored when flat instructions access global memory" 284>; 285 286def FeatureNegativeScratchOffsetBug : SubtargetFeature<"negative-scratch-offset-bug", 287 "NegativeScratchOffsetBug", 288 "true", 289 "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9" 290>; 291 292def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug", 293 "NegativeUnalignedScratchOffsetBug", 294 "true", 295 "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10" 296>; 297 298def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", 299 "HasOffset3fBug", 300 "true", 301 "Branch offset of 3f hardware bug" 302>; 303 304def FeatureImageStoreD16Bug : SubtargetFeature<"image-store-d16-bug", 305 "HasImageStoreD16Bug", 306 "true", 307 "Image Store D16 hardware bug" 308>; 309 310def FeatureImageGather4D16Bug : SubtargetFeature<"image-gather4-d16-bug", 311 "HasImageGather4D16Bug", 312 "true", 313 "Image Gather4 D16 hardware bug" 314>; 315 316def FeatureMADIntraFwdBug : SubtargetFeature<"mad-intra-fwd-bug", 317 "HasMADIntraFwdBug", 318 "true", 319 "MAD_U64/I64 intra instruction forwarding bug" 320>; 321 322def FeatureMSAALoadDstSelBug : SubtargetFeature<"msaa-load-dst-sel-bug", 323 "HasMSAALoadDstSelBug", 324 "true", 325 "MSAA loads not honoring dst_sel bug" 326>; 327 328def FeaturePrivEnabledTrap2NopBug : SubtargetFeature<"priv-enabled-trap2-nop-bug", 329 "HasPrivEnabledTrap2NopBug", 330 "true", 331 "Hardware that runs with PRIV=1 interpreting 's_trap 2' as a nop bug" 332>; 333 334class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 335 "ldsbankcount"#Value, 336 "LDSBankCount", 337 !cast<string>(Value), 338 "The number of LDS banks per compute unit." 339>; 340 341def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 342def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 343 344def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 345 "GCN3Encoding", 346 "true", 347 "Encoding format for VI" 348>; 349 350def FeatureCIInsts : SubtargetFeature<"ci-insts", 351 "CIInsts", 352 "true", 353 "Additional instructions for CI+" 354>; 355 356def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts", 357 "GFX8Insts", 358 "true", 359 "Additional instructions for GFX8+" 360>; 361 362def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", 363 "GFX9Insts", 364 "true", 365 "Additional instructions for GFX9+" 366>; 367 368def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts", 369 "GFX90AInsts", 370 "true", 371 "Additional instructions for GFX90A+" 372 // [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO 373>; 374 375def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", 376 "GFX940Insts", 377 "true", 378 "Additional instructions for GFX940+" 379>; 380 381def FeaturePermlane16Swap : SubtargetFeature<"permlane16-swap", 382 "HasPermlane16Swap", 383 "true", 384 "Has v_permlane16_swap_b32 instructions" 385>; 386 387def FeaturePermlane32Swap : SubtargetFeature<"permlane32-swap", 388 "HasPermlane32Swap", 389 "true", 390 "Has v_permlane32_swap_b32 instructions" 391>; 392 393def FeatureFP8ConversionScaleInsts : SubtargetFeature<"fp8-cvt-scale-insts", 394 "HasFP8ConversionScaleInsts", 395 "true", 396 "Has fp8 conversion scale instructions" 397>; 398 399def FeatureBF8ConversionScaleInsts : SubtargetFeature<"bf8-cvt-scale-insts", 400 "HasBF8ConversionScaleInsts", 401 "true", 402 "Has bf8 conversion scale instructions" 403>; 404 405def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts", 406 "HasFP4ConversionScaleInsts", 407 "true", 408 "Has fp4 conversion scale instructions" 409>; 410 411def FeatureFP6BF6ConversionScaleInsts : SubtargetFeature<"fp6bf6-cvt-scale-insts", 412 "HasFP6BF6ConversionScaleInsts", 413 "true", 414 "Has fp6 and bf6 conversion scale instructions" 415>; 416 417def FeatureF16BF16ToFP6BF6ConversionScaleInsts : SubtargetFeature<"f16bf16-to-fp6bf6-cvt-scale-insts", 418 "HasF16BF16ToFP6BF6ConversionScaleInsts", 419 "true", 420 "Has f16bf16 to fp6bf6 conversion scale instructions" 421>; 422 423def FeatureF32ToF16BF16ConversionSRInsts : SubtargetFeature<"f32-to-f16bf16-cvt-sr-insts", 424 "HasF32ToF16BF16ConversionSRInsts", 425 "true", 426 "Has f32 to f16bf16 conversion scale instructions" 427>; 428 429def FeatureAshrPkInsts : SubtargetFeature<"ashr-pk-insts", 430 "HasAshrPkInsts", 431 "true", 432 "Has Arithmetic Shift Pack instructions" 433>; 434 435def FeatureCvtPkF16F32Inst : SubtargetFeature<"cvt-pk-f16-f32-inst", 436 "HasCvtPkF16F32Inst", 437 "true", 438 "Has cvt_pk_f16_f32 instruction" 439>; 440 441def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", 442 "GFX950Insts", 443 "true", 444 "Additional instructions for GFX950+", 445 [FeaturePermlane16Swap, 446 FeaturePermlane32Swap, 447 FeatureAshrPkInsts, 448 FeatureFP8ConversionScaleInsts, 449 FeatureBF8ConversionScaleInsts, 450 FeatureFP4ConversionScaleInsts, 451 FeatureFP6BF6ConversionScaleInsts, 452 FeatureF16BF16ToFP6BF6ConversionScaleInsts, 453 FeatureF32ToF16BF16ConversionSRInsts, 454 FeatureCvtPkF16F32Inst, 455 FeatureMinimum3Maximum3F32, 456 FeatureMinimum3Maximum3PKF16, 457 ] 458>; 459 460def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", 461 "GFX10Insts", 462 "true", 463 "Additional instructions for GFX10+" 464>; 465 466def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts", 467 "GFX11Insts", 468 "true", 469 "Additional instructions for GFX11+" 470>; 471 472def FeatureGFX12Insts : SubtargetFeature<"gfx12-insts", 473 "GFX12Insts", 474 "true", 475 "Additional instructions for GFX12+" 476>; 477 478def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts", 479 "GFX10_3Insts", 480 "true", 481 "Additional instructions for GFX10.3" 482>; 483 484def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts", 485 "GFX7GFX8GFX9Insts", 486 "true", 487 "Instructions shared in GFX7, GFX8, GFX9" 488>; 489 490def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 491 "HasSMemRealTime", 492 "true", 493 "Has s_memrealtime instruction" 494>; 495 496def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", 497 "HasInv2PiInlineImm", 498 "true", 499 "Has 1 / (2 * pi) as inline immediate" 500>; 501 502def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 503 "Has16BitInsts", 504 "true", 505 "Has i16/f16 instructions" 506>; 507 508def FeatureTrue16BitInsts : SubtargetFeature<"true16", 509 "HasTrue16BitInsts", 510 "true", 511 "True 16-bit operand instructions" 512>; 513 514def FeatureRealTrue16Insts : SubtargetFeature<"real-true16", 515 "EnableRealTrue16Insts", 516 "true", 517 "Use true 16-bit registers" 518>; 519 520def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts", 521 "HasBF16ConversionInsts", 522 "true", 523 "Has bf16 conversion instructions" 524>; 525 526def FeatureVOP3P : SubtargetFeature<"vop3p", 527 "HasVOP3PInsts", 528 "true", 529 "Has VOP3P packed instructions" 530>; 531 532def FeatureMovrel : SubtargetFeature<"movrel", 533 "HasMovrel", 534 "true", 535 "Has v_movrel*_b32 instructions" 536>; 537 538def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", 539 "HasVGPRIndexMode", 540 "true", 541 "Has VGPR mode register indexing" 542>; 543 544def FeatureScalarDwordx3Loads : SubtargetFeature<"scalar-dwordx3-loads", 545 "HasScalarDwordx3Loads", 546 "true", 547 "Has 96-bit scalar load instructions" 548>; 549 550def FeatureScalarStores : SubtargetFeature<"scalar-stores", 551 "HasScalarStores", 552 "true", 553 "Has store scalar memory instructions" 554>; 555 556def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics", 557 "HasScalarAtomics", 558 "true", 559 "Has atomic scalar memory instructions" 560>; 561 562def FeatureSDWA : SubtargetFeature<"sdwa", 563 "HasSDWA", 564 "true", 565 "Support SDWA (Sub-DWORD Addressing) extension" 566>; 567 568def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", 569 "HasSDWAOmod", 570 "true", 571 "Support OMod with SDWA (Sub-DWORD Addressing) extension" 572>; 573 574def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", 575 "HasSDWAScalar", 576 "true", 577 "Support scalar register with SDWA (Sub-DWORD Addressing) extension" 578>; 579 580def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", 581 "HasSDWASdst", 582 "true", 583 "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" 584>; 585 586def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", 587 "HasSDWAMac", 588 "true", 589 "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" 590>; 591 592def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", 593 "HasSDWAOutModsVOPC", 594 "true", 595 "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" 596>; 597 598def FeatureDPP : SubtargetFeature<"dpp", 599 "HasDPP", 600 "true", 601 "Support DPP (Data Parallel Primitives) extension" 602>; 603 604// DPP8 allows arbitrary cross-lane swizzling within groups of 8 lanes. 605def FeatureDPP8 : SubtargetFeature<"dpp8", 606 "HasDPP8", 607 "true", 608 "Support DPP8 (Data Parallel Primitives) extension" 609>; 610 611def FeatureDPALU_DPP : SubtargetFeature<"dpp-64bit", 612 "HasDPALU_DPP", 613 "true", 614 "Support DPP (Data Parallel Primitives) extension in DP ALU" 615>; 616 617def FeatureDPPSrc1SGPR : SubtargetFeature<"dpp-src1-sgpr", 618 "HasDPPSrc1SGPR", 619 "true", 620 "Support SGPR for Src1 of DPP instructions" 621>; 622 623def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops", 624 "HasPackedFP32Ops", 625 "true", 626 "Support packed fp32 instructions" 627>; 628 629def FeatureR128A16 : SubtargetFeature<"r128-a16", 630 "HasR128A16", 631 "true", 632 "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128" 633>; 634 635def FeatureA16 : SubtargetFeature<"a16", 636 "HasA16", 637 "true", 638 "Support A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands" 639>; 640 641def FeatureG16 : SubtargetFeature<"g16", 642 "HasG16", 643 "true", 644 "Support G16 for 16-bit gradient image operands" 645>; 646 647def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", 648 "HasNSAEncoding", 649 "true", 650 "Support NSA encoding for image instructions" 651>; 652 653def FeaturePartialNSAEncoding : SubtargetFeature<"partial-nsa-encoding", 654 "HasPartialNSAEncoding", 655 "true", 656 "Support partial NSA encoding for image instructions" 657>; 658 659def FeatureImageInsts : SubtargetFeature<"image-insts", 660 "HasImageInsts", 661 "true", 662 "Support image instructions" 663>; 664 665def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts", 666 "HasExtendedImageInsts", 667 "true", 668 "Support mips != 0, lod != 0, gather4, and get_lod" 669>; 670 671def FeatureGFX10_AEncoding : SubtargetFeature<"gfx10_a-encoding", 672 "GFX10_AEncoding", 673 "true", 674 "Has BVH ray tracing instructions" 675>; 676 677def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding", 678 "GFX10_BEncoding", 679 "true", 680 "Encoding format GFX10_B" 681>; 682 683def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", 684 "HasIntClamp", 685 "true", 686 "Support clamp for integer destination" 687>; 688 689def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", 690 "HasUnpackedD16VMem", 691 "true", 692 "Has unpacked d16 vmem instructions" 693>; 694 695def FeatureDLInsts : SubtargetFeature<"dl-insts", 696 "HasDLInsts", 697 "true", 698 "Has v_fmac_f32 and v_xnor_b32 instructions" 699>; 700 701def FeatureFmacF64Inst : SubtargetFeature<"fmacf64-inst", 702 "HasFmacF64Inst", 703 "true", 704 "Has v_fmac_f64 instruction" 705>; 706 707def FeatureDot1Insts : SubtargetFeature<"dot1-insts", 708 "HasDot1Insts", 709 "true", 710 "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions" 711>; 712 713def FeatureDot2Insts : SubtargetFeature<"dot2-insts", 714 "HasDot2Insts", 715 "true", 716 "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions" 717>; 718 719def FeatureDot3Insts : SubtargetFeature<"dot3-insts", 720 "HasDot3Insts", 721 "true", 722 "Has v_dot8c_i32_i4 instruction" 723>; 724 725def FeatureDot4Insts : SubtargetFeature<"dot4-insts", 726 "HasDot4Insts", 727 "true", 728 "Has v_dot2c_i32_i16 instruction" 729>; 730 731def FeatureDot5Insts : SubtargetFeature<"dot5-insts", 732 "HasDot5Insts", 733 "true", 734 "Has v_dot2c_f32_f16 instruction" 735>; 736 737def FeatureDot6Insts : SubtargetFeature<"dot6-insts", 738 "HasDot6Insts", 739 "true", 740 "Has v_dot4c_i32_i8 instruction" 741>; 742 743def FeatureDot7Insts : SubtargetFeature<"dot7-insts", 744 "HasDot7Insts", 745 "true", 746 "Has v_dot4_u32_u8, v_dot8_u32_u4 instructions" 747>; 748 749def FeatureDot8Insts : SubtargetFeature<"dot8-insts", 750 "HasDot8Insts", 751 "true", 752 "Has v_dot4_i32_iu8, v_dot8_i32_iu4 instructions" 753>; 754 755def FeatureDot9Insts : SubtargetFeature<"dot9-insts", 756 "HasDot9Insts", 757 "true", 758 "Has v_dot2_f16_f16, v_dot2_bf16_bf16 instructions" 759>; 760 761def FeatureDot10Insts : SubtargetFeature<"dot10-insts", 762 "HasDot10Insts", 763 "true", 764 "Has v_dot2_f32_f16 instruction" 765>; 766 767def FeatureDot11Insts : SubtargetFeature<"dot11-insts", 768 "HasDot11Insts", 769 "true", 770 "Has v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8 instructions" 771>; 772 773def FeatureDot12Insts : SubtargetFeature<"dot12-insts", 774 "HasDot12Insts", 775 "true", 776 "Has v_dot2_f32_bf16 instructions" 777>; 778 779def FeatureDot13Insts : SubtargetFeature<"dot13-insts", 780 "HasDot13Insts", 781 "true", 782 "Has v_dot2c_f32_bf16 instructions" 783>; 784 785 786def FeatureMAIInsts : SubtargetFeature<"mai-insts", 787 "HasMAIInsts", 788 "true", 789 "Has mAI instructions" 790>; 791 792def FeatureFP8Insts : SubtargetFeature<"fp8-insts", 793 "HasFP8Insts", 794 "true", 795 "Has fp8 and bf8 instructions" 796>; 797 798def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts", 799 "HasFP8ConversionInsts", 800 "true", 801 "Has fp8 and bf8 conversion instructions" 802>; 803 804def FeatureCvtFP8VOP1Bug : SubtargetFeature<"cvt-fp8-vop1-bug", 805 "HasCvtFP8Vop1Bug", 806 "true", 807 "FP8/BF8 VOP1 form of conversion to F32 is unreliable", 808 [FeatureFP8ConversionInsts] 809>; 810 811def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", 812 "HasPkFmacF16Inst", 813 "true", 814 "Has v_pk_fmac_f16 instruction" 815>; 816 817def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", 818 "HasAtomicDsPkAdd16Insts", 819 "true", 820 "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, " 821 "ds_pk_add_rtn_f16 instructions" 822>; 823 824def FeatureAtomicFlatPkAdd16Insts : SubtargetFeature<"atomic-flat-pk-add-16-insts", 825 "HasAtomicFlatPkAdd16Insts", 826 "true", 827 "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions" 828>; 829 830def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts", 831 "HasAtomicFaddRtnInsts", 832 "true", 833 "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " 834 "return original value", 835 [FeatureFlatGlobalInsts] 836>; 837 838def FeatureAtomicFMinFMaxF32GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f32", 839 "HasAtomicFMinFMaxF32GlobalInsts", 840 "true", 841 "Has global/buffer instructions for atomicrmw fmin/fmax for float" 842>; 843 844def FeatureAtomicFMinFMaxF64GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f64", 845 "HasAtomicFMinFMaxF64GlobalInsts", 846 "true", 847 "Has global/buffer instructions for atomicrmw fmin/fmax for float" 848>; 849 850def FeatureAtomicFMinFMaxF32FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f32", 851 "HasAtomicFMinFMaxF32FlatInsts", 852 "true", 853 "Has flat memory instructions for atomicrmw fmin/fmax for float" 854>; 855 856def FeatureAtomicFMinFMaxF64FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f64", 857 "HasAtomicFMinFMaxF64FlatInsts", 858 "true", 859 "Has flat memory instructions for atomicrmw fmin/fmax for double" 860>; 861 862def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts", 863 "HasAtomicFaddNoRtnInsts", 864 "true", 865 "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " 866 "don't return original value", 867 [FeatureFlatGlobalInsts] 868>; 869 870def FeatureAtomicBufferGlobalPkAddF16NoRtnInsts 871 : SubtargetFeature<"atomic-buffer-global-pk-add-f16-no-rtn-insts", 872 "HasAtomicBufferGlobalPkAddF16NoRtnInsts", 873 "true", 874 "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " 875 "don't return original value", 876 [FeatureFlatGlobalInsts] 877>; 878 879def FeatureAtomicBufferGlobalPkAddF16Insts : SubtargetFeature<"atomic-buffer-global-pk-add-f16-insts", 880 "HasAtomicBufferGlobalPkAddF16Insts", 881 "true", 882 "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " 883 "can return original value", 884 [FeatureFlatGlobalInsts] 885>; 886 887def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf16-inst", 888 "HasAtomicGlobalPkAddBF16Inst", 889 "true", 890 "Has global_atomic_pk_add_bf16 instruction", 891 [FeatureFlatGlobalInsts] 892>; 893 894def FeatureAtomicBufferPkAddBF16Inst : SubtargetFeature<"atomic-buffer-pk-add-bf16-inst", 895 "HasAtomicBufferPkAddBF16Inst", 896 "true", 897 "Has buffer_atomic_pk_add_bf16 instruction" 898>; 899 900def FeatureAtomicCSubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts", 901 "HasAtomicCSubNoRtnInsts", 902 "true", 903 "Has buffer_atomic_csub and global_atomic_csub instructions that don't " 904 "return original value" 905>; 906 907def FeatureFlatAtomicFaddF32Inst 908 : SubtargetFeature<"flat-atomic-fadd-f32-inst", 909 "HasFlatAtomicFaddF32Inst", 910 "true", 911 "Has flat_atomic_add_f32 instruction" 912>; 913 914def FeatureFlatBufferGlobalAtomicFaddF64Inst 915 : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", 916 "HasFlatBufferGlobalAtomicFaddF64Inst", 917 "true", 918 "Has flat, buffer, and global instructions for f64 atomic fadd" 919>; 920 921def FeatureMemoryAtomicFAddF32DenormalSupport 922 : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", 923 "HasMemoryAtomicFaddF32DenormalSupport", 924 "true", 925 "global/flat/buffer atomic fadd for float supports denormal handling" 926>; 927 928def FeatureAgentScopeFineGrainedRemoteMemoryAtomics 929 : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", 930 "HasAgentScopeFineGrainedRemoteMemoryAtomics", 931 "true", 932 "Agent (device) scoped atomic operations, excluding those directly " 933 "supported by PCIe (i.e. integer atomic add, exchange, and " 934 "compare-and-swap), are functional for allocations in host or peer " 935 "device memory." 936>; 937 938def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", 939 "HasDefaultComponentZero", 940 "true", 941 "BUFFER/IMAGE store instructions set unspecified components to zero (before GFX12)" 942>; 943 944def FeatureDefaultComponentBroadcast : SubtargetFeature<"default-component-broadcast", 945 "HasDefaultComponentBroadcast", 946 "true", 947 "BUFFER/IMAGE store instructions set unspecified components to x component (GFX12)" 948>; 949 950def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support", 951 "SupportsSRAMECC", 952 "true", 953 "Hardware supports SRAMECC" 954>; 955 956def FeatureSRAMECC : SubtargetFeature<"sramecc", 957 "EnableSRAMECC", 958 "true", 959 "Enable SRAMECC" 960>; 961 962def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx", 963 "HasNoSdstCMPX", 964 "true", 965 "V_CMPX does not write VCC/SGPR in addition to EXEC" 966>; 967 968def FeatureVscnt : SubtargetFeature<"vscnt", 969 "HasVscnt", 970 "true", 971 "Has separate store vscnt counter" 972>; 973 974def FeatureGetWaveIdInst : SubtargetFeature<"get-wave-id-inst", 975 "HasGetWaveIdInst", 976 "true", 977 "Has s_get_waveid_in_workgroup instruction" 978>; 979 980def FeatureSMemTimeInst : SubtargetFeature<"s-memtime-inst", 981 "HasSMemTimeInst", 982 "true", 983 "Has s_memtime instruction" 984>; 985 986def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register", 987 "HasShaderCyclesRegister", 988 "true", 989 "Has SHADER_CYCLES hardware register" 990>; 991 992def FeatureShaderCyclesHiLoRegisters : SubtargetFeature<"shader-cycles-hi-lo-registers", 993 "HasShaderCyclesHiLoRegisters", 994 "true", 995 "Has SHADER_CYCLES_HI/LO hardware registers" 996>; 997 998def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts", 999 "HasMadMacF32Insts", 1000 "true", 1001 "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions" 1002>; 1003 1004def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts", 1005 "HasDsSrc2Insts", 1006 "true", 1007 "Has ds_*_src2 instructions" 1008>; 1009 1010def FeatureVOP3Literal : SubtargetFeature<"vop3-literal", 1011 "HasVOP3Literal", 1012 "true", 1013 "Can use one literal in VOP3" 1014>; 1015 1016def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard", 1017 "HasNoDataDepHazard", 1018 "true", 1019 "Does not need SW waitstates" 1020>; 1021 1022// Allocate 1536 VGPRs for wave32 and 768 VGPRs for wave64 1023// with allocation granularity 24 for wave32 and 12 for wave64 1024def Feature1_5xVGPRs : SubtargetFeature<"allocate1_5xvgprs", 1025 "Has1_5xVGPRs", 1026 "true", 1027 "Has 50% more physical VGPRs and 50% larger allocation granule" 1028>; 1029 1030 1031def FeatureVOPD : SubtargetFeature<"vopd", 1032 "HasVOPDInsts", 1033 "true", 1034 "Has VOPD dual issue wave32 instructions" 1035>; 1036 1037def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard", 1038 "HasVALUTransUseHazard", 1039 "true", 1040 "Hazard when TRANS instructions are closely followed by a use of the result" 1041>; 1042 1043def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1", 1044 "HasForceStoreSC0SC1", 1045 "true", 1046 "Has SC0 and SC1 on stores" 1047>; 1048 1049def FeatureSALUFloatInsts : SubtargetFeature<"salu-float", 1050 "HasSALUFloatInsts", 1051 "true", 1052 "Has SALU floating point instructions" 1053>; 1054 1055def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans", 1056 "HasPseudoScalarTrans", 1057 "true", 1058 "Has Pseudo Scalar Transcendental instructions" 1059>; 1060 1061def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset", 1062 "HasRestrictedSOffset", 1063 "true", 1064 "Has restricted SOffset (immediate not supported)." 1065>; 1066 1067def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority", 1068 "HasRequiredExportPriority", 1069 "true", 1070 "Export priority must be explicitly manipulated on GFX11.5" 1071>; 1072 1073def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order", 1074 "HasVmemWriteVgprInOrder", 1075 "true", 1076 "VMEM instructions of the same type write VGPR results in order" 1077>; 1078 1079def FeatureBitOp3Insts : SubtargetFeature<"bitop3-insts", 1080 "HasBitOp3Insts", 1081 "true", 1082 "Has v_bitop3_b32/v_bitop3_b16 instructions" 1083>; 1084 1085def FeaturePrngInst : SubtargetFeature<"prng-inst", 1086 "HasPrngInst", 1087 "true", 1088 "Has v_prng_b32 instruction" 1089>; 1090 1091//===------------------------------------------------------------===// 1092// Subtarget Features (options and debugging) 1093//===------------------------------------------------------------===// 1094 1095class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 1096 "max-private-element-size-"#size, 1097 "MaxPrivateElementSize", 1098 !cast<string>(size), 1099 "Maximum private access size may be "#size 1100>; 1101 1102def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 1103def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 1104def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 1105 1106def FeatureDumpCode : SubtargetFeature <"DumpCode", 1107 "DumpCode", 1108 "true", 1109 "Dump MachineInstrs in the CodeEmitter" 1110>; 1111 1112def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 1113 "DumpCode", 1114 "true", 1115 "Dump MachineInstrs in the CodeEmitter" 1116>; 1117 1118// XXX - This should probably be removed once enabled by default 1119def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 1120 "EnableLoadStoreOpt", 1121 "true", 1122 "Enable SI load/store optimizer pass" 1123>; 1124 1125// Performance debugging feature. Allow using DS instruction immediate 1126// offsets even if the base pointer can't be proven to be base. On SI, 1127// base pointer values that won't give the same result as a 16-bit add 1128// are not safe to fold, but this will override the conservative test 1129// for the base pointer. 1130def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 1131 "unsafe-ds-offset-folding", 1132 "EnableUnsafeDSOffsetFolding", 1133 "true", 1134 "Force using DS instruction immediate offsets on SI" 1135>; 1136 1137def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 1138 "EnableSIScheduler", 1139 "true", 1140 "Enable SI Machine Scheduler" 1141>; 1142 1143def FeatureEnableDS128 : SubtargetFeature<"enable-ds128", 1144 "EnableDS128", 1145 "true", 1146 "Use ds_{read|write}_b128" 1147>; 1148 1149// Sparse texture support requires that all result registers are zeroed when 1150// PRTStrictNull is set to true. This feature is turned on for all architectures 1151// but is enabled as a feature in case there are situations where PRTStrictNull 1152// is disabled by the driver. 1153def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null", 1154 "EnablePRTStrictNull", 1155 "true", 1156 "Enable zeroing of result registers for sparse texture fetches" 1157>; 1158 1159// Unless +-flat-for-global is specified, turn on FlatForGlobal for 1160// all OS-es on VI and newer hardware to avoid assertion failures due 1161// to missing ADDR64 variants of MUBUF instructions. 1162// FIXME: moveToVALU should be able to handle converting addr64 MUBUF 1163// instructions. 1164 1165def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 1166 "FlatForGlobal", 1167 "true", 1168 "Force to generate flat instruction for global" 1169>; 1170 1171def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < 1172 "auto-waitcnt-before-barrier", 1173 "AutoWaitcntBeforeBarrier", 1174 "true", 1175 "Hardware automatically inserts waitcnt before barrier" 1176>; 1177 1178def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier", 1179 "BackOffBarrier", 1180 "true", 1181 "Hardware supports backing off s_barrier if an exception occurs" 1182>; 1183 1184def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", 1185 "HasTrigReducedRange", 1186 "true", 1187 "Requires use of fract on arguments to trig instructions" 1188>; 1189 1190def FeatureKernargPreload : SubtargetFeature <"kernarg-preload", 1191 "KernargPreload", 1192 "true", 1193 "Hardware supports preloading of kernel arguments in user SGPRs." 1194>; 1195 1196// Alignment enforcement is controlled by a configuration register: 1197// SH_MEM_CONFIG.alignment_mode 1198def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", 1199 "UnalignedAccessMode", 1200 "true", 1201 "Enable unaligned global, local and region loads and stores if the hardware" 1202 " supports it" 1203>; 1204 1205def FeaturePackedTID : SubtargetFeature<"packed-tid", 1206 "HasPackedTID", 1207 "true", 1208 "Workitem IDs are packed into v0 at kernel launch" 1209>; 1210 1211def FeatureArchitectedFlatScratch : SubtargetFeature<"architected-flat-scratch", 1212 "HasArchitectedFlatScratch", 1213 "true", 1214 "Flat Scratch register is a readonly SPI initialized architected register" 1215>; 1216 1217def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs", 1218 "HasArchitectedSGPRs", 1219 "true", 1220 "Enable the architected SGPRs" 1221>; 1222 1223def FeatureGDS : SubtargetFeature<"gds", 1224 "HasGDS", 1225 "true", 1226 "Has Global Data Share" 1227>; 1228 1229def FeatureGWS : SubtargetFeature<"gws", 1230 "HasGWS", 1231 "true", 1232 "Has Global Wave Sync" 1233>; 1234 1235def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6", 1236 "RequiresCOV6", 1237 "true", 1238 "Target Requires Code Object V6" 1239>; 1240 1241def FeatureXF32Insts : SubtargetFeature<"xf32-insts", 1242 "HasXF32Insts", 1243 "true", 1244 "Has instructions that support xf32 format, such as " 1245 "v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32" 1246 >; 1247 1248// Dummy feature used to disable assembler instructions. 1249def FeatureDisable : SubtargetFeature<"", 1250 "FeatureDisable","true", 1251 "Dummy feature to disable assembler instructions" 1252>; 1253 1254//===----------------------------------------------------------------------===// 1255 1256class GCNSubtargetFeatureGeneration <string Value, 1257 string FeatureName, 1258 list<SubtargetFeature> Implies> : 1259 SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>; 1260 1261def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 1262 "southern-islands", 1263 [FeatureFP64, FeatureAddressableLocalMemorySize32768, FeatureMIMG_R128, 1264 FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1265 FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, 1266 FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, 1267 FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, 1268 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1269 FeatureVmemWriteVgprInOrder 1270 ] 1271>; 1272 1273def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", 1274 "sea-islands", 1275 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1276 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1277 FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, 1278 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1279 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, 1280 FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, 1281 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1282 FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, 1283 FeatureVmemWriteVgprInOrder 1284 ] 1285>; 1286 1287def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 1288 "volcanic-islands", 1289 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1290 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1291 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 1292 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, 1293 FeatureScalarStores, FeatureInv2PiInlineImm, 1294 FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, 1295 FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, 1296 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1297 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, 1298 FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, 1299 FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder 1300 ] 1301>; 1302 1303def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", 1304 "gfx9", 1305 [FeatureFP64, 1306 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1307 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 1308 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, 1309 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, 1310 FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1311 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 1312 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1313 FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, 1314 FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, 1315 FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, 1316 FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, 1317 FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, 1318 FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder 1319 ] 1320>; 1321 1322def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", 1323 "gfx10", 1324 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1325 FeatureFlatAddressSpace, 1326 FeatureCIInsts, Feature16BitInsts, 1327 FeatureSMemRealTime, FeatureInv2PiInlineImm, 1328 FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P, 1329 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1330 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 1331 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1332 FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, 1333 FeatureNoSdstCMPX, FeatureVscnt, 1334 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 1335 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1336 FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, 1337 FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, 1338 FeatureUnalignedDSAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, 1339 FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, 1340 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1341 FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, 1342 FeatureVmemWriteVgprInOrder 1343 ] 1344>; 1345 1346def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", 1347 "gfx11", 1348 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1349 FeatureFlatAddressSpace, Feature16BitInsts, 1350 FeatureInv2PiInlineImm, FeatureApertureRegs, 1351 FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, 1352 FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, 1353 FeatureGFX11Insts, FeatureVOP3P, FeatureVOPD, FeatureTrue16BitInsts, 1354 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1355 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1356 FeatureAddNoCarryInsts, FeatureFmaMixInsts, 1357 FeatureNoSdstCMPX, FeatureVscnt, 1358 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 1359 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1360 FeatureA16, FeatureFastDenormalF32, FeatureG16, 1361 FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, 1362 FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS, 1363 FeatureDefaultComponentZero, FeatureMaxHardClauseLength32, 1364 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, 1365 FeatureVmemWriteVgprInOrder 1366 ] 1367>; 1368 1369def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", 1370 "gfx12", 1371 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1372 FeatureFlatAddressSpace, Feature16BitInsts, 1373 FeatureInv2PiInlineImm, FeatureApertureRegs, 1374 FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, 1375 FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, 1376 FeatureGFX11Insts, FeatureGFX12Insts, FeatureVOP3P, FeatureVOPD, 1377 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1378 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1379 FeatureAddNoCarryInsts, FeatureFmaMixInsts, 1380 FeatureNoSdstCMPX, FeatureVscnt, 1381 FeatureVOP3Literal, FeatureDPP8, 1382 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1383 FeatureA16, FeatureFastDenormalF32, FeatureG16, 1384 FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, 1385 FeatureUnalignedDSAccess, FeatureTrue16BitInsts, 1386 FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, 1387 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, 1388 FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16, 1389 FeatureAgentScopeFineGrainedRemoteMemoryAtomics 1390 ] 1391>; 1392 1393//===----------------------------------------------------------------------===// 1394 1395class FeatureSet<list<SubtargetFeature> Features_> { 1396 list<SubtargetFeature> Features = Features_; 1397} 1398 1399def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands, 1400 FeatureFastFMAF32, 1401 HalfRate64Ops, 1402 FeatureLDSBankCount32]>; 1403 1404def FeatureISAVersion6_0_1 : FeatureSet< 1405 [FeatureSouthernIslands, 1406 FeatureLDSBankCount32]>; 1407 1408def FeatureISAVersion6_0_2 : FeatureSet< 1409 [FeatureSouthernIslands, 1410 FeatureLDSBankCount32]>; 1411 1412def FeatureISAVersion7_0_0 : FeatureSet< 1413 [FeatureSeaIslands, 1414 FeatureLDSBankCount32]>; 1415 1416def FeatureISAVersion7_0_1 : FeatureSet< 1417 [FeatureSeaIslands, 1418 HalfRate64Ops, 1419 FeatureLDSBankCount32, 1420 FeatureFastFMAF32]>; 1421 1422def FeatureISAVersion7_0_2 : FeatureSet< 1423 [FeatureSeaIslands, 1424 FeatureLDSBankCount16, 1425 FeatureFastFMAF32]>; 1426 1427def FeatureISAVersion7_0_3 : FeatureSet< 1428 [FeatureSeaIslands, 1429 FeatureLDSBankCount16]>; 1430 1431def FeatureISAVersion7_0_4 : FeatureSet< 1432 [FeatureSeaIslands, 1433 FeatureLDSBankCount32]>; 1434 1435def FeatureISAVersion7_0_5 : FeatureSet< 1436 [FeatureSeaIslands, 1437 FeatureLDSBankCount16]>; 1438 1439def FeatureISAVersion8_0_Common : FeatureSet< 1440 [FeatureVolcanicIslands, 1441 FeatureLDSBankCount32, 1442 FeatureUnpackedD16VMem]>; 1443 1444def FeatureISAVersion8_0_1 : FeatureSet< 1445 !listconcat(FeatureISAVersion8_0_Common.Features, 1446 [FeatureFastFMAF32, 1447 HalfRate64Ops, 1448 FeatureSupportsXNACK])>; 1449 1450def FeatureISAVersion8_0_2 : FeatureSet< 1451 !listconcat(FeatureISAVersion8_0_Common.Features, 1452 [FeatureSGPRInitBug])>; 1453 1454def FeatureISAVersion8_0_3 : FeatureSet< 1455 !listconcat(FeatureISAVersion8_0_Common.Features, 1456 [])>; 1457 1458def FeatureISAVersion8_0_5 : FeatureSet< 1459 !listconcat(FeatureISAVersion8_0_Common.Features, 1460 [FeatureSGPRInitBug])>; 1461 1462def FeatureISAVersion8_1_0 : FeatureSet< 1463 [FeatureVolcanicIslands, 1464 FeatureLDSBankCount16, 1465 FeatureSupportsXNACK, 1466 FeatureImageStoreD16Bug, 1467 FeatureImageGather4D16Bug]>; 1468 1469def FeatureISAVersion9_0_Common : FeatureSet< 1470 [FeatureGFX9, 1471 FeatureAddressableLocalMemorySize65536, 1472 FeatureLDSBankCount32, 1473 FeatureImageInsts, 1474 FeatureMadMacF32Insts]>; 1475 1476def FeatureISAVersion9_0_Consumer_Common : FeatureSet< 1477 !listconcat(FeatureISAVersion9_0_Common.Features, 1478 [FeatureImageGather4D16Bug, 1479 FeatureDsSrc2Insts, 1480 FeatureExtendedImageInsts, 1481 FeatureGDS])>; 1482 1483def FeatureISAVersion9_Generic : FeatureSet< 1484 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1485 [FeatureRequiresCOV6])>; 1486 1487def FeatureISAVersion9_0_MI_Common : FeatureSet< 1488 !listconcat(FeatureISAVersion9_0_Common.Features, 1489 [FeatureAddressableLocalMemorySize65536, 1490 FeatureFmaMixInsts, 1491 FeatureDLInsts, 1492 FeatureDot1Insts, 1493 FeatureDot2Insts, 1494 FeatureDot3Insts, 1495 FeatureDot4Insts, 1496 FeatureDot5Insts, 1497 FeatureDot6Insts, 1498 FeatureDot7Insts, 1499 FeatureDot10Insts, 1500 FeatureMAIInsts, 1501 FeaturePkFmacF16Inst, 1502 FeatureAtomicFaddNoRtnInsts, 1503 FeatureSupportsSRAMECC])>; 1504 1505def FeatureISAVersion9_0_0 : FeatureSet< 1506 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1507 [FeatureMadMixInsts])>; 1508 1509def FeatureISAVersion9_0_2 : FeatureSet< 1510 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1511 [FeatureMadMixInsts])>; 1512 1513def FeatureISAVersion9_0_4 : FeatureSet< 1514 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1515 [FeatureFmaMixInsts])>; 1516 1517def FeatureISAVersion9_0_6 : FeatureSet< 1518 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1519 [HalfRate64Ops, 1520 FeatureFmaMixInsts, 1521 FeatureDLInsts, 1522 FeatureDot1Insts, 1523 FeatureDot2Insts, 1524 FeatureDot7Insts, 1525 FeatureDot10Insts, 1526 FeatureSupportsSRAMECC])>; 1527 1528def FeatureISAVersion9_0_8 : FeatureSet< 1529 !listconcat(FeatureISAVersion9_0_MI_Common.Features, 1530 [FeatureGDS, 1531 HalfRate64Ops, 1532 FeatureDsSrc2Insts, 1533 FeatureExtendedImageInsts, 1534 FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, 1535 FeatureMFMAInlineLiteralBug, 1536 FeatureImageGather4D16Bug])>; 1537 1538def FeatureISAVersion9_0_9 : FeatureSet< 1539 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1540 [FeatureMadMixInsts, 1541 FeatureImageInsts])>; 1542 1543def FeatureISAVersion9_0_A : FeatureSet< 1544 !listconcat(FeatureISAVersion9_0_MI_Common.Features, 1545 [FeatureGFX90AInsts, 1546 FeatureFmacF64Inst, 1547 FeatureDPALU_DPP, 1548 FeaturePackedFP32Ops, 1549 FeatureAtomicFaddRtnInsts, 1550 FeatureAtomicBufferGlobalPkAddF16Insts, 1551 FeaturePackedTID, 1552 FullRate64Ops, 1553 FeatureBackOffBarrier, 1554 FeatureKernargPreload, 1555 FeatureAtomicFMinFMaxF64GlobalInsts, 1556 FeatureAtomicFMinFMaxF64FlatInsts, 1557 FeatureFlatBufferGlobalAtomicFaddF64Inst 1558 ])>; 1559 1560def FeatureISAVersion9_0_C : FeatureSet< 1561 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1562 [FeatureMadMixInsts])>; 1563 1564def FeatureISAVersion9_4_Common : FeatureSet< 1565 [FeatureGFX9, 1566 FeatureGFX90AInsts, 1567 FeatureGFX940Insts, 1568 FeatureFmaMixInsts, 1569 FeatureLDSBankCount32, 1570 FeatureDLInsts, 1571 FeatureFmacF64Inst, 1572 FeatureDot1Insts, 1573 FeatureDot2Insts, 1574 FeatureDot3Insts, 1575 FeatureDot4Insts, 1576 FeatureDot5Insts, 1577 FeatureDot6Insts, 1578 FeatureDot7Insts, 1579 FeatureDot10Insts, 1580 FeatureAtomicDsPkAdd16Insts, 1581 FeatureAtomicFlatPkAdd16Insts, 1582 FeatureDPALU_DPP, 1583 FeaturePackedFP32Ops, 1584 FeatureMAIInsts, 1585 FeaturePkFmacF16Inst, 1586 FeatureAtomicFaddRtnInsts, 1587 FeatureAtomicFaddNoRtnInsts, 1588 FeatureAtomicBufferGlobalPkAddF16Insts, 1589 FeatureAtomicGlobalPkAddBF16Inst, 1590 FeatureFlatAtomicFaddF32Inst, 1591 FeatureSupportsSRAMECC, 1592 FeaturePackedTID, 1593 FeatureArchitectedFlatScratch, 1594 FullRate64Ops, 1595 FeatureBackOffBarrier, 1596 FeatureKernargPreload, 1597 FeatureAtomicFMinFMaxF64GlobalInsts, 1598 FeatureAtomicFMinFMaxF64FlatInsts, 1599 FeatureAgentScopeFineGrainedRemoteMemoryAtomics, 1600 FeatureMemoryAtomicFAddF32DenormalSupport, 1601 FeatureFlatBufferGlobalAtomicFaddF64Inst 1602 ]>; 1603 1604def FeatureISAVersion9_5_Common : FeatureSet< 1605 !listconcat(FeatureISAVersion9_4_Common.Features, 1606 [FeatureAddressableLocalMemorySize163840, 1607 FeatureFP8Insts, 1608 FeatureFP8ConversionInsts, 1609 FeatureGFX950Insts, 1610 FeaturePrngInst, 1611 FeatureBF16ConversionInsts, 1612 FeatureBitOp3Insts, 1613 FeatureFP8ConversionScaleInsts, 1614 FeatureBF8ConversionScaleInsts, 1615 FeatureFP4ConversionScaleInsts, 1616 FeatureFP6BF6ConversionScaleInsts, 1617 FeatureDot12Insts, 1618 FeatureDot13Insts, 1619 FeatureAtomicBufferPkAddBF16Inst 1620 ])>; 1621 1622def FeatureISAVersion9_4_0 : FeatureSet< 1623 !listconcat(FeatureISAVersion9_4_Common.Features, 1624 [ 1625 FeatureAddressableLocalMemorySize65536, 1626 FeatureForceStoreSC0SC1, 1627 FeatureFP8Insts, 1628 FeatureFP8ConversionInsts, 1629 FeatureCvtFP8VOP1Bug, 1630 FeatureXF32Insts 1631 ])>; 1632 1633def FeatureISAVersion9_4_1 : FeatureSet< 1634 !listconcat(FeatureISAVersion9_4_Common.Features, 1635 [ 1636 FeatureAddressableLocalMemorySize65536, 1637 FeatureForceStoreSC0SC1, 1638 FeatureFP8Insts, 1639 FeatureFP8ConversionInsts, 1640 FeatureCvtFP8VOP1Bug, 1641 FeatureXF32Insts 1642 ])>; 1643 1644def FeatureISAVersion9_4_2 : FeatureSet< 1645 !listconcat(FeatureISAVersion9_4_Common.Features, 1646 [ 1647 FeatureAddressableLocalMemorySize65536, 1648 FeatureFP8Insts, 1649 FeatureFP8ConversionInsts, 1650 FeatureCvtFP8VOP1Bug, 1651 FeatureXF32Insts 1652 ])>; 1653 1654def FeatureISAVersion9_4_Generic : FeatureSet< 1655 !listconcat(FeatureISAVersion9_4_Common.Features, 1656 [FeatureAddressableLocalMemorySize65536, 1657 FeatureRequiresCOV6])>; 1658 1659def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>; 1660 1661def FeatureISAVersion10_Common : FeatureSet< 1662 [FeatureGFX10, 1663 FeatureLDSBankCount32, 1664 FeatureDLInsts, 1665 FeatureNSAEncoding, 1666 FeatureBackOffBarrier]>; 1667 1668def FeatureISAVersion10_1_Common : FeatureSet< 1669 !listconcat(FeatureISAVersion10_Common.Features, 1670 [FeatureScalarStores, 1671 FeatureScalarAtomics, 1672 FeatureScalarFlatScratchInsts, 1673 FeatureGetWaveIdInst, 1674 FeatureMadMacF32Insts, 1675 FeatureDsSrc2Insts, 1676 FeatureLdsMisalignedBug, 1677 FeatureSupportsXNACK, 1678 // gfx101x bugs 1679 FeatureVcmpxPermlaneHazard, 1680 FeatureVMEMtoScalarWriteHazard, 1681 FeatureSMEMtoVectorWriteHazard, 1682 FeatureInstFwdPrefetchBug, 1683 FeatureVcmpxExecWARHazard, 1684 FeatureLdsBranchVmemWARHazard, 1685 FeatureNSAtoVMEMBug, 1686 FeatureNSAClauseBug, 1687 FeatureOffset3fBug, 1688 FeatureFlatSegmentOffsetBug, 1689 FeatureNegativeUnalignedScratchOffsetBug])>; 1690 1691def FeatureISAVersion10_1_Generic : FeatureSet< 1692 !listconcat(FeatureISAVersion10_1_Common.Features, 1693 [FeatureRequiresCOV6])>; 1694 1695def FeatureISAVersion10_1_0 : FeatureSet< 1696 !listconcat(FeatureISAVersion10_1_Common.Features, 1697 [])>; 1698 1699def FeatureISAVersion10_1_1 : FeatureSet< 1700 !listconcat(FeatureISAVersion10_1_Common.Features, 1701 [FeatureDot1Insts, 1702 FeatureDot2Insts, 1703 FeatureDot5Insts, 1704 FeatureDot6Insts, 1705 FeatureDot7Insts, 1706 FeatureDot10Insts])>; 1707 1708def FeatureISAVersion10_1_2 : FeatureSet< 1709 !listconcat(FeatureISAVersion10_1_Common.Features, 1710 [FeatureDot1Insts, 1711 FeatureDot2Insts, 1712 FeatureDot5Insts, 1713 FeatureDot6Insts, 1714 FeatureDot7Insts, 1715 FeatureDot10Insts])>; 1716 1717def FeatureISAVersion10_1_3 : FeatureSet< 1718 !listconcat(FeatureISAVersion10_1_Common.Features, 1719 [FeatureGFX10_AEncoding])>; 1720 1721def FeatureISAVersion10_3_0 : FeatureSet< 1722 !listconcat(FeatureISAVersion10_Common.Features, 1723 [FeatureGFX10_AEncoding, 1724 FeatureGFX10_BEncoding, 1725 FeatureGFX10_3Insts, 1726 FeatureDot1Insts, 1727 FeatureDot2Insts, 1728 FeatureDot5Insts, 1729 FeatureDot6Insts, 1730 FeatureDot7Insts, 1731 FeatureDot10Insts, 1732 FeatureShaderCyclesRegister])>; 1733 1734def FeatureISAVersion10_3_Generic: FeatureSet< 1735 !listconcat(FeatureISAVersion10_3_0.Features, 1736 [FeatureRequiresCOV6])>; 1737 1738def FeatureISAVersion11_Common : FeatureSet< 1739 [FeatureGFX11, 1740 FeatureLDSBankCount32, 1741 FeatureDLInsts, 1742 FeatureDot5Insts, 1743 FeatureDot7Insts, 1744 FeatureDot8Insts, 1745 FeatureDot9Insts, 1746 FeatureDot10Insts, 1747 FeatureDot12Insts, 1748 FeatureNSAEncoding, 1749 FeaturePartialNSAEncoding, 1750 FeatureShaderCyclesRegister, 1751 FeatureArchitectedFlatScratch, 1752 FeatureAtomicFaddRtnInsts, 1753 FeatureAtomicFaddNoRtnInsts, 1754 FeatureFlatAtomicFaddF32Inst, 1755 FeatureImageInsts, 1756 FeaturePackedTID, 1757 FeatureVcmpxPermlaneHazard, 1758 FeatureMemoryAtomicFAddF32DenormalSupport]>; 1759 1760// There are few workarounds that need to be 1761// added to all targets. This pessimizes codegen 1762// a bit on the generic GFX11 target. 1763def FeatureISAVersion11_Generic: FeatureSet< 1764 !listconcat(FeatureISAVersion11_Common.Features, 1765 [FeatureMSAALoadDstSelBug, 1766 FeatureVALUTransUseHazard, 1767 FeatureUserSGPRInit16Bug, 1768 FeatureMADIntraFwdBug, 1769 FeaturePrivEnabledTrap2NopBug, 1770 FeatureRequiresCOV6, 1771 FeatureRequiredExportPriority])>; 1772 1773def FeatureISAVersion11_0_Common : FeatureSet< 1774 !listconcat(FeatureISAVersion11_Common.Features, 1775 [FeatureMSAALoadDstSelBug, 1776 FeatureVALUTransUseHazard, 1777 FeatureMADIntraFwdBug, 1778 FeaturePrivEnabledTrap2NopBug])>; 1779 1780def FeatureISAVersion11_0_0 : FeatureSet< 1781 !listconcat(FeatureISAVersion11_0_Common.Features, 1782 [Feature1_5xVGPRs, 1783 FeatureUserSGPRInit16Bug])>; 1784 1785def FeatureISAVersion11_0_1 : FeatureSet< 1786 !listconcat(FeatureISAVersion11_0_Common.Features, 1787 [Feature1_5xVGPRs])>; 1788 1789def FeatureISAVersion11_0_2 : FeatureSet< 1790 !listconcat(FeatureISAVersion11_0_Common.Features, 1791 [FeatureUserSGPRInit16Bug])>; 1792 1793def FeatureISAVersion11_0_3 : FeatureSet< 1794 !listconcat(FeatureISAVersion11_0_Common.Features, 1795 [])>; 1796 1797def FeatureISAVersion11_5_0 : FeatureSet< 1798 !listconcat(FeatureISAVersion11_Common.Features, 1799 [FeatureSALUFloatInsts, 1800 FeatureDPPSrc1SGPR, 1801 FeatureRequiredExportPriority])>; 1802 1803def FeatureISAVersion11_5_1 : FeatureSet< 1804 !listconcat(FeatureISAVersion11_Common.Features, 1805 [FeatureSALUFloatInsts, 1806 FeatureDPPSrc1SGPR, 1807 Feature1_5xVGPRs, 1808 FeatureRequiredExportPriority])>; 1809 1810def FeatureISAVersion11_5_2 : FeatureSet< 1811 !listconcat(FeatureISAVersion11_Common.Features, 1812 [FeatureSALUFloatInsts, 1813 FeatureDPPSrc1SGPR, 1814 FeatureRequiredExportPriority])>; 1815 1816def FeatureISAVersion11_5_3 : FeatureSet< 1817 !listconcat(FeatureISAVersion11_Common.Features, 1818 [FeatureSALUFloatInsts, 1819 FeatureDPPSrc1SGPR, 1820 FeatureRequiredExportPriority])>; 1821 1822def FeatureISAVersion12 : FeatureSet< 1823 [FeatureGFX12, 1824 FeatureLDSBankCount32, 1825 FeatureDLInsts, 1826 FeatureDot7Insts, 1827 FeatureDot8Insts, 1828 FeatureDot9Insts, 1829 FeatureDot10Insts, 1830 FeatureDot11Insts, 1831 FeatureDot12Insts, 1832 FeatureNSAEncoding, 1833 FeaturePartialNSAEncoding, 1834 FeatureShaderCyclesHiLoRegisters, 1835 FeatureArchitectedFlatScratch, 1836 FeatureArchitectedSGPRs, 1837 FeatureAtomicFaddRtnInsts, 1838 FeatureAtomicFaddNoRtnInsts, 1839 FeatureAtomicDsPkAdd16Insts, 1840 FeatureAtomicFlatPkAdd16Insts, 1841 FeatureAtomicBufferGlobalPkAddF16Insts, 1842 FeatureAtomicGlobalPkAddBF16Inst, 1843 FeatureAtomicBufferPkAddBF16Inst, 1844 FeatureFlatAtomicFaddF32Inst, 1845 FeatureImageInsts, 1846 FeatureExtendedImageInsts, 1847 FeatureFP8ConversionInsts, 1848 FeaturePackedTID, 1849 FeatureVcmpxPermlaneHazard, 1850 FeatureSALUFloatInsts, 1851 FeaturePseudoScalarTrans, 1852 FeatureHasRestrictedSOffset, 1853 FeatureScalarDwordx3Loads, 1854 FeatureDPPSrc1SGPR, 1855 FeatureMaxHardClauseLength32, 1856 Feature1_5xVGPRs, 1857 FeatureMemoryAtomicFAddF32DenormalSupport 1858 ]>; 1859 1860def FeatureISAVersion12_Generic: FeatureSet< 1861 !listconcat(FeatureISAVersion12.Features, 1862 [FeatureRequiresCOV6])>; 1863 1864//===----------------------------------------------------------------------===// 1865 1866def AMDGPUInstrInfo : InstrInfo { 1867 let guessInstructionProperties = 1; 1868} 1869 1870def AMDGPUAsmParser : AsmParser { 1871 // Some of the R600 registers have the same name, so this crashes. 1872 // For example T0_XYZW and T0_XY both have the asm name T0. 1873 let ShouldEmitMatchRegisterName = 0; 1874 1875 // Call the custom operand parser for all operands. 1876 let OperandParserMethod = "parseCustomOperand"; 1877 let CallCustomParserForAllOperands = true; 1878} 1879 1880def AMDGPUAsmWriter : AsmWriter { 1881 int PassSubtarget = 1; 1882} 1883 1884def AMDGPUAsmVariants { 1885 string Default = "Default"; 1886 int Default_ID = 0; 1887 string VOP3 = "VOP3"; 1888 int VOP3_ID = 1; 1889 string SDWA = "SDWA"; 1890 int SDWA_ID = 2; 1891 string SDWA9 = "SDWA9"; 1892 int SDWA9_ID = 3; 1893 string DPP = "DPP"; 1894 int DPP_ID = 4; 1895 string VOP3_DPP = "VOP3_DPP"; 1896 int VOP3_DPP_ID = 5; 1897 string Disable = "Disable"; 1898 int Disable_ID = 6; 1899} 1900 1901def DefaultAMDGPUAsmParserVariant : AsmParserVariant { 1902 let Variant = AMDGPUAsmVariants.Default_ID; 1903 let Name = AMDGPUAsmVariants.Default; 1904} 1905 1906def VOP3AsmParserVariant : AsmParserVariant { 1907 let Variant = AMDGPUAsmVariants.VOP3_ID; 1908 let Name = AMDGPUAsmVariants.VOP3; 1909} 1910 1911def SDWAAsmParserVariant : AsmParserVariant { 1912 let Variant = AMDGPUAsmVariants.SDWA_ID; 1913 let Name = AMDGPUAsmVariants.SDWA; 1914} 1915 1916def SDWA9AsmParserVariant : AsmParserVariant { 1917 let Variant = AMDGPUAsmVariants.SDWA9_ID; 1918 let Name = AMDGPUAsmVariants.SDWA9; 1919} 1920 1921def DPPAsmParserVariant : AsmParserVariant { 1922 let Variant = AMDGPUAsmVariants.DPP_ID; 1923 let Name = AMDGPUAsmVariants.DPP; 1924} 1925 1926def VOP3_DPPAsmParserVariant : AsmParserVariant { 1927 let Variant = AMDGPUAsmVariants.VOP3_DPP_ID; 1928 let Name = AMDGPUAsmVariants.VOP3_DPP; 1929} 1930 1931def AMDGPU : Target { 1932 // Pull in Instruction Info: 1933 let InstructionSet = AMDGPUInstrInfo; 1934 let AssemblyParsers = [AMDGPUAsmParser]; 1935 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, 1936 VOP3AsmParserVariant, 1937 SDWAAsmParserVariant, 1938 SDWA9AsmParserVariant, 1939 DPPAsmParserVariant, 1940 VOP3_DPPAsmParserVariant]; 1941 let AssemblyWriters = [AMDGPUAsmWriter]; 1942 let AllowRegisterRenaming = 1; 1943} 1944 1945// Dummy Instruction itineraries for pseudo instructions 1946def ALU_NULL : FuncUnit; 1947def NullALU : InstrItinClass; 1948 1949//===----------------------------------------------------------------------===// 1950// Predicate helper class 1951//===----------------------------------------------------------------------===// 1952 1953def isGFX6 : 1954 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">, 1955 AssemblerPredicate<(all_of FeatureSouthernIslands)>; 1956 1957def isGFX6GFX7 : 1958 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1959 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 1960 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX10Insts))>; 1961 1962def isGFX6GFX7GFX10 : 1963 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1964 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1965 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1966 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX11Insts))>; 1967 1968def isGFX6GFX7GFX10Plus : 1969 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1970 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1971 "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 1972 AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>; 1973 1974def isGFX7Only : 1975 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 1976 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX10Insts))>; 1977 1978def isGFX7GFX10 : 1979 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1980 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1981 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX11Insts))>; 1982 1983def isGFX7GFX10GFX11 : 1984 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1985 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 1986 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1987 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>; 1988 1989def isGFX7GFX8GFX9 : 1990 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1991 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1992 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1993 AssemblerPredicate<(all_of FeatureGFX7GFX8GFX9Insts)>; 1994 1995def isGFX6GFX7GFX8GFX9 : 1996 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1997 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1998 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1999 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 2000 AssemblerPredicate<(all_of (not FeatureGFX10Insts))>; 2001 2002def isGFX6GFX7GFX8GFX9NotGFX90A : 2003 Predicate<"!Subtarget->hasGFX90AInsts() &&" 2004 "(Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 2005 " Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2006 " Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2007 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 2008 AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>; 2009 2010def isGFX6GFX7GFX8GFX9GFX10 : 2011 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 2012 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2013 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2014 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2015 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2016 AssemblerPredicate<(all_of (not FeatureGFX11Insts))>; 2017 2018def isNotGFX12Plus : 2019 Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::GFX11">, 2020 AssemblerPredicate<(all_of (not FeatureGFX12Insts))>; 2021 2022def isGFX7GFX8GFX9GFX10 : 2023 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2024 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2025 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2026 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2027 AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>; 2028 2029def isGFX8GFX9GFX10GFX11 : 2030 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2031 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2032 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 2033 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 2034 AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX12Insts))>; 2035 2036def isGFX7Plus : 2037 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, 2038 AssemblerPredicate<(all_of FeatureCIInsts)>; 2039 2040def isGFX8Plus : 2041 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 2042 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 2043 2044def isGFX8Only : Predicate<"Subtarget->getGeneration() ==" 2045 "AMDGPUSubtarget::VOLCANIC_ISLANDS">, 2046 AssemblerPredicate <(all_of FeatureVolcanicIslands)>; 2047 2048def isGFX9Plus : 2049 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 2050 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 2051 2052def isNotGFX9Plus : 2053 Predicate<"Subtarget->getGeneration() < AMDGPUSubtarget::GFX9">; 2054 2055def isGFX9Only : Predicate < 2056 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 2057 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts)>; 2058 2059def isGCN3ExcludingGFX90A : 2060 Predicate<"Subtarget->isGCN3Encoding() && !Subtarget->hasGFX90AInsts()">, 2061 AssemblerPredicate<(all_of FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 2062 2063def isGFX90APlus : 2064 Predicate<"Subtarget->hasGFX90AInsts()">, 2065 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 2066 2067def isNotGFX90APlus : 2068 Predicate<"!Subtarget->hasGFX90AInsts()">, 2069 AssemblerPredicate<(all_of (not FeatureGFX90AInsts))>; 2070 2071def isGFX8GFX9NotGFX90A : 2072 Predicate<"!Subtarget->hasGFX90AInsts() &&" 2073 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2074 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 2075 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 2076 2077def isGFX90AOnly : 2078 Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, 2079 AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; 2080 2081def isGFX908orGFX90A : 2082 Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">, 2083 AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>; 2084 2085def isGFX940Plus : 2086 Predicate<"Subtarget->hasGFX940Insts()">, 2087 AssemblerPredicate<(all_of FeatureGFX940Insts)>; 2088 2089def isNotGFX940Plus : 2090 Predicate<"!Subtarget->hasGFX940Insts()">, 2091 AssemblerPredicate<(all_of (not FeatureGFX940Insts))>; 2092 2093def HasGFX950Insts : 2094 Predicate<"Subtarget->hasGFX950Insts()">, 2095 AssemblerPredicate<(all_of FeatureGFX950Insts)>; 2096 2097def HasPermlane16Swap : 2098 Predicate<"Subtarget->hasPermlane16Swap()">, 2099 AssemblerPredicate<(all_of FeaturePermlane16Swap)>; 2100 2101def HasPermlane32Swap : 2102 Predicate<"Subtarget->hasPermlane32Swap()">, 2103 AssemblerPredicate<(all_of FeaturePermlane32Swap)>; 2104 2105def isGFX8GFX9NotGFX940 : 2106 Predicate<"!Subtarget->hasGFX940Insts() &&" 2107 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2108 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 2109 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>; 2110 2111def isGFX8GFX9 : 2112 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2113 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 2114 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>; 2115 2116def isGFX10Only : 2117 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2118 AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX11Insts))>; 2119 2120def isGFX10Plus : 2121 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 2122 AssemblerPredicate<(all_of FeatureGFX10Insts)>; 2123 2124def isGFX10GFX11 : 2125 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 2126 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 2127 AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX12Insts))>; 2128 2129def isGFX10Before1030 : 2130 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&" 2131 "!Subtarget->hasGFX10_3Insts()">, 2132 AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>; 2133 2134def isGFX9GFX10 : 2135 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2136 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2137 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX11Insts))>; 2138 2139def isGFX9GFX10GFX11 : 2140 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9 &&" 2141 "Subtarget->getGeneration() < AMDGPUSubtarget::GFX12">, 2142 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX12Insts))>; 2143 2144def isGFX8GFX9GFX10 : 2145 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2146 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2147 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2148 AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX11Insts))>; 2149 2150def isGFX11Only : 2151 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 2152 AssemblerPredicate<(all_of FeatureGFX11Insts, (not FeatureGFX12Insts))>; 2153 2154def isGFX11Plus : 2155 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, 2156 AssemblerPredicate<(all_of FeatureGFX11Insts)>; 2157 2158def isGFX12Only : 2159 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12">, 2160 AssemblerPredicate<(all_of FeatureGFX12Insts)>; 2161 2162def isGFX12Plus : 2163 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">, 2164 AssemblerPredicate<(all_of FeatureGFX12Insts)>; 2165 2166def HasMinimum3Maximum3F32 : 2167 Predicate<"Subtarget->hasMinimum3Maximum3F32()">, 2168 AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>; 2169 2170def HasMinimum3Maximum3F16 : 2171 Predicate<"Subtarget->hasMinimum3Maximum3F16()">, 2172 AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>; 2173 2174def HasMinimum3Maximum3PKF16 : 2175 Predicate<"Subtarget->hasMinimum3Maximum3PKF16()">, 2176 AssemblerPredicate<(all_of FeatureMinimum3Maximum3PKF16)>; 2177 2178 2179def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, 2180 AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; 2181 2182def HasFlatBufferGlobalAtomicFaddF64Inst : 2183 Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, 2184 AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; 2185 2186def HasAtomicFMinFMaxF32GlobalInsts : 2187 Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, 2188 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32GlobalInsts)>; 2189 2190def HasAtomicFMinFMaxF64GlobalInsts : 2191 Predicate<"Subtarget->hasAtomicFMinFMaxF64GlobalInsts()">, 2192 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64GlobalInsts)>; 2193 2194def HasAtomicFMinFMaxF32FlatInsts : 2195 Predicate<"Subtarget->hasAtomicFMinFMaxF32FlatInsts()">, 2196 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32FlatInsts)>; 2197 2198def HasAtomicFMinFMaxF64FlatInsts : 2199 Predicate<"Subtarget->hasAtomicFMinFMaxF64FlatInsts()">, 2200 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64FlatInsts)>; 2201 2202def HasLdsAtomicAddF64 : 2203 Predicate<"Subtarget->hasLdsAtomicAddF64()">, 2204 AssemblerPredicate<(any_of FeatureGFX90AInsts)>; 2205 2206def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, 2207 AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>; 2208def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, 2209 AssemblerPredicate<(all_of FeatureFlatScratchInsts)>; 2210def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">, 2211 AssemblerPredicate<(all_of FeatureScalarFlatScratchInsts)>; 2212def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, 2213 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 2214 2215def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, 2216 AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>; 2217def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, 2218 AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; 2219 2220def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, 2221 AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; 2222 2223def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">, 2224 AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>; 2225 2226def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, 2227 AssemblerPredicate<(all_of FeatureUnpackedD16VMem)>; 2228def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, 2229 AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>; 2230 2231def HasRestrictedSOffset : Predicate<"Subtarget->hasRestrictedSOffset()">, 2232 AssemblerPredicate<(all_of FeatureHasRestrictedSOffset)>; 2233def HasUnrestrictedSOffset : Predicate<"!Subtarget->hasRestrictedSOffset()">, 2234 AssemblerPredicate<(all_of (not FeatureHasRestrictedSOffset))>; 2235 2236def D16PreservesUnusedBits : 2237 Predicate<"Subtarget->d16PreservesUnusedBits()">, 2238 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>; 2239 2240def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; 2241def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; 2242 2243def HasExportInsts : Predicate<"Subtarget->hasExportInsts()">, 2244 AssemblerPredicate<(all_of (not FeatureGFX90AInsts))>; 2245 2246def HasVINTERPEncoding : Predicate<"Subtarget->hasVINTERPEncoding()">, 2247 AssemblerPredicate<(all_of FeatureGFX11Insts)>; 2248 2249def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 2250 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 2251 2252def HasLDSFPAtomicAddF32 : Predicate<"Subtarget->hasLDSFPAtomicAddF32()">, 2253 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 2254 2255def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">, 2256 AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>; 2257 2258def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">; 2259 2260def HasXNACKEnabled : Predicate<"Subtarget->isXNACKEnabled()">; 2261 2262def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, 2263 AssemblerPredicate<(all_of Feature16BitInsts)>; 2264 2265def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">, 2266 AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; 2267def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts()">, 2268 AssemblerPredicate<(all_of (not FeatureTrue16BitInsts))>; 2269 2270// Control use of True16 instructions. The real True16 instructions are 2271// True16 instructions as they are defined in the ISA. Fake True16 2272// instructions have the same encoding as real ones but syntactically 2273// only allow 32-bit registers in operands and use low halves thereof. 2274def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">, 2275 AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>; 2276def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && " 2277 "!Subtarget->useRealTrue16Insts()">, 2278 AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; 2279 // FIXME When we default to RealTrue16 instead of Fake, change the line as follows. 2280 // AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>; 2281 2282def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">, 2283 AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>; 2284 2285def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, 2286 AssemblerPredicate<(all_of FeatureVOP3P)>; 2287 2288def NotHasMed3_16 : Predicate<"!Subtarget->hasMed3_16()">; 2289def HasMed3_16 : Predicate<"Subtarget->hasMed3_16()">; 2290 2291def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">; 2292def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">; 2293 2294def HasFminFmaxLegacy : Predicate<"Subtarget->hasFminFmaxLegacy()">; 2295 2296def HasSDWA : Predicate<"Subtarget->hasSDWA()">; 2297 2298def HasSDWA8 : Predicate<"Subtarget->hasSDWA()">, 2299 AssemblerPredicate<(all_of (not FeatureGFX9Insts), FeatureSDWA)>; 2300 2301def HasSDWA9 : 2302 Predicate<"Subtarget->hasSDWA()">, 2303 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts,FeatureSDWA)>; 2304 2305def HasSDWA10 : 2306 Predicate<"Subtarget->hasSDWA()">, 2307 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureSDWA)>; 2308 2309def HasDPP : Predicate<"Subtarget->hasDPP()">, 2310 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureDPP)>; 2311 2312def HasDPP8 : Predicate<"Subtarget->hasDPP8()">, 2313 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>; 2314 2315def HasDPALU_DPP : Predicate<"Subtarget->hasDPALU_DPP()">, 2316 AssemblerPredicate<(all_of FeatureDPALU_DPP)>; 2317 2318def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, 2319 AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; 2320 2321def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">, 2322 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 2323 2324def HasFmaakFmamkF32Insts : 2325 Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, 2326 AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; 2327 2328def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, 2329 AssemblerPredicate<(all_of FeatureImageInsts)>; 2330 2331def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">, 2332 AssemblerPredicate<(all_of FeatureExtendedImageInsts)>; 2333 2334def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, 2335 AssemblerPredicate<(all_of FeatureR128A16)>; 2336 2337def HasA16 : Predicate<"Subtarget->hasA16()">, 2338 AssemblerPredicate<(all_of FeatureA16)>; 2339 2340def HasG16 : Predicate<"Subtarget->hasG16()">, 2341 AssemblerPredicate<(all_of FeatureG16)>; 2342 2343def HasDPP16 : Predicate<"Subtarget->hasDPP()">, 2344 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP)>; 2345 2346def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, 2347 AssemblerPredicate<(all_of FeatureIntClamp)>; 2348 2349def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">, 2350 AssemblerPredicate<(all_of FeatureMadMixInsts)>; 2351 2352def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">, 2353 AssemblerPredicate<(all_of FeatureScalarStores)>; 2354 2355def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">, 2356 AssemblerPredicate<(all_of FeatureScalarAtomics)>; 2357 2358def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">, 2359 AssemblerPredicate<(all_of FeatureNoSdstCMPX)>; 2360 2361def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">, 2362 AssemblerPredicate<(all_of (not FeatureNoSdstCMPX))>; 2363 2364def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; 2365def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; 2366def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, 2367 AssemblerPredicate<(all_of FeatureVGPRIndexMode)>; 2368def HasMovrel : Predicate<"Subtarget->hasMovrel()">, 2369 AssemblerPredicate<(all_of FeatureMovrel)>; 2370 2371def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">, 2372 AssemblerPredicate<(all_of FeatureFmaMixInsts)>; 2373 2374def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, 2375 AssemblerPredicate<(all_of FeatureDLInsts)>; 2376 2377def HasFmacF64Inst : Predicate<"Subtarget->hasFmacF64Inst()">, 2378 AssemblerPredicate<(all_of FeatureFmacF64Inst)>; 2379 2380def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, 2381 AssemblerPredicate<(all_of FeatureDot1Insts)>; 2382 2383def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, 2384 AssemblerPredicate<(all_of FeatureDot2Insts)>; 2385 2386def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">, 2387 AssemblerPredicate<(all_of FeatureDot3Insts)>; 2388 2389def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">, 2390 AssemblerPredicate<(all_of FeatureDot4Insts)>; 2391 2392def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, 2393 AssemblerPredicate<(all_of FeatureDot5Insts)>; 2394 2395def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, 2396 AssemblerPredicate<(all_of FeatureDot6Insts)>; 2397 2398def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">, 2399 AssemblerPredicate<(all_of FeatureDot7Insts)>; 2400 2401def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">, 2402 AssemblerPredicate<(all_of FeatureDot8Insts)>; 2403 2404def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">, 2405 AssemblerPredicate<(all_of FeatureDot9Insts)>; 2406 2407def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">, 2408 AssemblerPredicate<(all_of FeatureDot10Insts)>; 2409 2410def HasDot11Insts : Predicate<"Subtarget->hasDot11Insts()">, 2411 AssemblerPredicate<(all_of FeatureDot11Insts)>; 2412 2413def HasDot12Insts : Predicate<"Subtarget->hasDot12Insts()">, 2414 AssemblerPredicate<(all_of FeatureDot12Insts)>; 2415 2416def HasDot13Insts : Predicate<"Subtarget->hasDot13Insts()">, 2417 AssemblerPredicate<(all_of FeatureDot13Insts)>; 2418 2419def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, 2420 AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; 2421 2422def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">, 2423 AssemblerPredicate<(all_of FeatureMAIInsts)>; 2424 2425def HasSMemRealTime : Predicate<"Subtarget->hasSMemRealTime()">, 2426 AssemblerPredicate<(all_of FeatureSMemRealTime)>; 2427 2428def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, 2429 AssemblerPredicate<(all_of FeatureSMemTimeInst)>; 2430 2431def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, 2432 AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; 2433 2434def HasShaderCyclesHiLoRegisters : Predicate<"Subtarget->hasShaderCyclesHiLoRegisters()">; 2435 2436def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, 2437 AssemblerPredicate<(all_of FeatureFP8Insts)>; 2438 2439def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">, 2440 AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>; 2441 2442def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, 2443 AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; 2444 2445def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">, 2446 AssemblerPredicate<(all_of FeatureMadMacF32Insts)>; 2447 2448def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">, 2449 AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; 2450 2451def HasAtomicDsPkAdd16Insts : Predicate<"Subtarget->hasAtomicDsPkAdd16Insts()">, 2452 AssemblerPredicate<(any_of FeatureAtomicDsPkAdd16Insts)>; 2453 2454def HasAtomicFlatPkAdd16Insts : Predicate<"Subtarget->hasAtomicFlatPkAdd16Insts()">, 2455 AssemblerPredicate<(any_of FeatureAtomicFlatPkAdd16Insts)>; 2456 2457def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">, 2458 AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>; 2459def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">, 2460 AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>; 2461def HasAtomicBufferGlobalPkAddF16NoRtnInsts 2462 : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16NoRtnInsts() || Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">, 2463 AssemblerPredicate<(any_of FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts)>; 2464def HasAtomicBufferGlobalPkAddF16Insts 2465 : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">, 2466 AssemblerPredicate<(all_of FeatureAtomicBufferGlobalPkAddF16Insts)>; 2467def HasAtomicGlobalPkAddBF16Inst 2468 : Predicate<"Subtarget->hasAtomicGlobalPkAddBF16Inst()">, 2469 AssemblerPredicate<(all_of FeatureAtomicGlobalPkAddBF16Inst)>; 2470def HasAtomicBufferPkAddBF16Inst 2471 : Predicate<"Subtarget->hasAtomicBufferPkAddBF16Inst()">, 2472 AssemblerPredicate<(all_of FeatureAtomicBufferPkAddBF16Inst)>; 2473def HasFlatAtomicFaddF32Inst 2474 : Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">, 2475 AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>; 2476 2477def HasDefaultComponentZero 2478 : Predicate<"Subtarget->hasDefaultComponentZero()">, 2479 AssemblerPredicate<(all_of FeatureDefaultComponentZero)>; 2480def HasDefaultComponentBroadcast 2481 : Predicate<"Subtarget->hasDefaultComponentBroadcast()">, 2482 AssemblerPredicate<(all_of FeatureDefaultComponentBroadcast)>; 2483 2484def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">, 2485 AssemblerPredicate<(all_of FeatureDsSrc2Insts)>; 2486 2487def EnableFlatScratch : Predicate<"Subtarget->enableFlatScratch()">; 2488 2489def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">; 2490 2491def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">, 2492 AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>; 2493 2494def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">; 2495 2496def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">; 2497 2498def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">, 2499 AssemblerPredicate<(all_of FeatureSALUFloatInsts)>; 2500 2501def NotHasSALUFloatInsts : Predicate<"!Subtarget->hasSALUFloatInsts()">, 2502 AssemblerPredicate<(all_of (not FeatureSALUFloatInsts))>; 2503 2504def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">, 2505 AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>; 2506 2507def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">, 2508 AssemblerPredicate<(all_of FeatureBitOp3Insts)>; 2509 2510def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">, 2511 AssemblerPredicate<(all_of FeaturePrngInst)>; 2512 2513def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">, 2514 AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>; 2515 2516def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInsts()">, 2517 AssemblerPredicate<(all_of FeatureBF8ConversionScaleInsts)>; 2518 2519def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">, 2520 AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>; 2521 2522def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionScaleInsts()">, 2523 AssemblerPredicate<(all_of FeatureFP6BF6ConversionScaleInsts)>; 2524 2525def HasF16BF16ToFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasF16BF16ToFP6BF6ConversionScaleInsts()">, 2526 AssemblerPredicate<(all_of FeatureF16BF16ToFP6BF6ConversionScaleInsts)>; 2527 2528def HasCvtPkF16F32Inst : Predicate<"Subtarget->hasCvtPkF16F32Inst()">, 2529 AssemblerPredicate<(all_of FeatureCvtPkF16F32Inst)>; 2530 2531def HasF32ToF16BF16ConversionSRInsts : Predicate<"Subtarget->hasF32ToF16BF16ConversionSRInsts()">, 2532 AssemblerPredicate<(all_of FeatureF32ToF16BF16ConversionSRInsts)>; 2533 2534def HasGDS : Predicate<"Subtarget->hasGDS()">; 2535 2536def HasGWS : Predicate<"Subtarget->hasGWS()">; 2537 2538def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; 2539def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; 2540 2541def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">; 2542 2543def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">; 2544 2545def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">, 2546 AssemblerPredicate<(all_of FeatureXF32Insts)>; 2547 2548def HasAshrPkInsts : Predicate<"Subtarget->hasAshrPkInsts()">, 2549 AssemblerPredicate<(all_of FeatureAshrPkInsts)>; 2550 2551// Include AMDGPU TD files 2552include "SISchedule.td" 2553include "GCNProcessors.td" 2554include "AMDGPUInstrInfo.td" 2555include "SIRegisterInfo.td" 2556include "AMDGPURegisterBanks.td" 2557include "AMDGPUInstructions.td" 2558include "SIInstrInfo.td" 2559include "AMDGPUCallingConv.td" 2560include "AMDGPUSearchableTables.td" 2561