1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "AMDGPUSubtarget.h" 13 #include "SIDefines.h" 14 #include "llvm/IR/CallingConv.h" 15 #include "llvm/IR/InstrTypes.h" 16 #include "llvm/IR/Module.h" 17 #include "llvm/Support/Alignment.h" 18 #include <array> 19 #include <functional> 20 #include <utility> 21 22 struct amd_kernel_code_t; 23 24 namespace llvm { 25 26 struct Align; 27 class Argument; 28 class Function; 29 class GlobalValue; 30 class MCInstrInfo; 31 class MCRegisterClass; 32 class MCRegisterInfo; 33 class MCSubtargetInfo; 34 class StringRef; 35 class Triple; 36 class raw_ostream; 37 38 namespace AMDGPU { 39 40 struct AMDGPUMCKernelCodeT; 41 struct IsaVersion; 42 43 /// Generic target versions emitted by this version of LLVM. 44 /// 45 /// These numbers are incremented every time a codegen breaking change occurs 46 /// within a generic family. 47 namespace GenericVersion { 48 static constexpr unsigned GFX9 = 1; 49 static constexpr unsigned GFX9_4 = 1; 50 static constexpr unsigned GFX10_1 = 1; 51 static constexpr unsigned GFX10_3 = 1; 52 static constexpr unsigned GFX11 = 1; 53 static constexpr unsigned GFX12 = 1; 54 } // namespace GenericVersion 55 56 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 }; 57 58 enum class FPType { None, FP4, FP8 }; 59 60 /// \returns True if \p STI is AMDHSA. 61 bool isHsaAbi(const MCSubtargetInfo &STI); 62 63 /// \returns Code object version from the IR module flag. 64 unsigned getAMDHSACodeObjectVersion(const Module &M); 65 66 /// \returns Code object version from ELF's e_ident[EI_ABIVERSION]. 67 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion); 68 69 /// \returns The default HSA code object version. This should only be used when 70 /// we lack a more accurate CodeObjectVersion value (e.g. from the IR module 71 /// flag or a .amdhsa_code_object_version directive) 72 unsigned getDefaultAMDHSACodeObjectVersion(); 73 74 /// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param 75 /// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion(). 76 uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion); 77 78 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr 79 unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV); 80 81 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr 82 unsigned getHostcallImplicitArgPosition(unsigned COV); 83 84 unsigned getDefaultQueueImplicitArgPosition(unsigned COV); 85 unsigned getCompletionActionImplicitArgPosition(unsigned COV); 86 87 struct GcnBufferFormatInfo { 88 unsigned Format; 89 unsigned BitsPerComp; 90 unsigned NumComponents; 91 unsigned NumFormat; 92 unsigned DataFormat; 93 }; 94 95 struct MAIInstInfo { 96 uint16_t Opcode; 97 bool is_dgemm; 98 bool is_gfx940_xdl; 99 }; 100 101 struct MFMA_F8F6F4_Info { 102 unsigned Opcode; 103 unsigned F8F8Opcode; 104 uint8_t NumRegsSrcA; 105 uint8_t NumRegsSrcB; 106 }; 107 108 struct CvtScaleF32_F32F16ToF8F4_Info { 109 unsigned Opcode; 110 }; 111 112 #define GET_MIMGBaseOpcode_DECL 113 #define GET_MIMGDim_DECL 114 #define GET_MIMGEncoding_DECL 115 #define GET_MIMGLZMapping_DECL 116 #define GET_MIMGMIPMapping_DECL 117 #define GET_MIMGBiASMapping_DECL 118 #define GET_MAIInstInfoTable_DECL 119 #define GET_MAIInstInfoTable_DECL 120 #define GET_isMFMA_F8F6F4Table_DECL 121 #define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL 122 #include "AMDGPUGenSearchableTables.inc" 123 124 namespace IsaInfo { 125 126 enum { 127 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 128 // doesn't spill SGPRs as much as when 80 is set. 129 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 130 TRAP_NUM_SGPRS = 16 131 }; 132 133 enum class TargetIDSetting { 134 Unsupported, 135 Any, 136 Off, 137 On 138 }; 139 140 class AMDGPUTargetID { 141 private: 142 const MCSubtargetInfo &STI; 143 TargetIDSetting XnackSetting; 144 TargetIDSetting SramEccSetting; 145 146 public: 147 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 148 ~AMDGPUTargetID() = default; 149 150 /// \return True if the current xnack setting is not "Unsupported". 151 bool isXnackSupported() const { 152 return XnackSetting != TargetIDSetting::Unsupported; 153 } 154 155 /// \returns True if the current xnack setting is "On" or "Any". 156 bool isXnackOnOrAny() const { 157 return XnackSetting == TargetIDSetting::On || 158 XnackSetting == TargetIDSetting::Any; 159 } 160 161 /// \returns True if current xnack setting is "On" or "Off", 162 /// false otherwise. 163 bool isXnackOnOrOff() const { 164 return getXnackSetting() == TargetIDSetting::On || 165 getXnackSetting() == TargetIDSetting::Off; 166 } 167 168 /// \returns The current xnack TargetIDSetting, possible options are 169 /// "Unsupported", "Any", "Off", and "On". 170 TargetIDSetting getXnackSetting() const { 171 return XnackSetting; 172 } 173 174 /// Sets xnack setting to \p NewXnackSetting. 175 void setXnackSetting(TargetIDSetting NewXnackSetting) { 176 XnackSetting = NewXnackSetting; 177 } 178 179 /// \return True if the current sramecc setting is not "Unsupported". 180 bool isSramEccSupported() const { 181 return SramEccSetting != TargetIDSetting::Unsupported; 182 } 183 184 /// \returns True if the current sramecc setting is "On" or "Any". 185 bool isSramEccOnOrAny() const { 186 return SramEccSetting == TargetIDSetting::On || 187 SramEccSetting == TargetIDSetting::Any; 188 } 189 190 /// \returns True if current sramecc setting is "On" or "Off", 191 /// false otherwise. 192 bool isSramEccOnOrOff() const { 193 return getSramEccSetting() == TargetIDSetting::On || 194 getSramEccSetting() == TargetIDSetting::Off; 195 } 196 197 /// \returns The current sramecc TargetIDSetting, possible options are 198 /// "Unsupported", "Any", "Off", and "On". 199 TargetIDSetting getSramEccSetting() const { 200 return SramEccSetting; 201 } 202 203 /// Sets sramecc setting to \p NewSramEccSetting. 204 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 205 SramEccSetting = NewSramEccSetting; 206 } 207 208 void setTargetIDFromFeaturesString(StringRef FS); 209 void setTargetIDFromTargetIDStream(StringRef TargetID); 210 211 /// \returns String representation of an object. 212 std::string toString() const; 213 }; 214 215 /// \returns Wavefront size for given subtarget \p STI. 216 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 217 218 /// \returns Local memory size in bytes for given subtarget \p STI. 219 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 220 221 /// \returns Maximum addressable local memory size in bytes for given subtarget 222 /// \p STI. 223 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI); 224 225 /// \returns Number of execution units per compute unit for given subtarget \p 226 /// STI. 227 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 228 229 /// \returns Maximum number of work groups per compute unit for given subtarget 230 /// \p STI and limited by given \p FlatWorkGroupSize. 231 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 232 unsigned FlatWorkGroupSize); 233 234 /// \returns Minimum number of waves per execution unit for given subtarget \p 235 /// STI. 236 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 237 238 /// \returns Maximum number of waves per execution unit for given subtarget \p 239 /// STI without any kind of limitation. 240 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 241 242 /// \returns Number of waves per execution unit required to support the given \p 243 /// FlatWorkGroupSize. 244 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 245 unsigned FlatWorkGroupSize); 246 247 /// \returns Minimum flat work group size for given subtarget \p STI. 248 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 249 250 /// \returns Maximum flat work group size for given subtarget \p STI. 251 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 252 253 /// \returns Number of waves per work group for given subtarget \p STI and 254 /// \p FlatWorkGroupSize. 255 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 256 unsigned FlatWorkGroupSize); 257 258 /// \returns SGPR allocation granularity for given subtarget \p STI. 259 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 260 261 /// \returns SGPR encoding granularity for given subtarget \p STI. 262 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 263 264 /// \returns Total number of SGPRs for given subtarget \p STI. 265 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 266 267 /// \returns Addressable number of SGPRs for given subtarget \p STI. 268 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 269 270 /// \returns Minimum number of SGPRs that meets the given number of waves per 271 /// execution unit requirement for given subtarget \p STI. 272 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 273 274 /// \returns Maximum number of SGPRs that meets the given number of waves per 275 /// execution unit requirement for given subtarget \p STI. 276 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 277 bool Addressable); 278 279 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 280 /// STI when the given special registers are used. 281 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 282 bool FlatScrUsed, bool XNACKUsed); 283 284 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 285 /// STI when the given special registers are used. XNACK is inferred from 286 /// \p STI. 287 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 288 bool FlatScrUsed); 289 290 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 291 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 292 /// register counts. 293 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 294 295 /// \returns VGPR allocation granularity for given subtarget \p STI. 296 /// 297 /// For subtargets which support it, \p EnableWavefrontSize32 should match 298 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 299 unsigned 300 getVGPRAllocGranule(const MCSubtargetInfo *STI, 301 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 302 303 /// \returns VGPR encoding granularity for given subtarget \p STI. 304 /// 305 /// For subtargets which support it, \p EnableWavefrontSize32 should match 306 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 307 unsigned getVGPREncodingGranule( 308 const MCSubtargetInfo *STI, 309 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 310 311 /// \returns Total number of VGPRs for given subtarget \p STI. 312 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 313 314 /// \returns Addressable number of architectural VGPRs for a given subtarget \p 315 /// STI. 316 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI); 317 318 /// \returns Addressable number of VGPRs for given subtarget \p STI. 319 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 320 321 /// \returns Minimum number of VGPRs that meets given number of waves per 322 /// execution unit requirement for given subtarget \p STI. 323 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 324 325 /// \returns Maximum number of VGPRs that meets given number of waves per 326 /// execution unit requirement for given subtarget \p STI. 327 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 328 329 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given 330 /// subtarget \p STI. 331 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 332 unsigned NumVGPRs); 333 334 /// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule 335 /// size, \p MaxWaves possible, and \p TotalNumVGPRs available. 336 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, 337 unsigned MaxWaves, 338 unsigned TotalNumVGPRs); 339 340 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p 341 /// Gen. 342 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, 343 AMDGPUSubtarget::Generation Gen); 344 345 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 346 /// \p NumVGPRs are used. We actually return the number of blocks -1, since 347 /// that's what we encode. 348 /// 349 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 350 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 351 unsigned getEncodedNumVGPRBlocks( 352 const MCSubtargetInfo *STI, unsigned NumVGPRs, 353 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 354 355 /// \returns Number of VGPR blocks that need to be allocated for the given 356 /// subtarget \p STI when \p NumVGPRs are used. 357 unsigned getAllocatedNumVGPRBlocks( 358 const MCSubtargetInfo *STI, unsigned NumVGPRs, 359 std::optional<bool> EnableWavefrontSize32 = std::nullopt); 360 361 } // end namespace IsaInfo 362 363 // Represents a field in an encoded value. 364 template <unsigned HighBit, unsigned LowBit, unsigned D = 0> 365 struct EncodingField { 366 static_assert(HighBit >= LowBit, "Invalid bit range!"); 367 static constexpr unsigned Offset = LowBit; 368 static constexpr unsigned Width = HighBit - LowBit + 1; 369 370 using ValueType = unsigned; 371 static constexpr ValueType Default = D; 372 373 ValueType Value; 374 constexpr EncodingField(ValueType Value) : Value(Value) {} 375 376 constexpr uint64_t encode() const { return Value; } 377 static ValueType decode(uint64_t Encoded) { return Encoded; } 378 }; 379 380 // Represents a single bit in an encoded value. 381 template <unsigned Bit, unsigned D = 0> 382 using EncodingBit = EncodingField<Bit, Bit, D>; 383 384 // A helper for encoding and decoding multiple fields. 385 template <typename... Fields> struct EncodingFields { 386 static constexpr uint64_t encode(Fields... Values) { 387 return ((Values.encode() << Values.Offset) | ...); 388 } 389 390 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) { 391 return {Fields::decode((Encoded >> Fields::Offset) & 392 maxUIntN(Fields::Width))...}; 393 } 394 }; 395 396 LLVM_READONLY 397 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 398 399 LLVM_READONLY 400 inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) { 401 return getNamedOperandIdx(Opcode, NamedIdx) != -1; 402 } 403 404 LLVM_READONLY 405 int getSOPPWithRelaxation(uint16_t Opcode); 406 407 struct MIMGBaseOpcodeInfo { 408 MIMGBaseOpcode BaseOpcode; 409 bool Store; 410 bool Atomic; 411 bool AtomicX2; 412 bool Sampler; 413 bool Gather4; 414 415 uint8_t NumExtraArgs; 416 bool Gradients; 417 bool G16; 418 bool Coordinates; 419 bool LodOrClampOrMip; 420 bool HasD16; 421 bool MSAA; 422 bool BVH; 423 bool A16; 424 bool NoReturn; 425 }; 426 427 LLVM_READONLY 428 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 429 430 LLVM_READONLY 431 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 432 433 struct MIMGDimInfo { 434 MIMGDim Dim; 435 uint8_t NumCoords; 436 uint8_t NumGradients; 437 bool MSAA; 438 bool DA; 439 uint8_t Encoding; 440 const char *AsmSuffix; 441 }; 442 443 LLVM_READONLY 444 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 445 446 LLVM_READONLY 447 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 448 449 LLVM_READONLY 450 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 451 452 struct MIMGLZMappingInfo { 453 MIMGBaseOpcode L; 454 MIMGBaseOpcode LZ; 455 }; 456 457 struct MIMGMIPMappingInfo { 458 MIMGBaseOpcode MIP; 459 MIMGBaseOpcode NONMIP; 460 }; 461 462 struct MIMGBiasMappingInfo { 463 MIMGBaseOpcode Bias; 464 MIMGBaseOpcode NoBias; 465 }; 466 467 struct MIMGOffsetMappingInfo { 468 MIMGBaseOpcode Offset; 469 MIMGBaseOpcode NoOffset; 470 }; 471 472 struct MIMGG16MappingInfo { 473 MIMGBaseOpcode G; 474 MIMGBaseOpcode G16; 475 }; 476 477 LLVM_READONLY 478 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 479 480 struct WMMAOpcodeMappingInfo { 481 unsigned Opcode2Addr; 482 unsigned Opcode3Addr; 483 }; 484 485 LLVM_READONLY 486 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 487 488 LLVM_READONLY 489 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 490 491 LLVM_READONLY 492 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 493 494 LLVM_READONLY 495 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 496 497 LLVM_READONLY 498 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 499 unsigned VDataDwords, unsigned VAddrDwords); 500 501 LLVM_READONLY 502 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 503 504 LLVM_READONLY 505 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 506 const MIMGDimInfo *Dim, bool IsA16, 507 bool IsG16Supported); 508 509 struct MIMGInfo { 510 uint16_t Opcode; 511 uint16_t BaseOpcode; 512 uint8_t MIMGEncoding; 513 uint8_t VDataDwords; 514 uint8_t VAddrDwords; 515 uint8_t VAddrOperands; 516 }; 517 518 LLVM_READONLY 519 const MIMGInfo *getMIMGInfo(unsigned Opc); 520 521 LLVM_READONLY 522 int getMTBUFBaseOpcode(unsigned Opc); 523 524 LLVM_READONLY 525 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 526 527 LLVM_READONLY 528 int getMTBUFElements(unsigned Opc); 529 530 LLVM_READONLY 531 bool getMTBUFHasVAddr(unsigned Opc); 532 533 LLVM_READONLY 534 bool getMTBUFHasSrsrc(unsigned Opc); 535 536 LLVM_READONLY 537 bool getMTBUFHasSoffset(unsigned Opc); 538 539 LLVM_READONLY 540 int getMUBUFBaseOpcode(unsigned Opc); 541 542 LLVM_READONLY 543 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 544 545 LLVM_READONLY 546 int getMUBUFElements(unsigned Opc); 547 548 LLVM_READONLY 549 bool getMUBUFHasVAddr(unsigned Opc); 550 551 LLVM_READONLY 552 bool getMUBUFHasSrsrc(unsigned Opc); 553 554 LLVM_READONLY 555 bool getMUBUFHasSoffset(unsigned Opc); 556 557 LLVM_READONLY 558 bool getMUBUFIsBufferInv(unsigned Opc); 559 560 LLVM_READONLY 561 bool getMUBUFTfe(unsigned Opc); 562 563 LLVM_READONLY 564 bool getSMEMIsBuffer(unsigned Opc); 565 566 LLVM_READONLY 567 bool getVOP1IsSingle(unsigned Opc); 568 569 LLVM_READONLY 570 bool getVOP2IsSingle(unsigned Opc); 571 572 LLVM_READONLY 573 bool getVOP3IsSingle(unsigned Opc); 574 575 LLVM_READONLY 576 bool isVOPC64DPP(unsigned Opc); 577 578 LLVM_READONLY 579 bool isVOPCAsmOnly(unsigned Opc); 580 581 /// Returns true if MAI operation is a double precision GEMM. 582 LLVM_READONLY 583 bool getMAIIsDGEMM(unsigned Opc); 584 585 LLVM_READONLY 586 bool getMAIIsGFX940XDL(unsigned Opc); 587 588 struct CanBeVOPD { 589 bool X; 590 bool Y; 591 }; 592 593 /// \returns SIEncodingFamily used for VOPD encoding on a \p ST. 594 LLVM_READONLY 595 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST); 596 597 LLVM_READONLY 598 CanBeVOPD getCanBeVOPD(unsigned Opc); 599 600 LLVM_READNONE 601 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal); 602 603 LLVM_READONLY 604 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, 605 unsigned BLGP, 606 unsigned F8F8Opcode); 607 608 LLVM_READONLY 609 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 610 uint8_t NumComponents, 611 uint8_t NumFormat, 612 const MCSubtargetInfo &STI); 613 LLVM_READONLY 614 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 615 const MCSubtargetInfo &STI); 616 617 LLVM_READONLY 618 int getMCOpcode(uint16_t Opcode, unsigned Gen); 619 620 LLVM_READONLY 621 unsigned getVOPDOpcode(unsigned Opc); 622 623 LLVM_READONLY 624 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily); 625 626 LLVM_READONLY 627 bool isVOPD(unsigned Opc); 628 629 LLVM_READNONE 630 bool isMAC(unsigned Opc); 631 632 LLVM_READNONE 633 bool isPermlane16(unsigned Opc); 634 635 LLVM_READNONE 636 bool isGenericAtomic(unsigned Opc); 637 638 LLVM_READNONE 639 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc); 640 641 namespace VOPD { 642 643 enum Component : unsigned { 644 DST = 0, 645 SRC0, 646 SRC1, 647 SRC2, 648 649 DST_NUM = 1, 650 MAX_SRC_NUM = 3, 651 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM 652 }; 653 654 // LSB mask for VGPR banks per VOPD component operand. 655 // 4 banks result in a mask 3, setting 2 lower bits. 656 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1}; 657 658 enum ComponentIndex : unsigned { X = 0, Y = 1 }; 659 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y}; 660 constexpr unsigned COMPONENTS_NUM = 2; 661 662 // Properties of VOPD components. 663 class ComponentProps { 664 private: 665 unsigned SrcOperandsNum = 0; 666 unsigned MandatoryLiteralIdx = ~0u; 667 bool HasSrc2Acc = false; 668 669 public: 670 ComponentProps() = default; 671 ComponentProps(const MCInstrDesc &OpDesc); 672 673 // Return the total number of src operands this component has. 674 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; } 675 676 // Return the number of src operands of this component visible to the parser. 677 unsigned getCompParsedSrcOperandsNum() const { 678 return SrcOperandsNum - HasSrc2Acc; 679 } 680 681 // Return true iif this component has a mandatory literal. 682 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; } 683 684 // If this component has a mandatory literal, return component operand 685 // index of this literal (i.e. either Component::SRC1 or Component::SRC2). 686 unsigned getMandatoryLiteralCompOperandIndex() const { 687 assert(hasMandatoryLiteral()); 688 return MandatoryLiteralIdx; 689 } 690 691 // Return true iif this component has operand 692 // with component index CompSrcIdx and this operand may be a register. 693 bool hasRegSrcOperand(unsigned CompSrcIdx) const { 694 assert(CompSrcIdx < Component::MAX_SRC_NUM); 695 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx); 696 } 697 698 // Return true iif this component has tied src2. 699 bool hasSrc2Acc() const { return HasSrc2Acc; } 700 701 private: 702 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const { 703 assert(CompSrcIdx < Component::MAX_SRC_NUM); 704 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx; 705 } 706 }; 707 708 enum ComponentKind : unsigned { 709 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD. 710 COMPONENT_X, // A VOPD instruction, X component. 711 COMPONENT_Y, // A VOPD instruction, Y component. 712 MAX = COMPONENT_Y 713 }; 714 715 // Interface functions of this class map VOPD component operand indices 716 // to indices of operands in MachineInstr/MCInst or parsed operands array. 717 // 718 // Note that this class operates with 3 kinds of indices: 719 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.); 720 // - MC operand indices (they refer operands in a MachineInstr/MCInst); 721 // - parsed operand indices (they refer operands in parsed operands array). 722 // 723 // For SINGLE components mapping between these indices is trivial. 724 // But things get more complicated for COMPONENT_X and 725 // COMPONENT_Y because these components share the same 726 // MachineInstr/MCInst and the same parsed operands array. 727 // Below is an example of component operand to parsed operand 728 // mapping for the following instruction: 729 // 730 // v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1 731 // 732 // PARSED COMPONENT PARSED 733 // COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX 734 // ------------------------------------------------------------------- 735 // "v_dual_add_f32" 0 736 // v_dual_add_f32 v255 0 (DST) --> 1 737 // v4 1 (SRC0) --> 2 738 // v5 2 (SRC1) --> 3 739 // "::" 4 740 // "v_dual_mov_b32" 5 741 // v_dual_mov_b32 v6 0 (DST) --> 6 742 // v1 1 (SRC0) --> 7 743 // ------------------------------------------------------------------- 744 // 745 class ComponentLayout { 746 private: 747 // Regular MachineInstr/MCInst operands are ordered as follows: 748 // dst, src0 [, other src operands] 749 // VOPD MachineInstr/MCInst operands are ordered as follows: 750 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 751 // Each ComponentKind has operand indices defined below. 752 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1}; 753 static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */}; 754 755 // Parsed operands of regular instructions are ordered as follows: 756 // Mnemo dst src0 [vsrc1 ...] 757 // Parsed VOPD operands are ordered as follows: 758 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 759 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 760 // Each ComponentKind has operand indices defined below. 761 static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 762 4 /* + OpX.ParsedSrcNum */}; 763 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = { 764 2, 2, 5 /* + OpX.ParsedSrcNum */}; 765 766 private: 767 const ComponentKind Kind; 768 const ComponentProps PrevComp; 769 770 public: 771 // Create layout for COMPONENT_X or SINGLE component. 772 ComponentLayout(ComponentKind Kind) : Kind(Kind) { 773 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X); 774 } 775 776 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout. 777 ComponentLayout(const ComponentProps &OpXProps) 778 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {} 779 780 public: 781 // Return the index of dst operand in MCInst operands. 782 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; } 783 784 // Return the index of the specified src operand in MCInst operands. 785 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const { 786 assert(CompSrcIdx < Component::MAX_SRC_NUM); 787 return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx; 788 } 789 790 // Return the index of dst operand in the parsed operands array. 791 unsigned getIndexOfDstInParsedOperands() const { 792 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum(); 793 } 794 795 // Return the index of the specified src operand in the parsed operands array. 796 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const { 797 assert(CompSrcIdx < Component::MAX_SRC_NUM); 798 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx; 799 } 800 801 private: 802 unsigned getPrevCompSrcNum() const { 803 return PrevComp.getCompSrcOperandsNum(); 804 } 805 unsigned getPrevCompParsedSrcNum() const { 806 return PrevComp.getCompParsedSrcOperandsNum(); 807 } 808 }; 809 810 // Layout and properties of VOPD components. 811 class ComponentInfo : public ComponentLayout, public ComponentProps { 812 public: 813 // Create ComponentInfo for COMPONENT_X or SINGLE component. 814 ComponentInfo(const MCInstrDesc &OpDesc, 815 ComponentKind Kind = ComponentKind::SINGLE) 816 : ComponentLayout(Kind), ComponentProps(OpDesc) {} 817 818 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout. 819 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps) 820 : ComponentLayout(OpXProps), ComponentProps(OpDesc) {} 821 822 // Map component operand index to parsed operand index. 823 // Return 0 if the specified operand does not exist. 824 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const; 825 }; 826 827 // Properties of VOPD instructions. 828 class InstInfo { 829 private: 830 const ComponentInfo CompInfo[COMPONENTS_NUM]; 831 832 public: 833 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>; 834 835 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) 836 : CompInfo{OpX, OpY} {} 837 838 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY) 839 : CompInfo{OprInfoX, OprInfoY} {} 840 841 const ComponentInfo &operator[](size_t ComponentIdx) const { 842 assert(ComponentIdx < COMPONENTS_NUM); 843 return CompInfo[ComponentIdx]; 844 } 845 846 // Check VOPD operands constraints. 847 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 848 // for the specified component and MC operand. The callback must return 0 849 // if the operand is not a register or not a VGPR. 850 // If \p SkipSrc is set to true then constraints for source operands are not 851 // checked. 852 bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx, 853 bool SkipSrc = false) const { 854 return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value(); 855 } 856 857 // Check VOPD operands constraints. 858 // Return the index of an invalid component operand, if any. 859 // If \p SkipSrc is set to true then constraints for source operands are not 860 // checked. 861 std::optional<unsigned> getInvalidCompOperandIndex( 862 std::function<unsigned(unsigned, unsigned)> GetRegIdx, 863 bool SkipSrc = false) const; 864 865 private: 866 RegIndices 867 getRegIndices(unsigned ComponentIdx, 868 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const; 869 }; 870 871 } // namespace VOPD 872 873 LLVM_READONLY 874 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode); 875 876 LLVM_READONLY 877 // Get properties of 2 single VOP1/VOP2 instructions 878 // used as components to create a VOPD instruction. 879 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY); 880 881 LLVM_READONLY 882 // Get properties of VOPD X and Y components. 883 VOPD::InstInfo 884 getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); 885 886 LLVM_READONLY 887 bool isTrue16Inst(unsigned Opc); 888 889 LLVM_READONLY 890 FPType getFPDstSelType(unsigned Opc); 891 892 LLVM_READONLY 893 bool isInvalidSingleUseConsumerInst(unsigned Opc); 894 895 LLVM_READONLY 896 bool isInvalidSingleUseProducerInst(unsigned Opc); 897 898 bool isDPMACCInstruction(unsigned Opc); 899 900 LLVM_READONLY 901 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); 902 903 LLVM_READONLY 904 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); 905 906 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header, 907 const MCSubtargetInfo *STI); 908 909 bool isGroupSegment(const GlobalValue *GV); 910 bool isGlobalSegment(const GlobalValue *GV); 911 bool isReadOnlySegment(const GlobalValue *GV); 912 913 /// \returns True if constants should be emitted to .text section for given 914 /// target triple \p TT, false otherwise. 915 bool shouldEmitConstantsToTextSection(const Triple &TT); 916 917 /// \returns Integer value requested using \p F's \p Name attribute. 918 /// 919 /// \returns \p Default if attribute is not present. 920 /// 921 /// \returns \p Default and emits error if requested value cannot be converted 922 /// to integer. 923 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 924 925 /// \returns A pair of integer values requested using \p F's \p Name attribute 926 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 927 /// is false). 928 /// 929 /// \returns \p Default if attribute is not present. 930 /// 931 /// \returns \p Default and emits error if one of the requested values cannot be 932 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 933 /// not present. 934 std::pair<unsigned, unsigned> 935 getIntegerPairAttribute(const Function &F, StringRef Name, 936 std::pair<unsigned, unsigned> Default, 937 bool OnlyFirstRequired = false); 938 939 /// \returns A pair of integer values requested using \p F's \p Name attribute 940 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 941 /// is false). 942 /// 943 /// \returns \p std::nullopt if attribute is not present. 944 /// 945 /// \returns \p std::nullopt and emits error if one of the requested values 946 /// cannot be converted to integer, or \p OnlyFirstRequired is false and 947 /// "second" value is not present. 948 std::optional<std::pair<unsigned, std::optional<unsigned>>> 949 getIntegerPairAttribute(const Function &F, StringRef Name, 950 bool OnlyFirstRequired = false); 951 952 /// \returns Generate a vector of integer values requested using \p F's \p Name 953 /// attribute. 954 /// 955 /// \returns true if exactly Size (>2) number of integers are found in the 956 /// attribute. 957 /// 958 /// \returns false if any error occurs. 959 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, 960 unsigned Size, 961 unsigned DefaultVal = 0); 962 963 /// Represents the counter values to wait for in an s_waitcnt instruction. 964 /// 965 /// Large values (including the maximum possible integer) can be used to 966 /// represent "don't care" waits. 967 struct Waitcnt { 968 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12. 969 unsigned ExpCnt = ~0u; 970 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12. 971 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11. 972 unsigned SampleCnt = ~0u; // gfx12+ only. 973 unsigned BvhCnt = ~0u; // gfx12+ only. 974 unsigned KmCnt = ~0u; // gfx12+ only. 975 976 Waitcnt() = default; 977 // Pre-gfx12 constructor. 978 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 979 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt), 980 SampleCnt(~0u), BvhCnt(~0u), KmCnt(~0u) {} 981 982 // gfx12+ constructor. 983 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, 984 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt) 985 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt), 986 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {} 987 988 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); } 989 990 bool hasWaitExceptStoreCnt() const { 991 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u || 992 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u; 993 } 994 995 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; } 996 997 Waitcnt combined(const Waitcnt &Other) const { 998 // Does the right thing provided self and Other are either both pre-gfx12 999 // or both gfx12+. 1000 return Waitcnt( 1001 std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt), 1002 std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt), 1003 std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), 1004 std::min(KmCnt, Other.KmCnt)); 1005 } 1006 }; 1007 1008 // The following methods are only meaningful on targets that support 1009 // S_WAITCNT. 1010 1011 /// \returns Vmcnt bit mask for given isa \p Version. 1012 unsigned getVmcntBitMask(const IsaVersion &Version); 1013 1014 /// \returns Expcnt bit mask for given isa \p Version. 1015 unsigned getExpcntBitMask(const IsaVersion &Version); 1016 1017 /// \returns Lgkmcnt bit mask for given isa \p Version. 1018 unsigned getLgkmcntBitMask(const IsaVersion &Version); 1019 1020 /// \returns Waitcnt bit mask for given isa \p Version. 1021 unsigned getWaitcntBitMask(const IsaVersion &Version); 1022 1023 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 1024 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 1025 1026 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 1027 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 1028 1029 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 1030 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 1031 1032 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 1033 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 1034 /// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction 1035 /// which needs it is deprecated 1036 /// 1037 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 1038 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) 1039 /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) 1040 /// \p Vmcnt = \p Waitcnt[15:10] (gfx11) 1041 /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) 1042 /// \p Expcnt = \p Waitcnt[2:0] (gfx11) 1043 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) 1044 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) 1045 /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11) 1046 /// 1047 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1048 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 1049 1050 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 1051 1052 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 1053 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1054 unsigned Vmcnt); 1055 1056 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 1057 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1058 unsigned Expcnt); 1059 1060 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 1061 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1062 unsigned Lgkmcnt); 1063 1064 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 1065 /// \p Version. Should not be used on gfx12+, the instruction which needs 1066 /// it is deprecated 1067 /// 1068 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 1069 /// Waitcnt[2:0] = \p Expcnt (gfx11+) 1070 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) 1071 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) 1072 /// Waitcnt[6:4] = \p Expcnt (pre-gfx11) 1073 /// Waitcnt[9:4] = \p Lgkmcnt (gfx11) 1074 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) 1075 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10) 1076 /// Waitcnt[15:10] = \p Vmcnt (gfx11) 1077 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) 1078 /// 1079 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 1080 /// isa \p Version. 1081 /// 1082 unsigned encodeWaitcnt(const IsaVersion &Version, 1083 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 1084 1085 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 1086 1087 // The following methods are only meaningful on targets that support 1088 // S_WAIT_*CNT, introduced with gfx12. 1089 1090 /// \returns Loadcnt bit mask for given isa \p Version. 1091 /// Returns 0 for versions that do not support LOADcnt 1092 unsigned getLoadcntBitMask(const IsaVersion &Version); 1093 1094 /// \returns Samplecnt bit mask for given isa \p Version. 1095 /// Returns 0 for versions that do not support SAMPLEcnt 1096 unsigned getSamplecntBitMask(const IsaVersion &Version); 1097 1098 /// \returns Bvhcnt bit mask for given isa \p Version. 1099 /// Returns 0 for versions that do not support BVHcnt 1100 unsigned getBvhcntBitMask(const IsaVersion &Version); 1101 1102 /// \returns Dscnt bit mask for given isa \p Version. 1103 /// Returns 0 for versions that do not support DScnt 1104 unsigned getDscntBitMask(const IsaVersion &Version); 1105 1106 /// \returns Dscnt bit mask for given isa \p Version. 1107 /// Returns 0 for versions that do not support KMcnt 1108 unsigned getKmcntBitMask(const IsaVersion &Version); 1109 1110 /// \return STOREcnt or VScnt bit mask for given isa \p Version. 1111 /// returns 0 for versions that do not support STOREcnt or VScnt. 1112 /// STOREcnt and VScnt are the same counter, the name used 1113 /// depends on the ISA version. 1114 unsigned getStorecntBitMask(const IsaVersion &Version); 1115 1116 // The following are only meaningful on targets that support 1117 // S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT. 1118 1119 /// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given 1120 /// isa \p Version. 1121 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt); 1122 1123 /// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given 1124 /// isa \p Version. 1125 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt); 1126 1127 /// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an 1128 /// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa 1129 /// \p Version. 1130 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); 1131 1132 /// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an 1133 /// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa 1134 /// \p Version. 1135 unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded); 1136 1137 namespace Hwreg { 1138 1139 using HwregId = EncodingField<5, 0>; 1140 using HwregOffset = EncodingField<10, 6>; 1141 1142 struct HwregSize : EncodingField<15, 11, 32> { 1143 using EncodingField::EncodingField; 1144 constexpr uint64_t encode() const { return Value - 1; } 1145 static ValueType decode(uint64_t Encoded) { return Encoded + 1; } 1146 }; 1147 1148 using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>; 1149 1150 } // namespace Hwreg 1151 1152 namespace DepCtr { 1153 1154 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); 1155 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1156 const MCSubtargetInfo &STI); 1157 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1158 const MCSubtargetInfo &STI); 1159 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1160 bool &IsDefault, const MCSubtargetInfo &STI); 1161 1162 /// \returns Decoded VaVdst from given immediate \p Encoded. 1163 unsigned decodeFieldVaVdst(unsigned Encoded); 1164 1165 /// \returns Decoded VmVsrc from given immediate \p Encoded. 1166 unsigned decodeFieldVmVsrc(unsigned Encoded); 1167 1168 /// \returns Decoded SaSdst from given immediate \p Encoded. 1169 unsigned decodeFieldSaSdst(unsigned Encoded); 1170 1171 /// \returns \p VmVsrc as an encoded Depctr immediate. 1172 unsigned encodeFieldVmVsrc(unsigned VmVsrc); 1173 1174 /// \returns \p Encoded combined with encoded \p VmVsrc. 1175 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc); 1176 1177 /// \returns \p VaVdst as an encoded Depctr immediate. 1178 unsigned encodeFieldVaVdst(unsigned VaVdst); 1179 1180 /// \returns \p Encoded combined with encoded \p VaVdst. 1181 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst); 1182 1183 /// \returns \p SaSdst as an encoded Depctr immediate. 1184 unsigned encodeFieldSaSdst(unsigned SaSdst); 1185 1186 /// \returns \p Encoded combined with encoded \p SaSdst. 1187 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst); 1188 1189 } // namespace DepCtr 1190 1191 namespace Exp { 1192 1193 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 1194 1195 LLVM_READONLY 1196 unsigned getTgtId(const StringRef Name); 1197 1198 LLVM_READNONE 1199 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 1200 1201 } // namespace Exp 1202 1203 namespace MTBUFFormat { 1204 1205 LLVM_READNONE 1206 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 1207 1208 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 1209 1210 int64_t getDfmt(const StringRef Name); 1211 1212 StringRef getDfmtName(unsigned Id); 1213 1214 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 1215 1216 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 1217 1218 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 1219 1220 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 1221 1222 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); 1223 1224 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); 1225 1226 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); 1227 1228 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1229 const MCSubtargetInfo &STI); 1230 1231 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 1232 1233 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 1234 1235 } // namespace MTBUFFormat 1236 1237 namespace SendMsg { 1238 1239 LLVM_READNONE 1240 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); 1241 1242 LLVM_READNONE 1243 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1244 bool Strict = true); 1245 1246 LLVM_READNONE 1247 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1248 const MCSubtargetInfo &STI, bool Strict = true); 1249 1250 LLVM_READNONE 1251 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); 1252 1253 LLVM_READNONE 1254 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); 1255 1256 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1257 uint16_t &StreamId, const MCSubtargetInfo &STI); 1258 1259 LLVM_READNONE 1260 uint64_t encodeMsg(uint64_t MsgId, 1261 uint64_t OpId, 1262 uint64_t StreamId); 1263 1264 } // namespace SendMsg 1265 1266 1267 unsigned getInitialPSInputAddr(const Function &F); 1268 1269 bool getHasColorExport(const Function &F); 1270 1271 bool getHasDepthExport(const Function &F); 1272 1273 LLVM_READNONE 1274 bool isShader(CallingConv::ID CC); 1275 1276 LLVM_READNONE 1277 bool isGraphics(CallingConv::ID CC); 1278 1279 LLVM_READNONE 1280 bool isCompute(CallingConv::ID CC); 1281 1282 LLVM_READNONE 1283 bool isEntryFunctionCC(CallingConv::ID CC); 1284 1285 // These functions are considered entrypoints into the current module, i.e. they 1286 // are allowed to be called from outside the current module. This is different 1287 // from isEntryFunctionCC, which is only true for functions that are entered by 1288 // the hardware. Module entry points include all entry functions but also 1289 // include functions that can be called from other functions inside or outside 1290 // the current module. Module entry functions are allowed to allocate LDS. 1291 LLVM_READNONE 1292 bool isModuleEntryFunctionCC(CallingConv::ID CC); 1293 1294 LLVM_READNONE 1295 bool isChainCC(CallingConv::ID CC); 1296 1297 bool isKernelCC(const Function *Func); 1298 1299 // FIXME: Remove this when calling conventions cleaned up 1300 LLVM_READNONE 1301 inline bool isKernel(CallingConv::ID CC) { 1302 switch (CC) { 1303 case CallingConv::AMDGPU_KERNEL: 1304 case CallingConv::SPIR_KERNEL: 1305 return true; 1306 default: 1307 return false; 1308 } 1309 } 1310 1311 bool hasXNACK(const MCSubtargetInfo &STI); 1312 bool hasSRAMECC(const MCSubtargetInfo &STI); 1313 bool hasMIMG_R128(const MCSubtargetInfo &STI); 1314 bool hasA16(const MCSubtargetInfo &STI); 1315 bool hasG16(const MCSubtargetInfo &STI); 1316 bool hasPackedD16(const MCSubtargetInfo &STI); 1317 bool hasGDS(const MCSubtargetInfo &STI); 1318 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false); 1319 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI); 1320 1321 bool isSI(const MCSubtargetInfo &STI); 1322 bool isCI(const MCSubtargetInfo &STI); 1323 bool isVI(const MCSubtargetInfo &STI); 1324 bool isGFX9(const MCSubtargetInfo &STI); 1325 bool isGFX9_GFX10(const MCSubtargetInfo &STI); 1326 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI); 1327 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); 1328 bool isGFX8Plus(const MCSubtargetInfo &STI); 1329 bool isGFX9Plus(const MCSubtargetInfo &STI); 1330 bool isNotGFX9Plus(const MCSubtargetInfo &STI); 1331 bool isGFX10(const MCSubtargetInfo &STI); 1332 bool isGFX10_GFX11(const MCSubtargetInfo &STI); 1333 bool isGFX10Plus(const MCSubtargetInfo &STI); 1334 bool isNotGFX10Plus(const MCSubtargetInfo &STI); 1335 bool isGFX10Before1030(const MCSubtargetInfo &STI); 1336 bool isGFX11(const MCSubtargetInfo &STI); 1337 bool isGFX11Plus(const MCSubtargetInfo &STI); 1338 bool isGFX12(const MCSubtargetInfo &STI); 1339 bool isGFX12Plus(const MCSubtargetInfo &STI); 1340 bool isNotGFX12Plus(const MCSubtargetInfo &STI); 1341 bool isNotGFX11Plus(const MCSubtargetInfo &STI); 1342 bool isGCN3Encoding(const MCSubtargetInfo &STI); 1343 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 1344 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 1345 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 1346 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI); 1347 bool isGFX90A(const MCSubtargetInfo &STI); 1348 bool isGFX940(const MCSubtargetInfo &STI); 1349 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 1350 bool hasMAIInsts(const MCSubtargetInfo &STI); 1351 bool hasVOPD(const MCSubtargetInfo &STI); 1352 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI); 1353 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); 1354 unsigned hasKernargPreload(const MCSubtargetInfo &STI); 1355 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST); 1356 1357 /// Is Reg - scalar register 1358 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI); 1359 1360 /// \returns if \p Reg occupies the high 16-bits of a 32-bit register. 1361 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI); 1362 1363 /// If \p Reg is a pseudo reg, return the correct hardware register given 1364 /// \p STI otherwise return \p Reg. 1365 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI); 1366 1367 /// Convert hardware register \p Reg to a pseudo register 1368 LLVM_READNONE 1369 MCRegister mc2PseudoReg(MCRegister Reg); 1370 1371 LLVM_READNONE 1372 bool isInlineValue(unsigned Reg); 1373 1374 /// Is this an AMDGPU specific source operand? These include registers, 1375 /// inline constants, literals and mandatory literals (KImm). 1376 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 1377 1378 /// Is this a KImm operand? 1379 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo); 1380 1381 /// Is this floating-point operand? 1382 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 1383 1384 /// Does this operand support only inlinable literals? 1385 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 1386 1387 /// Get the size in bits of a register from the register class \p RC. 1388 unsigned getRegBitWidth(unsigned RCID); 1389 1390 /// Get the size in bits of a register from the register class \p RC. 1391 unsigned getRegBitWidth(const MCRegisterClass &RC); 1392 1393 /// Get size of register operand 1394 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 1395 unsigned OpNo); 1396 1397 LLVM_READNONE 1398 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 1399 switch (OpInfo.OperandType) { 1400 case AMDGPU::OPERAND_REG_IMM_INT32: 1401 case AMDGPU::OPERAND_REG_IMM_FP32: 1402 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1403 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1404 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1405 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1406 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1407 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1408 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1409 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1410 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1411 case AMDGPU::OPERAND_KIMM32: 1412 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 1413 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1414 return 4; 1415 1416 case AMDGPU::OPERAND_REG_IMM_INT64: 1417 case AMDGPU::OPERAND_REG_IMM_FP64: 1418 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1419 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1420 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1421 return 8; 1422 1423 case AMDGPU::OPERAND_REG_IMM_INT16: 1424 case AMDGPU::OPERAND_REG_IMM_BF16: 1425 case AMDGPU::OPERAND_REG_IMM_FP16: 1426 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: 1427 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1428 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1429 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 1430 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1431 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1432 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 1433 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1434 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1435 case AMDGPU::OPERAND_REG_INLINE_AC_BF16: 1436 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1437 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1438 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: 1439 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1440 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1441 case AMDGPU::OPERAND_REG_IMM_V2BF16: 1442 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1443 return 2; 1444 1445 default: 1446 llvm_unreachable("unhandled operand type"); 1447 } 1448 } 1449 1450 LLVM_READNONE 1451 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 1452 return getOperandSize(Desc.operands()[OpNo]); 1453 } 1454 1455 /// Is this literal inlinable, and not one of the values intended for floating 1456 /// point values. 1457 LLVM_READNONE 1458 inline bool isInlinableIntLiteral(int64_t Literal) { 1459 return Literal >= -16 && Literal <= 64; 1460 } 1461 1462 /// Is this literal inlinable 1463 LLVM_READNONE 1464 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 1465 1466 LLVM_READNONE 1467 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 1468 1469 LLVM_READNONE 1470 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi); 1471 1472 LLVM_READNONE 1473 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi); 1474 1475 LLVM_READNONE 1476 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi); 1477 1478 LLVM_READNONE 1479 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi); 1480 1481 LLVM_READNONE 1482 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal); 1483 1484 LLVM_READNONE 1485 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal); 1486 1487 LLVM_READNONE 1488 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal); 1489 1490 LLVM_READNONE 1491 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType); 1492 1493 LLVM_READNONE 1494 bool isInlinableLiteralV2I16(uint32_t Literal); 1495 1496 LLVM_READNONE 1497 bool isInlinableLiteralV2BF16(uint32_t Literal); 1498 1499 LLVM_READNONE 1500 bool isInlinableLiteralV2F16(uint32_t Literal); 1501 1502 LLVM_READNONE 1503 bool isValid32BitLiteral(uint64_t Val, bool IsFP64); 1504 1505 bool isArgPassedInSGPR(const Argument *Arg); 1506 1507 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo); 1508 1509 LLVM_READONLY 1510 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 1511 int64_t EncodedOffset); 1512 1513 LLVM_READONLY 1514 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 1515 int64_t EncodedOffset, 1516 bool IsBuffer); 1517 1518 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 1519 /// offsets. 1520 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 1521 1522 /// \returns The encoding that will be used for \p ByteOffset in the 1523 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10 1524 /// S_LOAD instructions have a signed offset, on other subtargets it is 1525 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 1526 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 1527 int64_t ByteOffset, bool IsBuffer, 1528 bool HasSOffset = false); 1529 1530 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 1531 /// instruction. This is only useful on CI.s 1532 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 1533 int64_t ByteOffset); 1534 1535 /// For pre-GFX12 FLAT instructions the offset must be positive; 1536 /// MSB is ignored and forced to zero. 1537 /// 1538 /// \return The number of bits available for the signed offset field in flat 1539 /// instructions. Note that some forms of the instruction disallow negative 1540 /// offsets. 1541 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST); 1542 1543 /// \returns true if this offset is small enough to fit in the SMRD 1544 /// offset field. \p ByteOffset should be the offset in bytes and 1545 /// not the encoded offset. 1546 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 1547 1548 LLVM_READNONE 1549 inline bool isLegalDPALU_DPPControl(unsigned DC) { 1550 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 1551 } 1552 1553 /// \returns true if an instruction may have a 64-bit VGPR operand. 1554 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc); 1555 1556 /// \returns true if an instruction is a DP ALU DPP. 1557 bool isDPALU_DPP(const MCInstrDesc &OpDesc); 1558 1559 /// \returns true if the intrinsic is divergent 1560 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 1561 1562 /// \returns true if the intrinsic is uniform 1563 bool isIntrinsicAlwaysUniform(unsigned IntrID); 1564 1565 /// \returns lds block size in terms of dwords. \p 1566 /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which 1567 /// must be defined in terms of bytes. 1568 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST); 1569 1570 } // end namespace AMDGPU 1571 1572 raw_ostream &operator<<(raw_ostream &OS, 1573 const AMDGPU::IsaInfo::TargetIDSetting S); 1574 1575 } // end namespace llvm 1576 1577 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1578