1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14 #include "Utils/AMDKernelCodeTUtils.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/BinaryFormat/ELF.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/IntrinsicsAMDGPU.h" 22 #include "llvm/IR/IntrinsicsR600.h" 23 #include "llvm/IR/LLVMContext.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCSubtargetInfo.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/TargetParser/TargetParser.h" 29 #include <optional> 30 31 #define GET_INSTRINFO_NAMED_OPS 32 #define GET_INSTRMAP_INFO 33 #include "AMDGPUGenInstrInfo.inc" 34 35 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( 36 "amdhsa-code-object-version", llvm::cl::Hidden, 37 llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), 38 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " 39 "or asm directive still take priority if present)")); 40 41 namespace { 42 43 /// \returns Bit mask for given bit \p Shift and bit \p Width. 44 unsigned getBitMask(unsigned Shift, unsigned Width) { 45 return ((1 << Width) - 1) << Shift; 46 } 47 48 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 49 /// 50 /// \returns Packed \p Dst. 51 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 52 unsigned Mask = getBitMask(Shift, Width); 53 return ((Src << Shift) & Mask) | (Dst & ~Mask); 54 } 55 56 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 57 /// 58 /// \returns Unpacked bits. 59 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 60 return (Src & getBitMask(Shift, Width)) >> Shift; 61 } 62 63 /// \returns Vmcnt bit shift (lower bits). 64 unsigned getVmcntBitShiftLo(unsigned VersionMajor) { 65 return VersionMajor >= 11 ? 10 : 0; 66 } 67 68 /// \returns Vmcnt bit width (lower bits). 69 unsigned getVmcntBitWidthLo(unsigned VersionMajor) { 70 return VersionMajor >= 11 ? 6 : 4; 71 } 72 73 /// \returns Expcnt bit shift. 74 unsigned getExpcntBitShift(unsigned VersionMajor) { 75 return VersionMajor >= 11 ? 0 : 4; 76 } 77 78 /// \returns Expcnt bit width. 79 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } 80 81 /// \returns Lgkmcnt bit shift. 82 unsigned getLgkmcntBitShift(unsigned VersionMajor) { 83 return VersionMajor >= 11 ? 4 : 8; 84 } 85 86 /// \returns Lgkmcnt bit width. 87 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 88 return VersionMajor >= 10 ? 6 : 4; 89 } 90 91 /// \returns Vmcnt bit shift (higher bits). 92 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } 93 94 /// \returns Vmcnt bit width (higher bits). 95 unsigned getVmcntBitWidthHi(unsigned VersionMajor) { 96 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; 97 } 98 99 /// \returns Loadcnt bit width 100 unsigned getLoadcntBitWidth(unsigned VersionMajor) { 101 return VersionMajor >= 12 ? 6 : 0; 102 } 103 104 /// \returns Samplecnt bit width. 105 unsigned getSamplecntBitWidth(unsigned VersionMajor) { 106 return VersionMajor >= 12 ? 6 : 0; 107 } 108 109 /// \returns Bvhcnt bit width. 110 unsigned getBvhcntBitWidth(unsigned VersionMajor) { 111 return VersionMajor >= 12 ? 3 : 0; 112 } 113 114 /// \returns Dscnt bit width. 115 unsigned getDscntBitWidth(unsigned VersionMajor) { 116 return VersionMajor >= 12 ? 6 : 0; 117 } 118 119 /// \returns Dscnt bit shift in combined S_WAIT instructions. 120 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; } 121 122 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor. 123 unsigned getStorecntBitWidth(unsigned VersionMajor) { 124 return VersionMajor >= 10 ? 6 : 0; 125 } 126 127 /// \returns Kmcnt bit width. 128 unsigned getKmcntBitWidth(unsigned VersionMajor) { 129 return VersionMajor >= 12 ? 5 : 0; 130 } 131 132 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions. 133 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) { 134 return VersionMajor >= 12 ? 8 : 0; 135 } 136 137 /// \returns VmVsrc bit width 138 inline unsigned getVmVsrcBitWidth() { return 3; } 139 140 /// \returns VmVsrc bit shift 141 inline unsigned getVmVsrcBitShift() { return 2; } 142 143 /// \returns VaVdst bit width 144 inline unsigned getVaVdstBitWidth() { return 4; } 145 146 /// \returns VaVdst bit shift 147 inline unsigned getVaVdstBitShift() { return 12; } 148 149 /// \returns SaSdst bit width 150 inline unsigned getSaSdstBitWidth() { return 1; } 151 152 /// \returns SaSdst bit shift 153 inline unsigned getSaSdstBitShift() { return 0; } 154 155 } // end anonymous namespace 156 157 namespace llvm { 158 159 namespace AMDGPU { 160 161 /// \returns true if the target supports signed immediate offset for SMRD 162 /// instructions. 163 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 164 return isGFX9Plus(ST); 165 } 166 167 /// \returns True if \p STI is AMDHSA. 168 bool isHsaAbi(const MCSubtargetInfo &STI) { 169 return STI.getTargetTriple().getOS() == Triple::AMDHSA; 170 } 171 172 unsigned getAMDHSACodeObjectVersion(const Module &M) { 173 if (auto *Ver = mdconst::extract_or_null<ConstantInt>( 174 M.getModuleFlag("amdhsa_code_object_version"))) { 175 return (unsigned)Ver->getZExtValue() / 100; 176 } 177 178 return getDefaultAMDHSACodeObjectVersion(); 179 } 180 181 unsigned getDefaultAMDHSACodeObjectVersion() { 182 return DefaultAMDHSACodeObjectVersion; 183 } 184 185 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) { 186 switch (ABIVersion) { 187 case ELF::ELFABIVERSION_AMDGPU_HSA_V4: 188 return 4; 189 case ELF::ELFABIVERSION_AMDGPU_HSA_V5: 190 return 5; 191 case ELF::ELFABIVERSION_AMDGPU_HSA_V6: 192 return 6; 193 default: 194 return getDefaultAMDHSACodeObjectVersion(); 195 } 196 } 197 198 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { 199 if (T.getOS() != Triple::AMDHSA) 200 return 0; 201 202 switch (CodeObjectVersion) { 203 case 4: 204 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 205 case 5: 206 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 207 case 6: 208 return ELF::ELFABIVERSION_AMDGPU_HSA_V6; 209 default: 210 report_fatal_error("Unsupported AMDHSA Code Object Version " + 211 Twine(CodeObjectVersion)); 212 } 213 } 214 215 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { 216 switch (CodeObjectVersion) { 217 case AMDHSA_COV4: 218 return 48; 219 case AMDHSA_COV5: 220 case AMDHSA_COV6: 221 default: 222 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; 223 } 224 } 225 226 227 // FIXME: All such magic numbers about the ABI should be in a 228 // central TD file. 229 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { 230 switch (CodeObjectVersion) { 231 case AMDHSA_COV4: 232 return 24; 233 case AMDHSA_COV5: 234 case AMDHSA_COV6: 235 default: 236 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; 237 } 238 } 239 240 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { 241 switch (CodeObjectVersion) { 242 case AMDHSA_COV4: 243 return 32; 244 case AMDHSA_COV5: 245 case AMDHSA_COV6: 246 default: 247 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; 248 } 249 } 250 251 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { 252 switch (CodeObjectVersion) { 253 case AMDHSA_COV4: 254 return 40; 255 case AMDHSA_COV5: 256 case AMDHSA_COV6: 257 default: 258 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; 259 } 260 } 261 262 #define GET_MIMGBaseOpcodesTable_IMPL 263 #define GET_MIMGDimInfoTable_IMPL 264 #define GET_MIMGInfoTable_IMPL 265 #define GET_MIMGLZMappingTable_IMPL 266 #define GET_MIMGMIPMappingTable_IMPL 267 #define GET_MIMGBiasMappingTable_IMPL 268 #define GET_MIMGOffsetMappingTable_IMPL 269 #define GET_MIMGG16MappingTable_IMPL 270 #define GET_MAIInstInfoTable_IMPL 271 #include "AMDGPUGenSearchableTables.inc" 272 273 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 274 unsigned VDataDwords, unsigned VAddrDwords) { 275 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 276 VDataDwords, VAddrDwords); 277 return Info ? Info->Opcode : -1; 278 } 279 280 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 281 const MIMGInfo *Info = getMIMGInfo(Opc); 282 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 283 } 284 285 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 286 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 287 const MIMGInfo *NewInfo = 288 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 289 NewChannels, OrigInfo->VAddrDwords); 290 return NewInfo ? NewInfo->Opcode : -1; 291 } 292 293 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 294 const MIMGDimInfo *Dim, bool IsA16, 295 bool IsG16Supported) { 296 unsigned AddrWords = BaseOpcode->NumExtraArgs; 297 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 298 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 299 if (IsA16) 300 AddrWords += divideCeil(AddrComponents, 2); 301 else 302 AddrWords += AddrComponents; 303 304 // Note: For subtargets that support A16 but not G16, enabling A16 also 305 // enables 16 bit gradients. 306 // For subtargets that support A16 (operand) and G16 (done with a different 307 // instruction encoding), they are independent. 308 309 if (BaseOpcode->Gradients) { 310 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 311 // There are two gradients per coordinate, we pack them separately. 312 // For the 3d case, 313 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 314 AddrWords += alignTo<2>(Dim->NumGradients / 2); 315 else 316 AddrWords += Dim->NumGradients; 317 } 318 return AddrWords; 319 } 320 321 struct MUBUFInfo { 322 uint16_t Opcode; 323 uint16_t BaseOpcode; 324 uint8_t elements; 325 bool has_vaddr; 326 bool has_srsrc; 327 bool has_soffset; 328 bool IsBufferInv; 329 bool tfe; 330 }; 331 332 struct MTBUFInfo { 333 uint16_t Opcode; 334 uint16_t BaseOpcode; 335 uint8_t elements; 336 bool has_vaddr; 337 bool has_srsrc; 338 bool has_soffset; 339 }; 340 341 struct SMInfo { 342 uint16_t Opcode; 343 bool IsBuffer; 344 }; 345 346 struct VOPInfo { 347 uint16_t Opcode; 348 bool IsSingle; 349 }; 350 351 struct VOPC64DPPInfo { 352 uint16_t Opcode; 353 }; 354 355 struct VOPCDPPAsmOnlyInfo { 356 uint16_t Opcode; 357 }; 358 359 struct VOP3CDPPAsmOnlyInfo { 360 uint16_t Opcode; 361 }; 362 363 struct VOPDComponentInfo { 364 uint16_t BaseVOP; 365 uint16_t VOPDOp; 366 bool CanBeVOPDX; 367 }; 368 369 struct VOPDInfo { 370 uint16_t Opcode; 371 uint16_t OpX; 372 uint16_t OpY; 373 uint16_t Subtarget; 374 }; 375 376 struct VOPTrue16Info { 377 uint16_t Opcode; 378 bool IsTrue16; 379 }; 380 381 #define GET_FP4FP8DstByteSelTable_DECL 382 #define GET_FP4FP8DstByteSelTable_IMPL 383 384 struct DPMACCInstructionInfo { 385 uint16_t Opcode; 386 bool IsDPMACCInstruction; 387 }; 388 389 struct FP4FP8DstByteSelInfo { 390 uint16_t Opcode; 391 bool HasFP8DstByteSel; 392 bool HasFP4DstByteSel; 393 }; 394 395 #define GET_MTBUFInfoTable_DECL 396 #define GET_MTBUFInfoTable_IMPL 397 #define GET_MUBUFInfoTable_DECL 398 #define GET_MUBUFInfoTable_IMPL 399 #define GET_SMInfoTable_DECL 400 #define GET_SMInfoTable_IMPL 401 #define GET_VOP1InfoTable_DECL 402 #define GET_VOP1InfoTable_IMPL 403 #define GET_VOP2InfoTable_DECL 404 #define GET_VOP2InfoTable_IMPL 405 #define GET_VOP3InfoTable_DECL 406 #define GET_VOP3InfoTable_IMPL 407 #define GET_VOPC64DPPTable_DECL 408 #define GET_VOPC64DPPTable_IMPL 409 #define GET_VOPC64DPP8Table_DECL 410 #define GET_VOPC64DPP8Table_IMPL 411 #define GET_VOPCAsmOnlyInfoTable_DECL 412 #define GET_VOPCAsmOnlyInfoTable_IMPL 413 #define GET_VOP3CAsmOnlyInfoTable_DECL 414 #define GET_VOP3CAsmOnlyInfoTable_IMPL 415 #define GET_VOPDComponentTable_DECL 416 #define GET_VOPDComponentTable_IMPL 417 #define GET_VOPDPairs_DECL 418 #define GET_VOPDPairs_IMPL 419 #define GET_VOPTrue16Table_DECL 420 #define GET_VOPTrue16Table_IMPL 421 #define GET_WMMAOpcode2AddrMappingTable_DECL 422 #define GET_WMMAOpcode2AddrMappingTable_IMPL 423 #define GET_WMMAOpcode3AddrMappingTable_DECL 424 #define GET_WMMAOpcode3AddrMappingTable_IMPL 425 #define GET_getMFMA_F8F6F4_WithSize_DECL 426 #define GET_getMFMA_F8F6F4_WithSize_IMPL 427 #define GET_isMFMA_F8F6F4Table_IMPL 428 #define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL 429 430 #include "AMDGPUGenSearchableTables.inc" 431 432 int getMTBUFBaseOpcode(unsigned Opc) { 433 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 434 return Info ? Info->BaseOpcode : -1; 435 } 436 437 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 438 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 439 return Info ? Info->Opcode : -1; 440 } 441 442 int getMTBUFElements(unsigned Opc) { 443 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 444 return Info ? Info->elements : 0; 445 } 446 447 bool getMTBUFHasVAddr(unsigned Opc) { 448 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 449 return Info ? Info->has_vaddr : false; 450 } 451 452 bool getMTBUFHasSrsrc(unsigned Opc) { 453 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 454 return Info ? Info->has_srsrc : false; 455 } 456 457 bool getMTBUFHasSoffset(unsigned Opc) { 458 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 459 return Info ? Info->has_soffset : false; 460 } 461 462 int getMUBUFBaseOpcode(unsigned Opc) { 463 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 464 return Info ? Info->BaseOpcode : -1; 465 } 466 467 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 468 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 469 return Info ? Info->Opcode : -1; 470 } 471 472 int getMUBUFElements(unsigned Opc) { 473 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 474 return Info ? Info->elements : 0; 475 } 476 477 bool getMUBUFHasVAddr(unsigned Opc) { 478 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 479 return Info ? Info->has_vaddr : false; 480 } 481 482 bool getMUBUFHasSrsrc(unsigned Opc) { 483 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 484 return Info ? Info->has_srsrc : false; 485 } 486 487 bool getMUBUFHasSoffset(unsigned Opc) { 488 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 489 return Info ? Info->has_soffset : false; 490 } 491 492 bool getMUBUFIsBufferInv(unsigned Opc) { 493 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 494 return Info ? Info->IsBufferInv : false; 495 } 496 497 bool getMUBUFTfe(unsigned Opc) { 498 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 499 return Info ? Info->tfe : false; 500 } 501 502 bool getSMEMIsBuffer(unsigned Opc) { 503 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 504 return Info ? Info->IsBuffer : false; 505 } 506 507 bool getVOP1IsSingle(unsigned Opc) { 508 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 509 return Info ? Info->IsSingle : true; 510 } 511 512 bool getVOP2IsSingle(unsigned Opc) { 513 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 514 return Info ? Info->IsSingle : true; 515 } 516 517 bool getVOP3IsSingle(unsigned Opc) { 518 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 519 return Info ? Info->IsSingle : true; 520 } 521 522 bool isVOPC64DPP(unsigned Opc) { 523 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); 524 } 525 526 bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); } 527 528 bool getMAIIsDGEMM(unsigned Opc) { 529 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 530 return Info ? Info->is_dgemm : false; 531 } 532 533 bool getMAIIsGFX940XDL(unsigned Opc) { 534 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 535 return Info ? Info->is_gfx940_xdl : false; 536 } 537 538 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) { 539 switch (EncodingVal) { 540 case MFMAScaleFormats::FP6_E2M3: 541 case MFMAScaleFormats::FP6_E3M2: 542 return 6; 543 case MFMAScaleFormats::FP4_E2M1: 544 return 4; 545 case MFMAScaleFormats::FP8_E4M3: 546 case MFMAScaleFormats::FP8_E5M2: 547 default: 548 return 8; 549 } 550 551 llvm_unreachable("covered switch over mfma scale formats"); 552 } 553 554 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, 555 unsigned BLGP, 556 unsigned F8F8Opcode) { 557 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ); 558 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP); 559 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode); 560 } 561 562 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { 563 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) 564 return SIEncodingFamily::GFX12; 565 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) 566 return SIEncodingFamily::GFX11; 567 llvm_unreachable("Subtarget generation does not support VOPD!"); 568 } 569 570 CanBeVOPD getCanBeVOPD(unsigned Opc) { 571 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 572 if (Info) 573 return {Info->CanBeVOPDX, true}; 574 return {false, false}; 575 } 576 577 unsigned getVOPDOpcode(unsigned Opc) { 578 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 579 return Info ? Info->VOPDOp : ~0u; 580 } 581 582 bool isVOPD(unsigned Opc) { 583 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); 584 } 585 586 bool isMAC(unsigned Opc) { 587 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 588 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 589 Opc == AMDGPU::V_MAC_F32_e64_vi || 590 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 591 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 592 Opc == AMDGPU::V_MAC_F16_e64_vi || 593 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 594 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 595 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 596 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || 597 Opc == AMDGPU::V_FMAC_F32_e64_vi || 598 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 599 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 600 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 601 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 || 602 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 || 603 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || 604 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi || 605 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || 606 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || 607 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; 608 } 609 610 bool isPermlane16(unsigned Opc) { 611 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 612 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || 613 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || 614 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || 615 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || 616 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || 617 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || 618 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; 619 } 620 621 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { 622 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || 623 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || 624 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || 625 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || 626 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || 627 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || 628 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 || 629 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 || 630 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 || 631 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12; 632 } 633 634 bool isGenericAtomic(unsigned Opc) { 635 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || 636 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || 637 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || 638 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || 639 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || 640 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || 641 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || 642 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || 643 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || 644 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || 645 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || 646 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || 647 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || 648 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || 649 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || 650 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || 651 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; 652 } 653 654 bool isTrue16Inst(unsigned Opc) { 655 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); 656 return Info ? Info->IsTrue16 : false; 657 } 658 659 FPType getFPDstSelType(unsigned Opc) { 660 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc); 661 if (!Info) 662 return FPType::None; 663 if (Info->HasFP8DstByteSel) 664 return FPType::FP8; 665 if (Info->HasFP4DstByteSel) 666 return FPType::FP4; 667 668 return FPType::None; 669 } 670 671 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { 672 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); 673 return Info ? Info->Opcode3Addr : ~0u; 674 } 675 676 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { 677 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); 678 return Info ? Info->Opcode2Addr : ~0u; 679 } 680 681 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 682 // header files, so we need to wrap it in a function that takes unsigned 683 // instead. 684 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 685 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 686 } 687 688 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { 689 const VOPDInfo *Info = 690 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily); 691 return Info ? Info->Opcode : -1; 692 } 693 694 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { 695 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); 696 assert(Info); 697 const auto *OpX = getVOPDBaseFromComponent(Info->OpX); 698 const auto *OpY = getVOPDBaseFromComponent(Info->OpY); 699 assert(OpX && OpY); 700 return {OpX->BaseVOP, OpY->BaseVOP}; 701 } 702 703 namespace VOPD { 704 705 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { 706 assert(OpDesc.getNumDefs() == Component::DST_NUM); 707 708 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); 709 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); 710 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); 711 assert(TiedIdx == -1 || TiedIdx == Component::DST); 712 HasSrc2Acc = TiedIdx != -1; 713 714 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); 715 assert(SrcOperandsNum <= Component::MAX_SRC_NUM); 716 717 auto OperandsNum = OpDesc.getNumOperands(); 718 unsigned CompOprIdx; 719 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { 720 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { 721 MandatoryLiteralIdx = CompOprIdx; 722 break; 723 } 724 } 725 } 726 727 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { 728 assert(CompOprIdx < Component::MAX_OPR_NUM); 729 730 if (CompOprIdx == Component::DST) 731 return getIndexOfDstInParsedOperands(); 732 733 auto CompSrcIdx = CompOprIdx - Component::DST_NUM; 734 if (CompSrcIdx < getCompParsedSrcOperandsNum()) 735 return getIndexOfSrcInParsedOperands(CompSrcIdx); 736 737 // The specified operand does not exist. 738 return 0; 739 } 740 741 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( 742 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const { 743 744 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx); 745 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx); 746 747 const unsigned CompOprNum = 748 SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; 749 unsigned CompOprIdx; 750 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { 751 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; 752 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && 753 ((OpXRegs[CompOprIdx] & BanksMasks) == 754 (OpYRegs[CompOprIdx] & BanksMasks))) 755 return CompOprIdx; 756 } 757 758 return {}; 759 } 760 761 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used 762 // by the specified component. If an operand is unused 763 // or is not a VGPR, the corresponding value is 0. 764 // 765 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 766 // for the specified component and MC operand. The callback must return 0 767 // if the operand is not a register or not a VGPR. 768 InstInfo::RegIndices InstInfo::getRegIndices( 769 unsigned CompIdx, 770 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { 771 assert(CompIdx < COMPONENTS_NUM); 772 773 const auto &Comp = CompInfo[CompIdx]; 774 InstInfo::RegIndices RegIndices; 775 776 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); 777 778 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { 779 unsigned CompSrcIdx = CompOprIdx - DST_NUM; 780 RegIndices[CompOprIdx] = 781 Comp.hasRegSrcOperand(CompSrcIdx) 782 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) 783 : 0; 784 } 785 return RegIndices; 786 } 787 788 } // namespace VOPD 789 790 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { 791 return VOPD::InstInfo(OpX, OpY); 792 } 793 794 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, 795 const MCInstrInfo *InstrInfo) { 796 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); 797 const auto &OpXDesc = InstrInfo->get(OpX); 798 const auto &OpYDesc = InstrInfo->get(OpY); 799 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); 800 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); 801 return VOPD::InstInfo(OpXInfo, OpYInfo); 802 } 803 804 namespace IsaInfo { 805 806 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 807 : STI(STI), XnackSetting(TargetIDSetting::Any), 808 SramEccSetting(TargetIDSetting::Any) { 809 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 810 XnackSetting = TargetIDSetting::Unsupported; 811 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 812 SramEccSetting = TargetIDSetting::Unsupported; 813 } 814 815 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 816 // Check if xnack or sramecc is explicitly enabled or disabled. In the 817 // absence of the target features we assume we must generate code that can run 818 // in any environment. 819 SubtargetFeatures Features(FS); 820 std::optional<bool> XnackRequested; 821 std::optional<bool> SramEccRequested; 822 823 for (const std::string &Feature : Features.getFeatures()) { 824 if (Feature == "+xnack") 825 XnackRequested = true; 826 else if (Feature == "-xnack") 827 XnackRequested = false; 828 else if (Feature == "+sramecc") 829 SramEccRequested = true; 830 else if (Feature == "-sramecc") 831 SramEccRequested = false; 832 } 833 834 bool XnackSupported = isXnackSupported(); 835 bool SramEccSupported = isSramEccSupported(); 836 837 if (XnackRequested) { 838 if (XnackSupported) { 839 XnackSetting = 840 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 841 } else { 842 // If a specific xnack setting was requested and this GPU does not support 843 // xnack emit a warning. Setting will remain set to "Unsupported". 844 if (*XnackRequested) { 845 errs() << "warning: xnack 'On' was requested for a processor that does " 846 "not support it!\n"; 847 } else { 848 errs() << "warning: xnack 'Off' was requested for a processor that " 849 "does not support it!\n"; 850 } 851 } 852 } 853 854 if (SramEccRequested) { 855 if (SramEccSupported) { 856 SramEccSetting = 857 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 858 } else { 859 // If a specific sramecc setting was requested and this GPU does not 860 // support sramecc emit a warning. Setting will remain set to 861 // "Unsupported". 862 if (*SramEccRequested) { 863 errs() << "warning: sramecc 'On' was requested for a processor that " 864 "does not support it!\n"; 865 } else { 866 errs() << "warning: sramecc 'Off' was requested for a processor that " 867 "does not support it!\n"; 868 } 869 } 870 } 871 } 872 873 static TargetIDSetting 874 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 875 if (FeatureString.ends_with("-")) 876 return TargetIDSetting::Off; 877 if (FeatureString.ends_with("+")) 878 return TargetIDSetting::On; 879 880 llvm_unreachable("Malformed feature string"); 881 } 882 883 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 884 SmallVector<StringRef, 3> TargetIDSplit; 885 TargetID.split(TargetIDSplit, ':'); 886 887 for (const auto &FeatureString : TargetIDSplit) { 888 if (FeatureString.starts_with("xnack")) 889 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 890 if (FeatureString.starts_with("sramecc")) 891 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 892 } 893 } 894 895 std::string AMDGPUTargetID::toString() const { 896 std::string StringRep; 897 raw_string_ostream StreamRep(StringRep); 898 899 auto TargetTriple = STI.getTargetTriple(); 900 auto Version = getIsaVersion(STI.getCPU()); 901 902 StreamRep << TargetTriple.getArchName() << '-' 903 << TargetTriple.getVendorName() << '-' 904 << TargetTriple.getOSName() << '-' 905 << TargetTriple.getEnvironmentName() << '-'; 906 907 std::string Processor; 908 // TODO: Following else statement is present here because we used various 909 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 910 // Remove once all aliases are removed from GCNProcessors.td. 911 if (Version.Major >= 9) 912 Processor = STI.getCPU().str(); 913 else 914 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 915 Twine(Version.Stepping)) 916 .str(); 917 918 std::string Features; 919 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { 920 // sramecc. 921 if (getSramEccSetting() == TargetIDSetting::Off) 922 Features += ":sramecc-"; 923 else if (getSramEccSetting() == TargetIDSetting::On) 924 Features += ":sramecc+"; 925 // xnack. 926 if (getXnackSetting() == TargetIDSetting::Off) 927 Features += ":xnack-"; 928 else if (getXnackSetting() == TargetIDSetting::On) 929 Features += ":xnack+"; 930 } 931 932 StreamRep << Processor << Features; 933 934 return StringRep; 935 } 936 937 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 938 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 939 return 16; 940 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 941 return 32; 942 943 return 64; 944 } 945 946 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 947 unsigned BytesPerCU = getAddressableLocalMemorySize(STI); 948 949 // "Per CU" really means "per whatever functional block the waves of a 950 // workgroup must share". So the effective local memory size is doubled in 951 // WGP mode on gfx10. 952 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 953 BytesPerCU *= 2; 954 955 return BytesPerCU; 956 } 957 958 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { 959 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768)) 960 return 32768; 961 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536)) 962 return 65536; 963 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840)) 964 return 163840; 965 return 0; 966 } 967 968 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 969 // "Per CU" really means "per whatever functional block the waves of a 970 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 971 // two SIMDs. 972 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 973 return 2; 974 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 975 // two CUs, so a total of four SIMDs. 976 return 4; 977 } 978 979 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 980 unsigned FlatWorkGroupSize) { 981 assert(FlatWorkGroupSize != 0); 982 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 983 return 8; 984 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); 985 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 986 if (N == 1) { 987 // Single-wave workgroups don't consume barrier resources. 988 return MaxWaves; 989 } 990 991 unsigned MaxBarriers = 16; 992 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 993 MaxBarriers = 32; 994 995 return std::min(MaxWaves / N, MaxBarriers); 996 } 997 998 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 999 return 1; 1000 } 1001 1002 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 1003 // FIXME: Need to take scratch memory into account. 1004 if (isGFX90A(*STI)) 1005 return 8; 1006 if (!isGFX10Plus(*STI)) 1007 return 10; 1008 return hasGFX10_3Insts(*STI) ? 16 : 20; 1009 } 1010 1011 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 1012 unsigned FlatWorkGroupSize) { 1013 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 1014 getEUsPerCU(STI)); 1015 } 1016 1017 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 1018 return 1; 1019 } 1020 1021 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 1022 // Some subtargets allow encoding 2048, but this isn't tested or supported. 1023 return 1024; 1024 } 1025 1026 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 1027 unsigned FlatWorkGroupSize) { 1028 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 1029 } 1030 1031 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 1032 IsaVersion Version = getIsaVersion(STI->getCPU()); 1033 if (Version.Major >= 10) 1034 return getAddressableNumSGPRs(STI); 1035 if (Version.Major >= 8) 1036 return 16; 1037 return 8; 1038 } 1039 1040 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 1041 return 8; 1042 } 1043 1044 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 1045 IsaVersion Version = getIsaVersion(STI->getCPU()); 1046 if (Version.Major >= 8) 1047 return 800; 1048 return 512; 1049 } 1050 1051 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 1052 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 1053 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 1054 1055 IsaVersion Version = getIsaVersion(STI->getCPU()); 1056 if (Version.Major >= 10) 1057 return 106; 1058 if (Version.Major >= 8) 1059 return 102; 1060 return 104; 1061 } 1062 1063 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1064 assert(WavesPerEU != 0); 1065 1066 IsaVersion Version = getIsaVersion(STI->getCPU()); 1067 if (Version.Major >= 10) 1068 return 0; 1069 1070 if (WavesPerEU >= getMaxWavesPerEU(STI)) 1071 return 0; 1072 1073 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 1074 if (STI->getFeatureBits().test(FeatureTrapHandler)) 1075 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 1076 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 1077 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 1078 } 1079 1080 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 1081 bool Addressable) { 1082 assert(WavesPerEU != 0); 1083 1084 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 1085 IsaVersion Version = getIsaVersion(STI->getCPU()); 1086 if (Version.Major >= 10) 1087 return Addressable ? AddressableNumSGPRs : 108; 1088 if (Version.Major >= 8 && !Addressable) 1089 AddressableNumSGPRs = 112; 1090 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 1091 if (STI->getFeatureBits().test(FeatureTrapHandler)) 1092 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 1093 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 1094 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 1095 } 1096 1097 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1098 bool FlatScrUsed, bool XNACKUsed) { 1099 unsigned ExtraSGPRs = 0; 1100 if (VCCUsed) 1101 ExtraSGPRs = 2; 1102 1103 IsaVersion Version = getIsaVersion(STI->getCPU()); 1104 if (Version.Major >= 10) 1105 return ExtraSGPRs; 1106 1107 if (Version.Major < 8) { 1108 if (FlatScrUsed) 1109 ExtraSGPRs = 4; 1110 } else { 1111 if (XNACKUsed) 1112 ExtraSGPRs = 4; 1113 1114 if (FlatScrUsed || 1115 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 1116 ExtraSGPRs = 6; 1117 } 1118 1119 return ExtraSGPRs; 1120 } 1121 1122 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1123 bool FlatScrUsed) { 1124 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 1125 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 1126 } 1127 1128 static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, 1129 unsigned Granule) { 1130 return divideCeil(std::max(1u, NumRegs), Granule); 1131 } 1132 1133 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 1134 // SGPRBlocks is actual number of SGPR blocks minus 1. 1135 return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) - 1136 1; 1137 } 1138 1139 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 1140 std::optional<bool> EnableWavefrontSize32) { 1141 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1142 return 8; 1143 1144 bool IsWave32 = EnableWavefrontSize32 ? 1145 *EnableWavefrontSize32 : 1146 STI->getFeatureBits().test(FeatureWavefrontSize32); 1147 1148 if (STI->getFeatureBits().test(Feature1_5xVGPRs)) 1149 return IsWave32 ? 24 : 12; 1150 1151 if (hasGFX10_3Insts(*STI)) 1152 return IsWave32 ? 16 : 8; 1153 1154 return IsWave32 ? 8 : 4; 1155 } 1156 1157 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 1158 std::optional<bool> EnableWavefrontSize32) { 1159 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1160 return 8; 1161 1162 bool IsWave32 = EnableWavefrontSize32 ? 1163 *EnableWavefrontSize32 : 1164 STI->getFeatureBits().test(FeatureWavefrontSize32); 1165 1166 return IsWave32 ? 8 : 4; 1167 } 1168 1169 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 1170 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1171 return 512; 1172 if (!isGFX10Plus(*STI)) 1173 return 256; 1174 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32); 1175 if (STI->getFeatureBits().test(Feature1_5xVGPRs)) 1176 return IsWave32 ? 1536 : 768; 1177 return IsWave32 ? 1024 : 512; 1178 } 1179 1180 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; } 1181 1182 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 1183 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1184 return 512; 1185 return getAddressableNumArchVGPRs(STI); 1186 } 1187 1188 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 1189 unsigned NumVGPRs) { 1190 return getNumWavesPerEUWithNumVGPRs(NumVGPRs, getVGPRAllocGranule(STI), 1191 getMaxWavesPerEU(STI), 1192 getTotalNumVGPRs(STI)); 1193 } 1194 1195 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, 1196 unsigned MaxWaves, 1197 unsigned TotalNumVGPRs) { 1198 if (NumVGPRs < Granule) 1199 return MaxWaves; 1200 unsigned RoundedRegs = alignTo(NumVGPRs, Granule); 1201 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves); 1202 } 1203 1204 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, 1205 AMDGPUSubtarget::Generation Gen) { 1206 if (Gen >= AMDGPUSubtarget::GFX10) 1207 return MaxWaves; 1208 1209 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 1210 if (SGPRs <= 80) 1211 return 10; 1212 if (SGPRs <= 88) 1213 return 9; 1214 if (SGPRs <= 100) 1215 return 8; 1216 return 7; 1217 } 1218 if (SGPRs <= 48) 1219 return 10; 1220 if (SGPRs <= 56) 1221 return 9; 1222 if (SGPRs <= 64) 1223 return 8; 1224 if (SGPRs <= 72) 1225 return 7; 1226 if (SGPRs <= 80) 1227 return 6; 1228 return 5; 1229 } 1230 1231 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1232 assert(WavesPerEU != 0); 1233 1234 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); 1235 if (WavesPerEU >= MaxWavesPerEU) 1236 return 0; 1237 1238 unsigned TotNumVGPRs = getTotalNumVGPRs(STI); 1239 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); 1240 unsigned Granule = getVGPRAllocGranule(STI); 1241 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule); 1242 1243 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule)) 1244 return 0; 1245 1246 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs); 1247 if (WavesPerEU < MinWavesPerEU) 1248 return getMinNumVGPRs(STI, MinWavesPerEU); 1249 1250 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule); 1251 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); 1252 return std::min(MinNumVGPRs, AddrsableNumVGPRs); 1253 } 1254 1255 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1256 assert(WavesPerEU != 0); 1257 1258 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 1259 getVGPRAllocGranule(STI)); 1260 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 1261 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 1262 } 1263 1264 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 1265 std::optional<bool> EnableWavefrontSize32) { 1266 return getGranulatedNumRegisterBlocks( 1267 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) - 1268 1; 1269 } 1270 1271 unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, 1272 unsigned NumVGPRs, 1273 std::optional<bool> EnableWavefrontSize32) { 1274 return getGranulatedNumRegisterBlocks( 1275 NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32)); 1276 } 1277 } // end namespace IsaInfo 1278 1279 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, 1280 const MCSubtargetInfo *STI) { 1281 IsaVersion Version = getIsaVersion(STI->getCPU()); 1282 KernelCode.amd_kernel_code_version_major = 1; 1283 KernelCode.amd_kernel_code_version_minor = 2; 1284 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 1285 KernelCode.amd_machine_version_major = Version.Major; 1286 KernelCode.amd_machine_version_minor = Version.Minor; 1287 KernelCode.amd_machine_version_stepping = Version.Stepping; 1288 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t); 1289 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 1290 KernelCode.wavefront_size = 5; 1291 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 1292 } else { 1293 KernelCode.wavefront_size = 6; 1294 } 1295 1296 // If the code object does not support indirect functions, then the value must 1297 // be 0xffffffff. 1298 KernelCode.call_convention = -1; 1299 1300 // These alignment values are specified in powers of two, so alignment = 1301 // 2^n. The minimum alignment is 2^4 = 16. 1302 KernelCode.kernarg_segment_alignment = 4; 1303 KernelCode.group_segment_alignment = 4; 1304 KernelCode.private_segment_alignment = 4; 1305 1306 if (Version.Major >= 10) { 1307 KernelCode.compute_pgm_resource_registers |= 1308 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 1309 S_00B848_MEM_ORDERED(1); 1310 } 1311 } 1312 1313 bool isGroupSegment(const GlobalValue *GV) { 1314 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 1315 } 1316 1317 bool isGlobalSegment(const GlobalValue *GV) { 1318 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 1319 } 1320 1321 bool isReadOnlySegment(const GlobalValue *GV) { 1322 unsigned AS = GV->getAddressSpace(); 1323 return AS == AMDGPUAS::CONSTANT_ADDRESS || 1324 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 1325 } 1326 1327 bool shouldEmitConstantsToTextSection(const Triple &TT) { 1328 return TT.getArch() == Triple::r600; 1329 } 1330 1331 std::pair<unsigned, unsigned> 1332 getIntegerPairAttribute(const Function &F, StringRef Name, 1333 std::pair<unsigned, unsigned> Default, 1334 bool OnlyFirstRequired) { 1335 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired)) 1336 return {Attr->first, Attr->second ? *(Attr->second) : Default.second}; 1337 return Default; 1338 } 1339 1340 std::optional<std::pair<unsigned, std::optional<unsigned>>> 1341 getIntegerPairAttribute(const Function &F, StringRef Name, 1342 bool OnlyFirstRequired) { 1343 Attribute A = F.getFnAttribute(Name); 1344 if (!A.isStringAttribute()) 1345 return std::nullopt; 1346 1347 LLVMContext &Ctx = F.getContext(); 1348 std::pair<unsigned, std::optional<unsigned>> Ints; 1349 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 1350 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 1351 Ctx.emitError("can't parse first integer attribute " + Name); 1352 return std::nullopt; 1353 } 1354 unsigned Second = 0; 1355 if (Strs.second.trim().getAsInteger(0, Second)) { 1356 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 1357 Ctx.emitError("can't parse second integer attribute " + Name); 1358 return std::nullopt; 1359 } 1360 } else { 1361 Ints.second = Second; 1362 } 1363 1364 return Ints; 1365 } 1366 1367 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, 1368 unsigned Size, 1369 unsigned DefaultVal) { 1370 assert(Size > 2); 1371 SmallVector<unsigned> Default(Size, DefaultVal); 1372 1373 Attribute A = F.getFnAttribute(Name); 1374 if (!A.isStringAttribute()) 1375 return Default; 1376 1377 SmallVector<unsigned> Vals(Size, DefaultVal); 1378 1379 LLVMContext &Ctx = F.getContext(); 1380 1381 StringRef S = A.getValueAsString(); 1382 unsigned i = 0; 1383 for (; !S.empty() && i < Size; i++) { 1384 std::pair<StringRef, StringRef> Strs = S.split(','); 1385 unsigned IntVal; 1386 if (Strs.first.trim().getAsInteger(0, IntVal)) { 1387 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " + 1388 Name); 1389 return Default; 1390 } 1391 Vals[i] = IntVal; 1392 S = Strs.second; 1393 } 1394 1395 if (!S.empty() || i < Size) { 1396 Ctx.emitError("attribute " + Name + 1397 " has incorrect number of integers; expected " + 1398 llvm::utostr(Size)); 1399 return Default; 1400 } 1401 return Vals; 1402 } 1403 1404 unsigned getVmcntBitMask(const IsaVersion &Version) { 1405 return (1 << (getVmcntBitWidthLo(Version.Major) + 1406 getVmcntBitWidthHi(Version.Major))) - 1407 1; 1408 } 1409 1410 unsigned getLoadcntBitMask(const IsaVersion &Version) { 1411 return (1 << getLoadcntBitWidth(Version.Major)) - 1; 1412 } 1413 1414 unsigned getSamplecntBitMask(const IsaVersion &Version) { 1415 return (1 << getSamplecntBitWidth(Version.Major)) - 1; 1416 } 1417 1418 unsigned getBvhcntBitMask(const IsaVersion &Version) { 1419 return (1 << getBvhcntBitWidth(Version.Major)) - 1; 1420 } 1421 1422 unsigned getExpcntBitMask(const IsaVersion &Version) { 1423 return (1 << getExpcntBitWidth(Version.Major)) - 1; 1424 } 1425 1426 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 1427 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 1428 } 1429 1430 unsigned getDscntBitMask(const IsaVersion &Version) { 1431 return (1 << getDscntBitWidth(Version.Major)) - 1; 1432 } 1433 1434 unsigned getKmcntBitMask(const IsaVersion &Version) { 1435 return (1 << getKmcntBitWidth(Version.Major)) - 1; 1436 } 1437 1438 unsigned getStorecntBitMask(const IsaVersion &Version) { 1439 return (1 << getStorecntBitWidth(Version.Major)) - 1; 1440 } 1441 1442 unsigned getWaitcntBitMask(const IsaVersion &Version) { 1443 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), 1444 getVmcntBitWidthLo(Version.Major)); 1445 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), 1446 getExpcntBitWidth(Version.Major)); 1447 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), 1448 getLgkmcntBitWidth(Version.Major)); 1449 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), 1450 getVmcntBitWidthHi(Version.Major)); 1451 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; 1452 } 1453 1454 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1455 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), 1456 getVmcntBitWidthLo(Version.Major)); 1457 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), 1458 getVmcntBitWidthHi(Version.Major)); 1459 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); 1460 } 1461 1462 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 1463 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), 1464 getExpcntBitWidth(Version.Major)); 1465 } 1466 1467 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1468 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), 1469 getLgkmcntBitWidth(Version.Major)); 1470 } 1471 1472 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1473 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 1474 Vmcnt = decodeVmcnt(Version, Waitcnt); 1475 Expcnt = decodeExpcnt(Version, Waitcnt); 1476 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 1477 } 1478 1479 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 1480 Waitcnt Decoded; 1481 Decoded.LoadCnt = decodeVmcnt(Version, Encoded); 1482 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 1483 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded); 1484 return Decoded; 1485 } 1486 1487 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1488 unsigned Vmcnt) { 1489 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), 1490 getVmcntBitWidthLo(Version.Major)); 1491 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, 1492 getVmcntBitShiftHi(Version.Major), 1493 getVmcntBitWidthHi(Version.Major)); 1494 } 1495 1496 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1497 unsigned Expcnt) { 1498 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), 1499 getExpcntBitWidth(Version.Major)); 1500 } 1501 1502 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1503 unsigned Lgkmcnt) { 1504 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), 1505 getLgkmcntBitWidth(Version.Major)); 1506 } 1507 1508 unsigned encodeWaitcnt(const IsaVersion &Version, 1509 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 1510 unsigned Waitcnt = getWaitcntBitMask(Version); 1511 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 1512 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 1513 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1514 return Waitcnt; 1515 } 1516 1517 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1518 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt); 1519 } 1520 1521 static unsigned getCombinedCountBitMask(const IsaVersion &Version, 1522 bool IsStore) { 1523 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major), 1524 getDscntBitWidth(Version.Major)); 1525 if (IsStore) { 1526 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1527 getStorecntBitWidth(Version.Major)); 1528 return Dscnt | Storecnt; 1529 } 1530 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1531 getLoadcntBitWidth(Version.Major)); 1532 return Dscnt | Loadcnt; 1533 } 1534 1535 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { 1536 Waitcnt Decoded; 1537 Decoded.LoadCnt = 1538 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major), 1539 getLoadcntBitWidth(Version.Major)); 1540 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major), 1541 getDscntBitWidth(Version.Major)); 1542 return Decoded; 1543 } 1544 1545 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { 1546 Waitcnt Decoded; 1547 Decoded.StoreCnt = 1548 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major), 1549 getStorecntBitWidth(Version.Major)); 1550 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major), 1551 getDscntBitWidth(Version.Major)); 1552 return Decoded; 1553 } 1554 1555 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, 1556 unsigned Loadcnt) { 1557 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1558 getLoadcntBitWidth(Version.Major)); 1559 } 1560 1561 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, 1562 unsigned Storecnt) { 1563 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1564 getStorecntBitWidth(Version.Major)); 1565 } 1566 1567 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, 1568 unsigned Dscnt) { 1569 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major), 1570 getDscntBitWidth(Version.Major)); 1571 } 1572 1573 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, 1574 unsigned Dscnt) { 1575 unsigned Waitcnt = getCombinedCountBitMask(Version, false); 1576 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt); 1577 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1578 return Waitcnt; 1579 } 1580 1581 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1582 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt); 1583 } 1584 1585 static unsigned encodeStorecntDscnt(const IsaVersion &Version, 1586 unsigned Storecnt, unsigned Dscnt) { 1587 unsigned Waitcnt = getCombinedCountBitMask(Version, true); 1588 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt); 1589 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1590 return Waitcnt; 1591 } 1592 1593 unsigned encodeStorecntDscnt(const IsaVersion &Version, 1594 const Waitcnt &Decoded) { 1595 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt); 1596 } 1597 1598 //===----------------------------------------------------------------------===// 1599 // Custom Operand Values 1600 //===----------------------------------------------------------------------===// 1601 1602 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, 1603 int Size, 1604 const MCSubtargetInfo &STI) { 1605 unsigned Enc = 0; 1606 for (int Idx = 0; Idx < Size; ++Idx) { 1607 const auto &Op = Opr[Idx]; 1608 if (Op.isSupported(STI)) 1609 Enc |= Op.encode(Op.Default); 1610 } 1611 return Enc; 1612 } 1613 1614 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, 1615 int Size, unsigned Code, 1616 bool &HasNonDefaultVal, 1617 const MCSubtargetInfo &STI) { 1618 unsigned UsedOprMask = 0; 1619 HasNonDefaultVal = false; 1620 for (int Idx = 0; Idx < Size; ++Idx) { 1621 const auto &Op = Opr[Idx]; 1622 if (!Op.isSupported(STI)) 1623 continue; 1624 UsedOprMask |= Op.getMask(); 1625 unsigned Val = Op.decode(Code); 1626 if (!Op.isValid(Val)) 1627 return false; 1628 HasNonDefaultVal |= (Val != Op.Default); 1629 } 1630 return (Code & ~UsedOprMask) == 0; 1631 } 1632 1633 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, 1634 unsigned Code, int &Idx, StringRef &Name, 1635 unsigned &Val, bool &IsDefault, 1636 const MCSubtargetInfo &STI) { 1637 while (Idx < Size) { 1638 const auto &Op = Opr[Idx++]; 1639 if (Op.isSupported(STI)) { 1640 Name = Op.Name; 1641 Val = Op.decode(Code); 1642 IsDefault = (Val == Op.Default); 1643 return true; 1644 } 1645 } 1646 1647 return false; 1648 } 1649 1650 static int encodeCustomOperandVal(const CustomOperandVal &Op, 1651 int64_t InputVal) { 1652 if (InputVal < 0 || InputVal > Op.Max) 1653 return OPR_VAL_INVALID; 1654 return Op.encode(InputVal); 1655 } 1656 1657 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, 1658 const StringRef Name, int64_t InputVal, 1659 unsigned &UsedOprMask, 1660 const MCSubtargetInfo &STI) { 1661 int InvalidId = OPR_ID_UNKNOWN; 1662 for (int Idx = 0; Idx < Size; ++Idx) { 1663 const auto &Op = Opr[Idx]; 1664 if (Op.Name == Name) { 1665 if (!Op.isSupported(STI)) { 1666 InvalidId = OPR_ID_UNSUPPORTED; 1667 continue; 1668 } 1669 auto OprMask = Op.getMask(); 1670 if (OprMask & UsedOprMask) 1671 return OPR_ID_DUPLICATE; 1672 UsedOprMask |= OprMask; 1673 return encodeCustomOperandVal(Op, InputVal); 1674 } 1675 } 1676 return InvalidId; 1677 } 1678 1679 //===----------------------------------------------------------------------===// 1680 // DepCtr 1681 //===----------------------------------------------------------------------===// 1682 1683 namespace DepCtr { 1684 1685 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { 1686 static int Default = -1; 1687 if (Default == -1) 1688 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); 1689 return Default; 1690 } 1691 1692 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1693 const MCSubtargetInfo &STI) { 1694 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, 1695 HasNonDefaultVal, STI); 1696 } 1697 1698 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1699 bool &IsDefault, const MCSubtargetInfo &STI) { 1700 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, 1701 IsDefault, STI); 1702 } 1703 1704 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1705 const MCSubtargetInfo &STI) { 1706 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, 1707 STI); 1708 } 1709 1710 unsigned decodeFieldVmVsrc(unsigned Encoded) { 1711 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1712 } 1713 1714 unsigned decodeFieldVaVdst(unsigned Encoded) { 1715 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1716 } 1717 1718 unsigned decodeFieldSaSdst(unsigned Encoded) { 1719 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1720 } 1721 1722 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { 1723 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1724 } 1725 1726 unsigned encodeFieldVmVsrc(unsigned VmVsrc) { 1727 return encodeFieldVmVsrc(0xffff, VmVsrc); 1728 } 1729 1730 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { 1731 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1732 } 1733 1734 unsigned encodeFieldVaVdst(unsigned VaVdst) { 1735 return encodeFieldVaVdst(0xffff, VaVdst); 1736 } 1737 1738 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { 1739 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1740 } 1741 1742 unsigned encodeFieldSaSdst(unsigned SaSdst) { 1743 return encodeFieldSaSdst(0xffff, SaSdst); 1744 } 1745 1746 } // namespace DepCtr 1747 1748 //===----------------------------------------------------------------------===// 1749 // exp tgt 1750 //===----------------------------------------------------------------------===// 1751 1752 namespace Exp { 1753 1754 struct ExpTgt { 1755 StringLiteral Name; 1756 unsigned Tgt; 1757 unsigned MaxIndex; 1758 }; 1759 1760 static constexpr ExpTgt ExpTgtInfo[] = { 1761 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1762 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1763 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1764 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1765 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1766 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, 1767 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1768 }; 1769 1770 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1771 for (const ExpTgt &Val : ExpTgtInfo) { 1772 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1773 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1774 Name = Val.Name; 1775 return true; 1776 } 1777 } 1778 return false; 1779 } 1780 1781 unsigned getTgtId(const StringRef Name) { 1782 1783 for (const ExpTgt &Val : ExpTgtInfo) { 1784 if (Val.MaxIndex == 0 && Name == Val.Name) 1785 return Val.Tgt; 1786 1787 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) { 1788 StringRef Suffix = Name.drop_front(Val.Name.size()); 1789 1790 unsigned Id; 1791 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1792 return ET_INVALID; 1793 1794 // Disable leading zeroes 1795 if (Suffix.size() > 1 && Suffix[0] == '0') 1796 return ET_INVALID; 1797 1798 return Val.Tgt + Id; 1799 } 1800 } 1801 return ET_INVALID; 1802 } 1803 1804 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1805 switch (Id) { 1806 case ET_NULL: 1807 return !isGFX11Plus(STI); 1808 case ET_POS4: 1809 case ET_PRIM: 1810 return isGFX10Plus(STI); 1811 case ET_DUAL_SRC_BLEND0: 1812 case ET_DUAL_SRC_BLEND1: 1813 return isGFX11Plus(STI); 1814 default: 1815 if (Id >= ET_PARAM0 && Id <= ET_PARAM31) 1816 return !isGFX11Plus(STI); 1817 return true; 1818 } 1819 } 1820 1821 } // namespace Exp 1822 1823 //===----------------------------------------------------------------------===// 1824 // MTBUF Format 1825 //===----------------------------------------------------------------------===// 1826 1827 namespace MTBUFFormat { 1828 1829 int64_t getDfmt(const StringRef Name) { 1830 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1831 if (Name == DfmtSymbolic[Id]) 1832 return Id; 1833 } 1834 return DFMT_UNDEF; 1835 } 1836 1837 StringRef getDfmtName(unsigned Id) { 1838 assert(Id <= DFMT_MAX); 1839 return DfmtSymbolic[Id]; 1840 } 1841 1842 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1843 if (isSI(STI) || isCI(STI)) 1844 return NfmtSymbolicSICI; 1845 if (isVI(STI) || isGFX9(STI)) 1846 return NfmtSymbolicVI; 1847 return NfmtSymbolicGFX10; 1848 } 1849 1850 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1851 const auto *lookupTable = getNfmtLookupTable(STI); 1852 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1853 if (Name == lookupTable[Id]) 1854 return Id; 1855 } 1856 return NFMT_UNDEF; 1857 } 1858 1859 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1860 assert(Id <= NFMT_MAX); 1861 return getNfmtLookupTable(STI)[Id]; 1862 } 1863 1864 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1865 unsigned Dfmt; 1866 unsigned Nfmt; 1867 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1868 return isValidNfmt(Nfmt, STI); 1869 } 1870 1871 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1872 return !getNfmtName(Id, STI).empty(); 1873 } 1874 1875 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1876 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1877 } 1878 1879 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1880 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1881 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1882 } 1883 1884 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { 1885 if (isGFX11Plus(STI)) { 1886 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1887 if (Name == UfmtSymbolicGFX11[Id]) 1888 return Id; 1889 } 1890 } else { 1891 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1892 if (Name == UfmtSymbolicGFX10[Id]) 1893 return Id; 1894 } 1895 } 1896 return UFMT_UNDEF; 1897 } 1898 1899 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { 1900 if(isValidUnifiedFormat(Id, STI)) 1901 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; 1902 return ""; 1903 } 1904 1905 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { 1906 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; 1907 } 1908 1909 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1910 const MCSubtargetInfo &STI) { 1911 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1912 if (isGFX11Plus(STI)) { 1913 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1914 if (Fmt == DfmtNfmt2UFmtGFX11[Id]) 1915 return Id; 1916 } 1917 } else { 1918 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1919 if (Fmt == DfmtNfmt2UFmtGFX10[Id]) 1920 return Id; 1921 } 1922 } 1923 return UFMT_UNDEF; 1924 } 1925 1926 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1927 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1928 } 1929 1930 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1931 if (isGFX10Plus(STI)) 1932 return UFMT_DEFAULT; 1933 return DFMT_NFMT_DEFAULT; 1934 } 1935 1936 } // namespace MTBUFFormat 1937 1938 //===----------------------------------------------------------------------===// 1939 // SendMsg 1940 //===----------------------------------------------------------------------===// 1941 1942 namespace SendMsg { 1943 1944 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { 1945 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; 1946 } 1947 1948 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { 1949 return (MsgId & ~(getMsgIdMask(STI))) == 0; 1950 } 1951 1952 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1953 bool Strict) { 1954 assert(isValidMsgId(MsgId, STI)); 1955 1956 if (!Strict) 1957 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1958 1959 if (msgRequiresOp(MsgId, STI)) { 1960 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP) 1961 return false; 1962 1963 return !getMsgOpName(MsgId, OpId, STI).empty(); 1964 } 1965 1966 return OpId == OP_NONE_; 1967 } 1968 1969 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1970 const MCSubtargetInfo &STI, bool Strict) { 1971 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1972 1973 if (!Strict) 1974 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1975 1976 if (!isGFX11Plus(STI)) { 1977 switch (MsgId) { 1978 case ID_GS_PreGFX11: 1979 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1980 case ID_GS_DONE_PreGFX11: 1981 return (OpId == OP_GS_NOP) ? 1982 (StreamId == STREAM_ID_NONE_) : 1983 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1984 } 1985 } 1986 return StreamId == STREAM_ID_NONE_; 1987 } 1988 1989 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { 1990 return MsgId == ID_SYSMSG || 1991 (!isGFX11Plus(STI) && 1992 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); 1993 } 1994 1995 bool msgSupportsStream(int64_t MsgId, int64_t OpId, 1996 const MCSubtargetInfo &STI) { 1997 return !isGFX11Plus(STI) && 1998 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && 1999 OpId != OP_GS_NOP; 2000 } 2001 2002 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 2003 uint16_t &StreamId, const MCSubtargetInfo &STI) { 2004 MsgId = Val & getMsgIdMask(STI); 2005 if (isGFX11Plus(STI)) { 2006 OpId = 0; 2007 StreamId = 0; 2008 } else { 2009 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 2010 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 2011 } 2012 } 2013 2014 uint64_t encodeMsg(uint64_t MsgId, 2015 uint64_t OpId, 2016 uint64_t StreamId) { 2017 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); 2018 } 2019 2020 } // namespace SendMsg 2021 2022 //===----------------------------------------------------------------------===// 2023 // 2024 //===----------------------------------------------------------------------===// 2025 2026 unsigned getInitialPSInputAddr(const Function &F) { 2027 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0); 2028 } 2029 2030 bool getHasColorExport(const Function &F) { 2031 // As a safe default always respond as if PS has color exports. 2032 return F.getFnAttributeAsParsedInteger( 2033 "amdgpu-color-export", 2034 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 2035 } 2036 2037 bool getHasDepthExport(const Function &F) { 2038 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0; 2039 } 2040 2041 bool isShader(CallingConv::ID cc) { 2042 switch(cc) { 2043 case CallingConv::AMDGPU_VS: 2044 case CallingConv::AMDGPU_LS: 2045 case CallingConv::AMDGPU_HS: 2046 case CallingConv::AMDGPU_ES: 2047 case CallingConv::AMDGPU_GS: 2048 case CallingConv::AMDGPU_PS: 2049 case CallingConv::AMDGPU_CS_Chain: 2050 case CallingConv::AMDGPU_CS_ChainPreserve: 2051 case CallingConv::AMDGPU_CS: 2052 return true; 2053 default: 2054 return false; 2055 } 2056 } 2057 2058 bool isGraphics(CallingConv::ID cc) { 2059 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 2060 } 2061 2062 bool isCompute(CallingConv::ID cc) { 2063 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 2064 } 2065 2066 bool isEntryFunctionCC(CallingConv::ID CC) { 2067 switch (CC) { 2068 case CallingConv::AMDGPU_KERNEL: 2069 case CallingConv::SPIR_KERNEL: 2070 case CallingConv::AMDGPU_VS: 2071 case CallingConv::AMDGPU_GS: 2072 case CallingConv::AMDGPU_PS: 2073 case CallingConv::AMDGPU_CS: 2074 case CallingConv::AMDGPU_ES: 2075 case CallingConv::AMDGPU_HS: 2076 case CallingConv::AMDGPU_LS: 2077 return true; 2078 default: 2079 return false; 2080 } 2081 } 2082 2083 bool isModuleEntryFunctionCC(CallingConv::ID CC) { 2084 switch (CC) { 2085 case CallingConv::AMDGPU_Gfx: 2086 return true; 2087 default: 2088 return isEntryFunctionCC(CC) || isChainCC(CC); 2089 } 2090 } 2091 2092 bool isChainCC(CallingConv::ID CC) { 2093 switch (CC) { 2094 case CallingConv::AMDGPU_CS_Chain: 2095 case CallingConv::AMDGPU_CS_ChainPreserve: 2096 return true; 2097 default: 2098 return false; 2099 } 2100 } 2101 2102 bool isKernelCC(const Function *Func) { 2103 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); 2104 } 2105 2106 bool hasXNACK(const MCSubtargetInfo &STI) { 2107 return STI.hasFeature(AMDGPU::FeatureXNACK); 2108 } 2109 2110 bool hasSRAMECC(const MCSubtargetInfo &STI) { 2111 return STI.hasFeature(AMDGPU::FeatureSRAMECC); 2112 } 2113 2114 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 2115 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16); 2116 } 2117 2118 bool hasA16(const MCSubtargetInfo &STI) { 2119 return STI.hasFeature(AMDGPU::FeatureA16); 2120 } 2121 2122 bool hasG16(const MCSubtargetInfo &STI) { 2123 return STI.hasFeature(AMDGPU::FeatureG16); 2124 } 2125 2126 bool hasPackedD16(const MCSubtargetInfo &STI) { 2127 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && 2128 !isSI(STI); 2129 } 2130 2131 bool hasGDS(const MCSubtargetInfo &STI) { 2132 return STI.hasFeature(AMDGPU::FeatureGDS); 2133 } 2134 2135 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { 2136 auto Version = getIsaVersion(STI.getCPU()); 2137 if (Version.Major == 10) 2138 return Version.Minor >= 3 ? 13 : 5; 2139 if (Version.Major == 11) 2140 return 5; 2141 if (Version.Major >= 12) 2142 return HasSampler ? 4 : 5; 2143 return 0; 2144 } 2145 2146 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } 2147 2148 bool isSI(const MCSubtargetInfo &STI) { 2149 return STI.hasFeature(AMDGPU::FeatureSouthernIslands); 2150 } 2151 2152 bool isCI(const MCSubtargetInfo &STI) { 2153 return STI.hasFeature(AMDGPU::FeatureSeaIslands); 2154 } 2155 2156 bool isVI(const MCSubtargetInfo &STI) { 2157 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); 2158 } 2159 2160 bool isGFX9(const MCSubtargetInfo &STI) { 2161 return STI.hasFeature(AMDGPU::FeatureGFX9); 2162 } 2163 2164 bool isGFX9_GFX10(const MCSubtargetInfo &STI) { 2165 return isGFX9(STI) || isGFX10(STI); 2166 } 2167 2168 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { 2169 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); 2170 } 2171 2172 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { 2173 return isVI(STI) || isGFX9(STI) || isGFX10(STI); 2174 } 2175 2176 bool isGFX8Plus(const MCSubtargetInfo &STI) { 2177 return isVI(STI) || isGFX9Plus(STI); 2178 } 2179 2180 bool isGFX9Plus(const MCSubtargetInfo &STI) { 2181 return isGFX9(STI) || isGFX10Plus(STI); 2182 } 2183 2184 bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); } 2185 2186 bool isGFX10(const MCSubtargetInfo &STI) { 2187 return STI.hasFeature(AMDGPU::FeatureGFX10); 2188 } 2189 2190 bool isGFX10_GFX11(const MCSubtargetInfo &STI) { 2191 return isGFX10(STI) || isGFX11(STI); 2192 } 2193 2194 bool isGFX10Plus(const MCSubtargetInfo &STI) { 2195 return isGFX10(STI) || isGFX11Plus(STI); 2196 } 2197 2198 bool isGFX11(const MCSubtargetInfo &STI) { 2199 return STI.hasFeature(AMDGPU::FeatureGFX11); 2200 } 2201 2202 bool isGFX11Plus(const MCSubtargetInfo &STI) { 2203 return isGFX11(STI) || isGFX12Plus(STI); 2204 } 2205 2206 bool isGFX12(const MCSubtargetInfo &STI) { 2207 return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; 2208 } 2209 2210 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } 2211 2212 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } 2213 2214 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { 2215 return !isGFX11Plus(STI); 2216 } 2217 2218 bool isNotGFX10Plus(const MCSubtargetInfo &STI) { 2219 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 2220 } 2221 2222 bool isGFX10Before1030(const MCSubtargetInfo &STI) { 2223 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 2224 } 2225 2226 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 2227 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding); 2228 } 2229 2230 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 2231 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding); 2232 } 2233 2234 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 2235 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding); 2236 } 2237 2238 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 2239 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts); 2240 } 2241 2242 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { 2243 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); 2244 } 2245 2246 bool isGFX90A(const MCSubtargetInfo &STI) { 2247 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); 2248 } 2249 2250 bool isGFX940(const MCSubtargetInfo &STI) { 2251 return STI.hasFeature(AMDGPU::FeatureGFX940Insts); 2252 } 2253 2254 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 2255 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); 2256 } 2257 2258 bool hasMAIInsts(const MCSubtargetInfo &STI) { 2259 return STI.hasFeature(AMDGPU::FeatureMAIInsts); 2260 } 2261 2262 bool hasVOPD(const MCSubtargetInfo &STI) { 2263 return STI.hasFeature(AMDGPU::FeatureVOPD); 2264 } 2265 2266 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { 2267 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR); 2268 } 2269 2270 unsigned hasKernargPreload(const MCSubtargetInfo &STI) { 2271 return STI.hasFeature(AMDGPU::FeatureKernargPreload); 2272 } 2273 2274 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, 2275 int32_t ArgNumVGPR) { 2276 if (has90AInsts && ArgNumAGPR) 2277 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 2278 return std::max(ArgNumVGPR, ArgNumAGPR); 2279 } 2280 2281 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) { 2282 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 2283 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 2284 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 2285 Reg == AMDGPU::SCC; 2286 } 2287 2288 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) { 2289 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16; 2290 } 2291 2292 #define MAP_REG2REG \ 2293 using namespace AMDGPU; \ 2294 switch(Reg.id()) { \ 2295 default: return Reg; \ 2296 CASE_CI_VI(FLAT_SCR) \ 2297 CASE_CI_VI(FLAT_SCR_LO) \ 2298 CASE_CI_VI(FLAT_SCR_HI) \ 2299 CASE_VI_GFX9PLUS(TTMP0) \ 2300 CASE_VI_GFX9PLUS(TTMP1) \ 2301 CASE_VI_GFX9PLUS(TTMP2) \ 2302 CASE_VI_GFX9PLUS(TTMP3) \ 2303 CASE_VI_GFX9PLUS(TTMP4) \ 2304 CASE_VI_GFX9PLUS(TTMP5) \ 2305 CASE_VI_GFX9PLUS(TTMP6) \ 2306 CASE_VI_GFX9PLUS(TTMP7) \ 2307 CASE_VI_GFX9PLUS(TTMP8) \ 2308 CASE_VI_GFX9PLUS(TTMP9) \ 2309 CASE_VI_GFX9PLUS(TTMP10) \ 2310 CASE_VI_GFX9PLUS(TTMP11) \ 2311 CASE_VI_GFX9PLUS(TTMP12) \ 2312 CASE_VI_GFX9PLUS(TTMP13) \ 2313 CASE_VI_GFX9PLUS(TTMP14) \ 2314 CASE_VI_GFX9PLUS(TTMP15) \ 2315 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 2316 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 2317 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 2318 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 2319 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 2320 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 2321 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 2322 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 2323 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 2324 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 2325 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 2326 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 2327 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 2328 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 2329 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2330 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2331 CASE_GFXPRE11_GFX11PLUS(M0) \ 2332 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ 2333 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ 2334 } 2335 2336 #define CASE_CI_VI(node) \ 2337 assert(!isSI(STI)); \ 2338 case node: return isCI(STI) ? node##_ci : node##_vi; 2339 2340 #define CASE_VI_GFX9PLUS(node) \ 2341 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 2342 2343 #define CASE_GFXPRE11_GFX11PLUS(node) \ 2344 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; 2345 2346 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ 2347 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; 2348 2349 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) { 2350 if (STI.getTargetTriple().getArch() == Triple::r600) 2351 return Reg; 2352 MAP_REG2REG 2353 } 2354 2355 #undef CASE_CI_VI 2356 #undef CASE_VI_GFX9PLUS 2357 #undef CASE_GFXPRE11_GFX11PLUS 2358 #undef CASE_GFXPRE11_GFX11PLUS_TO 2359 2360 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 2361 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 2362 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; 2363 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) 2364 2365 MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG } 2366 2367 bool isInlineValue(unsigned Reg) { 2368 switch (Reg) { 2369 case AMDGPU::SRC_SHARED_BASE_LO: 2370 case AMDGPU::SRC_SHARED_BASE: 2371 case AMDGPU::SRC_SHARED_LIMIT_LO: 2372 case AMDGPU::SRC_SHARED_LIMIT: 2373 case AMDGPU::SRC_PRIVATE_BASE_LO: 2374 case AMDGPU::SRC_PRIVATE_BASE: 2375 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 2376 case AMDGPU::SRC_PRIVATE_LIMIT: 2377 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2378 return true; 2379 case AMDGPU::SRC_VCCZ: 2380 case AMDGPU::SRC_EXECZ: 2381 case AMDGPU::SRC_SCC: 2382 return true; 2383 case AMDGPU::SGPR_NULL: 2384 return true; 2385 default: 2386 return false; 2387 } 2388 } 2389 2390 #undef CASE_CI_VI 2391 #undef CASE_VI_GFX9PLUS 2392 #undef CASE_GFXPRE11_GFX11PLUS 2393 #undef CASE_GFXPRE11_GFX11PLUS_TO 2394 #undef MAP_REG2REG 2395 2396 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2397 assert(OpNo < Desc.NumOperands); 2398 unsigned OpType = Desc.operands()[OpNo].OperandType; 2399 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2400 OpType <= AMDGPU::OPERAND_SRC_LAST; 2401 } 2402 2403 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2404 assert(OpNo < Desc.NumOperands); 2405 unsigned OpType = Desc.operands()[OpNo].OperandType; 2406 return OpType >= AMDGPU::OPERAND_KIMM_FIRST && 2407 OpType <= AMDGPU::OPERAND_KIMM_LAST; 2408 } 2409 2410 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2411 assert(OpNo < Desc.NumOperands); 2412 unsigned OpType = Desc.operands()[OpNo].OperandType; 2413 switch (OpType) { 2414 case AMDGPU::OPERAND_REG_IMM_FP32: 2415 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2416 case AMDGPU::OPERAND_REG_IMM_FP64: 2417 case AMDGPU::OPERAND_REG_IMM_FP16: 2418 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2419 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2420 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2421 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2422 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2423 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2424 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2425 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2426 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2427 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2428 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2429 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2430 return true; 2431 default: 2432 return false; 2433 } 2434 } 2435 2436 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2437 assert(OpNo < Desc.NumOperands); 2438 unsigned OpType = Desc.operands()[OpNo].OperandType; 2439 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 2440 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || 2441 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 2442 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); 2443 } 2444 2445 // Avoid using MCRegisterClass::getSize, since that function will go away 2446 // (move from MC* level to Target* level). Return size in bits. 2447 unsigned getRegBitWidth(unsigned RCID) { 2448 switch (RCID) { 2449 case AMDGPU::SGPR_LO16RegClassID: 2450 case AMDGPU::AGPR_LO16RegClassID: 2451 return 16; 2452 case AMDGPU::SGPR_32RegClassID: 2453 case AMDGPU::VGPR_32RegClassID: 2454 case AMDGPU::VRegOrLds_32RegClassID: 2455 case AMDGPU::AGPR_32RegClassID: 2456 case AMDGPU::VS_32RegClassID: 2457 case AMDGPU::AV_32RegClassID: 2458 case AMDGPU::SReg_32RegClassID: 2459 case AMDGPU::SReg_32_XM0RegClassID: 2460 case AMDGPU::SRegOrLds_32RegClassID: 2461 return 32; 2462 case AMDGPU::SGPR_64RegClassID: 2463 case AMDGPU::VS_64RegClassID: 2464 case AMDGPU::SReg_64RegClassID: 2465 case AMDGPU::VReg_64RegClassID: 2466 case AMDGPU::AReg_64RegClassID: 2467 case AMDGPU::SReg_64_XEXECRegClassID: 2468 case AMDGPU::VReg_64_Align2RegClassID: 2469 case AMDGPU::AReg_64_Align2RegClassID: 2470 case AMDGPU::AV_64RegClassID: 2471 case AMDGPU::AV_64_Align2RegClassID: 2472 return 64; 2473 case AMDGPU::SGPR_96RegClassID: 2474 case AMDGPU::SReg_96RegClassID: 2475 case AMDGPU::VReg_96RegClassID: 2476 case AMDGPU::AReg_96RegClassID: 2477 case AMDGPU::VReg_96_Align2RegClassID: 2478 case AMDGPU::AReg_96_Align2RegClassID: 2479 case AMDGPU::AV_96RegClassID: 2480 case AMDGPU::AV_96_Align2RegClassID: 2481 return 96; 2482 case AMDGPU::SGPR_128RegClassID: 2483 case AMDGPU::SReg_128RegClassID: 2484 case AMDGPU::VReg_128RegClassID: 2485 case AMDGPU::AReg_128RegClassID: 2486 case AMDGPU::VReg_128_Align2RegClassID: 2487 case AMDGPU::AReg_128_Align2RegClassID: 2488 case AMDGPU::AV_128RegClassID: 2489 case AMDGPU::AV_128_Align2RegClassID: 2490 case AMDGPU::SReg_128_XNULLRegClassID: 2491 return 128; 2492 case AMDGPU::SGPR_160RegClassID: 2493 case AMDGPU::SReg_160RegClassID: 2494 case AMDGPU::VReg_160RegClassID: 2495 case AMDGPU::AReg_160RegClassID: 2496 case AMDGPU::VReg_160_Align2RegClassID: 2497 case AMDGPU::AReg_160_Align2RegClassID: 2498 case AMDGPU::AV_160RegClassID: 2499 case AMDGPU::AV_160_Align2RegClassID: 2500 return 160; 2501 case AMDGPU::SGPR_192RegClassID: 2502 case AMDGPU::SReg_192RegClassID: 2503 case AMDGPU::VReg_192RegClassID: 2504 case AMDGPU::AReg_192RegClassID: 2505 case AMDGPU::VReg_192_Align2RegClassID: 2506 case AMDGPU::AReg_192_Align2RegClassID: 2507 case AMDGPU::AV_192RegClassID: 2508 case AMDGPU::AV_192_Align2RegClassID: 2509 return 192; 2510 case AMDGPU::SGPR_224RegClassID: 2511 case AMDGPU::SReg_224RegClassID: 2512 case AMDGPU::VReg_224RegClassID: 2513 case AMDGPU::AReg_224RegClassID: 2514 case AMDGPU::VReg_224_Align2RegClassID: 2515 case AMDGPU::AReg_224_Align2RegClassID: 2516 case AMDGPU::AV_224RegClassID: 2517 case AMDGPU::AV_224_Align2RegClassID: 2518 return 224; 2519 case AMDGPU::SGPR_256RegClassID: 2520 case AMDGPU::SReg_256RegClassID: 2521 case AMDGPU::VReg_256RegClassID: 2522 case AMDGPU::AReg_256RegClassID: 2523 case AMDGPU::VReg_256_Align2RegClassID: 2524 case AMDGPU::AReg_256_Align2RegClassID: 2525 case AMDGPU::AV_256RegClassID: 2526 case AMDGPU::AV_256_Align2RegClassID: 2527 case AMDGPU::SReg_256_XNULLRegClassID: 2528 return 256; 2529 case AMDGPU::SGPR_288RegClassID: 2530 case AMDGPU::SReg_288RegClassID: 2531 case AMDGPU::VReg_288RegClassID: 2532 case AMDGPU::AReg_288RegClassID: 2533 case AMDGPU::VReg_288_Align2RegClassID: 2534 case AMDGPU::AReg_288_Align2RegClassID: 2535 case AMDGPU::AV_288RegClassID: 2536 case AMDGPU::AV_288_Align2RegClassID: 2537 return 288; 2538 case AMDGPU::SGPR_320RegClassID: 2539 case AMDGPU::SReg_320RegClassID: 2540 case AMDGPU::VReg_320RegClassID: 2541 case AMDGPU::AReg_320RegClassID: 2542 case AMDGPU::VReg_320_Align2RegClassID: 2543 case AMDGPU::AReg_320_Align2RegClassID: 2544 case AMDGPU::AV_320RegClassID: 2545 case AMDGPU::AV_320_Align2RegClassID: 2546 return 320; 2547 case AMDGPU::SGPR_352RegClassID: 2548 case AMDGPU::SReg_352RegClassID: 2549 case AMDGPU::VReg_352RegClassID: 2550 case AMDGPU::AReg_352RegClassID: 2551 case AMDGPU::VReg_352_Align2RegClassID: 2552 case AMDGPU::AReg_352_Align2RegClassID: 2553 case AMDGPU::AV_352RegClassID: 2554 case AMDGPU::AV_352_Align2RegClassID: 2555 return 352; 2556 case AMDGPU::SGPR_384RegClassID: 2557 case AMDGPU::SReg_384RegClassID: 2558 case AMDGPU::VReg_384RegClassID: 2559 case AMDGPU::AReg_384RegClassID: 2560 case AMDGPU::VReg_384_Align2RegClassID: 2561 case AMDGPU::AReg_384_Align2RegClassID: 2562 case AMDGPU::AV_384RegClassID: 2563 case AMDGPU::AV_384_Align2RegClassID: 2564 return 384; 2565 case AMDGPU::SGPR_512RegClassID: 2566 case AMDGPU::SReg_512RegClassID: 2567 case AMDGPU::VReg_512RegClassID: 2568 case AMDGPU::AReg_512RegClassID: 2569 case AMDGPU::VReg_512_Align2RegClassID: 2570 case AMDGPU::AReg_512_Align2RegClassID: 2571 case AMDGPU::AV_512RegClassID: 2572 case AMDGPU::AV_512_Align2RegClassID: 2573 return 512; 2574 case AMDGPU::SGPR_1024RegClassID: 2575 case AMDGPU::SReg_1024RegClassID: 2576 case AMDGPU::VReg_1024RegClassID: 2577 case AMDGPU::AReg_1024RegClassID: 2578 case AMDGPU::VReg_1024_Align2RegClassID: 2579 case AMDGPU::AReg_1024_Align2RegClassID: 2580 case AMDGPU::AV_1024RegClassID: 2581 case AMDGPU::AV_1024_Align2RegClassID: 2582 return 1024; 2583 default: 2584 llvm_unreachable("Unexpected register class"); 2585 } 2586 } 2587 2588 unsigned getRegBitWidth(const MCRegisterClass &RC) { 2589 return getRegBitWidth(RC.getID()); 2590 } 2591 2592 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 2593 unsigned OpNo) { 2594 assert(OpNo < Desc.NumOperands); 2595 unsigned RCID = Desc.operands()[OpNo].RegClass; 2596 return getRegBitWidth(RCID) / 8; 2597 } 2598 2599 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 2600 if (isInlinableIntLiteral(Literal)) 2601 return true; 2602 2603 uint64_t Val = static_cast<uint64_t>(Literal); 2604 return (Val == llvm::bit_cast<uint64_t>(0.0)) || 2605 (Val == llvm::bit_cast<uint64_t>(1.0)) || 2606 (Val == llvm::bit_cast<uint64_t>(-1.0)) || 2607 (Val == llvm::bit_cast<uint64_t>(0.5)) || 2608 (Val == llvm::bit_cast<uint64_t>(-0.5)) || 2609 (Val == llvm::bit_cast<uint64_t>(2.0)) || 2610 (Val == llvm::bit_cast<uint64_t>(-2.0)) || 2611 (Val == llvm::bit_cast<uint64_t>(4.0)) || 2612 (Val == llvm::bit_cast<uint64_t>(-4.0)) || 2613 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 2614 } 2615 2616 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 2617 if (isInlinableIntLiteral(Literal)) 2618 return true; 2619 2620 // The actual type of the operand does not seem to matter as long 2621 // as the bits match one of the inline immediate values. For example: 2622 // 2623 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 2624 // so it is a legal inline immediate. 2625 // 2626 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 2627 // floating-point, so it is a legal inline immediate. 2628 2629 uint32_t Val = static_cast<uint32_t>(Literal); 2630 return (Val == llvm::bit_cast<uint32_t>(0.0f)) || 2631 (Val == llvm::bit_cast<uint32_t>(1.0f)) || 2632 (Val == llvm::bit_cast<uint32_t>(-1.0f)) || 2633 (Val == llvm::bit_cast<uint32_t>(0.5f)) || 2634 (Val == llvm::bit_cast<uint32_t>(-0.5f)) || 2635 (Val == llvm::bit_cast<uint32_t>(2.0f)) || 2636 (Val == llvm::bit_cast<uint32_t>(-2.0f)) || 2637 (Val == llvm::bit_cast<uint32_t>(4.0f)) || 2638 (Val == llvm::bit_cast<uint32_t>(-4.0f)) || 2639 (Val == 0x3e22f983 && HasInv2Pi); 2640 } 2641 2642 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) { 2643 if (!HasInv2Pi) 2644 return false; 2645 if (isInlinableIntLiteral(Literal)) 2646 return true; 2647 uint16_t Val = static_cast<uint16_t>(Literal); 2648 return Val == 0x3F00 || // 0.5 2649 Val == 0xBF00 || // -0.5 2650 Val == 0x3F80 || // 1.0 2651 Val == 0xBF80 || // -1.0 2652 Val == 0x4000 || // 2.0 2653 Val == 0xC000 || // -2.0 2654 Val == 0x4080 || // 4.0 2655 Val == 0xC080 || // -4.0 2656 Val == 0x3E22; // 1.0 / (2.0 * pi) 2657 } 2658 2659 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) { 2660 return isInlinableLiteral32(Literal, HasInv2Pi); 2661 } 2662 2663 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) { 2664 if (!HasInv2Pi) 2665 return false; 2666 if (isInlinableIntLiteral(Literal)) 2667 return true; 2668 uint16_t Val = static_cast<uint16_t>(Literal); 2669 return Val == 0x3C00 || // 1.0 2670 Val == 0xBC00 || // -1.0 2671 Val == 0x3800 || // 0.5 2672 Val == 0xB800 || // -0.5 2673 Val == 0x4000 || // 2.0 2674 Val == 0xC000 || // -2.0 2675 Val == 0x4400 || // 4.0 2676 Val == 0xC400 || // -4.0 2677 Val == 0x3118; // 1/2pi 2678 } 2679 2680 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { 2681 // Unfortunately, the Instruction Set Architecture Reference Guide is 2682 // misleading about how the inline operands work for (packed) 16-bit 2683 // instructions. In a nutshell, the actual HW behavior is: 2684 // 2685 // - integer encodings (-16 .. 64) are always produced as sign-extended 2686 // 32-bit values 2687 // - float encodings are produced as: 2688 // - for F16 instructions: corresponding half-precision float values in 2689 // the LSBs, 0 in the MSBs 2690 // - for UI16 instructions: corresponding single-precision float value 2691 int32_t Signed = static_cast<int32_t>(Literal); 2692 if (Signed >= 0 && Signed <= 64) 2693 return 128 + Signed; 2694 2695 if (Signed >= -16 && Signed <= -1) 2696 return 192 + std::abs(Signed); 2697 2698 if (IsFloat) { 2699 // clang-format off 2700 switch (Literal) { 2701 case 0x3800: return 240; // 0.5 2702 case 0xB800: return 241; // -0.5 2703 case 0x3C00: return 242; // 1.0 2704 case 0xBC00: return 243; // -1.0 2705 case 0x4000: return 244; // 2.0 2706 case 0xC000: return 245; // -2.0 2707 case 0x4400: return 246; // 4.0 2708 case 0xC400: return 247; // -4.0 2709 case 0x3118: return 248; // 1.0 / (2.0 * pi) 2710 default: break; 2711 } 2712 // clang-format on 2713 } else { 2714 // clang-format off 2715 switch (Literal) { 2716 case 0x3F000000: return 240; // 0.5 2717 case 0xBF000000: return 241; // -0.5 2718 case 0x3F800000: return 242; // 1.0 2719 case 0xBF800000: return 243; // -1.0 2720 case 0x40000000: return 244; // 2.0 2721 case 0xC0000000: return 245; // -2.0 2722 case 0x40800000: return 246; // 4.0 2723 case 0xC0800000: return 247; // -4.0 2724 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) 2725 default: break; 2726 } 2727 // clang-format on 2728 } 2729 2730 return {}; 2731 } 2732 2733 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction 2734 // or nullopt. 2735 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { 2736 return getInlineEncodingV216(false, Literal); 2737 } 2738 2739 // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction 2740 // or nullopt. 2741 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) { 2742 int32_t Signed = static_cast<int32_t>(Literal); 2743 if (Signed >= 0 && Signed <= 64) 2744 return 128 + Signed; 2745 2746 if (Signed >= -16 && Signed <= -1) 2747 return 192 + std::abs(Signed); 2748 2749 // clang-format off 2750 switch (Literal) { 2751 case 0x3F00: return 240; // 0.5 2752 case 0xBF00: return 241; // -0.5 2753 case 0x3F80: return 242; // 1.0 2754 case 0xBF80: return 243; // -1.0 2755 case 0x4000: return 244; // 2.0 2756 case 0xC000: return 245; // -2.0 2757 case 0x4080: return 246; // 4.0 2758 case 0xC080: return 247; // -4.0 2759 case 0x3E22: return 248; // 1.0 / (2.0 * pi) 2760 default: break; 2761 } 2762 // clang-format on 2763 2764 return std::nullopt; 2765 } 2766 2767 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction 2768 // or nullopt. 2769 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { 2770 return getInlineEncodingV216(true, Literal); 2771 } 2772 2773 // Whether the given literal can be inlined for a V_PK_* instruction. 2774 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { 2775 switch (OpType) { 2776 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2777 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2778 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2779 return getInlineEncodingV216(false, Literal).has_value(); 2780 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2781 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2782 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2783 return getInlineEncodingV216(true, Literal).has_value(); 2784 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2785 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2786 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: 2787 return isInlinableLiteralV2BF16(Literal); 2788 default: 2789 llvm_unreachable("bad packed operand type"); 2790 } 2791 } 2792 2793 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction. 2794 bool isInlinableLiteralV2I16(uint32_t Literal) { 2795 return getInlineEncodingV2I16(Literal).has_value(); 2796 } 2797 2798 // Whether the given literal can be inlined for a V_PK_*_BF16 instruction. 2799 bool isInlinableLiteralV2BF16(uint32_t Literal) { 2800 return getInlineEncodingV2BF16(Literal).has_value(); 2801 } 2802 2803 // Whether the given literal can be inlined for a V_PK_*_F16 instruction. 2804 bool isInlinableLiteralV2F16(uint32_t Literal) { 2805 return getInlineEncodingV2F16(Literal).has_value(); 2806 } 2807 2808 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { 2809 if (IsFP64) 2810 return !(Val & 0xffffffffu); 2811 2812 return isUInt<32>(Val) || isInt<32>(Val); 2813 } 2814 2815 bool isArgPassedInSGPR(const Argument *A) { 2816 const Function *F = A->getParent(); 2817 2818 // Arguments to compute shaders are never a source of divergence. 2819 CallingConv::ID CC = F->getCallingConv(); 2820 switch (CC) { 2821 case CallingConv::AMDGPU_KERNEL: 2822 case CallingConv::SPIR_KERNEL: 2823 return true; 2824 case CallingConv::AMDGPU_VS: 2825 case CallingConv::AMDGPU_LS: 2826 case CallingConv::AMDGPU_HS: 2827 case CallingConv::AMDGPU_ES: 2828 case CallingConv::AMDGPU_GS: 2829 case CallingConv::AMDGPU_PS: 2830 case CallingConv::AMDGPU_CS: 2831 case CallingConv::AMDGPU_Gfx: 2832 case CallingConv::AMDGPU_CS_Chain: 2833 case CallingConv::AMDGPU_CS_ChainPreserve: 2834 // For non-compute shaders, SGPR inputs are marked with either inreg or 2835 // byval. Everything else is in VGPRs. 2836 return A->hasAttribute(Attribute::InReg) || 2837 A->hasAttribute(Attribute::ByVal); 2838 default: 2839 // TODO: treat i1 as divergent? 2840 return A->hasAttribute(Attribute::InReg); 2841 } 2842 } 2843 2844 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { 2845 // Arguments to compute shaders are never a source of divergence. 2846 CallingConv::ID CC = CB->getCallingConv(); 2847 switch (CC) { 2848 case CallingConv::AMDGPU_KERNEL: 2849 case CallingConv::SPIR_KERNEL: 2850 return true; 2851 case CallingConv::AMDGPU_VS: 2852 case CallingConv::AMDGPU_LS: 2853 case CallingConv::AMDGPU_HS: 2854 case CallingConv::AMDGPU_ES: 2855 case CallingConv::AMDGPU_GS: 2856 case CallingConv::AMDGPU_PS: 2857 case CallingConv::AMDGPU_CS: 2858 case CallingConv::AMDGPU_Gfx: 2859 case CallingConv::AMDGPU_CS_Chain: 2860 case CallingConv::AMDGPU_CS_ChainPreserve: 2861 // For non-compute shaders, SGPR inputs are marked with either inreg or 2862 // byval. Everything else is in VGPRs. 2863 return CB->paramHasAttr(ArgNo, Attribute::InReg) || 2864 CB->paramHasAttr(ArgNo, Attribute::ByVal); 2865 default: 2866 return CB->paramHasAttr(ArgNo, Attribute::InReg); 2867 } 2868 } 2869 2870 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 2871 return isGCN3Encoding(ST) || isGFX10Plus(ST); 2872 } 2873 2874 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 2875 int64_t EncodedOffset) { 2876 if (isGFX12Plus(ST)) 2877 return isUInt<23>(EncodedOffset); 2878 2879 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 2880 : isUInt<8>(EncodedOffset); 2881 } 2882 2883 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 2884 int64_t EncodedOffset, 2885 bool IsBuffer) { 2886 if (isGFX12Plus(ST)) 2887 return isInt<24>(EncodedOffset); 2888 2889 return !IsBuffer && 2890 hasSMRDSignedImmOffset(ST) && 2891 isInt<21>(EncodedOffset); 2892 } 2893 2894 static bool isDwordAligned(uint64_t ByteOffset) { 2895 return (ByteOffset & 3) == 0; 2896 } 2897 2898 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 2899 uint64_t ByteOffset) { 2900 if (hasSMEMByteOffset(ST)) 2901 return ByteOffset; 2902 2903 assert(isDwordAligned(ByteOffset)); 2904 return ByteOffset >> 2; 2905 } 2906 2907 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 2908 int64_t ByteOffset, bool IsBuffer, 2909 bool HasSOffset) { 2910 // For unbuffered smem loads, it is illegal for the Immediate Offset to be 2911 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. 2912 // Handle case where SOffset is not present. 2913 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST)) 2914 return std::nullopt; 2915 2916 if (isGFX12Plus(ST)) // 24 bit signed offsets 2917 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2918 : std::nullopt; 2919 2920 // The signed version is always a byte offset. 2921 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 2922 assert(hasSMEMByteOffset(ST)); 2923 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2924 : std::nullopt; 2925 } 2926 2927 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 2928 return std::nullopt; 2929 2930 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2931 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 2932 ? std::optional<int64_t>(EncodedOffset) 2933 : std::nullopt; 2934 } 2935 2936 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 2937 int64_t ByteOffset) { 2938 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 2939 return std::nullopt; 2940 2941 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2942 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset) 2943 : std::nullopt; 2944 } 2945 2946 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { 2947 if (AMDGPU::isGFX10(ST)) 2948 return 12; 2949 2950 if (AMDGPU::isGFX12(ST)) 2951 return 24; 2952 return 13; 2953 } 2954 2955 namespace { 2956 2957 struct SourceOfDivergence { 2958 unsigned Intr; 2959 }; 2960 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 2961 2962 struct AlwaysUniform { 2963 unsigned Intr; 2964 }; 2965 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); 2966 2967 #define GET_SourcesOfDivergence_IMPL 2968 #define GET_UniformIntrinsics_IMPL 2969 #define GET_Gfx9BufferFormat_IMPL 2970 #define GET_Gfx10BufferFormat_IMPL 2971 #define GET_Gfx11PlusBufferFormat_IMPL 2972 2973 #include "AMDGPUGenSearchableTables.inc" 2974 2975 } // end anonymous namespace 2976 2977 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 2978 return lookupSourceOfDivergence(IntrID); 2979 } 2980 2981 bool isIntrinsicAlwaysUniform(unsigned IntrID) { 2982 return lookupAlwaysUniform(IntrID); 2983 } 2984 2985 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 2986 uint8_t NumComponents, 2987 uint8_t NumFormat, 2988 const MCSubtargetInfo &STI) { 2989 return isGFX11Plus(STI) 2990 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, 2991 NumFormat) 2992 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, 2993 NumComponents, NumFormat) 2994 : getGfx9BufferFormatInfo(BitsPerComp, 2995 NumComponents, NumFormat); 2996 } 2997 2998 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 2999 const MCSubtargetInfo &STI) { 3000 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) 3001 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) 3002 : getGfx9BufferFormatInfo(Format); 3003 } 3004 3005 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { 3006 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1, 3007 OpName::src2 }) { 3008 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); 3009 if (Idx == -1) 3010 continue; 3011 3012 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || 3013 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) 3014 return true; 3015 } 3016 3017 return false; 3018 } 3019 3020 bool isDPALU_DPP(const MCInstrDesc &OpDesc) { 3021 return hasAny64BitVGPROperands(OpDesc); 3022 } 3023 3024 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) { 3025 // Currently this is 128 for all subtargets 3026 return 128; 3027 } 3028 3029 } // namespace AMDGPU 3030 3031 raw_ostream &operator<<(raw_ostream &OS, 3032 const AMDGPU::IsaInfo::TargetIDSetting S) { 3033 switch (S) { 3034 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 3035 OS << "Unsupported"; 3036 break; 3037 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 3038 OS << "Any"; 3039 break; 3040 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 3041 OS << "Off"; 3042 break; 3043 case (AMDGPU::IsaInfo::TargetIDSetting::On): 3044 OS << "On"; 3045 break; 3046 } 3047 return OS; 3048 } 3049 3050 } // namespace llvm 3051