1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "AMDGPU.h" 13 #include "AMDKernelCodeT.h" 14 #include "SIDefines.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/IR/CallingConv.h" 17 #include "llvm/MC/MCInstrDesc.h" 18 #include "llvm/Support/AMDHSAKernelDescriptor.h" 19 #include "llvm/Support/Compiler.h" 20 #include "llvm/Support/ErrorHandling.h" 21 #include "llvm/Support/TargetParser.h" 22 #include <cstdint> 23 #include <string> 24 #include <utility> 25 26 namespace llvm { 27 28 class Argument; 29 class AMDGPUSubtarget; 30 class FeatureBitset; 31 class Function; 32 class GCNSubtarget; 33 class GlobalValue; 34 class MCContext; 35 class MCRegisterClass; 36 class MCRegisterInfo; 37 class MCSection; 38 class MCSubtargetInfo; 39 class MachineMemOperand; 40 class Triple; 41 42 namespace AMDGPU { 43 44 #define GET_MIMGBaseOpcode_DECL 45 #define GET_MIMGDim_DECL 46 #define GET_MIMGEncoding_DECL 47 #define GET_MIMGLZMapping_DECL 48 #define GET_MIMGMIPMapping_DECL 49 #include "AMDGPUGenSearchableTables.inc" 50 51 namespace IsaInfo { 52 53 enum { 54 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 55 // doesn't spill SGPRs as much as when 80 is set. 56 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 57 TRAP_NUM_SGPRS = 16 58 }; 59 60 /// Streams isa version string for given subtarget \p STI into \p Stream. 61 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); 62 63 /// \returns True if given subtarget \p STI supports code object version 3, 64 /// false otherwise. 65 bool hasCodeObjectV3(const MCSubtargetInfo *STI); 66 67 /// \returns Wavefront size for given subtarget \p STI. 68 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 69 70 /// \returns Local memory size in bytes for given subtarget \p STI. 71 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 72 73 /// \returns Number of execution units per compute unit for given subtarget \p 74 /// STI. 75 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 76 77 /// \returns Maximum number of work groups per compute unit for given subtarget 78 /// \p STI and limited by given \p FlatWorkGroupSize. 79 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 80 unsigned FlatWorkGroupSize); 81 82 /// \returns Maximum number of waves per compute unit for given subtarget \p 83 /// STI without any kind of limitation. 84 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI); 85 86 /// \returns Maximum number of waves per compute unit for given subtarget \p 87 /// STI and limited by given \p FlatWorkGroupSize. 88 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, 89 unsigned FlatWorkGroupSize); 90 91 /// \returns Minimum number of waves per execution unit for given subtarget \p 92 /// STI. 93 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 94 95 /// \returns Maximum number of waves per execution unit for given subtarget \p 96 /// STI without any kind of limitation. 97 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 98 99 /// \returns Maximum number of waves per execution unit for given subtarget \p 100 /// STI and limited by given \p FlatWorkGroupSize. 101 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, 102 unsigned FlatWorkGroupSize); 103 104 /// \returns Minimum flat work group size for given subtarget \p STI. 105 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 106 107 /// \returns Maximum flat work group size for given subtarget \p STI. 108 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 109 110 /// \returns Number of waves per work group for given subtarget \p STI and 111 /// limited by given \p FlatWorkGroupSize. 112 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 113 unsigned FlatWorkGroupSize); 114 115 /// \returns SGPR allocation granularity for given subtarget \p STI. 116 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 117 118 /// \returns SGPR encoding granularity for given subtarget \p STI. 119 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 120 121 /// \returns Total number of SGPRs for given subtarget \p STI. 122 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 123 124 /// \returns Addressable number of SGPRs for given subtarget \p STI. 125 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 126 127 /// \returns Minimum number of SGPRs that meets the given number of waves per 128 /// execution unit requirement for given subtarget \p STI. 129 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 130 131 /// \returns Maximum number of SGPRs that meets the given number of waves per 132 /// execution unit requirement for given subtarget \p STI. 133 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 134 bool Addressable); 135 136 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 137 /// STI when the given special registers are used. 138 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 139 bool FlatScrUsed, bool XNACKUsed); 140 141 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 142 /// STI when the given special registers are used. XNACK is inferred from 143 /// \p STI. 144 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 145 bool FlatScrUsed); 146 147 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 148 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 149 /// register counts. 150 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 151 152 /// \returns VGPR allocation granularity for given subtarget \p STI. 153 /// 154 /// For subtargets which support it, \p EnableWavefrontSize32 should match 155 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 156 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 157 Optional<bool> EnableWavefrontSize32 = None); 158 159 /// \returns VGPR encoding granularity for given subtarget \p STI. 160 /// 161 /// For subtargets which support it, \p EnableWavefrontSize32 should match 162 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 163 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 164 Optional<bool> EnableWavefrontSize32 = None); 165 166 /// \returns Total number of VGPRs for given subtarget \p STI. 167 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 168 169 /// \returns Addressable number of VGPRs for given subtarget \p STI. 170 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 171 172 /// \returns Minimum number of VGPRs that meets given number of waves per 173 /// execution unit requirement for given subtarget \p STI. 174 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 175 176 /// \returns Maximum number of VGPRs that meets given number of waves per 177 /// execution unit requirement for given subtarget \p STI. 178 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 179 180 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 181 /// \p NumVGPRs are used. 182 /// 183 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 184 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 185 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 186 Optional<bool> EnableWavefrontSize32 = None); 187 188 } // end namespace IsaInfo 189 190 LLVM_READONLY 191 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 192 193 LLVM_READONLY 194 int getSOPPWithRelaxation(uint16_t Opcode); 195 196 struct MIMGBaseOpcodeInfo { 197 MIMGBaseOpcode BaseOpcode; 198 bool Store; 199 bool Atomic; 200 bool AtomicX2; 201 bool Sampler; 202 bool Gather4; 203 204 uint8_t NumExtraArgs; 205 bool Gradients; 206 bool Coordinates; 207 bool LodOrClampOrMip; 208 bool HasD16; 209 }; 210 211 LLVM_READONLY 212 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 213 214 struct MIMGDimInfo { 215 MIMGDim Dim; 216 uint8_t NumCoords; 217 uint8_t NumGradients; 218 bool DA; 219 uint8_t Encoding; 220 const char *AsmSuffix; 221 }; 222 223 LLVM_READONLY 224 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 225 226 LLVM_READONLY 227 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 228 229 LLVM_READONLY 230 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 231 232 struct MIMGLZMappingInfo { 233 MIMGBaseOpcode L; 234 MIMGBaseOpcode LZ; 235 }; 236 237 struct MIMGMIPMappingInfo { 238 MIMGBaseOpcode MIP; 239 MIMGBaseOpcode NONMIP; 240 }; 241 242 LLVM_READONLY 243 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 244 245 LLVM_READONLY 246 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L); 247 248 LLVM_READONLY 249 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 250 unsigned VDataDwords, unsigned VAddrDwords); 251 252 LLVM_READONLY 253 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 254 255 struct MIMGInfo { 256 uint16_t Opcode; 257 uint16_t BaseOpcode; 258 uint8_t MIMGEncoding; 259 uint8_t VDataDwords; 260 uint8_t VAddrDwords; 261 }; 262 263 LLVM_READONLY 264 const MIMGInfo *getMIMGInfo(unsigned Opc); 265 266 LLVM_READONLY 267 int getMTBUFBaseOpcode(unsigned Opc); 268 269 LLVM_READONLY 270 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 271 272 LLVM_READONLY 273 int getMTBUFElements(unsigned Opc); 274 275 LLVM_READONLY 276 bool getMTBUFHasVAddr(unsigned Opc); 277 278 LLVM_READONLY 279 bool getMTBUFHasSrsrc(unsigned Opc); 280 281 LLVM_READONLY 282 bool getMTBUFHasSoffset(unsigned Opc); 283 284 LLVM_READONLY 285 int getMUBUFBaseOpcode(unsigned Opc); 286 287 LLVM_READONLY 288 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 289 290 LLVM_READONLY 291 int getMUBUFElements(unsigned Opc); 292 293 LLVM_READONLY 294 bool getMUBUFHasVAddr(unsigned Opc); 295 296 LLVM_READONLY 297 bool getMUBUFHasSrsrc(unsigned Opc); 298 299 LLVM_READONLY 300 bool getMUBUFHasSoffset(unsigned Opc); 301 302 LLVM_READONLY 303 int getMCOpcode(uint16_t Opcode, unsigned Gen); 304 305 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 306 const MCSubtargetInfo *STI); 307 308 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 309 const MCSubtargetInfo *STI); 310 311 bool isGroupSegment(const GlobalValue *GV); 312 bool isGlobalSegment(const GlobalValue *GV); 313 bool isReadOnlySegment(const GlobalValue *GV); 314 315 /// \returns True if constants should be emitted to .text section for given 316 /// target triple \p TT, false otherwise. 317 bool shouldEmitConstantsToTextSection(const Triple &TT); 318 319 /// \returns Integer value requested using \p F's \p Name attribute. 320 /// 321 /// \returns \p Default if attribute is not present. 322 /// 323 /// \returns \p Default and emits error if requested value cannot be converted 324 /// to integer. 325 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 326 327 /// \returns A pair of integer values requested using \p F's \p Name attribute 328 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 329 /// is false). 330 /// 331 /// \returns \p Default if attribute is not present. 332 /// 333 /// \returns \p Default and emits error if one of the requested values cannot be 334 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 335 /// not present. 336 std::pair<int, int> getIntegerPairAttribute(const Function &F, 337 StringRef Name, 338 std::pair<int, int> Default, 339 bool OnlyFirstRequired = false); 340 341 /// Represents the counter values to wait for in an s_waitcnt instruction. 342 /// 343 /// Large values (including the maximum possible integer) can be used to 344 /// represent "don't care" waits. 345 struct Waitcnt { 346 unsigned VmCnt = ~0u; 347 unsigned ExpCnt = ~0u; 348 unsigned LgkmCnt = ~0u; 349 unsigned VsCnt = ~0u; 350 351 Waitcnt() {} 352 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 353 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 354 355 static Waitcnt allZero(const IsaVersion &Version) { 356 return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u); 357 } 358 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 359 360 bool hasWait() const { 361 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 362 } 363 364 bool dominates(const Waitcnt &Other) const { 365 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 366 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 367 } 368 369 Waitcnt combined(const Waitcnt &Other) const { 370 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 371 std::min(LgkmCnt, Other.LgkmCnt), 372 std::min(VsCnt, Other.VsCnt)); 373 } 374 }; 375 376 /// \returns Vmcnt bit mask for given isa \p Version. 377 unsigned getVmcntBitMask(const IsaVersion &Version); 378 379 /// \returns Expcnt bit mask for given isa \p Version. 380 unsigned getExpcntBitMask(const IsaVersion &Version); 381 382 /// \returns Lgkmcnt bit mask for given isa \p Version. 383 unsigned getLgkmcntBitMask(const IsaVersion &Version); 384 385 /// \returns Waitcnt bit mask for given isa \p Version. 386 unsigned getWaitcntBitMask(const IsaVersion &Version); 387 388 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 389 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 390 391 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 392 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 393 394 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 395 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 396 397 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 398 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 399 /// \p Lgkmcnt respectively. 400 /// 401 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 402 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 403 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 404 /// \p Expcnt = \p Waitcnt[6:4] 405 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 406 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 407 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 408 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 409 410 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 411 412 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 413 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 414 unsigned Vmcnt); 415 416 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 417 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 418 unsigned Expcnt); 419 420 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 421 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 422 unsigned Lgkmcnt); 423 424 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 425 /// \p Version. 426 /// 427 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 428 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 429 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 430 /// Waitcnt[6:4] = \p Expcnt 431 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 432 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 433 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 434 /// 435 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 436 /// isa \p Version. 437 unsigned encodeWaitcnt(const IsaVersion &Version, 438 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 439 440 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 441 442 namespace Hwreg { 443 444 LLVM_READONLY 445 int64_t getHwregId(const StringRef Name); 446 447 LLVM_READNONE 448 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 449 450 LLVM_READNONE 451 bool isValidHwreg(int64_t Id); 452 453 LLVM_READNONE 454 bool isValidHwregOffset(int64_t Offset); 455 456 LLVM_READNONE 457 bool isValidHwregWidth(int64_t Width); 458 459 LLVM_READNONE 460 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 461 462 LLVM_READNONE 463 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 464 465 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 466 467 } // namespace Hwreg 468 469 namespace SendMsg { 470 471 LLVM_READONLY 472 int64_t getMsgId(const StringRef Name); 473 474 LLVM_READONLY 475 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 476 477 LLVM_READNONE 478 StringRef getMsgName(int64_t MsgId); 479 480 LLVM_READNONE 481 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 482 483 LLVM_READNONE 484 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 485 486 LLVM_READNONE 487 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true); 488 489 LLVM_READNONE 490 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true); 491 492 LLVM_READNONE 493 bool msgRequiresOp(int64_t MsgId); 494 495 LLVM_READNONE 496 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 497 498 void decodeMsg(unsigned Val, 499 uint16_t &MsgId, 500 uint16_t &OpId, 501 uint16_t &StreamId); 502 503 LLVM_READNONE 504 uint64_t encodeMsg(uint64_t MsgId, 505 uint64_t OpId, 506 uint64_t StreamId); 507 508 } // namespace SendMsg 509 510 511 unsigned getInitialPSInputAddr(const Function &F); 512 513 LLVM_READNONE 514 bool isShader(CallingConv::ID CC); 515 516 LLVM_READNONE 517 bool isCompute(CallingConv::ID CC); 518 519 LLVM_READNONE 520 bool isEntryFunctionCC(CallingConv::ID CC); 521 522 // FIXME: Remove this when calling conventions cleaned up 523 LLVM_READNONE 524 inline bool isKernel(CallingConv::ID CC) { 525 switch (CC) { 526 case CallingConv::AMDGPU_KERNEL: 527 case CallingConv::SPIR_KERNEL: 528 return true; 529 default: 530 return false; 531 } 532 } 533 534 bool hasXNACK(const MCSubtargetInfo &STI); 535 bool hasSRAMECC(const MCSubtargetInfo &STI); 536 bool hasMIMG_R128(const MCSubtargetInfo &STI); 537 bool hasPackedD16(const MCSubtargetInfo &STI); 538 539 bool isSI(const MCSubtargetInfo &STI); 540 bool isCI(const MCSubtargetInfo &STI); 541 bool isVI(const MCSubtargetInfo &STI); 542 bool isGFX9(const MCSubtargetInfo &STI); 543 bool isGFX10(const MCSubtargetInfo &STI); 544 545 /// Is Reg - scalar register 546 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 547 548 /// Is there any intersection between registers 549 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 550 551 /// If \p Reg is a pseudo reg, return the correct hardware register given 552 /// \p STI otherwise return \p Reg. 553 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 554 555 /// Convert hardware register \p Reg to a pseudo register 556 LLVM_READNONE 557 unsigned mc2PseudoReg(unsigned Reg); 558 559 /// Can this operand also contain immediate values? 560 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 561 562 /// Is this floating-point operand? 563 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 564 565 /// Does this opearnd support only inlinable literals? 566 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 567 568 /// Get the size in bits of a register from the register class \p RC. 569 unsigned getRegBitWidth(unsigned RCID); 570 571 /// Get the size in bits of a register from the register class \p RC. 572 unsigned getRegBitWidth(const MCRegisterClass &RC); 573 574 /// Get size of register operand 575 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 576 unsigned OpNo); 577 578 LLVM_READNONE 579 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 580 switch (OpInfo.OperandType) { 581 case AMDGPU::OPERAND_REG_IMM_INT32: 582 case AMDGPU::OPERAND_REG_IMM_FP32: 583 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 584 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 585 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 586 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 587 return 4; 588 589 case AMDGPU::OPERAND_REG_IMM_INT64: 590 case AMDGPU::OPERAND_REG_IMM_FP64: 591 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 592 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 593 return 8; 594 595 case AMDGPU::OPERAND_REG_IMM_INT16: 596 case AMDGPU::OPERAND_REG_IMM_FP16: 597 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 598 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 599 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 600 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 601 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 602 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 603 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 604 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 605 case AMDGPU::OPERAND_REG_IMM_V2INT16: 606 case AMDGPU::OPERAND_REG_IMM_V2FP16: 607 return 2; 608 609 default: 610 llvm_unreachable("unhandled operand type"); 611 } 612 } 613 614 LLVM_READNONE 615 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 616 return getOperandSize(Desc.OpInfo[OpNo]); 617 } 618 619 /// Is this literal inlinable 620 LLVM_READNONE 621 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 622 623 LLVM_READNONE 624 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 625 626 LLVM_READNONE 627 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 628 629 LLVM_READNONE 630 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 631 632 bool isArgPassedInSGPR(const Argument *Arg); 633 634 /// \returns The encoding that will be used for \p ByteOffset in the SMRD 635 /// offset field. 636 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 637 638 /// \returns true if this offset is small enough to fit in the SMRD 639 /// offset field. \p ByteOffset should be the offset in bytes and 640 /// not the encoded offset. 641 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 642 643 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 644 const GCNSubtarget *Subtarget, uint32_t Align = 4); 645 646 /// \returns true if the intrinsic is divergent 647 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 648 649 650 // Track defaults for fields in the MODE registser. 651 struct SIModeRegisterDefaults { 652 /// Floating point opcodes that support exception flag gathering quiet and 653 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 654 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 655 /// quieting. 656 bool IEEE : 1; 657 658 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 659 /// clamp NaN to zero; otherwise, pass NaN through. 660 bool DX10Clamp : 1; 661 662 // TODO: FP mode fields 663 664 SIModeRegisterDefaults() : 665 IEEE(true), 666 DX10Clamp(true) {} 667 668 SIModeRegisterDefaults(const Function &F); 669 670 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 671 SIModeRegisterDefaults Mode; 672 Mode.DX10Clamp = true; 673 Mode.IEEE = AMDGPU::isCompute(CC); 674 return Mode; 675 } 676 677 bool operator ==(const SIModeRegisterDefaults Other) const { 678 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp; 679 } 680 681 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 682 // be able to override. 683 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 684 return *this == CalleeMode; 685 } 686 }; 687 688 } // end namespace AMDGPU 689 } // end namespace llvm 690 691 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 692