1e8d8bef9SDimitry Andric //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //==-----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric /// \file 10e8d8bef9SDimitry Andric /// AMD GCN specific subclass of TargetSubtarget. 11e8d8bef9SDimitry Andric // 12e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 13e8d8bef9SDimitry Andric 14e8d8bef9SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 15e8d8bef9SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 16e8d8bef9SDimitry Andric 17e8d8bef9SDimitry Andric #include "AMDGPUCallLowering.h" 1806c3fb27SDimitry Andric #include "AMDGPURegisterBankInfo.h" 19e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 20e8d8bef9SDimitry Andric #include "SIFrameLowering.h" 21e8d8bef9SDimitry Andric #include "SIISelLowering.h" 22e8d8bef9SDimitry Andric #include "SIInstrInfo.h" 2306c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 24e8d8bef9SDimitry Andric #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 255f757f3fSDimitry Andric #include "llvm/Support/ErrorHandling.h" 26e8d8bef9SDimitry Andric 27e8d8bef9SDimitry Andric #define GET_SUBTARGETINFO_HEADER 28e8d8bef9SDimitry Andric #include "AMDGPUGenSubtargetInfo.inc" 29e8d8bef9SDimitry Andric 30e8d8bef9SDimitry Andric namespace llvm { 31e8d8bef9SDimitry Andric 32e8d8bef9SDimitry Andric class GCNTargetMachine; 33e8d8bef9SDimitry Andric 34e8d8bef9SDimitry Andric class GCNSubtarget final : public AMDGPUGenSubtargetInfo, 35e8d8bef9SDimitry Andric public AMDGPUSubtarget { 36bdd1243dSDimitry Andric public: 37e8d8bef9SDimitry Andric using AMDGPUSubtarget::getMaxWavesPerEU; 38e8d8bef9SDimitry Andric 39fe6060f1SDimitry Andric // Following 2 enums are documented at: 40fe6060f1SDimitry Andric // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi 41fe6060f1SDimitry Andric enum class TrapHandlerAbi { 42fe6060f1SDimitry Andric NONE = 0x00, 43fe6060f1SDimitry Andric AMDHSA = 0x01, 44e8d8bef9SDimitry Andric }; 45e8d8bef9SDimitry Andric 46fe6060f1SDimitry Andric enum class TrapID { 47fe6060f1SDimitry Andric LLVMAMDHSATrap = 0x02, 48fe6060f1SDimitry Andric LLVMAMDHSADebugTrap = 0x03, 49e8d8bef9SDimitry Andric }; 50e8d8bef9SDimitry Andric 51e8d8bef9SDimitry Andric private: 52e8d8bef9SDimitry Andric /// GlobalISel related APIs. 53e8d8bef9SDimitry Andric std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 54e8d8bef9SDimitry Andric std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 55e8d8bef9SDimitry Andric std::unique_ptr<InstructionSelector> InstSelector; 56e8d8bef9SDimitry Andric std::unique_ptr<LegalizerInfo> Legalizer; 5706c3fb27SDimitry Andric std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo; 58e8d8bef9SDimitry Andric 59e8d8bef9SDimitry Andric protected: 60e8d8bef9SDimitry Andric // Basic subtarget description. 61e8d8bef9SDimitry Andric Triple TargetTriple; 62e8d8bef9SDimitry Andric AMDGPU::IsaInfo::AMDGPUTargetID TargetID; 6381ad6265SDimitry Andric unsigned Gen = INVALID; 64e8d8bef9SDimitry Andric InstrItineraryData InstrItins; 6581ad6265SDimitry Andric int LDSBankCount = 0; 6681ad6265SDimitry Andric unsigned MaxPrivateElementSize = 0; 67e8d8bef9SDimitry Andric 68e8d8bef9SDimitry Andric // Possibly statically set by tablegen, but may want to be overridden. 6981ad6265SDimitry Andric bool FastDenormalF32 = false; 7081ad6265SDimitry Andric bool HalfRate64Ops = false; 7181ad6265SDimitry Andric bool FullRate64Ops = false; 72e8d8bef9SDimitry Andric 73e8d8bef9SDimitry Andric // Dynamically set bits that enable features. 7481ad6265SDimitry Andric bool FlatForGlobal = false; 7581ad6265SDimitry Andric bool AutoWaitcntBeforeBarrier = false; 76bdd1243dSDimitry Andric bool BackOffBarrier = false; 7781ad6265SDimitry Andric bool UnalignedScratchAccess = false; 7881ad6265SDimitry Andric bool UnalignedAccessMode = false; 7981ad6265SDimitry Andric bool HasApertureRegs = false; 8081ad6265SDimitry Andric bool SupportsXNACK = false; 815f757f3fSDimitry Andric bool KernargPreload = false; 82e8d8bef9SDimitry Andric 83e8d8bef9SDimitry Andric // This should not be used directly. 'TargetID' tracks the dynamic settings 84e8d8bef9SDimitry Andric // for XNACK. 8581ad6265SDimitry Andric bool EnableXNACK = false; 86e8d8bef9SDimitry Andric 8781ad6265SDimitry Andric bool EnableTgSplit = false; 8881ad6265SDimitry Andric bool EnableCuMode = false; 8981ad6265SDimitry Andric bool TrapHandler = false; 900fca6ea1SDimitry Andric bool EnablePreciseMemory = false; 91e8d8bef9SDimitry Andric 92e8d8bef9SDimitry Andric // Used as options. 9381ad6265SDimitry Andric bool EnableLoadStoreOpt = false; 9481ad6265SDimitry Andric bool EnableUnsafeDSOffsetFolding = false; 9581ad6265SDimitry Andric bool EnableSIScheduler = false; 9681ad6265SDimitry Andric bool EnableDS128 = false; 9781ad6265SDimitry Andric bool EnablePRTStrictNull = false; 9881ad6265SDimitry Andric bool DumpCode = false; 99e8d8bef9SDimitry Andric 100e8d8bef9SDimitry Andric // Subtarget statically properties set by tablegen 10181ad6265SDimitry Andric bool FP64 = false; 10281ad6265SDimitry Andric bool FMA = false; 10381ad6265SDimitry Andric bool MIMG_R128 = false; 10481ad6265SDimitry Andric bool CIInsts = false; 10581ad6265SDimitry Andric bool GFX8Insts = false; 10681ad6265SDimitry Andric bool GFX9Insts = false; 10781ad6265SDimitry Andric bool GFX90AInsts = false; 10881ad6265SDimitry Andric bool GFX940Insts = false; 10981ad6265SDimitry Andric bool GFX10Insts = false; 11081ad6265SDimitry Andric bool GFX11Insts = false; 1115f757f3fSDimitry Andric bool GFX12Insts = false; 11281ad6265SDimitry Andric bool GFX10_3Insts = false; 11381ad6265SDimitry Andric bool GFX7GFX8GFX9Insts = false; 11481ad6265SDimitry Andric bool SGPRInitBug = false; 11581ad6265SDimitry Andric bool UserSGPRInit16Bug = false; 11681ad6265SDimitry Andric bool NegativeScratchOffsetBug = false; 11781ad6265SDimitry Andric bool NegativeUnalignedScratchOffsetBug = false; 11881ad6265SDimitry Andric bool HasSMemRealTime = false; 11981ad6265SDimitry Andric bool HasIntClamp = false; 12081ad6265SDimitry Andric bool HasFmaMixInsts = false; 12181ad6265SDimitry Andric bool HasMovrel = false; 12281ad6265SDimitry Andric bool HasVGPRIndexMode = false; 1235f757f3fSDimitry Andric bool HasScalarDwordx3Loads = false; 12481ad6265SDimitry Andric bool HasScalarStores = false; 12581ad6265SDimitry Andric bool HasScalarAtomics = false; 12681ad6265SDimitry Andric bool HasSDWAOmod = false; 12781ad6265SDimitry Andric bool HasSDWAScalar = false; 12881ad6265SDimitry Andric bool HasSDWASdst = false; 12981ad6265SDimitry Andric bool HasSDWAMac = false; 13081ad6265SDimitry Andric bool HasSDWAOutModsVOPC = false; 13181ad6265SDimitry Andric bool HasDPP = false; 13281ad6265SDimitry Andric bool HasDPP8 = false; 1335f757f3fSDimitry Andric bool HasDPALU_DPP = false; 1345f757f3fSDimitry Andric bool HasDPPSrc1SGPR = false; 13581ad6265SDimitry Andric bool HasPackedFP32Ops = false; 13681ad6265SDimitry Andric bool HasImageInsts = false; 13781ad6265SDimitry Andric bool HasExtendedImageInsts = false; 13881ad6265SDimitry Andric bool HasR128A16 = false; 139bdd1243dSDimitry Andric bool HasA16 = false; 14081ad6265SDimitry Andric bool HasG16 = false; 14181ad6265SDimitry Andric bool HasNSAEncoding = false; 14206c3fb27SDimitry Andric bool HasPartialNSAEncoding = false; 14381ad6265SDimitry Andric bool GFX10_AEncoding = false; 14481ad6265SDimitry Andric bool GFX10_BEncoding = false; 14581ad6265SDimitry Andric bool HasDLInsts = false; 146bdd1243dSDimitry Andric bool HasFmacF64Inst = false; 14781ad6265SDimitry Andric bool HasDot1Insts = false; 14881ad6265SDimitry Andric bool HasDot2Insts = false; 14981ad6265SDimitry Andric bool HasDot3Insts = false; 15081ad6265SDimitry Andric bool HasDot4Insts = false; 15181ad6265SDimitry Andric bool HasDot5Insts = false; 15281ad6265SDimitry Andric bool HasDot6Insts = false; 15381ad6265SDimitry Andric bool HasDot7Insts = false; 15481ad6265SDimitry Andric bool HasDot8Insts = false; 155bdd1243dSDimitry Andric bool HasDot9Insts = false; 15606c3fb27SDimitry Andric bool HasDot10Insts = false; 1570fca6ea1SDimitry Andric bool HasDot11Insts = false; 15881ad6265SDimitry Andric bool HasMAIInsts = false; 159fcaf7f86SDimitry Andric bool HasFP8Insts = false; 1607a6dacacSDimitry Andric bool HasFP8ConversionInsts = false; 16181ad6265SDimitry Andric bool HasPkFmacF16Inst = false; 1620fca6ea1SDimitry Andric bool HasAtomicFMinFMaxF32GlobalInsts = false; 1630fca6ea1SDimitry Andric bool HasAtomicFMinFMaxF64GlobalInsts = false; 1640fca6ea1SDimitry Andric bool HasAtomicFMinFMaxF32FlatInsts = false; 1650fca6ea1SDimitry Andric bool HasAtomicFMinFMaxF64FlatInsts = false; 16606c3fb27SDimitry Andric bool HasAtomicDsPkAdd16Insts = false; 16706c3fb27SDimitry Andric bool HasAtomicFlatPkAdd16Insts = false; 16881ad6265SDimitry Andric bool HasAtomicFaddRtnInsts = false; 16981ad6265SDimitry Andric bool HasAtomicFaddNoRtnInsts = false; 1700fca6ea1SDimitry Andric bool HasMemoryAtomicFaddF32DenormalSupport = false; 17106c3fb27SDimitry Andric bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; 17206c3fb27SDimitry Andric bool HasAtomicBufferGlobalPkAddF16Insts = false; 1735f757f3fSDimitry Andric bool HasAtomicCSubNoRtnInsts = false; 17406c3fb27SDimitry Andric bool HasAtomicGlobalPkAddBF16Inst = false; 1750fca6ea1SDimitry Andric bool HasAtomicBufferPkAddBF16Inst = false; 176bdd1243dSDimitry Andric bool HasFlatAtomicFaddF32Inst = false; 1770fca6ea1SDimitry Andric bool HasFlatBufferGlobalAtomicFaddF64Inst = false; 1787a6dacacSDimitry Andric bool HasDefaultComponentZero = false; 1790fca6ea1SDimitry Andric bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; 1807a6dacacSDimitry Andric bool HasDefaultComponentBroadcast = false; 1810fca6ea1SDimitry Andric /// The maximum number of instructions that may be placed within an S_CLAUSE, 1820fca6ea1SDimitry Andric /// which is one greater than the maximum argument to S_CLAUSE. A value of 0 1830fca6ea1SDimitry Andric /// indicates a lack of S_CLAUSE support. 1840fca6ea1SDimitry Andric unsigned MaxHardClauseLength = 0; 18581ad6265SDimitry Andric bool SupportsSRAMECC = false; 186e8d8bef9SDimitry Andric 187e8d8bef9SDimitry Andric // This should not be used directly. 'TargetID' tracks the dynamic settings 188e8d8bef9SDimitry Andric // for SRAMECC. 18981ad6265SDimitry Andric bool EnableSRAMECC = false; 190e8d8bef9SDimitry Andric 19181ad6265SDimitry Andric bool HasNoSdstCMPX = false; 19281ad6265SDimitry Andric bool HasVscnt = false; 19381ad6265SDimitry Andric bool HasGetWaveIdInst = false; 19481ad6265SDimitry Andric bool HasSMemTimeInst = false; 19581ad6265SDimitry Andric bool HasShaderCyclesRegister = false; 1961db9f3b2SDimitry Andric bool HasShaderCyclesHiLoRegisters = false; 19781ad6265SDimitry Andric bool HasVOP3Literal = false; 19881ad6265SDimitry Andric bool HasNoDataDepHazard = false; 19981ad6265SDimitry Andric bool FlatAddressSpace = false; 20081ad6265SDimitry Andric bool FlatInstOffsets = false; 20181ad6265SDimitry Andric bool FlatGlobalInsts = false; 20281ad6265SDimitry Andric bool FlatScratchInsts = false; 20381ad6265SDimitry Andric bool ScalarFlatScratchInsts = false; 20481ad6265SDimitry Andric bool HasArchitectedFlatScratch = false; 20581ad6265SDimitry Andric bool EnableFlatScratch = false; 20606c3fb27SDimitry Andric bool HasArchitectedSGPRs = false; 2075f757f3fSDimitry Andric bool HasGDS = false; 2085f757f3fSDimitry Andric bool HasGWS = false; 20981ad6265SDimitry Andric bool AddNoCarryInsts = false; 21081ad6265SDimitry Andric bool HasUnpackedD16VMem = false; 21181ad6265SDimitry Andric bool LDSMisalignedBug = false; 21281ad6265SDimitry Andric bool HasMFMAInlineLiteralBug = false; 21381ad6265SDimitry Andric bool UnalignedBufferAccess = false; 21481ad6265SDimitry Andric bool UnalignedDSAccess = false; 21581ad6265SDimitry Andric bool HasPackedTID = false; 21681ad6265SDimitry Andric bool ScalarizeGlobal = false; 2175f757f3fSDimitry Andric bool HasSALUFloatInsts = false; 2185f757f3fSDimitry Andric bool HasVGPRSingleUseHintInsts = false; 2195f757f3fSDimitry Andric bool HasPseudoScalarTrans = false; 2205f757f3fSDimitry Andric bool HasRestrictedSOffset = false; 221e8d8bef9SDimitry Andric 22281ad6265SDimitry Andric bool HasVcmpxPermlaneHazard = false; 22381ad6265SDimitry Andric bool HasVMEMtoScalarWriteHazard = false; 22481ad6265SDimitry Andric bool HasSMEMtoVectorWriteHazard = false; 22581ad6265SDimitry Andric bool HasInstFwdPrefetchBug = false; 22681ad6265SDimitry Andric bool HasVcmpxExecWARHazard = false; 22781ad6265SDimitry Andric bool HasLdsBranchVmemWARHazard = false; 22881ad6265SDimitry Andric bool HasNSAtoVMEMBug = false; 22981ad6265SDimitry Andric bool HasNSAClauseBug = false; 23081ad6265SDimitry Andric bool HasOffset3fBug = false; 23181ad6265SDimitry Andric bool HasFlatSegmentOffsetBug = false; 23281ad6265SDimitry Andric bool HasImageStoreD16Bug = false; 23381ad6265SDimitry Andric bool HasImageGather4D16Bug = false; 2345f757f3fSDimitry Andric bool HasMSAALoadDstSelBug = false; 2350fca6ea1SDimitry Andric bool HasPrivEnabledTrap2NopBug = false; 2360fca6ea1SDimitry Andric bool Has1_5xVGPRs = false; 237bdd1243dSDimitry Andric bool HasMADIntraFwdBug = false; 23881ad6265SDimitry Andric bool HasVOPDInsts = false; 239bdd1243dSDimitry Andric bool HasVALUTransUseHazard = false; 24006c3fb27SDimitry Andric bool HasForceStoreSC0SC1 = false; 2410fca6ea1SDimitry Andric bool HasRequiredExportPriority = false; 242*6c4b055cSDimitry Andric bool HasVmemWriteVgprInOrder = false; 2430fca6ea1SDimitry Andric 2440fca6ea1SDimitry Andric bool RequiresCOV6 = false; 245e8d8bef9SDimitry Andric 246e8d8bef9SDimitry Andric // Dummy feature to use for assembler in tablegen. 24781ad6265SDimitry Andric bool FeatureDisable = false; 248e8d8bef9SDimitry Andric 249e8d8bef9SDimitry Andric SelectionDAGTargetInfo TSInfo; 250e8d8bef9SDimitry Andric private: 251e8d8bef9SDimitry Andric SIInstrInfo InstrInfo; 252e8d8bef9SDimitry Andric SITargetLowering TLInfo; 253e8d8bef9SDimitry Andric SIFrameLowering FrameLowering; 254e8d8bef9SDimitry Andric 255e8d8bef9SDimitry Andric public: 256e8d8bef9SDimitry Andric GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 257e8d8bef9SDimitry Andric const GCNTargetMachine &TM); 258e8d8bef9SDimitry Andric ~GCNSubtarget() override; 259e8d8bef9SDimitry Andric 260e8d8bef9SDimitry Andric GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, 261e8d8bef9SDimitry Andric StringRef GPU, StringRef FS); 262e8d8bef9SDimitry Andric 2630fca6ea1SDimitry Andric /// Diagnose inconsistent subtarget features before attempting to codegen 2640fca6ea1SDimitry Andric /// function \p F. 2650fca6ea1SDimitry Andric void checkSubtargetFeatures(const Function &F) const; 2660fca6ea1SDimitry Andric 267e8d8bef9SDimitry Andric const SIInstrInfo *getInstrInfo() const override { 268e8d8bef9SDimitry Andric return &InstrInfo; 269e8d8bef9SDimitry Andric } 270e8d8bef9SDimitry Andric 271e8d8bef9SDimitry Andric const SIFrameLowering *getFrameLowering() const override { 272e8d8bef9SDimitry Andric return &FrameLowering; 273e8d8bef9SDimitry Andric } 274e8d8bef9SDimitry Andric 275e8d8bef9SDimitry Andric const SITargetLowering *getTargetLowering() const override { 276e8d8bef9SDimitry Andric return &TLInfo; 277e8d8bef9SDimitry Andric } 278e8d8bef9SDimitry Andric 279e8d8bef9SDimitry Andric const SIRegisterInfo *getRegisterInfo() const override { 280e8d8bef9SDimitry Andric return &InstrInfo.getRegisterInfo(); 281e8d8bef9SDimitry Andric } 282e8d8bef9SDimitry Andric 283e8d8bef9SDimitry Andric const CallLowering *getCallLowering() const override { 284e8d8bef9SDimitry Andric return CallLoweringInfo.get(); 285e8d8bef9SDimitry Andric } 286e8d8bef9SDimitry Andric 287e8d8bef9SDimitry Andric const InlineAsmLowering *getInlineAsmLowering() const override { 288e8d8bef9SDimitry Andric return InlineAsmLoweringInfo.get(); 289e8d8bef9SDimitry Andric } 290e8d8bef9SDimitry Andric 291e8d8bef9SDimitry Andric InstructionSelector *getInstructionSelector() const override { 292e8d8bef9SDimitry Andric return InstSelector.get(); 293e8d8bef9SDimitry Andric } 294e8d8bef9SDimitry Andric 295e8d8bef9SDimitry Andric const LegalizerInfo *getLegalizerInfo() const override { 296e8d8bef9SDimitry Andric return Legalizer.get(); 297e8d8bef9SDimitry Andric } 298e8d8bef9SDimitry Andric 29906c3fb27SDimitry Andric const AMDGPURegisterBankInfo *getRegBankInfo() const override { 300e8d8bef9SDimitry Andric return RegBankInfo.get(); 301e8d8bef9SDimitry Andric } 302e8d8bef9SDimitry Andric 303fe6060f1SDimitry Andric const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const { 304fe6060f1SDimitry Andric return TargetID; 305fe6060f1SDimitry Andric } 306fe6060f1SDimitry Andric 307e8d8bef9SDimitry Andric // Nothing implemented, just prevent crashes on use. 308e8d8bef9SDimitry Andric const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { 309e8d8bef9SDimitry Andric return &TSInfo; 310e8d8bef9SDimitry Andric } 311e8d8bef9SDimitry Andric 312e8d8bef9SDimitry Andric const InstrItineraryData *getInstrItineraryData() const override { 313e8d8bef9SDimitry Andric return &InstrItins; 314e8d8bef9SDimitry Andric } 315e8d8bef9SDimitry Andric 316e8d8bef9SDimitry Andric void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 317e8d8bef9SDimitry Andric 318e8d8bef9SDimitry Andric Generation getGeneration() const { 319e8d8bef9SDimitry Andric return (Generation)Gen; 320e8d8bef9SDimitry Andric } 321e8d8bef9SDimitry Andric 32281ad6265SDimitry Andric unsigned getMaxWaveScratchSize() const { 32381ad6265SDimitry Andric // See COMPUTE_TMPRING_SIZE.WAVESIZE. 3247a6dacacSDimitry Andric if (getGeneration() >= GFX12) { 3257a6dacacSDimitry Andric // 18-bit field in units of 64-dword. 3267a6dacacSDimitry Andric return (64 * 4) * ((1 << 18) - 1); 32781ad6265SDimitry Andric } 3287a6dacacSDimitry Andric if (getGeneration() == GFX11) { 32981ad6265SDimitry Andric // 15-bit field in units of 64-dword. 33081ad6265SDimitry Andric return (64 * 4) * ((1 << 15) - 1); 33181ad6265SDimitry Andric } 3327a6dacacSDimitry Andric // 13-bit field in units of 256-dword. 3337a6dacacSDimitry Andric return (256 * 4) * ((1 << 13) - 1); 3347a6dacacSDimitry Andric } 33581ad6265SDimitry Andric 336349cc55cSDimitry Andric /// Return the number of high bits known to be zero for a frame index. 337e8d8bef9SDimitry Andric unsigned getKnownHighZeroBitsForFrameIndex() const { 33806c3fb27SDimitry Andric return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2(); 339e8d8bef9SDimitry Andric } 340e8d8bef9SDimitry Andric 341e8d8bef9SDimitry Andric int getLDSBankCount() const { 342e8d8bef9SDimitry Andric return LDSBankCount; 343e8d8bef9SDimitry Andric } 344e8d8bef9SDimitry Andric 345e8d8bef9SDimitry Andric unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { 346e8d8bef9SDimitry Andric return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; 347e8d8bef9SDimitry Andric } 348e8d8bef9SDimitry Andric 349e8d8bef9SDimitry Andric unsigned getConstantBusLimit(unsigned Opcode) const; 350e8d8bef9SDimitry Andric 351fe6060f1SDimitry Andric /// Returns if the result of this instruction with a 16-bit result returned in 352fe6060f1SDimitry Andric /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve 353fe6060f1SDimitry Andric /// the original value. 354fe6060f1SDimitry Andric bool zeroesHigh16BitsOfDest(unsigned Opcode) const; 355fe6060f1SDimitry Andric 356bdd1243dSDimitry Andric bool supportsWGP() const { return getGeneration() >= GFX10; } 357bdd1243dSDimitry Andric 358e8d8bef9SDimitry Andric bool hasIntClamp() const { 359e8d8bef9SDimitry Andric return HasIntClamp; 360e8d8bef9SDimitry Andric } 361e8d8bef9SDimitry Andric 362e8d8bef9SDimitry Andric bool hasFP64() const { 363e8d8bef9SDimitry Andric return FP64; 364e8d8bef9SDimitry Andric } 365e8d8bef9SDimitry Andric 366e8d8bef9SDimitry Andric bool hasMIMG_R128() const { 367e8d8bef9SDimitry Andric return MIMG_R128; 368e8d8bef9SDimitry Andric } 369e8d8bef9SDimitry Andric 370e8d8bef9SDimitry Andric bool hasHWFP64() const { 371e8d8bef9SDimitry Andric return FP64; 372e8d8bef9SDimitry Andric } 373e8d8bef9SDimitry Andric 374e8d8bef9SDimitry Andric bool hasHalfRate64Ops() const { 375e8d8bef9SDimitry Andric return HalfRate64Ops; 376e8d8bef9SDimitry Andric } 377e8d8bef9SDimitry Andric 378fe6060f1SDimitry Andric bool hasFullRate64Ops() const { 379fe6060f1SDimitry Andric return FullRate64Ops; 380fe6060f1SDimitry Andric } 381fe6060f1SDimitry Andric 382e8d8bef9SDimitry Andric bool hasAddr64() const { 383e8d8bef9SDimitry Andric return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); 384e8d8bef9SDimitry Andric } 385e8d8bef9SDimitry Andric 386e8d8bef9SDimitry Andric bool hasFlat() const { 387e8d8bef9SDimitry Andric return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); 388e8d8bef9SDimitry Andric } 389e8d8bef9SDimitry Andric 390e8d8bef9SDimitry Andric // Return true if the target only has the reverse operand versions of VALU 391e8d8bef9SDimitry Andric // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). 392e8d8bef9SDimitry Andric bool hasOnlyRevVALUShifts() const { 393e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 394e8d8bef9SDimitry Andric } 395e8d8bef9SDimitry Andric 396e8d8bef9SDimitry Andric bool hasFractBug() const { 397e8d8bef9SDimitry Andric return getGeneration() == SOUTHERN_ISLANDS; 398e8d8bef9SDimitry Andric } 399e8d8bef9SDimitry Andric 400e8d8bef9SDimitry Andric bool hasBFE() const { 401e8d8bef9SDimitry Andric return true; 402e8d8bef9SDimitry Andric } 403e8d8bef9SDimitry Andric 404e8d8bef9SDimitry Andric bool hasBFI() const { 405e8d8bef9SDimitry Andric return true; 406e8d8bef9SDimitry Andric } 407e8d8bef9SDimitry Andric 408e8d8bef9SDimitry Andric bool hasBFM() const { 409e8d8bef9SDimitry Andric return hasBFE(); 410e8d8bef9SDimitry Andric } 411e8d8bef9SDimitry Andric 412e8d8bef9SDimitry Andric bool hasBCNT(unsigned Size) const { 413e8d8bef9SDimitry Andric return true; 414e8d8bef9SDimitry Andric } 415e8d8bef9SDimitry Andric 416e8d8bef9SDimitry Andric bool hasFFBL() const { 417e8d8bef9SDimitry Andric return true; 418e8d8bef9SDimitry Andric } 419e8d8bef9SDimitry Andric 420e8d8bef9SDimitry Andric bool hasFFBH() const { 421e8d8bef9SDimitry Andric return true; 422e8d8bef9SDimitry Andric } 423e8d8bef9SDimitry Andric 424e8d8bef9SDimitry Andric bool hasMed3_16() const { 425e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 426e8d8bef9SDimitry Andric } 427e8d8bef9SDimitry Andric 428e8d8bef9SDimitry Andric bool hasMin3Max3_16() const { 429e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 430e8d8bef9SDimitry Andric } 431e8d8bef9SDimitry Andric 432e8d8bef9SDimitry Andric bool hasFmaMixInsts() const { 433e8d8bef9SDimitry Andric return HasFmaMixInsts; 434e8d8bef9SDimitry Andric } 435e8d8bef9SDimitry Andric 436e8d8bef9SDimitry Andric bool hasCARRY() const { 437e8d8bef9SDimitry Andric return true; 438e8d8bef9SDimitry Andric } 439e8d8bef9SDimitry Andric 440e8d8bef9SDimitry Andric bool hasFMA() const { 441e8d8bef9SDimitry Andric return FMA; 442e8d8bef9SDimitry Andric } 443e8d8bef9SDimitry Andric 444e8d8bef9SDimitry Andric bool hasSwap() const { 445e8d8bef9SDimitry Andric return GFX9Insts; 446e8d8bef9SDimitry Andric } 447e8d8bef9SDimitry Andric 448e8d8bef9SDimitry Andric bool hasScalarPackInsts() const { 449e8d8bef9SDimitry Andric return GFX9Insts; 450e8d8bef9SDimitry Andric } 451e8d8bef9SDimitry Andric 452e8d8bef9SDimitry Andric bool hasScalarMulHiInsts() const { 453e8d8bef9SDimitry Andric return GFX9Insts; 454e8d8bef9SDimitry Andric } 455e8d8bef9SDimitry Andric 4567a6dacacSDimitry Andric bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; } 4577a6dacacSDimitry Andric 458e8d8bef9SDimitry Andric TrapHandlerAbi getTrapHandlerAbi() const { 459fe6060f1SDimitry Andric return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE; 460fe6060f1SDimitry Andric } 461fe6060f1SDimitry Andric 462fe6060f1SDimitry Andric bool supportsGetDoorbellID() const { 463fe6060f1SDimitry Andric // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets. 464fe6060f1SDimitry Andric return getGeneration() >= GFX9; 465e8d8bef9SDimitry Andric } 466e8d8bef9SDimitry Andric 467e8d8bef9SDimitry Andric /// True if the offset field of DS instructions works as expected. On SI, the 468e8d8bef9SDimitry Andric /// offset uses a 16-bit adder and does not always wrap properly. 469e8d8bef9SDimitry Andric bool hasUsableDSOffset() const { 470e8d8bef9SDimitry Andric return getGeneration() >= SEA_ISLANDS; 471e8d8bef9SDimitry Andric } 472e8d8bef9SDimitry Andric 473e8d8bef9SDimitry Andric bool unsafeDSOffsetFoldingEnabled() const { 474e8d8bef9SDimitry Andric return EnableUnsafeDSOffsetFolding; 475e8d8bef9SDimitry Andric } 476e8d8bef9SDimitry Andric 477e8d8bef9SDimitry Andric /// Condition output from div_scale is usable. 478e8d8bef9SDimitry Andric bool hasUsableDivScaleConditionOutput() const { 479e8d8bef9SDimitry Andric return getGeneration() != SOUTHERN_ISLANDS; 480e8d8bef9SDimitry Andric } 481e8d8bef9SDimitry Andric 482e8d8bef9SDimitry Andric /// Extra wait hazard is needed in some cases before 483e8d8bef9SDimitry Andric /// s_cbranch_vccnz/s_cbranch_vccz. 484e8d8bef9SDimitry Andric bool hasReadVCCZBug() const { 485e8d8bef9SDimitry Andric return getGeneration() <= SEA_ISLANDS; 486e8d8bef9SDimitry Andric } 487e8d8bef9SDimitry Andric 488e8d8bef9SDimitry Andric /// Writes to VCC_LO/VCC_HI update the VCCZ flag. 489e8d8bef9SDimitry Andric bool partialVCCWritesUpdateVCCZ() const { 490e8d8bef9SDimitry Andric return getGeneration() >= GFX10; 491e8d8bef9SDimitry Andric } 492e8d8bef9SDimitry Andric 493e8d8bef9SDimitry Andric /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR 494e8d8bef9SDimitry Andric /// was written by a VALU instruction. 495e8d8bef9SDimitry Andric bool hasSMRDReadVALUDefHazard() const { 496e8d8bef9SDimitry Andric return getGeneration() == SOUTHERN_ISLANDS; 497e8d8bef9SDimitry Andric } 498e8d8bef9SDimitry Andric 499e8d8bef9SDimitry Andric /// A read of an SGPR by a VMEM instruction requires 5 wait states when the 500e8d8bef9SDimitry Andric /// SGPR was written by a VALU Instruction. 501e8d8bef9SDimitry Andric bool hasVMEMReadSGPRVALUDefHazard() const { 502e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 503e8d8bef9SDimitry Andric } 504e8d8bef9SDimitry Andric 505e8d8bef9SDimitry Andric bool hasRFEHazards() const { 506e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 507e8d8bef9SDimitry Andric } 508e8d8bef9SDimitry Andric 509e8d8bef9SDimitry Andric /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. 510e8d8bef9SDimitry Andric unsigned getSetRegWaitStates() const { 511e8d8bef9SDimitry Andric return getGeneration() <= SEA_ISLANDS ? 1 : 2; 512e8d8bef9SDimitry Andric } 513e8d8bef9SDimitry Andric 514e8d8bef9SDimitry Andric bool dumpCode() const { 515e8d8bef9SDimitry Andric return DumpCode; 516e8d8bef9SDimitry Andric } 517e8d8bef9SDimitry Andric 518e8d8bef9SDimitry Andric /// Return the amount of LDS that can be used that will not restrict the 519e8d8bef9SDimitry Andric /// occupancy lower than WaveCount. 520e8d8bef9SDimitry Andric unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 521e8d8bef9SDimitry Andric const Function &) const; 522e8d8bef9SDimitry Andric 523e8d8bef9SDimitry Andric bool supportsMinMaxDenormModes() const { 524e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 525e8d8bef9SDimitry Andric } 526e8d8bef9SDimitry Andric 527e8d8bef9SDimitry Andric /// \returns If target supports S_DENORM_MODE. 528e8d8bef9SDimitry Andric bool hasDenormModeInst() const { 529e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX10; 530e8d8bef9SDimitry Andric } 531e8d8bef9SDimitry Andric 532e8d8bef9SDimitry Andric bool useFlatForGlobal() const { 533e8d8bef9SDimitry Andric return FlatForGlobal; 534e8d8bef9SDimitry Andric } 535e8d8bef9SDimitry Andric 536e8d8bef9SDimitry Andric /// \returns If target supports ds_read/write_b128 and user enables generation 537e8d8bef9SDimitry Andric /// of ds_read/write_b128. 538e8d8bef9SDimitry Andric bool useDS128() const { 539e8d8bef9SDimitry Andric return CIInsts && EnableDS128; 540e8d8bef9SDimitry Andric } 541e8d8bef9SDimitry Andric 542e8d8bef9SDimitry Andric /// \return If target supports ds_read/write_b96/128. 543e8d8bef9SDimitry Andric bool hasDS96AndDS128() const { 544e8d8bef9SDimitry Andric return CIInsts; 545e8d8bef9SDimitry Andric } 546e8d8bef9SDimitry Andric 547e8d8bef9SDimitry Andric /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 548e8d8bef9SDimitry Andric bool haveRoundOpsF64() const { 549e8d8bef9SDimitry Andric return CIInsts; 550e8d8bef9SDimitry Andric } 551e8d8bef9SDimitry Andric 552e8d8bef9SDimitry Andric /// \returns If MUBUF instructions always perform range checking, even for 553e8d8bef9SDimitry Andric /// buffer resources used for private memory access. 554e8d8bef9SDimitry Andric bool privateMemoryResourceIsRangeChecked() const { 555e8d8bef9SDimitry Andric return getGeneration() < AMDGPUSubtarget::GFX9; 556e8d8bef9SDimitry Andric } 557e8d8bef9SDimitry Andric 558e8d8bef9SDimitry Andric /// \returns If target requires PRT Struct NULL support (zero result registers 559e8d8bef9SDimitry Andric /// for sparse texture support). 560e8d8bef9SDimitry Andric bool usePRTStrictNull() const { 561e8d8bef9SDimitry Andric return EnablePRTStrictNull; 562e8d8bef9SDimitry Andric } 563e8d8bef9SDimitry Andric 564e8d8bef9SDimitry Andric bool hasAutoWaitcntBeforeBarrier() const { 565e8d8bef9SDimitry Andric return AutoWaitcntBeforeBarrier; 566e8d8bef9SDimitry Andric } 567e8d8bef9SDimitry Andric 568bdd1243dSDimitry Andric /// \returns true if the target supports backing off of s_barrier instructions 569bdd1243dSDimitry Andric /// when an exception is raised. 570bdd1243dSDimitry Andric bool supportsBackOffBarrier() const { 571bdd1243dSDimitry Andric return BackOffBarrier; 572bdd1243dSDimitry Andric } 573bdd1243dSDimitry Andric 574e8d8bef9SDimitry Andric bool hasUnalignedBufferAccess() const { 575e8d8bef9SDimitry Andric return UnalignedBufferAccess; 576e8d8bef9SDimitry Andric } 577e8d8bef9SDimitry Andric 578e8d8bef9SDimitry Andric bool hasUnalignedBufferAccessEnabled() const { 579e8d8bef9SDimitry Andric return UnalignedBufferAccess && UnalignedAccessMode; 580e8d8bef9SDimitry Andric } 581e8d8bef9SDimitry Andric 582e8d8bef9SDimitry Andric bool hasUnalignedDSAccess() const { 583e8d8bef9SDimitry Andric return UnalignedDSAccess; 584e8d8bef9SDimitry Andric } 585e8d8bef9SDimitry Andric 586e8d8bef9SDimitry Andric bool hasUnalignedDSAccessEnabled() const { 587e8d8bef9SDimitry Andric return UnalignedDSAccess && UnalignedAccessMode; 588e8d8bef9SDimitry Andric } 589e8d8bef9SDimitry Andric 590e8d8bef9SDimitry Andric bool hasUnalignedScratchAccess() const { 591e8d8bef9SDimitry Andric return UnalignedScratchAccess; 592e8d8bef9SDimitry Andric } 593e8d8bef9SDimitry Andric 594e8d8bef9SDimitry Andric bool hasUnalignedAccessMode() const { 595e8d8bef9SDimitry Andric return UnalignedAccessMode; 596e8d8bef9SDimitry Andric } 597e8d8bef9SDimitry Andric 598e8d8bef9SDimitry Andric bool hasApertureRegs() const { 599e8d8bef9SDimitry Andric return HasApertureRegs; 600e8d8bef9SDimitry Andric } 601e8d8bef9SDimitry Andric 602e8d8bef9SDimitry Andric bool isTrapHandlerEnabled() const { 603e8d8bef9SDimitry Andric return TrapHandler; 604e8d8bef9SDimitry Andric } 605e8d8bef9SDimitry Andric 606e8d8bef9SDimitry Andric bool isXNACKEnabled() const { 607e8d8bef9SDimitry Andric return TargetID.isXnackOnOrAny(); 608e8d8bef9SDimitry Andric } 609e8d8bef9SDimitry Andric 610fe6060f1SDimitry Andric bool isTgSplitEnabled() const { 611fe6060f1SDimitry Andric return EnableTgSplit; 612fe6060f1SDimitry Andric } 613fe6060f1SDimitry Andric 614e8d8bef9SDimitry Andric bool isCuModeEnabled() const { 615e8d8bef9SDimitry Andric return EnableCuMode; 616e8d8bef9SDimitry Andric } 617e8d8bef9SDimitry Andric 6180fca6ea1SDimitry Andric bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; } 6190fca6ea1SDimitry Andric 620e8d8bef9SDimitry Andric bool hasFlatAddressSpace() const { 621e8d8bef9SDimitry Andric return FlatAddressSpace; 622e8d8bef9SDimitry Andric } 623e8d8bef9SDimitry Andric 624e8d8bef9SDimitry Andric bool hasFlatScrRegister() const { 625e8d8bef9SDimitry Andric return hasFlatAddressSpace(); 626e8d8bef9SDimitry Andric } 627e8d8bef9SDimitry Andric 628e8d8bef9SDimitry Andric bool hasFlatInstOffsets() const { 629e8d8bef9SDimitry Andric return FlatInstOffsets; 630e8d8bef9SDimitry Andric } 631e8d8bef9SDimitry Andric 632e8d8bef9SDimitry Andric bool hasFlatGlobalInsts() const { 633e8d8bef9SDimitry Andric return FlatGlobalInsts; 634e8d8bef9SDimitry Andric } 635e8d8bef9SDimitry Andric 636e8d8bef9SDimitry Andric bool hasFlatScratchInsts() const { 637e8d8bef9SDimitry Andric return FlatScratchInsts; 638e8d8bef9SDimitry Andric } 639e8d8bef9SDimitry Andric 640e8d8bef9SDimitry Andric // Check if target supports ST addressing mode with FLAT scratch instructions. 641e8d8bef9SDimitry Andric // The ST addressing mode means no registers are used, either VGPR or SGPR, 642e8d8bef9SDimitry Andric // but only immediate offset is swizzled and added to the FLAT scratch base. 643e8d8bef9SDimitry Andric bool hasFlatScratchSTMode() const { 64481ad6265SDimitry Andric return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts()); 645e8d8bef9SDimitry Andric } 646e8d8bef9SDimitry Andric 64781ad6265SDimitry Andric bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; } 64881ad6265SDimitry Andric 649e8d8bef9SDimitry Andric bool hasScalarFlatScratchInsts() const { 650e8d8bef9SDimitry Andric return ScalarFlatScratchInsts; 651e8d8bef9SDimitry Andric } 652e8d8bef9SDimitry Andric 65381ad6265SDimitry Andric bool enableFlatScratch() const { 65481ad6265SDimitry Andric return flatScratchIsArchitected() || 65581ad6265SDimitry Andric (EnableFlatScratch && hasFlatScratchInsts()); 65681ad6265SDimitry Andric } 65781ad6265SDimitry Andric 658e8d8bef9SDimitry Andric bool hasGlobalAddTidInsts() const { 659e8d8bef9SDimitry Andric return GFX10_BEncoding; 660e8d8bef9SDimitry Andric } 661e8d8bef9SDimitry Andric 662e8d8bef9SDimitry Andric bool hasAtomicCSub() const { 663e8d8bef9SDimitry Andric return GFX10_BEncoding; 664e8d8bef9SDimitry Andric } 665e8d8bef9SDimitry Andric 6660fca6ea1SDimitry Andric bool hasExportInsts() const { 6670fca6ea1SDimitry Andric return !hasGFX940Insts(); 6680fca6ea1SDimitry Andric } 6690fca6ea1SDimitry Andric 6700fca6ea1SDimitry Andric bool hasVINTERPEncoding() const { 6710fca6ea1SDimitry Andric return GFX11Insts; 6720fca6ea1SDimitry Andric } 6730fca6ea1SDimitry Andric 6740fca6ea1SDimitry Andric // DS_ADD_F64/DS_ADD_RTN_F64 6750fca6ea1SDimitry Andric bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); } 6760fca6ea1SDimitry Andric 677e8d8bef9SDimitry Andric bool hasMultiDwordFlatScratchAddressing() const { 678e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 679e8d8bef9SDimitry Andric } 680e8d8bef9SDimitry Andric 681e8d8bef9SDimitry Andric bool hasFlatSegmentOffsetBug() const { 682e8d8bef9SDimitry Andric return HasFlatSegmentOffsetBug; 683e8d8bef9SDimitry Andric } 684e8d8bef9SDimitry Andric 685e8d8bef9SDimitry Andric bool hasFlatLgkmVMemCountInOrder() const { 686e8d8bef9SDimitry Andric return getGeneration() > GFX9; 687e8d8bef9SDimitry Andric } 688e8d8bef9SDimitry Andric 689e8d8bef9SDimitry Andric bool hasD16LoadStore() const { 690e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 691e8d8bef9SDimitry Andric } 692e8d8bef9SDimitry Andric 693e8d8bef9SDimitry Andric bool d16PreservesUnusedBits() const { 694e8d8bef9SDimitry Andric return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); 695e8d8bef9SDimitry Andric } 696e8d8bef9SDimitry Andric 697e8d8bef9SDimitry Andric bool hasD16Images() const { 698e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 699e8d8bef9SDimitry Andric } 700e8d8bef9SDimitry Andric 701e8d8bef9SDimitry Andric /// Return if most LDS instructions have an m0 use that require m0 to be 702349cc55cSDimitry Andric /// initialized. 703e8d8bef9SDimitry Andric bool ldsRequiresM0Init() const { 704e8d8bef9SDimitry Andric return getGeneration() < GFX9; 705e8d8bef9SDimitry Andric } 706e8d8bef9SDimitry Andric 707e8d8bef9SDimitry Andric // True if the hardware rewinds and replays GWS operations if a wave is 708e8d8bef9SDimitry Andric // preempted. 709e8d8bef9SDimitry Andric // 710e8d8bef9SDimitry Andric // If this is false, a GWS operation requires testing if a nack set the 711e8d8bef9SDimitry Andric // MEM_VIOL bit, and repeating if so. 712e8d8bef9SDimitry Andric bool hasGWSAutoReplay() const { 713e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 714e8d8bef9SDimitry Andric } 715e8d8bef9SDimitry Andric 716e8d8bef9SDimitry Andric /// \returns if target has ds_gws_sema_release_all instruction. 717e8d8bef9SDimitry Andric bool hasGWSSemaReleaseAll() const { 718e8d8bef9SDimitry Andric return CIInsts; 719e8d8bef9SDimitry Andric } 720e8d8bef9SDimitry Andric 721e8d8bef9SDimitry Andric /// \returns true if the target has integer add/sub instructions that do not 722e8d8bef9SDimitry Andric /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, 723e8d8bef9SDimitry Andric /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier 724e8d8bef9SDimitry Andric /// for saturation. 725e8d8bef9SDimitry Andric bool hasAddNoCarry() const { 726e8d8bef9SDimitry Andric return AddNoCarryInsts; 727e8d8bef9SDimitry Andric } 728e8d8bef9SDimitry Andric 7295f757f3fSDimitry Andric bool hasScalarAddSub64() const { return getGeneration() >= GFX12; } 7305f757f3fSDimitry Andric 7311db9f3b2SDimitry Andric bool hasScalarSMulU64() const { return getGeneration() >= GFX12; } 7321db9f3b2SDimitry Andric 733e8d8bef9SDimitry Andric bool hasUnpackedD16VMem() const { 734e8d8bef9SDimitry Andric return HasUnpackedD16VMem; 735e8d8bef9SDimitry Andric } 736e8d8bef9SDimitry Andric 737e8d8bef9SDimitry Andric // Covers VS/PS/CS graphics shaders 738e8d8bef9SDimitry Andric bool isMesaGfxShader(const Function &F) const { 739e8d8bef9SDimitry Andric return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); 740e8d8bef9SDimitry Andric } 741e8d8bef9SDimitry Andric 742e8d8bef9SDimitry Andric bool hasMad64_32() const { 743e8d8bef9SDimitry Andric return getGeneration() >= SEA_ISLANDS; 744e8d8bef9SDimitry Andric } 745e8d8bef9SDimitry Andric 746e8d8bef9SDimitry Andric bool hasSDWAOmod() const { 747e8d8bef9SDimitry Andric return HasSDWAOmod; 748e8d8bef9SDimitry Andric } 749e8d8bef9SDimitry Andric 750e8d8bef9SDimitry Andric bool hasSDWAScalar() const { 751e8d8bef9SDimitry Andric return HasSDWAScalar; 752e8d8bef9SDimitry Andric } 753e8d8bef9SDimitry Andric 754e8d8bef9SDimitry Andric bool hasSDWASdst() const { 755e8d8bef9SDimitry Andric return HasSDWASdst; 756e8d8bef9SDimitry Andric } 757e8d8bef9SDimitry Andric 758e8d8bef9SDimitry Andric bool hasSDWAMac() const { 759e8d8bef9SDimitry Andric return HasSDWAMac; 760e8d8bef9SDimitry Andric } 761e8d8bef9SDimitry Andric 762e8d8bef9SDimitry Andric bool hasSDWAOutModsVOPC() const { 763e8d8bef9SDimitry Andric return HasSDWAOutModsVOPC; 764e8d8bef9SDimitry Andric } 765e8d8bef9SDimitry Andric 766e8d8bef9SDimitry Andric bool hasDLInsts() const { 767e8d8bef9SDimitry Andric return HasDLInsts; 768e8d8bef9SDimitry Andric } 769e8d8bef9SDimitry Andric 770bdd1243dSDimitry Andric bool hasFmacF64Inst() const { return HasFmacF64Inst; } 771bdd1243dSDimitry Andric 772e8d8bef9SDimitry Andric bool hasDot1Insts() const { 773e8d8bef9SDimitry Andric return HasDot1Insts; 774e8d8bef9SDimitry Andric } 775e8d8bef9SDimitry Andric 776e8d8bef9SDimitry Andric bool hasDot2Insts() const { 777e8d8bef9SDimitry Andric return HasDot2Insts; 778e8d8bef9SDimitry Andric } 779e8d8bef9SDimitry Andric 780e8d8bef9SDimitry Andric bool hasDot3Insts() const { 781e8d8bef9SDimitry Andric return HasDot3Insts; 782e8d8bef9SDimitry Andric } 783e8d8bef9SDimitry Andric 784e8d8bef9SDimitry Andric bool hasDot4Insts() const { 785e8d8bef9SDimitry Andric return HasDot4Insts; 786e8d8bef9SDimitry Andric } 787e8d8bef9SDimitry Andric 788e8d8bef9SDimitry Andric bool hasDot5Insts() const { 789e8d8bef9SDimitry Andric return HasDot5Insts; 790e8d8bef9SDimitry Andric } 791e8d8bef9SDimitry Andric 792e8d8bef9SDimitry Andric bool hasDot6Insts() const { 793e8d8bef9SDimitry Andric return HasDot6Insts; 794e8d8bef9SDimitry Andric } 795e8d8bef9SDimitry Andric 796fe6060f1SDimitry Andric bool hasDot7Insts() const { 797fe6060f1SDimitry Andric return HasDot7Insts; 798fe6060f1SDimitry Andric } 799fe6060f1SDimitry Andric 80081ad6265SDimitry Andric bool hasDot8Insts() const { 80181ad6265SDimitry Andric return HasDot8Insts; 80281ad6265SDimitry Andric } 80381ad6265SDimitry Andric 804bdd1243dSDimitry Andric bool hasDot9Insts() const { 805bdd1243dSDimitry Andric return HasDot9Insts; 806bdd1243dSDimitry Andric } 807bdd1243dSDimitry Andric 80806c3fb27SDimitry Andric bool hasDot10Insts() const { 80906c3fb27SDimitry Andric return HasDot10Insts; 81006c3fb27SDimitry Andric } 81106c3fb27SDimitry Andric 8120fca6ea1SDimitry Andric bool hasDot11Insts() const { 8130fca6ea1SDimitry Andric return HasDot11Insts; 8140fca6ea1SDimitry Andric } 8150fca6ea1SDimitry Andric 816e8d8bef9SDimitry Andric bool hasMAIInsts() const { 817e8d8bef9SDimitry Andric return HasMAIInsts; 818e8d8bef9SDimitry Andric } 819e8d8bef9SDimitry Andric 820fcaf7f86SDimitry Andric bool hasFP8Insts() const { 821fcaf7f86SDimitry Andric return HasFP8Insts; 822fcaf7f86SDimitry Andric } 823fcaf7f86SDimitry Andric 8247a6dacacSDimitry Andric bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; } 8257a6dacacSDimitry Andric 826e8d8bef9SDimitry Andric bool hasPkFmacF16Inst() const { 827e8d8bef9SDimitry Andric return HasPkFmacF16Inst; 828e8d8bef9SDimitry Andric } 829e8d8bef9SDimitry Andric 8300fca6ea1SDimitry Andric bool hasAtomicFMinFMaxF32GlobalInsts() const { 8310fca6ea1SDimitry Andric return HasAtomicFMinFMaxF32GlobalInsts; 8320fca6ea1SDimitry Andric } 8330fca6ea1SDimitry Andric 8340fca6ea1SDimitry Andric bool hasAtomicFMinFMaxF64GlobalInsts() const { 8350fca6ea1SDimitry Andric return HasAtomicFMinFMaxF64GlobalInsts; 8360fca6ea1SDimitry Andric } 8370fca6ea1SDimitry Andric 8380fca6ea1SDimitry Andric bool hasAtomicFMinFMaxF32FlatInsts() const { 8390fca6ea1SDimitry Andric return HasAtomicFMinFMaxF32FlatInsts; 8400fca6ea1SDimitry Andric } 8410fca6ea1SDimitry Andric 8420fca6ea1SDimitry Andric bool hasAtomicFMinFMaxF64FlatInsts() const { 8430fca6ea1SDimitry Andric return HasAtomicFMinFMaxF64FlatInsts; 8440fca6ea1SDimitry Andric } 8450fca6ea1SDimitry Andric 84606c3fb27SDimitry Andric bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; } 84706c3fb27SDimitry Andric 84806c3fb27SDimitry Andric bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; } 84906c3fb27SDimitry Andric 850e8d8bef9SDimitry Andric bool hasAtomicFaddInsts() const { 85181ad6265SDimitry Andric return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; 852e8d8bef9SDimitry Andric } 853e8d8bef9SDimitry Andric 85481ad6265SDimitry Andric bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; } 85581ad6265SDimitry Andric 85681ad6265SDimitry Andric bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } 85781ad6265SDimitry Andric 85806c3fb27SDimitry Andric bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const { 85906c3fb27SDimitry Andric return HasAtomicBufferGlobalPkAddF16NoRtnInsts; 86006c3fb27SDimitry Andric } 86106c3fb27SDimitry Andric 86206c3fb27SDimitry Andric bool hasAtomicBufferGlobalPkAddF16Insts() const { 86306c3fb27SDimitry Andric return HasAtomicBufferGlobalPkAddF16Insts; 86406c3fb27SDimitry Andric } 86506c3fb27SDimitry Andric 86606c3fb27SDimitry Andric bool hasAtomicGlobalPkAddBF16Inst() const { 86706c3fb27SDimitry Andric return HasAtomicGlobalPkAddBF16Inst; 86806c3fb27SDimitry Andric } 86981ad6265SDimitry Andric 8700fca6ea1SDimitry Andric bool hasAtomicBufferPkAddBF16Inst() const { 8710fca6ea1SDimitry Andric return HasAtomicBufferPkAddBF16Inst; 8720fca6ea1SDimitry Andric } 8730fca6ea1SDimitry Andric 874bdd1243dSDimitry Andric bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } 875bdd1243dSDimitry Andric 8760fca6ea1SDimitry Andric /// \return true if the target has flat, global, and buffer atomic fadd for 8770fca6ea1SDimitry Andric /// double. 8780fca6ea1SDimitry Andric bool hasFlatBufferGlobalAtomicFaddF64Inst() const { 8790fca6ea1SDimitry Andric return HasFlatBufferGlobalAtomicFaddF64Inst; 8800fca6ea1SDimitry Andric } 8810fca6ea1SDimitry Andric 8820fca6ea1SDimitry Andric /// \return true if the target's flat, global, and buffer atomic fadd for 8830fca6ea1SDimitry Andric /// float supports denormal handling. 8840fca6ea1SDimitry Andric bool hasMemoryAtomicFaddF32DenormalSupport() const { 8850fca6ea1SDimitry Andric return HasMemoryAtomicFaddF32DenormalSupport; 8860fca6ea1SDimitry Andric } 8870fca6ea1SDimitry Andric 8880fca6ea1SDimitry Andric /// \return true if atomic operations targeting fine-grained memory work 8890fca6ea1SDimitry Andric /// correctly at device scope, in allocations in host or peer PCIe device 8900fca6ea1SDimitry Andric /// memory. 8910fca6ea1SDimitry Andric bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const { 8920fca6ea1SDimitry Andric return HasAgentScopeFineGrainedRemoteMemoryAtomics; 8930fca6ea1SDimitry Andric } 8940fca6ea1SDimitry Andric 8957a6dacacSDimitry Andric bool hasDefaultComponentZero() const { return HasDefaultComponentZero; } 8967a6dacacSDimitry Andric 8977a6dacacSDimitry Andric bool hasDefaultComponentBroadcast() const { 8987a6dacacSDimitry Andric return HasDefaultComponentBroadcast; 8997a6dacacSDimitry Andric } 9007a6dacacSDimitry Andric 901e8d8bef9SDimitry Andric bool hasNoSdstCMPX() const { 902e8d8bef9SDimitry Andric return HasNoSdstCMPX; 903e8d8bef9SDimitry Andric } 904e8d8bef9SDimitry Andric 905e8d8bef9SDimitry Andric bool hasVscnt() const { 906e8d8bef9SDimitry Andric return HasVscnt; 907e8d8bef9SDimitry Andric } 908e8d8bef9SDimitry Andric 909e8d8bef9SDimitry Andric bool hasGetWaveIdInst() const { 910e8d8bef9SDimitry Andric return HasGetWaveIdInst; 911e8d8bef9SDimitry Andric } 912e8d8bef9SDimitry Andric 913e8d8bef9SDimitry Andric bool hasSMemTimeInst() const { 914e8d8bef9SDimitry Andric return HasSMemTimeInst; 915e8d8bef9SDimitry Andric } 916e8d8bef9SDimitry Andric 917fe6060f1SDimitry Andric bool hasShaderCyclesRegister() const { 918fe6060f1SDimitry Andric return HasShaderCyclesRegister; 919fe6060f1SDimitry Andric } 920fe6060f1SDimitry Andric 9211db9f3b2SDimitry Andric bool hasShaderCyclesHiLoRegisters() const { 9221db9f3b2SDimitry Andric return HasShaderCyclesHiLoRegisters; 9231db9f3b2SDimitry Andric } 9241db9f3b2SDimitry Andric 925e8d8bef9SDimitry Andric bool hasVOP3Literal() const { 926e8d8bef9SDimitry Andric return HasVOP3Literal; 927e8d8bef9SDimitry Andric } 928e8d8bef9SDimitry Andric 929e8d8bef9SDimitry Andric bool hasNoDataDepHazard() const { 930e8d8bef9SDimitry Andric return HasNoDataDepHazard; 931e8d8bef9SDimitry Andric } 932e8d8bef9SDimitry Andric 933e8d8bef9SDimitry Andric bool vmemWriteNeedsExpWaitcnt() const { 934e8d8bef9SDimitry Andric return getGeneration() < SEA_ISLANDS; 935e8d8bef9SDimitry Andric } 936e8d8bef9SDimitry Andric 9377a6dacacSDimitry Andric bool hasInstPrefetch() const { 9387a6dacacSDimitry Andric return getGeneration() == GFX10 || getGeneration() == GFX11; 9397a6dacacSDimitry Andric } 940bdd1243dSDimitry Andric 9415f757f3fSDimitry Andric bool hasPrefetch() const { return GFX12Insts; } 9425f757f3fSDimitry Andric 9435f757f3fSDimitry Andric // Has s_cmpk_* instructions. 9445f757f3fSDimitry Andric bool hasSCmpK() const { return getGeneration() < GFX12; } 9455f757f3fSDimitry Andric 946e8d8bef9SDimitry Andric // Scratch is allocated in 256 dword per wave blocks for the entire 947349cc55cSDimitry Andric // wavefront. When viewed from the perspective of an arbitrary workitem, this 948e8d8bef9SDimitry Andric // is 4-byte aligned. 949e8d8bef9SDimitry Andric // 950e8d8bef9SDimitry Andric // Only 4-byte alignment is really needed to access anything. Transformations 951e8d8bef9SDimitry Andric // on the pointer value itself may rely on the alignment / known low bits of 952e8d8bef9SDimitry Andric // the pointer. Set this to something above the minimum to avoid needing 953e8d8bef9SDimitry Andric // dynamic realignment in common cases. 954e8d8bef9SDimitry Andric Align getStackAlignment() const { return Align(16); } 955e8d8bef9SDimitry Andric 956e8d8bef9SDimitry Andric bool enableMachineScheduler() const override { 957e8d8bef9SDimitry Andric return true; 958e8d8bef9SDimitry Andric } 959e8d8bef9SDimitry Andric 960e8d8bef9SDimitry Andric bool useAA() const override; 961e8d8bef9SDimitry Andric 962e8d8bef9SDimitry Andric bool enableSubRegLiveness() const override { 963e8d8bef9SDimitry Andric return true; 964e8d8bef9SDimitry Andric } 965e8d8bef9SDimitry Andric 966e8d8bef9SDimitry Andric void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } 967e8d8bef9SDimitry Andric bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } 968e8d8bef9SDimitry Andric 969e8d8bef9SDimitry Andric // static wrappers 970e8d8bef9SDimitry Andric static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); 971e8d8bef9SDimitry Andric 972e8d8bef9SDimitry Andric // XXX - Why is this here if it isn't in the default pass set? 973e8d8bef9SDimitry Andric bool enableEarlyIfConversion() const override { 974e8d8bef9SDimitry Andric return true; 975e8d8bef9SDimitry Andric } 976e8d8bef9SDimitry Andric 977e8d8bef9SDimitry Andric void overrideSchedPolicy(MachineSchedPolicy &Policy, 978e8d8bef9SDimitry Andric unsigned NumRegionInstrs) const override; 979e8d8bef9SDimitry Andric 9800fca6ea1SDimitry Andric void mirFileLoaded(MachineFunction &MF) const override; 9810fca6ea1SDimitry Andric 982e8d8bef9SDimitry Andric unsigned getMaxNumUserSGPRs() const { 9835f757f3fSDimitry Andric return AMDGPU::getMaxNumUserSGPRs(*this); 984e8d8bef9SDimitry Andric } 985e8d8bef9SDimitry Andric 986e8d8bef9SDimitry Andric bool hasSMemRealTime() const { 987e8d8bef9SDimitry Andric return HasSMemRealTime; 988e8d8bef9SDimitry Andric } 989e8d8bef9SDimitry Andric 990e8d8bef9SDimitry Andric bool hasMovrel() const { 991e8d8bef9SDimitry Andric return HasMovrel; 992e8d8bef9SDimitry Andric } 993e8d8bef9SDimitry Andric 994e8d8bef9SDimitry Andric bool hasVGPRIndexMode() const { 995e8d8bef9SDimitry Andric return HasVGPRIndexMode; 996e8d8bef9SDimitry Andric } 997e8d8bef9SDimitry Andric 998e8d8bef9SDimitry Andric bool useVGPRIndexMode() const; 999e8d8bef9SDimitry Andric 1000e8d8bef9SDimitry Andric bool hasScalarCompareEq64() const { 1001e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 1002e8d8bef9SDimitry Andric } 1003e8d8bef9SDimitry Andric 10045f757f3fSDimitry Andric bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; } 10055f757f3fSDimitry Andric 1006e8d8bef9SDimitry Andric bool hasScalarStores() const { 1007e8d8bef9SDimitry Andric return HasScalarStores; 1008e8d8bef9SDimitry Andric } 1009e8d8bef9SDimitry Andric 1010e8d8bef9SDimitry Andric bool hasScalarAtomics() const { 1011e8d8bef9SDimitry Andric return HasScalarAtomics; 1012e8d8bef9SDimitry Andric } 1013e8d8bef9SDimitry Andric 10140fca6ea1SDimitry Andric bool hasLDSFPAtomicAddF32() const { return GFX8Insts; } 10150fca6ea1SDimitry Andric bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; } 1016e8d8bef9SDimitry Andric 1017fe6060f1SDimitry Andric /// \returns true if the subtarget has the v_permlanex16_b32 instruction. 1018fe6060f1SDimitry Andric bool hasPermLaneX16() const { return getGeneration() >= GFX10; } 1019fe6060f1SDimitry Andric 102081ad6265SDimitry Andric /// \returns true if the subtarget has the v_permlane64_b32 instruction. 102181ad6265SDimitry Andric bool hasPermLane64() const { return getGeneration() >= GFX11; } 102281ad6265SDimitry Andric 1023e8d8bef9SDimitry Andric bool hasDPP() const { 1024e8d8bef9SDimitry Andric return HasDPP; 1025e8d8bef9SDimitry Andric } 1026e8d8bef9SDimitry Andric 1027e8d8bef9SDimitry Andric bool hasDPPBroadcasts() const { 1028e8d8bef9SDimitry Andric return HasDPP && getGeneration() < GFX10; 1029e8d8bef9SDimitry Andric } 1030e8d8bef9SDimitry Andric 1031e8d8bef9SDimitry Andric bool hasDPPWavefrontShifts() const { 1032e8d8bef9SDimitry Andric return HasDPP && getGeneration() < GFX10; 1033e8d8bef9SDimitry Andric } 1034e8d8bef9SDimitry Andric 1035e8d8bef9SDimitry Andric bool hasDPP8() const { 1036e8d8bef9SDimitry Andric return HasDPP8; 1037e8d8bef9SDimitry Andric } 1038e8d8bef9SDimitry Andric 10395f757f3fSDimitry Andric bool hasDPALU_DPP() const { 10405f757f3fSDimitry Andric return HasDPALU_DPP; 1041fe6060f1SDimitry Andric } 1042fe6060f1SDimitry Andric 10435f757f3fSDimitry Andric bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; } 10445f757f3fSDimitry Andric 1045fe6060f1SDimitry Andric bool hasPackedFP32Ops() const { 1046fe6060f1SDimitry Andric return HasPackedFP32Ops; 1047fe6060f1SDimitry Andric } 1048fe6060f1SDimitry Andric 10495f757f3fSDimitry Andric // Has V_PK_MOV_B32 opcode 10505f757f3fSDimitry Andric bool hasPkMovB32() const { 10515f757f3fSDimitry Andric return GFX90AInsts; 10525f757f3fSDimitry Andric } 10535f757f3fSDimitry Andric 1054fe6060f1SDimitry Andric bool hasFmaakFmamkF32Insts() const { 105581ad6265SDimitry Andric return getGeneration() >= GFX10 || hasGFX940Insts(); 105681ad6265SDimitry Andric } 105781ad6265SDimitry Andric 105881ad6265SDimitry Andric bool hasImageInsts() const { 105981ad6265SDimitry Andric return HasImageInsts; 1060fe6060f1SDimitry Andric } 1061fe6060f1SDimitry Andric 1062fe6060f1SDimitry Andric bool hasExtendedImageInsts() const { 1063fe6060f1SDimitry Andric return HasExtendedImageInsts; 1064fe6060f1SDimitry Andric } 1065fe6060f1SDimitry Andric 1066e8d8bef9SDimitry Andric bool hasR128A16() const { 1067e8d8bef9SDimitry Andric return HasR128A16; 1068e8d8bef9SDimitry Andric } 1069e8d8bef9SDimitry Andric 1070bdd1243dSDimitry Andric bool hasA16() const { return HasA16; } 1071e8d8bef9SDimitry Andric 1072e8d8bef9SDimitry Andric bool hasG16() const { return HasG16; } 1073e8d8bef9SDimitry Andric 1074e8d8bef9SDimitry Andric bool hasOffset3fBug() const { 1075e8d8bef9SDimitry Andric return HasOffset3fBug; 1076e8d8bef9SDimitry Andric } 1077e8d8bef9SDimitry Andric 1078e8d8bef9SDimitry Andric bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } 1079e8d8bef9SDimitry Andric 1080e8d8bef9SDimitry Andric bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } 1081e8d8bef9SDimitry Andric 1082bdd1243dSDimitry Andric bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; } 1083bdd1243dSDimitry Andric 10845f757f3fSDimitry Andric bool hasMSAALoadDstSelBug() const { return HasMSAALoadDstSelBug; } 10855f757f3fSDimitry Andric 10860fca6ea1SDimitry Andric bool hasPrivEnabledTrap2NopBug() const { return HasPrivEnabledTrap2NopBug; } 10870fca6ea1SDimitry Andric 1088e8d8bef9SDimitry Andric bool hasNSAEncoding() const { return HasNSAEncoding; } 1089e8d8bef9SDimitry Andric 10907a6dacacSDimitry Andric bool hasNonNSAEncoding() const { return getGeneration() < GFX12; } 10917a6dacacSDimitry Andric 109206c3fb27SDimitry Andric bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; } 109306c3fb27SDimitry Andric 10945f757f3fSDimitry Andric unsigned getNSAMaxSize(bool HasSampler = false) const { 10955f757f3fSDimitry Andric return AMDGPU::getNSAMaxSize(*this, HasSampler); 10965f757f3fSDimitry Andric } 1097fe6060f1SDimitry Andric 1098fe6060f1SDimitry Andric bool hasGFX10_AEncoding() const { 1099fe6060f1SDimitry Andric return GFX10_AEncoding; 1100fe6060f1SDimitry Andric } 1101fe6060f1SDimitry Andric 1102e8d8bef9SDimitry Andric bool hasGFX10_BEncoding() const { 1103e8d8bef9SDimitry Andric return GFX10_BEncoding; 1104e8d8bef9SDimitry Andric } 1105e8d8bef9SDimitry Andric 1106e8d8bef9SDimitry Andric bool hasGFX10_3Insts() const { 1107e8d8bef9SDimitry Andric return GFX10_3Insts; 1108e8d8bef9SDimitry Andric } 1109e8d8bef9SDimitry Andric 1110e8d8bef9SDimitry Andric bool hasMadF16() const; 1111e8d8bef9SDimitry Andric 111281ad6265SDimitry Andric bool hasMovB64() const { return GFX940Insts; } 111381ad6265SDimitry Andric 111481ad6265SDimitry Andric bool hasLshlAddB64() const { return GFX940Insts; } 111581ad6265SDimitry Andric 1116e8d8bef9SDimitry Andric bool enableSIScheduler() const { 1117e8d8bef9SDimitry Andric return EnableSIScheduler; 1118e8d8bef9SDimitry Andric } 1119e8d8bef9SDimitry Andric 1120e8d8bef9SDimitry Andric bool loadStoreOptEnabled() const { 1121e8d8bef9SDimitry Andric return EnableLoadStoreOpt; 1122e8d8bef9SDimitry Andric } 1123e8d8bef9SDimitry Andric 1124e8d8bef9SDimitry Andric bool hasSGPRInitBug() const { 1125e8d8bef9SDimitry Andric return SGPRInitBug; 1126e8d8bef9SDimitry Andric } 1127e8d8bef9SDimitry Andric 112881ad6265SDimitry Andric bool hasUserSGPRInit16Bug() const { 1129fcaf7f86SDimitry Andric return UserSGPRInit16Bug && isWave32(); 113081ad6265SDimitry Andric } 113181ad6265SDimitry Andric 1132fe6060f1SDimitry Andric bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } 1133fe6060f1SDimitry Andric 1134fe6060f1SDimitry Andric bool hasNegativeUnalignedScratchOffsetBug() const { 1135fe6060f1SDimitry Andric return NegativeUnalignedScratchOffsetBug; 1136fe6060f1SDimitry Andric } 1137fe6060f1SDimitry Andric 1138e8d8bef9SDimitry Andric bool hasMFMAInlineLiteralBug() const { 1139e8d8bef9SDimitry Andric return HasMFMAInlineLiteralBug; 1140e8d8bef9SDimitry Andric } 1141e8d8bef9SDimitry Andric 1142e8d8bef9SDimitry Andric bool has12DWordStoreHazard() const { 1143e8d8bef9SDimitry Andric return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; 1144e8d8bef9SDimitry Andric } 1145e8d8bef9SDimitry Andric 1146e8d8bef9SDimitry Andric // \returns true if the subtarget supports DWORDX3 load/store instructions. 1147e8d8bef9SDimitry Andric bool hasDwordx3LoadStores() const { 1148e8d8bef9SDimitry Andric return CIInsts; 1149e8d8bef9SDimitry Andric } 1150e8d8bef9SDimitry Andric 1151e8d8bef9SDimitry Andric bool hasReadM0MovRelInterpHazard() const { 1152e8d8bef9SDimitry Andric return getGeneration() == AMDGPUSubtarget::GFX9; 1153e8d8bef9SDimitry Andric } 1154e8d8bef9SDimitry Andric 1155e8d8bef9SDimitry Andric bool hasReadM0SendMsgHazard() const { 1156e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 1157e8d8bef9SDimitry Andric getGeneration() <= AMDGPUSubtarget::GFX9; 1158e8d8bef9SDimitry Andric } 1159e8d8bef9SDimitry Andric 116081ad6265SDimitry Andric bool hasReadM0LdsDmaHazard() const { 116181ad6265SDimitry Andric return getGeneration() == AMDGPUSubtarget::GFX9; 116281ad6265SDimitry Andric } 116381ad6265SDimitry Andric 116481ad6265SDimitry Andric bool hasReadM0LdsDirectHazard() const { 116581ad6265SDimitry Andric return getGeneration() == AMDGPUSubtarget::GFX9; 116681ad6265SDimitry Andric } 116781ad6265SDimitry Andric 1168e8d8bef9SDimitry Andric bool hasVcmpxPermlaneHazard() const { 1169e8d8bef9SDimitry Andric return HasVcmpxPermlaneHazard; 1170e8d8bef9SDimitry Andric } 1171e8d8bef9SDimitry Andric 1172e8d8bef9SDimitry Andric bool hasVMEMtoScalarWriteHazard() const { 1173e8d8bef9SDimitry Andric return HasVMEMtoScalarWriteHazard; 1174e8d8bef9SDimitry Andric } 1175e8d8bef9SDimitry Andric 1176e8d8bef9SDimitry Andric bool hasSMEMtoVectorWriteHazard() const { 1177e8d8bef9SDimitry Andric return HasSMEMtoVectorWriteHazard; 1178e8d8bef9SDimitry Andric } 1179e8d8bef9SDimitry Andric 1180e8d8bef9SDimitry Andric bool hasLDSMisalignedBug() const { 1181e8d8bef9SDimitry Andric return LDSMisalignedBug && !EnableCuMode; 1182e8d8bef9SDimitry Andric } 1183e8d8bef9SDimitry Andric 1184e8d8bef9SDimitry Andric bool hasInstFwdPrefetchBug() const { 1185e8d8bef9SDimitry Andric return HasInstFwdPrefetchBug; 1186e8d8bef9SDimitry Andric } 1187e8d8bef9SDimitry Andric 1188e8d8bef9SDimitry Andric bool hasVcmpxExecWARHazard() const { 1189e8d8bef9SDimitry Andric return HasVcmpxExecWARHazard; 1190e8d8bef9SDimitry Andric } 1191e8d8bef9SDimitry Andric 1192e8d8bef9SDimitry Andric bool hasLdsBranchVmemWARHazard() const { 1193e8d8bef9SDimitry Andric return HasLdsBranchVmemWARHazard; 1194e8d8bef9SDimitry Andric } 1195e8d8bef9SDimitry Andric 1196bdd1243dSDimitry Andric // Shift amount of a 64 bit shift cannot be a highest allocated register 1197bdd1243dSDimitry Andric // if also at the end of the allocation block. 1198bdd1243dSDimitry Andric bool hasShift64HighRegBug() const { 1199bdd1243dSDimitry Andric return GFX90AInsts && !GFX940Insts; 1200bdd1243dSDimitry Andric } 1201bdd1243dSDimitry Andric 120281ad6265SDimitry Andric // Has one cycle hazard on transcendental instruction feeding a 120381ad6265SDimitry Andric // non transcendental VALU. 120481ad6265SDimitry Andric bool hasTransForwardingHazard() const { return GFX940Insts; } 120581ad6265SDimitry Andric 120681ad6265SDimitry Andric // Has one cycle hazard on a VALU instruction partially writing dst with 120781ad6265SDimitry Andric // a shift of result bits feeding another VALU instruction. 120881ad6265SDimitry Andric bool hasDstSelForwardingHazard() const { return GFX940Insts; } 120981ad6265SDimitry Andric 121081ad6265SDimitry Andric // Cannot use op_sel with v_dot instructions. 12111db9f3b2SDimitry Andric bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; } 121281ad6265SDimitry Andric 121381ad6265SDimitry Andric // Does not have HW interlocs for VALU writing and then reading SGPRs. 121481ad6265SDimitry Andric bool hasVDecCoExecHazard() const { 121581ad6265SDimitry Andric return GFX940Insts; 121681ad6265SDimitry Andric } 121781ad6265SDimitry Andric 1218e8d8bef9SDimitry Andric bool hasNSAtoVMEMBug() const { 1219e8d8bef9SDimitry Andric return HasNSAtoVMEMBug; 1220e8d8bef9SDimitry Andric } 1221e8d8bef9SDimitry Andric 1222fe6060f1SDimitry Andric bool hasNSAClauseBug() const { return HasNSAClauseBug; } 1223fe6060f1SDimitry Andric 12240fca6ea1SDimitry Andric bool hasHardClauses() const { return MaxHardClauseLength > 0; } 1225e8d8bef9SDimitry Andric 1226fe6060f1SDimitry Andric bool hasGFX90AInsts() const { return GFX90AInsts; } 1227fe6060f1SDimitry Andric 1228bdd1243dSDimitry Andric bool hasFPAtomicToDenormModeHazard() const { 1229bdd1243dSDimitry Andric return getGeneration() == GFX10; 1230bdd1243dSDimitry Andric } 1231bdd1243dSDimitry Andric 123281ad6265SDimitry Andric bool hasVOP3DPP() const { return getGeneration() >= GFX11; } 123381ad6265SDimitry Andric 123481ad6265SDimitry Andric bool hasLdsDirect() const { return getGeneration() >= GFX11; } 123581ad6265SDimitry Andric 1236297eecfbSDimitry Andric bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; } 1237297eecfbSDimitry Andric 123881ad6265SDimitry Andric bool hasVALUPartialForwardingHazard() const { 12397a6dacacSDimitry Andric return getGeneration() == GFX11; 124081ad6265SDimitry Andric } 124181ad6265SDimitry Andric 1242bdd1243dSDimitry Andric bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; } 1243bdd1243dSDimitry Andric 124406c3fb27SDimitry Andric bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; } 124506c3fb27SDimitry Andric 12460fca6ea1SDimitry Andric bool requiresCodeObjectV6() const { return RequiresCOV6; } 12470fca6ea1SDimitry Andric 12487a6dacacSDimitry Andric bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; } 124981ad6265SDimitry Andric 1250fe6060f1SDimitry Andric /// Return if operations acting on VGPR tuples require even alignment. 1251fe6060f1SDimitry Andric bool needsAlignedVGPRs() const { return GFX90AInsts; } 1252fe6060f1SDimitry Andric 125381ad6265SDimitry Andric /// Return true if the target has the S_PACK_HL_B32_B16 instruction. 125481ad6265SDimitry Andric bool hasSPackHL() const { return GFX11Insts; } 125581ad6265SDimitry Andric 125681ad6265SDimitry Andric /// Return true if the target's EXP instruction has the COMPR flag, which 125781ad6265SDimitry Andric /// affects the meaning of the EN (enable) bits. 125881ad6265SDimitry Andric bool hasCompressedExport() const { return !GFX11Insts; } 125981ad6265SDimitry Andric 126081ad6265SDimitry Andric /// Return true if the target's EXP instruction supports the NULL export 126181ad6265SDimitry Andric /// target. 126281ad6265SDimitry Andric bool hasNullExportTarget() const { return !GFX11Insts; } 126381ad6265SDimitry Andric 12640fca6ea1SDimitry Andric bool has1_5xVGPRs() const { return Has1_5xVGPRs; } 1265bdd1243dSDimitry Andric 126681ad6265SDimitry Andric bool hasVOPDInsts() const { return HasVOPDInsts; } 126781ad6265SDimitry Andric 126881ad6265SDimitry Andric bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; } 126981ad6265SDimitry Andric 127081ad6265SDimitry Andric /// Return true if the target has the S_DELAY_ALU instruction. 127181ad6265SDimitry Andric bool hasDelayAlu() const { return GFX11Insts; } 127281ad6265SDimitry Andric 1273fe6060f1SDimitry Andric bool hasPackedTID() const { return HasPackedTID; } 1274fe6060f1SDimitry Andric 127581ad6265SDimitry Andric // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that 127681ad6265SDimitry Andric // hasGFX90AInsts is also true. 127781ad6265SDimitry Andric bool hasGFX940Insts() const { return GFX940Insts; } 127881ad6265SDimitry Andric 12795f757f3fSDimitry Andric bool hasSALUFloatInsts() const { return HasSALUFloatInsts; } 12805f757f3fSDimitry Andric 12815f757f3fSDimitry Andric bool hasVGPRSingleUseHintInsts() const { return HasVGPRSingleUseHintInsts; } 12825f757f3fSDimitry Andric 12835f757f3fSDimitry Andric bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; } 12845f757f3fSDimitry Andric 12855f757f3fSDimitry Andric bool hasRestrictedSOffset() const { return HasRestrictedSOffset; } 12865f757f3fSDimitry Andric 12870fca6ea1SDimitry Andric bool hasRequiredExportPriority() const { return HasRequiredExportPriority; } 12880fca6ea1SDimitry Andric 1289*6c4b055cSDimitry Andric bool hasVmemWriteVgprInOrder() const { return HasVmemWriteVgprInOrder; } 1290*6c4b055cSDimitry Andric 12917a6dacacSDimitry Andric /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt 12927a6dacacSDimitry Andric /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively. 12937a6dacacSDimitry Andric bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; } 12947a6dacacSDimitry Andric 1295*6c4b055cSDimitry Andric /// \returns true if inline constants are not supported for F16 pseudo 1296*6c4b055cSDimitry Andric /// scalar transcendentals. 1297*6c4b055cSDimitry Andric bool hasNoF16PseudoScalarTransInlineConstants() const { 1298*6c4b055cSDimitry Andric return getGeneration() == GFX12; 1299*6c4b055cSDimitry Andric } 1300*6c4b055cSDimitry Andric 13010fca6ea1SDimitry Andric /// \returns The maximum number of instructions that can be enclosed in an 13020fca6ea1SDimitry Andric /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that 13030fca6ea1SDimitry Andric /// instruction. 13040fca6ea1SDimitry Andric unsigned maxHardClauseLength() const { return MaxHardClauseLength; } 13050fca6ea1SDimitry Andric 1306e8d8bef9SDimitry Andric /// Return the maximum number of waves per SIMD for kernels using \p SGPRs 1307e8d8bef9SDimitry Andric /// SGPRs 1308e8d8bef9SDimitry Andric unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; 1309e8d8bef9SDimitry Andric 1310e8d8bef9SDimitry Andric /// Return the maximum number of waves per SIMD for kernels using \p VGPRs 1311e8d8bef9SDimitry Andric /// VGPRs 1312e8d8bef9SDimitry Andric unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; 1313e8d8bef9SDimitry Andric 1314e8d8bef9SDimitry Andric /// Return occupancy for the given function. Used LDS and a number of 1315e8d8bef9SDimitry Andric /// registers if provided. 1316e8d8bef9SDimitry Andric /// Note, occupancy can be affected by the scratch allocation as well, but 1317e8d8bef9SDimitry Andric /// we do not have enough information to compute it. 1318e8d8bef9SDimitry Andric unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, 1319e8d8bef9SDimitry Andric unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; 1320e8d8bef9SDimitry Andric 1321e8d8bef9SDimitry Andric /// \returns true if the flat_scratch register should be initialized with the 1322e8d8bef9SDimitry Andric /// pointer to the wave's scratch memory rather than a size and offset. 1323e8d8bef9SDimitry Andric bool flatScratchIsPointer() const { 1324e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 1325e8d8bef9SDimitry Andric } 1326e8d8bef9SDimitry Andric 1327fe6060f1SDimitry Andric /// \returns true if the flat_scratch register is initialized by the HW. 1328fe6060f1SDimitry Andric /// In this case it is readonly. 1329fe6060f1SDimitry Andric bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; } 1330fe6060f1SDimitry Andric 133106c3fb27SDimitry Andric /// \returns true if the architected SGPRs are enabled. 133206c3fb27SDimitry Andric bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; } 133306c3fb27SDimitry Andric 13345f757f3fSDimitry Andric /// \returns true if Global Data Share is supported. 13355f757f3fSDimitry Andric bool hasGDS() const { return HasGDS; } 13365f757f3fSDimitry Andric 13375f757f3fSDimitry Andric /// \returns true if Global Wave Sync is supported. 13385f757f3fSDimitry Andric bool hasGWS() const { return HasGWS; } 13395f757f3fSDimitry Andric 1340e8d8bef9SDimitry Andric /// \returns true if the machine has merged shaders in which s0-s7 are 1341e8d8bef9SDimitry Andric /// reserved by the hardware and user SGPRs start at s8 1342e8d8bef9SDimitry Andric bool hasMergedShaders() const { 1343e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 1344e8d8bef9SDimitry Andric } 1345e8d8bef9SDimitry Andric 134681ad6265SDimitry Andric // \returns true if the target supports the pre-NGG legacy geometry path. 134781ad6265SDimitry Andric bool hasLegacyGeometry() const { return getGeneration() < GFX11; } 134881ad6265SDimitry Andric 13495f757f3fSDimitry Andric // \returns true if preloading kernel arguments is supported. 13505f757f3fSDimitry Andric bool hasKernargPreload() const { return KernargPreload; } 13515f757f3fSDimitry Andric 13525f757f3fSDimitry Andric // \returns true if the target has split barriers feature 13535f757f3fSDimitry Andric bool hasSplitBarriers() const { return getGeneration() >= GFX12; } 13545f757f3fSDimitry Andric 13555f757f3fSDimitry Andric // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable. 13565f757f3fSDimitry Andric bool hasCvtFP8VOP1Bug() const { return true; } 13575f757f3fSDimitry Andric 13585f757f3fSDimitry Andric // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a 13595f757f3fSDimitry Andric // no-return form. 13605f757f3fSDimitry Andric bool hasAtomicCSubNoRtnInsts() const { return HasAtomicCSubNoRtnInsts; } 13615f757f3fSDimitry Andric 13625f757f3fSDimitry Andric // \returns true if the target has DX10_CLAMP kernel descriptor mode bit 13635f757f3fSDimitry Andric bool hasDX10ClampMode() const { return getGeneration() < GFX12; } 13645f757f3fSDimitry Andric 13655f757f3fSDimitry Andric // \returns true if the target has IEEE kernel descriptor mode bit 13665f757f3fSDimitry Andric bool hasIEEEMode() const { return getGeneration() < GFX12; } 13675f757f3fSDimitry Andric 13685f757f3fSDimitry Andric // \returns true if the target has IEEE fminimum/fmaximum instructions 13695f757f3fSDimitry Andric bool hasIEEEMinMax() const { return getGeneration() >= GFX12; } 13705f757f3fSDimitry Andric 13710fca6ea1SDimitry Andric // \returns true if the target has IEEE fminimum3/fmaximum3 instructions 13720fca6ea1SDimitry Andric bool hasIEEEMinMax3() const { return hasIEEEMinMax(); } 13730fca6ea1SDimitry Andric 13745f757f3fSDimitry Andric // \returns true if the target has WG_RR_MODE kernel descriptor mode bit 13755f757f3fSDimitry Andric bool hasRrWGMode() const { return getGeneration() >= GFX12; } 13765f757f3fSDimitry Andric 13777a6dacacSDimitry Andric /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative 13787a6dacacSDimitry Andric /// values. 13797a6dacacSDimitry Andric bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; } 13807a6dacacSDimitry Andric 13817a6dacacSDimitry Andric // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead 13827a6dacacSDimitry Andric // of sign-extending. 13837a6dacacSDimitry Andric bool hasGetPCZeroExtension() const { return GFX12Insts; } 13847a6dacacSDimitry Andric 1385e8d8bef9SDimitry Andric /// \returns SGPR allocation granularity supported by the subtarget. 1386e8d8bef9SDimitry Andric unsigned getSGPRAllocGranule() const { 1387e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getSGPRAllocGranule(this); 1388e8d8bef9SDimitry Andric } 1389e8d8bef9SDimitry Andric 1390e8d8bef9SDimitry Andric /// \returns SGPR encoding granularity supported by the subtarget. 1391e8d8bef9SDimitry Andric unsigned getSGPREncodingGranule() const { 1392e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getSGPREncodingGranule(this); 1393e8d8bef9SDimitry Andric } 1394e8d8bef9SDimitry Andric 1395e8d8bef9SDimitry Andric /// \returns Total number of SGPRs supported by the subtarget. 1396e8d8bef9SDimitry Andric unsigned getTotalNumSGPRs() const { 1397e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getTotalNumSGPRs(this); 1398e8d8bef9SDimitry Andric } 1399e8d8bef9SDimitry Andric 1400e8d8bef9SDimitry Andric /// \returns Addressable number of SGPRs supported by the subtarget. 1401e8d8bef9SDimitry Andric unsigned getAddressableNumSGPRs() const { 1402e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); 1403e8d8bef9SDimitry Andric } 1404e8d8bef9SDimitry Andric 1405e8d8bef9SDimitry Andric /// \returns Minimum number of SGPRs that meets the given number of waves per 1406e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 1407e8d8bef9SDimitry Andric unsigned getMinNumSGPRs(unsigned WavesPerEU) const { 1408e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); 1409e8d8bef9SDimitry Andric } 1410e8d8bef9SDimitry Andric 1411e8d8bef9SDimitry Andric /// \returns Maximum number of SGPRs that meets the given number of waves per 1412e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 1413e8d8bef9SDimitry Andric unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { 1414e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); 1415e8d8bef9SDimitry Andric } 1416e8d8bef9SDimitry Andric 1417fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs. This is common 1418fe6060f1SDimitry Andric /// utility function called by MachineFunction and 1419fe6060f1SDimitry Andric /// Function variants of getReservedNumSGPRs. 142004eeddc0SDimitry Andric unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const; 1421fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs for given machine function \p MF. 1422e8d8bef9SDimitry Andric unsigned getReservedNumSGPRs(const MachineFunction &MF) const; 1423e8d8bef9SDimitry Andric 1424fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs for given function \p F. 1425fe6060f1SDimitry Andric unsigned getReservedNumSGPRs(const Function &F) const; 1426fe6060f1SDimitry Andric 1427fe6060f1SDimitry Andric /// \returns max num SGPRs. This is the common utility 1428fe6060f1SDimitry Andric /// function called by MachineFunction and Function 1429fe6060f1SDimitry Andric /// variants of getMaxNumSGPRs. 1430fe6060f1SDimitry Andric unsigned getBaseMaxNumSGPRs(const Function &F, 1431fe6060f1SDimitry Andric std::pair<unsigned, unsigned> WavesPerEU, 1432fe6060f1SDimitry Andric unsigned PreloadedSGPRs, 1433fe6060f1SDimitry Andric unsigned ReservedNumSGPRs) const; 1434fe6060f1SDimitry Andric 1435e8d8bef9SDimitry Andric /// \returns Maximum number of SGPRs that meets number of waves per execution 1436e8d8bef9SDimitry Andric /// unit requirement for function \p MF, or number of SGPRs explicitly 1437e8d8bef9SDimitry Andric /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. 1438e8d8bef9SDimitry Andric /// 1439e8d8bef9SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1440e8d8bef9SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1441e8d8bef9SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1442e8d8bef9SDimitry Andric /// unit requirement. 1443e8d8bef9SDimitry Andric unsigned getMaxNumSGPRs(const MachineFunction &MF) const; 1444e8d8bef9SDimitry Andric 1445fe6060f1SDimitry Andric /// \returns Maximum number of SGPRs that meets number of waves per execution 1446fe6060f1SDimitry Andric /// unit requirement for function \p F, or number of SGPRs explicitly 1447fe6060f1SDimitry Andric /// requested using "amdgpu-num-sgpr" attribute attached to function \p F. 1448fe6060f1SDimitry Andric /// 1449fe6060f1SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1450fe6060f1SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1451fe6060f1SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1452fe6060f1SDimitry Andric /// unit requirement. 1453fe6060f1SDimitry Andric unsigned getMaxNumSGPRs(const Function &F) const; 1454fe6060f1SDimitry Andric 1455e8d8bef9SDimitry Andric /// \returns VGPR allocation granularity supported by the subtarget. 1456e8d8bef9SDimitry Andric unsigned getVGPRAllocGranule() const { 1457e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getVGPRAllocGranule(this); 1458e8d8bef9SDimitry Andric } 1459e8d8bef9SDimitry Andric 1460e8d8bef9SDimitry Andric /// \returns VGPR encoding granularity supported by the subtarget. 1461e8d8bef9SDimitry Andric unsigned getVGPREncodingGranule() const { 1462e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getVGPREncodingGranule(this); 1463e8d8bef9SDimitry Andric } 1464e8d8bef9SDimitry Andric 1465e8d8bef9SDimitry Andric /// \returns Total number of VGPRs supported by the subtarget. 1466e8d8bef9SDimitry Andric unsigned getTotalNumVGPRs() const { 1467e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getTotalNumVGPRs(this); 1468e8d8bef9SDimitry Andric } 1469e8d8bef9SDimitry Andric 14700fca6ea1SDimitry Andric /// \returns Addressable number of architectural VGPRs supported by the 14710fca6ea1SDimitry Andric /// subtarget. 14720fca6ea1SDimitry Andric unsigned getAddressableNumArchVGPRs() const { 14730fca6ea1SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this); 14740fca6ea1SDimitry Andric } 14750fca6ea1SDimitry Andric 1476e8d8bef9SDimitry Andric /// \returns Addressable number of VGPRs supported by the subtarget. 1477e8d8bef9SDimitry Andric unsigned getAddressableNumVGPRs() const { 1478e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); 1479e8d8bef9SDimitry Andric } 1480e8d8bef9SDimitry Andric 1481bdd1243dSDimitry Andric /// \returns the minimum number of VGPRs that will prevent achieving more than 1482bdd1243dSDimitry Andric /// the specified number of waves \p WavesPerEU. 1483e8d8bef9SDimitry Andric unsigned getMinNumVGPRs(unsigned WavesPerEU) const { 1484e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); 1485e8d8bef9SDimitry Andric } 1486e8d8bef9SDimitry Andric 1487bdd1243dSDimitry Andric /// \returns the maximum number of VGPRs that can be used and still achieved 1488bdd1243dSDimitry Andric /// at least the specified number of waves \p WavesPerEU. 1489e8d8bef9SDimitry Andric unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { 1490e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); 1491e8d8bef9SDimitry Andric } 1492e8d8bef9SDimitry Andric 1493fe6060f1SDimitry Andric /// \returns max num VGPRs. This is the common utility function 1494fe6060f1SDimitry Andric /// called by MachineFunction and Function variants of getMaxNumVGPRs. 1495fe6060f1SDimitry Andric unsigned getBaseMaxNumVGPRs(const Function &F, 1496fe6060f1SDimitry Andric std::pair<unsigned, unsigned> WavesPerEU) const; 1497fe6060f1SDimitry Andric /// \returns Maximum number of VGPRs that meets number of waves per execution 1498fe6060f1SDimitry Andric /// unit requirement for function \p F, or number of VGPRs explicitly 1499fe6060f1SDimitry Andric /// requested using "amdgpu-num-vgpr" attribute attached to function \p F. 1500fe6060f1SDimitry Andric /// 1501fe6060f1SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1502fe6060f1SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1503fe6060f1SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1504fe6060f1SDimitry Andric /// unit requirement. 1505fe6060f1SDimitry Andric unsigned getMaxNumVGPRs(const Function &F) const; 1506fe6060f1SDimitry Andric 150781ad6265SDimitry Andric unsigned getMaxNumAGPRs(const Function &F) const { 150881ad6265SDimitry Andric return getMaxNumVGPRs(F); 150981ad6265SDimitry Andric } 151081ad6265SDimitry Andric 1511e8d8bef9SDimitry Andric /// \returns Maximum number of VGPRs that meets number of waves per execution 1512e8d8bef9SDimitry Andric /// unit requirement for function \p MF, or number of VGPRs explicitly 1513e8d8bef9SDimitry Andric /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. 1514e8d8bef9SDimitry Andric /// 1515e8d8bef9SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1516e8d8bef9SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1517e8d8bef9SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1518e8d8bef9SDimitry Andric /// unit requirement. 1519e8d8bef9SDimitry Andric unsigned getMaxNumVGPRs(const MachineFunction &MF) const; 1520e8d8bef9SDimitry Andric 1521e8d8bef9SDimitry Andric void getPostRAMutations( 1522e8d8bef9SDimitry Andric std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) 1523e8d8bef9SDimitry Andric const override; 1524e8d8bef9SDimitry Andric 1525349cc55cSDimitry Andric std::unique_ptr<ScheduleDAGMutation> 1526349cc55cSDimitry Andric createFillMFMAShadowMutation(const TargetInstrInfo *TII) const; 1527349cc55cSDimitry Andric 1528e8d8bef9SDimitry Andric bool isWave32() const { 1529e8d8bef9SDimitry Andric return getWavefrontSize() == 32; 1530e8d8bef9SDimitry Andric } 1531e8d8bef9SDimitry Andric 1532e8d8bef9SDimitry Andric bool isWave64() const { 1533e8d8bef9SDimitry Andric return getWavefrontSize() == 64; 1534e8d8bef9SDimitry Andric } 1535e8d8bef9SDimitry Andric 1536e8d8bef9SDimitry Andric const TargetRegisterClass *getBoolRC() const { 1537e8d8bef9SDimitry Andric return getRegisterInfo()->getBoolRC(); 1538e8d8bef9SDimitry Andric } 1539e8d8bef9SDimitry Andric 1540e8d8bef9SDimitry Andric /// \returns Maximum number of work groups per compute unit supported by the 1541e8d8bef9SDimitry Andric /// subtarget and limited by given \p FlatWorkGroupSize. 1542e8d8bef9SDimitry Andric unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { 1543e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); 1544e8d8bef9SDimitry Andric } 1545e8d8bef9SDimitry Andric 1546e8d8bef9SDimitry Andric /// \returns Minimum flat work group size supported by the subtarget. 1547e8d8bef9SDimitry Andric unsigned getMinFlatWorkGroupSize() const override { 1548e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); 1549e8d8bef9SDimitry Andric } 1550e8d8bef9SDimitry Andric 1551e8d8bef9SDimitry Andric /// \returns Maximum flat work group size supported by the subtarget. 1552e8d8bef9SDimitry Andric unsigned getMaxFlatWorkGroupSize() const override { 1553e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); 1554e8d8bef9SDimitry Andric } 1555e8d8bef9SDimitry Andric 1556e8d8bef9SDimitry Andric /// \returns Number of waves per execution unit required to support the given 1557e8d8bef9SDimitry Andric /// \p FlatWorkGroupSize. 1558e8d8bef9SDimitry Andric unsigned 1559e8d8bef9SDimitry Andric getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { 1560e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); 1561e8d8bef9SDimitry Andric } 1562e8d8bef9SDimitry Andric 1563e8d8bef9SDimitry Andric /// \returns Minimum number of waves per execution unit supported by the 1564e8d8bef9SDimitry Andric /// subtarget. 1565e8d8bef9SDimitry Andric unsigned getMinWavesPerEU() const override { 1566e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinWavesPerEU(this); 1567e8d8bef9SDimitry Andric } 1568e8d8bef9SDimitry Andric 1569e8d8bef9SDimitry Andric void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 15700fca6ea1SDimitry Andric SDep &Dep, 15710fca6ea1SDimitry Andric const TargetSchedModel *SchedModel) const override; 157281ad6265SDimitry Andric 157381ad6265SDimitry Andric // \returns true if it's beneficial on this subtarget for the scheduler to 157481ad6265SDimitry Andric // cluster stores as well as loads. 157581ad6265SDimitry Andric bool shouldClusterStores() const { return getGeneration() >= GFX11; } 1576bdd1243dSDimitry Andric 1577bdd1243dSDimitry Andric // \returns the number of address arguments from which to enable MIMG NSA 1578bdd1243dSDimitry Andric // on supported architectures. 1579bdd1243dSDimitry Andric unsigned getNSAThreshold(const MachineFunction &MF) const; 158006c3fb27SDimitry Andric 158106c3fb27SDimitry Andric // \returns true if the subtarget has a hazard requiring an "s_nop 0" 158206c3fb27SDimitry Andric // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)". 158306c3fb27SDimitry Andric bool requiresNopBeforeDeallocVGPRs() const { 158406c3fb27SDimitry Andric // Currently all targets that support the dealloc VGPRs message also require 158506c3fb27SDimitry Andric // the nop. 158606c3fb27SDimitry Andric return true; 158706c3fb27SDimitry Andric } 1588e8d8bef9SDimitry Andric }; 1589e8d8bef9SDimitry Andric 15905f757f3fSDimitry Andric class GCNUserSGPRUsageInfo { 15915f757f3fSDimitry Andric public: 15925f757f3fSDimitry Andric bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; } 15935f757f3fSDimitry Andric 15945f757f3fSDimitry Andric bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; } 15955f757f3fSDimitry Andric 15965f757f3fSDimitry Andric bool hasDispatchPtr() const { return DispatchPtr; } 15975f757f3fSDimitry Andric 15985f757f3fSDimitry Andric bool hasQueuePtr() const { return QueuePtr; } 15995f757f3fSDimitry Andric 16005f757f3fSDimitry Andric bool hasKernargSegmentPtr() const { return KernargSegmentPtr; } 16015f757f3fSDimitry Andric 16025f757f3fSDimitry Andric bool hasDispatchID() const { return DispatchID; } 16035f757f3fSDimitry Andric 16045f757f3fSDimitry Andric bool hasFlatScratchInit() const { return FlatScratchInit; } 16055f757f3fSDimitry Andric 16060fca6ea1SDimitry Andric bool hasPrivateSegmentSize() const { return PrivateSegmentSize; } 16070fca6ea1SDimitry Andric 16085f757f3fSDimitry Andric unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; } 16095f757f3fSDimitry Andric 16105f757f3fSDimitry Andric unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; } 16115f757f3fSDimitry Andric 16125f757f3fSDimitry Andric unsigned getNumFreeUserSGPRs(); 16135f757f3fSDimitry Andric 16145f757f3fSDimitry Andric void allocKernargPreloadSGPRs(unsigned NumSGPRs); 16155f757f3fSDimitry Andric 16165f757f3fSDimitry Andric enum UserSGPRID : unsigned { 16175f757f3fSDimitry Andric ImplicitBufferPtrID = 0, 16185f757f3fSDimitry Andric PrivateSegmentBufferID = 1, 16195f757f3fSDimitry Andric DispatchPtrID = 2, 16205f757f3fSDimitry Andric QueuePtrID = 3, 16215f757f3fSDimitry Andric KernargSegmentPtrID = 4, 16225f757f3fSDimitry Andric DispatchIdID = 5, 16235f757f3fSDimitry Andric FlatScratchInitID = 6, 16245f757f3fSDimitry Andric PrivateSegmentSizeID = 7 16255f757f3fSDimitry Andric }; 16265f757f3fSDimitry Andric 16275f757f3fSDimitry Andric // Returns the size in number of SGPRs for preload user SGPR field. 16285f757f3fSDimitry Andric static unsigned getNumUserSGPRForField(UserSGPRID ID) { 16295f757f3fSDimitry Andric switch (ID) { 16305f757f3fSDimitry Andric case ImplicitBufferPtrID: 16315f757f3fSDimitry Andric return 2; 16325f757f3fSDimitry Andric case PrivateSegmentBufferID: 16335f757f3fSDimitry Andric return 4; 16345f757f3fSDimitry Andric case DispatchPtrID: 16355f757f3fSDimitry Andric return 2; 16365f757f3fSDimitry Andric case QueuePtrID: 16375f757f3fSDimitry Andric return 2; 16385f757f3fSDimitry Andric case KernargSegmentPtrID: 16395f757f3fSDimitry Andric return 2; 16405f757f3fSDimitry Andric case DispatchIdID: 16415f757f3fSDimitry Andric return 2; 16425f757f3fSDimitry Andric case FlatScratchInitID: 16435f757f3fSDimitry Andric return 2; 16445f757f3fSDimitry Andric case PrivateSegmentSizeID: 16455f757f3fSDimitry Andric return 1; 16465f757f3fSDimitry Andric } 16475f757f3fSDimitry Andric llvm_unreachable("Unknown UserSGPRID."); 16485f757f3fSDimitry Andric } 16495f757f3fSDimitry Andric 16505f757f3fSDimitry Andric GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST); 16515f757f3fSDimitry Andric 16525f757f3fSDimitry Andric private: 16535f757f3fSDimitry Andric const GCNSubtarget &ST; 16545f757f3fSDimitry Andric 16555f757f3fSDimitry Andric // Private memory buffer 16565f757f3fSDimitry Andric // Compute directly in sgpr[0:1] 16575f757f3fSDimitry Andric // Other shaders indirect 64-bits at sgpr[0:1] 16585f757f3fSDimitry Andric bool ImplicitBufferPtr = false; 16595f757f3fSDimitry Andric 16605f757f3fSDimitry Andric bool PrivateSegmentBuffer = false; 16615f757f3fSDimitry Andric 16625f757f3fSDimitry Andric bool DispatchPtr = false; 16635f757f3fSDimitry Andric 16645f757f3fSDimitry Andric bool QueuePtr = false; 16655f757f3fSDimitry Andric 16665f757f3fSDimitry Andric bool KernargSegmentPtr = false; 16675f757f3fSDimitry Andric 16685f757f3fSDimitry Andric bool DispatchID = false; 16695f757f3fSDimitry Andric 16705f757f3fSDimitry Andric bool FlatScratchInit = false; 16715f757f3fSDimitry Andric 16720fca6ea1SDimitry Andric bool PrivateSegmentSize = false; 16730fca6ea1SDimitry Andric 16745f757f3fSDimitry Andric unsigned NumKernargPreloadSGPRs = 0; 16755f757f3fSDimitry Andric 16765f757f3fSDimitry Andric unsigned NumUsedUserSGPRs = 0; 16775f757f3fSDimitry Andric }; 16785f757f3fSDimitry Andric 1679e8d8bef9SDimitry Andric } // end namespace llvm 1680e8d8bef9SDimitry Andric 1681e8d8bef9SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1682