1e8d8bef9SDimitry Andric //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //==-----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric /// \file 10e8d8bef9SDimitry Andric /// AMD GCN specific subclass of TargetSubtarget. 11e8d8bef9SDimitry Andric // 12e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 13e8d8bef9SDimitry Andric 14e8d8bef9SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 15e8d8bef9SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 16e8d8bef9SDimitry Andric 17e8d8bef9SDimitry Andric #include "AMDGPUCallLowering.h" 18e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 19e8d8bef9SDimitry Andric #include "SIFrameLowering.h" 20e8d8bef9SDimitry Andric #include "SIISelLowering.h" 21e8d8bef9SDimitry Andric #include "SIInstrInfo.h" 22e8d8bef9SDimitry Andric #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 23e8d8bef9SDimitry Andric 24e8d8bef9SDimitry Andric #define GET_SUBTARGETINFO_HEADER 25e8d8bef9SDimitry Andric #include "AMDGPUGenSubtargetInfo.inc" 26e8d8bef9SDimitry Andric 27e8d8bef9SDimitry Andric namespace llvm { 28e8d8bef9SDimitry Andric 29e8d8bef9SDimitry Andric class GCNTargetMachine; 30e8d8bef9SDimitry Andric 31e8d8bef9SDimitry Andric class GCNSubtarget final : public AMDGPUGenSubtargetInfo, 32e8d8bef9SDimitry Andric public AMDGPUSubtarget { 33*bdd1243dSDimitry Andric public: 34e8d8bef9SDimitry Andric using AMDGPUSubtarget::getMaxWavesPerEU; 35e8d8bef9SDimitry Andric 36fe6060f1SDimitry Andric // Following 2 enums are documented at: 37fe6060f1SDimitry Andric // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi 38fe6060f1SDimitry Andric enum class TrapHandlerAbi { 39fe6060f1SDimitry Andric NONE = 0x00, 40fe6060f1SDimitry Andric AMDHSA = 0x01, 41e8d8bef9SDimitry Andric }; 42e8d8bef9SDimitry Andric 43fe6060f1SDimitry Andric enum class TrapID { 44fe6060f1SDimitry Andric LLVMAMDHSATrap = 0x02, 45fe6060f1SDimitry Andric LLVMAMDHSADebugTrap = 0x03, 46e8d8bef9SDimitry Andric }; 47e8d8bef9SDimitry Andric 48e8d8bef9SDimitry Andric private: 49e8d8bef9SDimitry Andric /// GlobalISel related APIs. 50e8d8bef9SDimitry Andric std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 51e8d8bef9SDimitry Andric std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 52e8d8bef9SDimitry Andric std::unique_ptr<InstructionSelector> InstSelector; 53e8d8bef9SDimitry Andric std::unique_ptr<LegalizerInfo> Legalizer; 54e8d8bef9SDimitry Andric std::unique_ptr<RegisterBankInfo> RegBankInfo; 55e8d8bef9SDimitry Andric 56e8d8bef9SDimitry Andric protected: 57e8d8bef9SDimitry Andric // Basic subtarget description. 58e8d8bef9SDimitry Andric Triple TargetTriple; 59e8d8bef9SDimitry Andric AMDGPU::IsaInfo::AMDGPUTargetID TargetID; 6081ad6265SDimitry Andric unsigned Gen = INVALID; 61e8d8bef9SDimitry Andric InstrItineraryData InstrItins; 6281ad6265SDimitry Andric int LDSBankCount = 0; 6381ad6265SDimitry Andric unsigned MaxPrivateElementSize = 0; 64e8d8bef9SDimitry Andric 65e8d8bef9SDimitry Andric // Possibly statically set by tablegen, but may want to be overridden. 6681ad6265SDimitry Andric bool FastFMAF32 = false; 6781ad6265SDimitry Andric bool FastDenormalF32 = false; 6881ad6265SDimitry Andric bool HalfRate64Ops = false; 6981ad6265SDimitry Andric bool FullRate64Ops = false; 70e8d8bef9SDimitry Andric 71e8d8bef9SDimitry Andric // Dynamically set bits that enable features. 7281ad6265SDimitry Andric bool FlatForGlobal = false; 7381ad6265SDimitry Andric bool AutoWaitcntBeforeBarrier = false; 74*bdd1243dSDimitry Andric bool BackOffBarrier = false; 7581ad6265SDimitry Andric bool UnalignedScratchAccess = false; 7681ad6265SDimitry Andric bool UnalignedAccessMode = false; 7781ad6265SDimitry Andric bool HasApertureRegs = false; 7881ad6265SDimitry Andric bool SupportsXNACK = false; 79e8d8bef9SDimitry Andric 80e8d8bef9SDimitry Andric // This should not be used directly. 'TargetID' tracks the dynamic settings 81e8d8bef9SDimitry Andric // for XNACK. 8281ad6265SDimitry Andric bool EnableXNACK = false; 83e8d8bef9SDimitry Andric 8481ad6265SDimitry Andric bool EnableTgSplit = false; 8581ad6265SDimitry Andric bool EnableCuMode = false; 8681ad6265SDimitry Andric bool TrapHandler = false; 87e8d8bef9SDimitry Andric 88e8d8bef9SDimitry Andric // Used as options. 8981ad6265SDimitry Andric bool EnableLoadStoreOpt = false; 9081ad6265SDimitry Andric bool EnableUnsafeDSOffsetFolding = false; 9181ad6265SDimitry Andric bool EnableSIScheduler = false; 9281ad6265SDimitry Andric bool EnableDS128 = false; 9381ad6265SDimitry Andric bool EnablePRTStrictNull = false; 9481ad6265SDimitry Andric bool DumpCode = false; 95e8d8bef9SDimitry Andric 96e8d8bef9SDimitry Andric // Subtarget statically properties set by tablegen 9781ad6265SDimitry Andric bool FP64 = false; 9881ad6265SDimitry Andric bool FMA = false; 9981ad6265SDimitry Andric bool MIMG_R128 = false; 10081ad6265SDimitry Andric bool CIInsts = false; 10181ad6265SDimitry Andric bool GFX8Insts = false; 10281ad6265SDimitry Andric bool GFX9Insts = false; 10381ad6265SDimitry Andric bool GFX90AInsts = false; 10481ad6265SDimitry Andric bool GFX940Insts = false; 10581ad6265SDimitry Andric bool GFX10Insts = false; 10681ad6265SDimitry Andric bool GFX11Insts = false; 10781ad6265SDimitry Andric bool GFX10_3Insts = false; 10881ad6265SDimitry Andric bool GFX7GFX8GFX9Insts = false; 10981ad6265SDimitry Andric bool SGPRInitBug = false; 11081ad6265SDimitry Andric bool UserSGPRInit16Bug = false; 11181ad6265SDimitry Andric bool NegativeScratchOffsetBug = false; 11281ad6265SDimitry Andric bool NegativeUnalignedScratchOffsetBug = false; 11381ad6265SDimitry Andric bool HasSMemRealTime = false; 11481ad6265SDimitry Andric bool HasIntClamp = false; 11581ad6265SDimitry Andric bool HasFmaMixInsts = false; 11681ad6265SDimitry Andric bool HasMovrel = false; 11781ad6265SDimitry Andric bool HasVGPRIndexMode = false; 11881ad6265SDimitry Andric bool HasScalarStores = false; 11981ad6265SDimitry Andric bool HasScalarAtomics = false; 12081ad6265SDimitry Andric bool HasSDWAOmod = false; 12181ad6265SDimitry Andric bool HasSDWAScalar = false; 12281ad6265SDimitry Andric bool HasSDWASdst = false; 12381ad6265SDimitry Andric bool HasSDWAMac = false; 12481ad6265SDimitry Andric bool HasSDWAOutModsVOPC = false; 12581ad6265SDimitry Andric bool HasDPP = false; 12681ad6265SDimitry Andric bool HasDPP8 = false; 12781ad6265SDimitry Andric bool Has64BitDPP = false; 12881ad6265SDimitry Andric bool HasPackedFP32Ops = false; 12981ad6265SDimitry Andric bool HasImageInsts = false; 13081ad6265SDimitry Andric bool HasExtendedImageInsts = false; 13181ad6265SDimitry Andric bool HasR128A16 = false; 132*bdd1243dSDimitry Andric bool HasA16 = false; 13381ad6265SDimitry Andric bool HasG16 = false; 13481ad6265SDimitry Andric bool HasNSAEncoding = false; 13581ad6265SDimitry Andric unsigned NSAMaxSize = 0; 13681ad6265SDimitry Andric bool GFX10_AEncoding = false; 13781ad6265SDimitry Andric bool GFX10_BEncoding = false; 13881ad6265SDimitry Andric bool HasDLInsts = false; 139*bdd1243dSDimitry Andric bool HasFmacF64Inst = false; 14081ad6265SDimitry Andric bool HasDot1Insts = false; 14181ad6265SDimitry Andric bool HasDot2Insts = false; 14281ad6265SDimitry Andric bool HasDot3Insts = false; 14381ad6265SDimitry Andric bool HasDot4Insts = false; 14481ad6265SDimitry Andric bool HasDot5Insts = false; 14581ad6265SDimitry Andric bool HasDot6Insts = false; 14681ad6265SDimitry Andric bool HasDot7Insts = false; 14781ad6265SDimitry Andric bool HasDot8Insts = false; 148*bdd1243dSDimitry Andric bool HasDot9Insts = false; 14981ad6265SDimitry Andric bool HasMAIInsts = false; 150fcaf7f86SDimitry Andric bool HasFP8Insts = false; 15181ad6265SDimitry Andric bool HasPkFmacF16Inst = false; 15281ad6265SDimitry Andric bool HasAtomicFaddRtnInsts = false; 15381ad6265SDimitry Andric bool HasAtomicFaddNoRtnInsts = false; 15481ad6265SDimitry Andric bool HasAtomicPkFaddNoRtnInsts = false; 155*bdd1243dSDimitry Andric bool HasFlatAtomicFaddF32Inst = false; 15681ad6265SDimitry Andric bool SupportsSRAMECC = false; 157e8d8bef9SDimitry Andric 158e8d8bef9SDimitry Andric // This should not be used directly. 'TargetID' tracks the dynamic settings 159e8d8bef9SDimitry Andric // for SRAMECC. 16081ad6265SDimitry Andric bool EnableSRAMECC = false; 161e8d8bef9SDimitry Andric 16281ad6265SDimitry Andric bool HasNoSdstCMPX = false; 16381ad6265SDimitry Andric bool HasVscnt = false; 16481ad6265SDimitry Andric bool HasGetWaveIdInst = false; 16581ad6265SDimitry Andric bool HasSMemTimeInst = false; 16681ad6265SDimitry Andric bool HasShaderCyclesRegister = false; 16781ad6265SDimitry Andric bool HasVOP3Literal = false; 16881ad6265SDimitry Andric bool HasNoDataDepHazard = false; 16981ad6265SDimitry Andric bool FlatAddressSpace = false; 17081ad6265SDimitry Andric bool FlatInstOffsets = false; 17181ad6265SDimitry Andric bool FlatGlobalInsts = false; 17281ad6265SDimitry Andric bool FlatScratchInsts = false; 17381ad6265SDimitry Andric bool ScalarFlatScratchInsts = false; 17481ad6265SDimitry Andric bool HasArchitectedFlatScratch = false; 17581ad6265SDimitry Andric bool EnableFlatScratch = false; 17681ad6265SDimitry Andric bool AddNoCarryInsts = false; 17781ad6265SDimitry Andric bool HasUnpackedD16VMem = false; 17881ad6265SDimitry Andric bool LDSMisalignedBug = false; 17981ad6265SDimitry Andric bool HasMFMAInlineLiteralBug = false; 18081ad6265SDimitry Andric bool UnalignedBufferAccess = false; 18181ad6265SDimitry Andric bool UnalignedDSAccess = false; 18281ad6265SDimitry Andric bool HasPackedTID = false; 18381ad6265SDimitry Andric bool ScalarizeGlobal = false; 184e8d8bef9SDimitry Andric 18581ad6265SDimitry Andric bool HasVcmpxPermlaneHazard = false; 18681ad6265SDimitry Andric bool HasVMEMtoScalarWriteHazard = false; 18781ad6265SDimitry Andric bool HasSMEMtoVectorWriteHazard = false; 18881ad6265SDimitry Andric bool HasInstFwdPrefetchBug = false; 18981ad6265SDimitry Andric bool HasVcmpxExecWARHazard = false; 19081ad6265SDimitry Andric bool HasLdsBranchVmemWARHazard = false; 19181ad6265SDimitry Andric bool HasNSAtoVMEMBug = false; 19281ad6265SDimitry Andric bool HasNSAClauseBug = false; 19381ad6265SDimitry Andric bool HasOffset3fBug = false; 19481ad6265SDimitry Andric bool HasFlatSegmentOffsetBug = false; 19581ad6265SDimitry Andric bool HasImageStoreD16Bug = false; 19681ad6265SDimitry Andric bool HasImageGather4D16Bug = false; 197*bdd1243dSDimitry Andric bool HasGFX11FullVGPRs = false; 198*bdd1243dSDimitry Andric bool HasMADIntraFwdBug = false; 19981ad6265SDimitry Andric bool HasVOPDInsts = false; 200*bdd1243dSDimitry Andric bool HasVALUTransUseHazard = false; 201e8d8bef9SDimitry Andric 202e8d8bef9SDimitry Andric // Dummy feature to use for assembler in tablegen. 20381ad6265SDimitry Andric bool FeatureDisable = false; 204e8d8bef9SDimitry Andric 205e8d8bef9SDimitry Andric SelectionDAGTargetInfo TSInfo; 206e8d8bef9SDimitry Andric private: 207e8d8bef9SDimitry Andric SIInstrInfo InstrInfo; 208e8d8bef9SDimitry Andric SITargetLowering TLInfo; 209e8d8bef9SDimitry Andric SIFrameLowering FrameLowering; 210e8d8bef9SDimitry Andric 211e8d8bef9SDimitry Andric public: 212e8d8bef9SDimitry Andric GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 213e8d8bef9SDimitry Andric const GCNTargetMachine &TM); 214e8d8bef9SDimitry Andric ~GCNSubtarget() override; 215e8d8bef9SDimitry Andric 216e8d8bef9SDimitry Andric GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, 217e8d8bef9SDimitry Andric StringRef GPU, StringRef FS); 218e8d8bef9SDimitry Andric 219e8d8bef9SDimitry Andric const SIInstrInfo *getInstrInfo() const override { 220e8d8bef9SDimitry Andric return &InstrInfo; 221e8d8bef9SDimitry Andric } 222e8d8bef9SDimitry Andric 223e8d8bef9SDimitry Andric const SIFrameLowering *getFrameLowering() const override { 224e8d8bef9SDimitry Andric return &FrameLowering; 225e8d8bef9SDimitry Andric } 226e8d8bef9SDimitry Andric 227e8d8bef9SDimitry Andric const SITargetLowering *getTargetLowering() const override { 228e8d8bef9SDimitry Andric return &TLInfo; 229e8d8bef9SDimitry Andric } 230e8d8bef9SDimitry Andric 231e8d8bef9SDimitry Andric const SIRegisterInfo *getRegisterInfo() const override { 232e8d8bef9SDimitry Andric return &InstrInfo.getRegisterInfo(); 233e8d8bef9SDimitry Andric } 234e8d8bef9SDimitry Andric 235e8d8bef9SDimitry Andric const CallLowering *getCallLowering() const override { 236e8d8bef9SDimitry Andric return CallLoweringInfo.get(); 237e8d8bef9SDimitry Andric } 238e8d8bef9SDimitry Andric 239e8d8bef9SDimitry Andric const InlineAsmLowering *getInlineAsmLowering() const override { 240e8d8bef9SDimitry Andric return InlineAsmLoweringInfo.get(); 241e8d8bef9SDimitry Andric } 242e8d8bef9SDimitry Andric 243e8d8bef9SDimitry Andric InstructionSelector *getInstructionSelector() const override { 244e8d8bef9SDimitry Andric return InstSelector.get(); 245e8d8bef9SDimitry Andric } 246e8d8bef9SDimitry Andric 247e8d8bef9SDimitry Andric const LegalizerInfo *getLegalizerInfo() const override { 248e8d8bef9SDimitry Andric return Legalizer.get(); 249e8d8bef9SDimitry Andric } 250e8d8bef9SDimitry Andric 251e8d8bef9SDimitry Andric const RegisterBankInfo *getRegBankInfo() const override { 252e8d8bef9SDimitry Andric return RegBankInfo.get(); 253e8d8bef9SDimitry Andric } 254e8d8bef9SDimitry Andric 255fe6060f1SDimitry Andric const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const { 256fe6060f1SDimitry Andric return TargetID; 257fe6060f1SDimitry Andric } 258fe6060f1SDimitry Andric 259e8d8bef9SDimitry Andric // Nothing implemented, just prevent crashes on use. 260e8d8bef9SDimitry Andric const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { 261e8d8bef9SDimitry Andric return &TSInfo; 262e8d8bef9SDimitry Andric } 263e8d8bef9SDimitry Andric 264e8d8bef9SDimitry Andric const InstrItineraryData *getInstrItineraryData() const override { 265e8d8bef9SDimitry Andric return &InstrItins; 266e8d8bef9SDimitry Andric } 267e8d8bef9SDimitry Andric 268e8d8bef9SDimitry Andric void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 269e8d8bef9SDimitry Andric 270e8d8bef9SDimitry Andric Generation getGeneration() const { 271e8d8bef9SDimitry Andric return (Generation)Gen; 272e8d8bef9SDimitry Andric } 273e8d8bef9SDimitry Andric 27481ad6265SDimitry Andric unsigned getMaxWaveScratchSize() const { 27581ad6265SDimitry Andric // See COMPUTE_TMPRING_SIZE.WAVESIZE. 27681ad6265SDimitry Andric if (getGeneration() < GFX11) { 27781ad6265SDimitry Andric // 13-bit field in units of 256-dword. 27881ad6265SDimitry Andric return (256 * 4) * ((1 << 13) - 1); 27981ad6265SDimitry Andric } 28081ad6265SDimitry Andric // 15-bit field in units of 64-dword. 28181ad6265SDimitry Andric return (64 * 4) * ((1 << 15) - 1); 28281ad6265SDimitry Andric } 28381ad6265SDimitry Andric 284349cc55cSDimitry Andric /// Return the number of high bits known to be zero for a frame index. 285e8d8bef9SDimitry Andric unsigned getKnownHighZeroBitsForFrameIndex() const { 28681ad6265SDimitry Andric return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2(); 287e8d8bef9SDimitry Andric } 288e8d8bef9SDimitry Andric 289e8d8bef9SDimitry Andric int getLDSBankCount() const { 290e8d8bef9SDimitry Andric return LDSBankCount; 291e8d8bef9SDimitry Andric } 292e8d8bef9SDimitry Andric 293e8d8bef9SDimitry Andric unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { 294e8d8bef9SDimitry Andric return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; 295e8d8bef9SDimitry Andric } 296e8d8bef9SDimitry Andric 297e8d8bef9SDimitry Andric unsigned getConstantBusLimit(unsigned Opcode) const; 298e8d8bef9SDimitry Andric 299fe6060f1SDimitry Andric /// Returns if the result of this instruction with a 16-bit result returned in 300fe6060f1SDimitry Andric /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve 301fe6060f1SDimitry Andric /// the original value. 302fe6060f1SDimitry Andric bool zeroesHigh16BitsOfDest(unsigned Opcode) const; 303fe6060f1SDimitry Andric 304*bdd1243dSDimitry Andric bool supportsWGP() const { return getGeneration() >= GFX10; } 305*bdd1243dSDimitry Andric 306e8d8bef9SDimitry Andric bool hasIntClamp() const { 307e8d8bef9SDimitry Andric return HasIntClamp; 308e8d8bef9SDimitry Andric } 309e8d8bef9SDimitry Andric 310e8d8bef9SDimitry Andric bool hasFP64() const { 311e8d8bef9SDimitry Andric return FP64; 312e8d8bef9SDimitry Andric } 313e8d8bef9SDimitry Andric 314e8d8bef9SDimitry Andric bool hasMIMG_R128() const { 315e8d8bef9SDimitry Andric return MIMG_R128; 316e8d8bef9SDimitry Andric } 317e8d8bef9SDimitry Andric 318e8d8bef9SDimitry Andric bool hasHWFP64() const { 319e8d8bef9SDimitry Andric return FP64; 320e8d8bef9SDimitry Andric } 321e8d8bef9SDimitry Andric 322e8d8bef9SDimitry Andric bool hasFastFMAF32() const { 323e8d8bef9SDimitry Andric return FastFMAF32; 324e8d8bef9SDimitry Andric } 325e8d8bef9SDimitry Andric 326e8d8bef9SDimitry Andric bool hasHalfRate64Ops() const { 327e8d8bef9SDimitry Andric return HalfRate64Ops; 328e8d8bef9SDimitry Andric } 329e8d8bef9SDimitry Andric 330fe6060f1SDimitry Andric bool hasFullRate64Ops() const { 331fe6060f1SDimitry Andric return FullRate64Ops; 332fe6060f1SDimitry Andric } 333fe6060f1SDimitry Andric 334e8d8bef9SDimitry Andric bool hasAddr64() const { 335e8d8bef9SDimitry Andric return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); 336e8d8bef9SDimitry Andric } 337e8d8bef9SDimitry Andric 338e8d8bef9SDimitry Andric bool hasFlat() const { 339e8d8bef9SDimitry Andric return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); 340e8d8bef9SDimitry Andric } 341e8d8bef9SDimitry Andric 342e8d8bef9SDimitry Andric // Return true if the target only has the reverse operand versions of VALU 343e8d8bef9SDimitry Andric // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). 344e8d8bef9SDimitry Andric bool hasOnlyRevVALUShifts() const { 345e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 346e8d8bef9SDimitry Andric } 347e8d8bef9SDimitry Andric 348e8d8bef9SDimitry Andric bool hasFractBug() const { 349e8d8bef9SDimitry Andric return getGeneration() == SOUTHERN_ISLANDS; 350e8d8bef9SDimitry Andric } 351e8d8bef9SDimitry Andric 352e8d8bef9SDimitry Andric bool hasBFE() const { 353e8d8bef9SDimitry Andric return true; 354e8d8bef9SDimitry Andric } 355e8d8bef9SDimitry Andric 356e8d8bef9SDimitry Andric bool hasBFI() const { 357e8d8bef9SDimitry Andric return true; 358e8d8bef9SDimitry Andric } 359e8d8bef9SDimitry Andric 360e8d8bef9SDimitry Andric bool hasBFM() const { 361e8d8bef9SDimitry Andric return hasBFE(); 362e8d8bef9SDimitry Andric } 363e8d8bef9SDimitry Andric 364e8d8bef9SDimitry Andric bool hasBCNT(unsigned Size) const { 365e8d8bef9SDimitry Andric return true; 366e8d8bef9SDimitry Andric } 367e8d8bef9SDimitry Andric 368e8d8bef9SDimitry Andric bool hasFFBL() const { 369e8d8bef9SDimitry Andric return true; 370e8d8bef9SDimitry Andric } 371e8d8bef9SDimitry Andric 372e8d8bef9SDimitry Andric bool hasFFBH() const { 373e8d8bef9SDimitry Andric return true; 374e8d8bef9SDimitry Andric } 375e8d8bef9SDimitry Andric 376e8d8bef9SDimitry Andric bool hasMed3_16() const { 377e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 378e8d8bef9SDimitry Andric } 379e8d8bef9SDimitry Andric 380e8d8bef9SDimitry Andric bool hasMin3Max3_16() const { 381e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 382e8d8bef9SDimitry Andric } 383e8d8bef9SDimitry Andric 384e8d8bef9SDimitry Andric bool hasFmaMixInsts() const { 385e8d8bef9SDimitry Andric return HasFmaMixInsts; 386e8d8bef9SDimitry Andric } 387e8d8bef9SDimitry Andric 388e8d8bef9SDimitry Andric bool hasCARRY() const { 389e8d8bef9SDimitry Andric return true; 390e8d8bef9SDimitry Andric } 391e8d8bef9SDimitry Andric 392e8d8bef9SDimitry Andric bool hasFMA() const { 393e8d8bef9SDimitry Andric return FMA; 394e8d8bef9SDimitry Andric } 395e8d8bef9SDimitry Andric 396e8d8bef9SDimitry Andric bool hasSwap() const { 397e8d8bef9SDimitry Andric return GFX9Insts; 398e8d8bef9SDimitry Andric } 399e8d8bef9SDimitry Andric 400e8d8bef9SDimitry Andric bool hasScalarPackInsts() const { 401e8d8bef9SDimitry Andric return GFX9Insts; 402e8d8bef9SDimitry Andric } 403e8d8bef9SDimitry Andric 404e8d8bef9SDimitry Andric bool hasScalarMulHiInsts() const { 405e8d8bef9SDimitry Andric return GFX9Insts; 406e8d8bef9SDimitry Andric } 407e8d8bef9SDimitry Andric 408e8d8bef9SDimitry Andric TrapHandlerAbi getTrapHandlerAbi() const { 409fe6060f1SDimitry Andric return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE; 410fe6060f1SDimitry Andric } 411fe6060f1SDimitry Andric 412fe6060f1SDimitry Andric bool supportsGetDoorbellID() const { 413fe6060f1SDimitry Andric // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets. 414fe6060f1SDimitry Andric return getGeneration() >= GFX9; 415e8d8bef9SDimitry Andric } 416e8d8bef9SDimitry Andric 417e8d8bef9SDimitry Andric /// True if the offset field of DS instructions works as expected. On SI, the 418e8d8bef9SDimitry Andric /// offset uses a 16-bit adder and does not always wrap properly. 419e8d8bef9SDimitry Andric bool hasUsableDSOffset() const { 420e8d8bef9SDimitry Andric return getGeneration() >= SEA_ISLANDS; 421e8d8bef9SDimitry Andric } 422e8d8bef9SDimitry Andric 423e8d8bef9SDimitry Andric bool unsafeDSOffsetFoldingEnabled() const { 424e8d8bef9SDimitry Andric return EnableUnsafeDSOffsetFolding; 425e8d8bef9SDimitry Andric } 426e8d8bef9SDimitry Andric 427e8d8bef9SDimitry Andric /// Condition output from div_scale is usable. 428e8d8bef9SDimitry Andric bool hasUsableDivScaleConditionOutput() const { 429e8d8bef9SDimitry Andric return getGeneration() != SOUTHERN_ISLANDS; 430e8d8bef9SDimitry Andric } 431e8d8bef9SDimitry Andric 432e8d8bef9SDimitry Andric /// Extra wait hazard is needed in some cases before 433e8d8bef9SDimitry Andric /// s_cbranch_vccnz/s_cbranch_vccz. 434e8d8bef9SDimitry Andric bool hasReadVCCZBug() const { 435e8d8bef9SDimitry Andric return getGeneration() <= SEA_ISLANDS; 436e8d8bef9SDimitry Andric } 437e8d8bef9SDimitry Andric 438e8d8bef9SDimitry Andric /// Writes to VCC_LO/VCC_HI update the VCCZ flag. 439e8d8bef9SDimitry Andric bool partialVCCWritesUpdateVCCZ() const { 440e8d8bef9SDimitry Andric return getGeneration() >= GFX10; 441e8d8bef9SDimitry Andric } 442e8d8bef9SDimitry Andric 443e8d8bef9SDimitry Andric /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR 444e8d8bef9SDimitry Andric /// was written by a VALU instruction. 445e8d8bef9SDimitry Andric bool hasSMRDReadVALUDefHazard() const { 446e8d8bef9SDimitry Andric return getGeneration() == SOUTHERN_ISLANDS; 447e8d8bef9SDimitry Andric } 448e8d8bef9SDimitry Andric 449e8d8bef9SDimitry Andric /// A read of an SGPR by a VMEM instruction requires 5 wait states when the 450e8d8bef9SDimitry Andric /// SGPR was written by a VALU Instruction. 451e8d8bef9SDimitry Andric bool hasVMEMReadSGPRVALUDefHazard() const { 452e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 453e8d8bef9SDimitry Andric } 454e8d8bef9SDimitry Andric 455e8d8bef9SDimitry Andric bool hasRFEHazards() const { 456e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 457e8d8bef9SDimitry Andric } 458e8d8bef9SDimitry Andric 459e8d8bef9SDimitry Andric /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. 460e8d8bef9SDimitry Andric unsigned getSetRegWaitStates() const { 461e8d8bef9SDimitry Andric return getGeneration() <= SEA_ISLANDS ? 1 : 2; 462e8d8bef9SDimitry Andric } 463e8d8bef9SDimitry Andric 464e8d8bef9SDimitry Andric bool dumpCode() const { 465e8d8bef9SDimitry Andric return DumpCode; 466e8d8bef9SDimitry Andric } 467e8d8bef9SDimitry Andric 468e8d8bef9SDimitry Andric /// Return the amount of LDS that can be used that will not restrict the 469e8d8bef9SDimitry Andric /// occupancy lower than WaveCount. 470e8d8bef9SDimitry Andric unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 471e8d8bef9SDimitry Andric const Function &) const; 472e8d8bef9SDimitry Andric 473e8d8bef9SDimitry Andric bool supportsMinMaxDenormModes() const { 474e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 475e8d8bef9SDimitry Andric } 476e8d8bef9SDimitry Andric 477e8d8bef9SDimitry Andric /// \returns If target supports S_DENORM_MODE. 478e8d8bef9SDimitry Andric bool hasDenormModeInst() const { 479e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX10; 480e8d8bef9SDimitry Andric } 481e8d8bef9SDimitry Andric 482e8d8bef9SDimitry Andric bool useFlatForGlobal() const { 483e8d8bef9SDimitry Andric return FlatForGlobal; 484e8d8bef9SDimitry Andric } 485e8d8bef9SDimitry Andric 486e8d8bef9SDimitry Andric /// \returns If target supports ds_read/write_b128 and user enables generation 487e8d8bef9SDimitry Andric /// of ds_read/write_b128. 488e8d8bef9SDimitry Andric bool useDS128() const { 489e8d8bef9SDimitry Andric return CIInsts && EnableDS128; 490e8d8bef9SDimitry Andric } 491e8d8bef9SDimitry Andric 492e8d8bef9SDimitry Andric /// \return If target supports ds_read/write_b96/128. 493e8d8bef9SDimitry Andric bool hasDS96AndDS128() const { 494e8d8bef9SDimitry Andric return CIInsts; 495e8d8bef9SDimitry Andric } 496e8d8bef9SDimitry Andric 497e8d8bef9SDimitry Andric /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 498e8d8bef9SDimitry Andric bool haveRoundOpsF64() const { 499e8d8bef9SDimitry Andric return CIInsts; 500e8d8bef9SDimitry Andric } 501e8d8bef9SDimitry Andric 502e8d8bef9SDimitry Andric /// \returns If MUBUF instructions always perform range checking, even for 503e8d8bef9SDimitry Andric /// buffer resources used for private memory access. 504e8d8bef9SDimitry Andric bool privateMemoryResourceIsRangeChecked() const { 505e8d8bef9SDimitry Andric return getGeneration() < AMDGPUSubtarget::GFX9; 506e8d8bef9SDimitry Andric } 507e8d8bef9SDimitry Andric 508e8d8bef9SDimitry Andric /// \returns If target requires PRT Struct NULL support (zero result registers 509e8d8bef9SDimitry Andric /// for sparse texture support). 510e8d8bef9SDimitry Andric bool usePRTStrictNull() const { 511e8d8bef9SDimitry Andric return EnablePRTStrictNull; 512e8d8bef9SDimitry Andric } 513e8d8bef9SDimitry Andric 514e8d8bef9SDimitry Andric bool hasAutoWaitcntBeforeBarrier() const { 515e8d8bef9SDimitry Andric return AutoWaitcntBeforeBarrier; 516e8d8bef9SDimitry Andric } 517e8d8bef9SDimitry Andric 518*bdd1243dSDimitry Andric /// \returns true if the target supports backing off of s_barrier instructions 519*bdd1243dSDimitry Andric /// when an exception is raised. 520*bdd1243dSDimitry Andric bool supportsBackOffBarrier() const { 521*bdd1243dSDimitry Andric return BackOffBarrier; 522*bdd1243dSDimitry Andric } 523*bdd1243dSDimitry Andric 524e8d8bef9SDimitry Andric bool hasUnalignedBufferAccess() const { 525e8d8bef9SDimitry Andric return UnalignedBufferAccess; 526e8d8bef9SDimitry Andric } 527e8d8bef9SDimitry Andric 528e8d8bef9SDimitry Andric bool hasUnalignedBufferAccessEnabled() const { 529e8d8bef9SDimitry Andric return UnalignedBufferAccess && UnalignedAccessMode; 530e8d8bef9SDimitry Andric } 531e8d8bef9SDimitry Andric 532e8d8bef9SDimitry Andric bool hasUnalignedDSAccess() const { 533e8d8bef9SDimitry Andric return UnalignedDSAccess; 534e8d8bef9SDimitry Andric } 535e8d8bef9SDimitry Andric 536e8d8bef9SDimitry Andric bool hasUnalignedDSAccessEnabled() const { 537e8d8bef9SDimitry Andric return UnalignedDSAccess && UnalignedAccessMode; 538e8d8bef9SDimitry Andric } 539e8d8bef9SDimitry Andric 540e8d8bef9SDimitry Andric bool hasUnalignedScratchAccess() const { 541e8d8bef9SDimitry Andric return UnalignedScratchAccess; 542e8d8bef9SDimitry Andric } 543e8d8bef9SDimitry Andric 544e8d8bef9SDimitry Andric bool hasUnalignedAccessMode() const { 545e8d8bef9SDimitry Andric return UnalignedAccessMode; 546e8d8bef9SDimitry Andric } 547e8d8bef9SDimitry Andric 548e8d8bef9SDimitry Andric bool hasApertureRegs() const { 549e8d8bef9SDimitry Andric return HasApertureRegs; 550e8d8bef9SDimitry Andric } 551e8d8bef9SDimitry Andric 552e8d8bef9SDimitry Andric bool isTrapHandlerEnabled() const { 553e8d8bef9SDimitry Andric return TrapHandler; 554e8d8bef9SDimitry Andric } 555e8d8bef9SDimitry Andric 556e8d8bef9SDimitry Andric bool isXNACKEnabled() const { 557e8d8bef9SDimitry Andric return TargetID.isXnackOnOrAny(); 558e8d8bef9SDimitry Andric } 559e8d8bef9SDimitry Andric 560fe6060f1SDimitry Andric bool isTgSplitEnabled() const { 561fe6060f1SDimitry Andric return EnableTgSplit; 562fe6060f1SDimitry Andric } 563fe6060f1SDimitry Andric 564e8d8bef9SDimitry Andric bool isCuModeEnabled() const { 565e8d8bef9SDimitry Andric return EnableCuMode; 566e8d8bef9SDimitry Andric } 567e8d8bef9SDimitry Andric 568e8d8bef9SDimitry Andric bool hasFlatAddressSpace() const { 569e8d8bef9SDimitry Andric return FlatAddressSpace; 570e8d8bef9SDimitry Andric } 571e8d8bef9SDimitry Andric 572e8d8bef9SDimitry Andric bool hasFlatScrRegister() const { 573e8d8bef9SDimitry Andric return hasFlatAddressSpace(); 574e8d8bef9SDimitry Andric } 575e8d8bef9SDimitry Andric 576e8d8bef9SDimitry Andric bool hasFlatInstOffsets() const { 577e8d8bef9SDimitry Andric return FlatInstOffsets; 578e8d8bef9SDimitry Andric } 579e8d8bef9SDimitry Andric 580e8d8bef9SDimitry Andric bool hasFlatGlobalInsts() const { 581e8d8bef9SDimitry Andric return FlatGlobalInsts; 582e8d8bef9SDimitry Andric } 583e8d8bef9SDimitry Andric 584e8d8bef9SDimitry Andric bool hasFlatScratchInsts() const { 585e8d8bef9SDimitry Andric return FlatScratchInsts; 586e8d8bef9SDimitry Andric } 587e8d8bef9SDimitry Andric 588e8d8bef9SDimitry Andric // Check if target supports ST addressing mode with FLAT scratch instructions. 589e8d8bef9SDimitry Andric // The ST addressing mode means no registers are used, either VGPR or SGPR, 590e8d8bef9SDimitry Andric // but only immediate offset is swizzled and added to the FLAT scratch base. 591e8d8bef9SDimitry Andric bool hasFlatScratchSTMode() const { 59281ad6265SDimitry Andric return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts()); 593e8d8bef9SDimitry Andric } 594e8d8bef9SDimitry Andric 59581ad6265SDimitry Andric bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; } 59681ad6265SDimitry Andric 597e8d8bef9SDimitry Andric bool hasScalarFlatScratchInsts() const { 598e8d8bef9SDimitry Andric return ScalarFlatScratchInsts; 599e8d8bef9SDimitry Andric } 600e8d8bef9SDimitry Andric 60181ad6265SDimitry Andric bool enableFlatScratch() const { 60281ad6265SDimitry Andric return flatScratchIsArchitected() || 60381ad6265SDimitry Andric (EnableFlatScratch && hasFlatScratchInsts()); 60481ad6265SDimitry Andric } 60581ad6265SDimitry Andric 606e8d8bef9SDimitry Andric bool hasGlobalAddTidInsts() const { 607e8d8bef9SDimitry Andric return GFX10_BEncoding; 608e8d8bef9SDimitry Andric } 609e8d8bef9SDimitry Andric 610e8d8bef9SDimitry Andric bool hasAtomicCSub() const { 611e8d8bef9SDimitry Andric return GFX10_BEncoding; 612e8d8bef9SDimitry Andric } 613e8d8bef9SDimitry Andric 614e8d8bef9SDimitry Andric bool hasMultiDwordFlatScratchAddressing() const { 615e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 616e8d8bef9SDimitry Andric } 617e8d8bef9SDimitry Andric 618e8d8bef9SDimitry Andric bool hasFlatSegmentOffsetBug() const { 619e8d8bef9SDimitry Andric return HasFlatSegmentOffsetBug; 620e8d8bef9SDimitry Andric } 621e8d8bef9SDimitry Andric 622e8d8bef9SDimitry Andric bool hasFlatLgkmVMemCountInOrder() const { 623e8d8bef9SDimitry Andric return getGeneration() > GFX9; 624e8d8bef9SDimitry Andric } 625e8d8bef9SDimitry Andric 626e8d8bef9SDimitry Andric bool hasD16LoadStore() const { 627e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 628e8d8bef9SDimitry Andric } 629e8d8bef9SDimitry Andric 630e8d8bef9SDimitry Andric bool d16PreservesUnusedBits() const { 631e8d8bef9SDimitry Andric return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); 632e8d8bef9SDimitry Andric } 633e8d8bef9SDimitry Andric 634e8d8bef9SDimitry Andric bool hasD16Images() const { 635e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 636e8d8bef9SDimitry Andric } 637e8d8bef9SDimitry Andric 638e8d8bef9SDimitry Andric /// Return if most LDS instructions have an m0 use that require m0 to be 639349cc55cSDimitry Andric /// initialized. 640e8d8bef9SDimitry Andric bool ldsRequiresM0Init() const { 641e8d8bef9SDimitry Andric return getGeneration() < GFX9; 642e8d8bef9SDimitry Andric } 643e8d8bef9SDimitry Andric 644e8d8bef9SDimitry Andric // True if the hardware rewinds and replays GWS operations if a wave is 645e8d8bef9SDimitry Andric // preempted. 646e8d8bef9SDimitry Andric // 647e8d8bef9SDimitry Andric // If this is false, a GWS operation requires testing if a nack set the 648e8d8bef9SDimitry Andric // MEM_VIOL bit, and repeating if so. 649e8d8bef9SDimitry Andric bool hasGWSAutoReplay() const { 650e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 651e8d8bef9SDimitry Andric } 652e8d8bef9SDimitry Andric 653e8d8bef9SDimitry Andric /// \returns if target has ds_gws_sema_release_all instruction. 654e8d8bef9SDimitry Andric bool hasGWSSemaReleaseAll() const { 655e8d8bef9SDimitry Andric return CIInsts; 656e8d8bef9SDimitry Andric } 657e8d8bef9SDimitry Andric 658e8d8bef9SDimitry Andric /// \returns true if the target has integer add/sub instructions that do not 659e8d8bef9SDimitry Andric /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, 660e8d8bef9SDimitry Andric /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier 661e8d8bef9SDimitry Andric /// for saturation. 662e8d8bef9SDimitry Andric bool hasAddNoCarry() const { 663e8d8bef9SDimitry Andric return AddNoCarryInsts; 664e8d8bef9SDimitry Andric } 665e8d8bef9SDimitry Andric 666e8d8bef9SDimitry Andric bool hasUnpackedD16VMem() const { 667e8d8bef9SDimitry Andric return HasUnpackedD16VMem; 668e8d8bef9SDimitry Andric } 669e8d8bef9SDimitry Andric 670e8d8bef9SDimitry Andric // Covers VS/PS/CS graphics shaders 671e8d8bef9SDimitry Andric bool isMesaGfxShader(const Function &F) const { 672e8d8bef9SDimitry Andric return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); 673e8d8bef9SDimitry Andric } 674e8d8bef9SDimitry Andric 675e8d8bef9SDimitry Andric bool hasMad64_32() const { 676e8d8bef9SDimitry Andric return getGeneration() >= SEA_ISLANDS; 677e8d8bef9SDimitry Andric } 678e8d8bef9SDimitry Andric 679e8d8bef9SDimitry Andric bool hasSDWAOmod() const { 680e8d8bef9SDimitry Andric return HasSDWAOmod; 681e8d8bef9SDimitry Andric } 682e8d8bef9SDimitry Andric 683e8d8bef9SDimitry Andric bool hasSDWAScalar() const { 684e8d8bef9SDimitry Andric return HasSDWAScalar; 685e8d8bef9SDimitry Andric } 686e8d8bef9SDimitry Andric 687e8d8bef9SDimitry Andric bool hasSDWASdst() const { 688e8d8bef9SDimitry Andric return HasSDWASdst; 689e8d8bef9SDimitry Andric } 690e8d8bef9SDimitry Andric 691e8d8bef9SDimitry Andric bool hasSDWAMac() const { 692e8d8bef9SDimitry Andric return HasSDWAMac; 693e8d8bef9SDimitry Andric } 694e8d8bef9SDimitry Andric 695e8d8bef9SDimitry Andric bool hasSDWAOutModsVOPC() const { 696e8d8bef9SDimitry Andric return HasSDWAOutModsVOPC; 697e8d8bef9SDimitry Andric } 698e8d8bef9SDimitry Andric 699e8d8bef9SDimitry Andric bool hasDLInsts() const { 700e8d8bef9SDimitry Andric return HasDLInsts; 701e8d8bef9SDimitry Andric } 702e8d8bef9SDimitry Andric 703*bdd1243dSDimitry Andric bool hasFmacF64Inst() const { return HasFmacF64Inst; } 704*bdd1243dSDimitry Andric 705e8d8bef9SDimitry Andric bool hasDot1Insts() const { 706e8d8bef9SDimitry Andric return HasDot1Insts; 707e8d8bef9SDimitry Andric } 708e8d8bef9SDimitry Andric 709e8d8bef9SDimitry Andric bool hasDot2Insts() const { 710e8d8bef9SDimitry Andric return HasDot2Insts; 711e8d8bef9SDimitry Andric } 712e8d8bef9SDimitry Andric 713e8d8bef9SDimitry Andric bool hasDot3Insts() const { 714e8d8bef9SDimitry Andric return HasDot3Insts; 715e8d8bef9SDimitry Andric } 716e8d8bef9SDimitry Andric 717e8d8bef9SDimitry Andric bool hasDot4Insts() const { 718e8d8bef9SDimitry Andric return HasDot4Insts; 719e8d8bef9SDimitry Andric } 720e8d8bef9SDimitry Andric 721e8d8bef9SDimitry Andric bool hasDot5Insts() const { 722e8d8bef9SDimitry Andric return HasDot5Insts; 723e8d8bef9SDimitry Andric } 724e8d8bef9SDimitry Andric 725e8d8bef9SDimitry Andric bool hasDot6Insts() const { 726e8d8bef9SDimitry Andric return HasDot6Insts; 727e8d8bef9SDimitry Andric } 728e8d8bef9SDimitry Andric 729fe6060f1SDimitry Andric bool hasDot7Insts() const { 730fe6060f1SDimitry Andric return HasDot7Insts; 731fe6060f1SDimitry Andric } 732fe6060f1SDimitry Andric 73381ad6265SDimitry Andric bool hasDot8Insts() const { 73481ad6265SDimitry Andric return HasDot8Insts; 73581ad6265SDimitry Andric } 73681ad6265SDimitry Andric 737*bdd1243dSDimitry Andric bool hasDot9Insts() const { 738*bdd1243dSDimitry Andric return HasDot9Insts; 739*bdd1243dSDimitry Andric } 740*bdd1243dSDimitry Andric 741e8d8bef9SDimitry Andric bool hasMAIInsts() const { 742e8d8bef9SDimitry Andric return HasMAIInsts; 743e8d8bef9SDimitry Andric } 744e8d8bef9SDimitry Andric 745fcaf7f86SDimitry Andric bool hasFP8Insts() const { 746fcaf7f86SDimitry Andric return HasFP8Insts; 747fcaf7f86SDimitry Andric } 748fcaf7f86SDimitry Andric 749e8d8bef9SDimitry Andric bool hasPkFmacF16Inst() const { 750e8d8bef9SDimitry Andric return HasPkFmacF16Inst; 751e8d8bef9SDimitry Andric } 752e8d8bef9SDimitry Andric 753e8d8bef9SDimitry Andric bool hasAtomicFaddInsts() const { 75481ad6265SDimitry Andric return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; 755e8d8bef9SDimitry Andric } 756e8d8bef9SDimitry Andric 75781ad6265SDimitry Andric bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; } 75881ad6265SDimitry Andric 75981ad6265SDimitry Andric bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } 76081ad6265SDimitry Andric 76181ad6265SDimitry Andric bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } 76281ad6265SDimitry Andric 763*bdd1243dSDimitry Andric bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } 764*bdd1243dSDimitry Andric 765e8d8bef9SDimitry Andric bool hasNoSdstCMPX() const { 766e8d8bef9SDimitry Andric return HasNoSdstCMPX; 767e8d8bef9SDimitry Andric } 768e8d8bef9SDimitry Andric 769e8d8bef9SDimitry Andric bool hasVscnt() const { 770e8d8bef9SDimitry Andric return HasVscnt; 771e8d8bef9SDimitry Andric } 772e8d8bef9SDimitry Andric 773e8d8bef9SDimitry Andric bool hasGetWaveIdInst() const { 774e8d8bef9SDimitry Andric return HasGetWaveIdInst; 775e8d8bef9SDimitry Andric } 776e8d8bef9SDimitry Andric 777e8d8bef9SDimitry Andric bool hasSMemTimeInst() const { 778e8d8bef9SDimitry Andric return HasSMemTimeInst; 779e8d8bef9SDimitry Andric } 780e8d8bef9SDimitry Andric 781fe6060f1SDimitry Andric bool hasShaderCyclesRegister() const { 782fe6060f1SDimitry Andric return HasShaderCyclesRegister; 783fe6060f1SDimitry Andric } 784fe6060f1SDimitry Andric 785e8d8bef9SDimitry Andric bool hasVOP3Literal() const { 786e8d8bef9SDimitry Andric return HasVOP3Literal; 787e8d8bef9SDimitry Andric } 788e8d8bef9SDimitry Andric 789e8d8bef9SDimitry Andric bool hasNoDataDepHazard() const { 790e8d8bef9SDimitry Andric return HasNoDataDepHazard; 791e8d8bef9SDimitry Andric } 792e8d8bef9SDimitry Andric 793e8d8bef9SDimitry Andric bool vmemWriteNeedsExpWaitcnt() const { 794e8d8bef9SDimitry Andric return getGeneration() < SEA_ISLANDS; 795e8d8bef9SDimitry Andric } 796e8d8bef9SDimitry Andric 797*bdd1243dSDimitry Andric bool hasInstPrefetch() const { return getGeneration() >= GFX10; } 798*bdd1243dSDimitry Andric 799e8d8bef9SDimitry Andric // Scratch is allocated in 256 dword per wave blocks for the entire 800349cc55cSDimitry Andric // wavefront. When viewed from the perspective of an arbitrary workitem, this 801e8d8bef9SDimitry Andric // is 4-byte aligned. 802e8d8bef9SDimitry Andric // 803e8d8bef9SDimitry Andric // Only 4-byte alignment is really needed to access anything. Transformations 804e8d8bef9SDimitry Andric // on the pointer value itself may rely on the alignment / known low bits of 805e8d8bef9SDimitry Andric // the pointer. Set this to something above the minimum to avoid needing 806e8d8bef9SDimitry Andric // dynamic realignment in common cases. 807e8d8bef9SDimitry Andric Align getStackAlignment() const { return Align(16); } 808e8d8bef9SDimitry Andric 809e8d8bef9SDimitry Andric bool enableMachineScheduler() const override { 810e8d8bef9SDimitry Andric return true; 811e8d8bef9SDimitry Andric } 812e8d8bef9SDimitry Andric 813e8d8bef9SDimitry Andric bool useAA() const override; 814e8d8bef9SDimitry Andric 815e8d8bef9SDimitry Andric bool enableSubRegLiveness() const override { 816e8d8bef9SDimitry Andric return true; 817e8d8bef9SDimitry Andric } 818e8d8bef9SDimitry Andric 819e8d8bef9SDimitry Andric void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } 820e8d8bef9SDimitry Andric bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } 821e8d8bef9SDimitry Andric 822e8d8bef9SDimitry Andric // static wrappers 823e8d8bef9SDimitry Andric static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); 824e8d8bef9SDimitry Andric 825e8d8bef9SDimitry Andric // XXX - Why is this here if it isn't in the default pass set? 826e8d8bef9SDimitry Andric bool enableEarlyIfConversion() const override { 827e8d8bef9SDimitry Andric return true; 828e8d8bef9SDimitry Andric } 829e8d8bef9SDimitry Andric 830e8d8bef9SDimitry Andric void overrideSchedPolicy(MachineSchedPolicy &Policy, 831e8d8bef9SDimitry Andric unsigned NumRegionInstrs) const override; 832e8d8bef9SDimitry Andric 833e8d8bef9SDimitry Andric unsigned getMaxNumUserSGPRs() const { 834e8d8bef9SDimitry Andric return 16; 835e8d8bef9SDimitry Andric } 836e8d8bef9SDimitry Andric 837e8d8bef9SDimitry Andric bool hasSMemRealTime() const { 838e8d8bef9SDimitry Andric return HasSMemRealTime; 839e8d8bef9SDimitry Andric } 840e8d8bef9SDimitry Andric 841e8d8bef9SDimitry Andric bool hasMovrel() const { 842e8d8bef9SDimitry Andric return HasMovrel; 843e8d8bef9SDimitry Andric } 844e8d8bef9SDimitry Andric 845e8d8bef9SDimitry Andric bool hasVGPRIndexMode() const { 846e8d8bef9SDimitry Andric return HasVGPRIndexMode; 847e8d8bef9SDimitry Andric } 848e8d8bef9SDimitry Andric 849e8d8bef9SDimitry Andric bool useVGPRIndexMode() const; 850e8d8bef9SDimitry Andric 851e8d8bef9SDimitry Andric bool hasScalarCompareEq64() const { 852e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 853e8d8bef9SDimitry Andric } 854e8d8bef9SDimitry Andric 855e8d8bef9SDimitry Andric bool hasScalarStores() const { 856e8d8bef9SDimitry Andric return HasScalarStores; 857e8d8bef9SDimitry Andric } 858e8d8bef9SDimitry Andric 859e8d8bef9SDimitry Andric bool hasScalarAtomics() const { 860e8d8bef9SDimitry Andric return HasScalarAtomics; 861e8d8bef9SDimitry Andric } 862e8d8bef9SDimitry Andric 863349cc55cSDimitry Andric bool hasLDSFPAtomicAdd() const { return GFX8Insts; } 864e8d8bef9SDimitry Andric 865fe6060f1SDimitry Andric /// \returns true if the subtarget has the v_permlanex16_b32 instruction. 866fe6060f1SDimitry Andric bool hasPermLaneX16() const { return getGeneration() >= GFX10; } 867fe6060f1SDimitry Andric 86881ad6265SDimitry Andric /// \returns true if the subtarget has the v_permlane64_b32 instruction. 86981ad6265SDimitry Andric bool hasPermLane64() const { return getGeneration() >= GFX11; } 87081ad6265SDimitry Andric 871e8d8bef9SDimitry Andric bool hasDPP() const { 872e8d8bef9SDimitry Andric return HasDPP; 873e8d8bef9SDimitry Andric } 874e8d8bef9SDimitry Andric 875e8d8bef9SDimitry Andric bool hasDPPBroadcasts() const { 876e8d8bef9SDimitry Andric return HasDPP && getGeneration() < GFX10; 877e8d8bef9SDimitry Andric } 878e8d8bef9SDimitry Andric 879e8d8bef9SDimitry Andric bool hasDPPWavefrontShifts() const { 880e8d8bef9SDimitry Andric return HasDPP && getGeneration() < GFX10; 881e8d8bef9SDimitry Andric } 882e8d8bef9SDimitry Andric 883e8d8bef9SDimitry Andric bool hasDPP8() const { 884e8d8bef9SDimitry Andric return HasDPP8; 885e8d8bef9SDimitry Andric } 886e8d8bef9SDimitry Andric 887fe6060f1SDimitry Andric bool has64BitDPP() const { 888fe6060f1SDimitry Andric return Has64BitDPP; 889fe6060f1SDimitry Andric } 890fe6060f1SDimitry Andric 891fe6060f1SDimitry Andric bool hasPackedFP32Ops() const { 892fe6060f1SDimitry Andric return HasPackedFP32Ops; 893fe6060f1SDimitry Andric } 894fe6060f1SDimitry Andric 895fe6060f1SDimitry Andric bool hasFmaakFmamkF32Insts() const { 89681ad6265SDimitry Andric return getGeneration() >= GFX10 || hasGFX940Insts(); 89781ad6265SDimitry Andric } 89881ad6265SDimitry Andric 89981ad6265SDimitry Andric bool hasImageInsts() const { 90081ad6265SDimitry Andric return HasImageInsts; 901fe6060f1SDimitry Andric } 902fe6060f1SDimitry Andric 903fe6060f1SDimitry Andric bool hasExtendedImageInsts() const { 904fe6060f1SDimitry Andric return HasExtendedImageInsts; 905fe6060f1SDimitry Andric } 906fe6060f1SDimitry Andric 907e8d8bef9SDimitry Andric bool hasR128A16() const { 908e8d8bef9SDimitry Andric return HasR128A16; 909e8d8bef9SDimitry Andric } 910e8d8bef9SDimitry Andric 911*bdd1243dSDimitry Andric bool hasA16() const { return HasA16; } 912e8d8bef9SDimitry Andric 913e8d8bef9SDimitry Andric bool hasG16() const { return HasG16; } 914e8d8bef9SDimitry Andric 915e8d8bef9SDimitry Andric bool hasOffset3fBug() const { 916e8d8bef9SDimitry Andric return HasOffset3fBug; 917e8d8bef9SDimitry Andric } 918e8d8bef9SDimitry Andric 919e8d8bef9SDimitry Andric bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } 920e8d8bef9SDimitry Andric 921e8d8bef9SDimitry Andric bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } 922e8d8bef9SDimitry Andric 923*bdd1243dSDimitry Andric bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; } 924*bdd1243dSDimitry Andric 925e8d8bef9SDimitry Andric bool hasNSAEncoding() const { return HasNSAEncoding; } 926e8d8bef9SDimitry Andric 927fe6060f1SDimitry Andric unsigned getNSAMaxSize() const { return NSAMaxSize; } 928fe6060f1SDimitry Andric 929fe6060f1SDimitry Andric bool hasGFX10_AEncoding() const { 930fe6060f1SDimitry Andric return GFX10_AEncoding; 931fe6060f1SDimitry Andric } 932fe6060f1SDimitry Andric 933e8d8bef9SDimitry Andric bool hasGFX10_BEncoding() const { 934e8d8bef9SDimitry Andric return GFX10_BEncoding; 935e8d8bef9SDimitry Andric } 936e8d8bef9SDimitry Andric 937e8d8bef9SDimitry Andric bool hasGFX10_3Insts() const { 938e8d8bef9SDimitry Andric return GFX10_3Insts; 939e8d8bef9SDimitry Andric } 940e8d8bef9SDimitry Andric 941e8d8bef9SDimitry Andric bool hasMadF16() const; 942e8d8bef9SDimitry Andric 94381ad6265SDimitry Andric bool hasMovB64() const { return GFX940Insts; } 94481ad6265SDimitry Andric 94581ad6265SDimitry Andric bool hasLshlAddB64() const { return GFX940Insts; } 94681ad6265SDimitry Andric 947e8d8bef9SDimitry Andric bool enableSIScheduler() const { 948e8d8bef9SDimitry Andric return EnableSIScheduler; 949e8d8bef9SDimitry Andric } 950e8d8bef9SDimitry Andric 951e8d8bef9SDimitry Andric bool loadStoreOptEnabled() const { 952e8d8bef9SDimitry Andric return EnableLoadStoreOpt; 953e8d8bef9SDimitry Andric } 954e8d8bef9SDimitry Andric 955e8d8bef9SDimitry Andric bool hasSGPRInitBug() const { 956e8d8bef9SDimitry Andric return SGPRInitBug; 957e8d8bef9SDimitry Andric } 958e8d8bef9SDimitry Andric 95981ad6265SDimitry Andric bool hasUserSGPRInit16Bug() const { 960fcaf7f86SDimitry Andric return UserSGPRInit16Bug && isWave32(); 96181ad6265SDimitry Andric } 96281ad6265SDimitry Andric 963fe6060f1SDimitry Andric bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } 964fe6060f1SDimitry Andric 965fe6060f1SDimitry Andric bool hasNegativeUnalignedScratchOffsetBug() const { 966fe6060f1SDimitry Andric return NegativeUnalignedScratchOffsetBug; 967fe6060f1SDimitry Andric } 968fe6060f1SDimitry Andric 969e8d8bef9SDimitry Andric bool hasMFMAInlineLiteralBug() const { 970e8d8bef9SDimitry Andric return HasMFMAInlineLiteralBug; 971e8d8bef9SDimitry Andric } 972e8d8bef9SDimitry Andric 973e8d8bef9SDimitry Andric bool has12DWordStoreHazard() const { 974e8d8bef9SDimitry Andric return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; 975e8d8bef9SDimitry Andric } 976e8d8bef9SDimitry Andric 977e8d8bef9SDimitry Andric // \returns true if the subtarget supports DWORDX3 load/store instructions. 978e8d8bef9SDimitry Andric bool hasDwordx3LoadStores() const { 979e8d8bef9SDimitry Andric return CIInsts; 980e8d8bef9SDimitry Andric } 981e8d8bef9SDimitry Andric 982e8d8bef9SDimitry Andric bool hasReadM0MovRelInterpHazard() const { 983e8d8bef9SDimitry Andric return getGeneration() == AMDGPUSubtarget::GFX9; 984e8d8bef9SDimitry Andric } 985e8d8bef9SDimitry Andric 986e8d8bef9SDimitry Andric bool hasReadM0SendMsgHazard() const { 987e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 988e8d8bef9SDimitry Andric getGeneration() <= AMDGPUSubtarget::GFX9; 989e8d8bef9SDimitry Andric } 990e8d8bef9SDimitry Andric 99181ad6265SDimitry Andric bool hasReadM0LdsDmaHazard() const { 99281ad6265SDimitry Andric return getGeneration() == AMDGPUSubtarget::GFX9; 99381ad6265SDimitry Andric } 99481ad6265SDimitry Andric 99581ad6265SDimitry Andric bool hasReadM0LdsDirectHazard() const { 99681ad6265SDimitry Andric return getGeneration() == AMDGPUSubtarget::GFX9; 99781ad6265SDimitry Andric } 99881ad6265SDimitry Andric 999e8d8bef9SDimitry Andric bool hasVcmpxPermlaneHazard() const { 1000e8d8bef9SDimitry Andric return HasVcmpxPermlaneHazard; 1001e8d8bef9SDimitry Andric } 1002e8d8bef9SDimitry Andric 1003e8d8bef9SDimitry Andric bool hasVMEMtoScalarWriteHazard() const { 1004e8d8bef9SDimitry Andric return HasVMEMtoScalarWriteHazard; 1005e8d8bef9SDimitry Andric } 1006e8d8bef9SDimitry Andric 1007e8d8bef9SDimitry Andric bool hasSMEMtoVectorWriteHazard() const { 1008e8d8bef9SDimitry Andric return HasSMEMtoVectorWriteHazard; 1009e8d8bef9SDimitry Andric } 1010e8d8bef9SDimitry Andric 1011e8d8bef9SDimitry Andric bool hasLDSMisalignedBug() const { 1012e8d8bef9SDimitry Andric return LDSMisalignedBug && !EnableCuMode; 1013e8d8bef9SDimitry Andric } 1014e8d8bef9SDimitry Andric 1015e8d8bef9SDimitry Andric bool hasInstFwdPrefetchBug() const { 1016e8d8bef9SDimitry Andric return HasInstFwdPrefetchBug; 1017e8d8bef9SDimitry Andric } 1018e8d8bef9SDimitry Andric 1019e8d8bef9SDimitry Andric bool hasVcmpxExecWARHazard() const { 1020e8d8bef9SDimitry Andric return HasVcmpxExecWARHazard; 1021e8d8bef9SDimitry Andric } 1022e8d8bef9SDimitry Andric 1023e8d8bef9SDimitry Andric bool hasLdsBranchVmemWARHazard() const { 1024e8d8bef9SDimitry Andric return HasLdsBranchVmemWARHazard; 1025e8d8bef9SDimitry Andric } 1026e8d8bef9SDimitry Andric 1027*bdd1243dSDimitry Andric // Shift amount of a 64 bit shift cannot be a highest allocated register 1028*bdd1243dSDimitry Andric // if also at the end of the allocation block. 1029*bdd1243dSDimitry Andric bool hasShift64HighRegBug() const { 1030*bdd1243dSDimitry Andric return GFX90AInsts && !GFX940Insts; 1031*bdd1243dSDimitry Andric } 1032*bdd1243dSDimitry Andric 103381ad6265SDimitry Andric // Has one cycle hazard on transcendental instruction feeding a 103481ad6265SDimitry Andric // non transcendental VALU. 103581ad6265SDimitry Andric bool hasTransForwardingHazard() const { return GFX940Insts; } 103681ad6265SDimitry Andric 103781ad6265SDimitry Andric // Has one cycle hazard on a VALU instruction partially writing dst with 103881ad6265SDimitry Andric // a shift of result bits feeding another VALU instruction. 103981ad6265SDimitry Andric bool hasDstSelForwardingHazard() const { return GFX940Insts; } 104081ad6265SDimitry Andric 104181ad6265SDimitry Andric // Cannot use op_sel with v_dot instructions. 104281ad6265SDimitry Andric bool hasDOTOpSelHazard() const { return GFX940Insts; } 104381ad6265SDimitry Andric 104481ad6265SDimitry Andric // Does not have HW interlocs for VALU writing and then reading SGPRs. 104581ad6265SDimitry Andric bool hasVDecCoExecHazard() const { 104681ad6265SDimitry Andric return GFX940Insts; 104781ad6265SDimitry Andric } 104881ad6265SDimitry Andric 1049e8d8bef9SDimitry Andric bool hasNSAtoVMEMBug() const { 1050e8d8bef9SDimitry Andric return HasNSAtoVMEMBug; 1051e8d8bef9SDimitry Andric } 1052e8d8bef9SDimitry Andric 1053fe6060f1SDimitry Andric bool hasNSAClauseBug() const { return HasNSAClauseBug; } 1054fe6060f1SDimitry Andric 1055e8d8bef9SDimitry Andric bool hasHardClauses() const { return getGeneration() >= GFX10; } 1056e8d8bef9SDimitry Andric 1057fe6060f1SDimitry Andric bool hasGFX90AInsts() const { return GFX90AInsts; } 1058fe6060f1SDimitry Andric 1059*bdd1243dSDimitry Andric bool hasFPAtomicToDenormModeHazard() const { 1060*bdd1243dSDimitry Andric return getGeneration() == GFX10; 1061*bdd1243dSDimitry Andric } 1062*bdd1243dSDimitry Andric 106381ad6265SDimitry Andric bool hasVOP3DPP() const { return getGeneration() >= GFX11; } 106481ad6265SDimitry Andric 106581ad6265SDimitry Andric bool hasLdsDirect() const { return getGeneration() >= GFX11; } 106681ad6265SDimitry Andric 106781ad6265SDimitry Andric bool hasVALUPartialForwardingHazard() const { 106881ad6265SDimitry Andric return getGeneration() >= GFX11; 106981ad6265SDimitry Andric } 107081ad6265SDimitry Andric 1071*bdd1243dSDimitry Andric bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; } 1072*bdd1243dSDimitry Andric 1073*bdd1243dSDimitry Andric bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; } 107481ad6265SDimitry Andric 1075fe6060f1SDimitry Andric /// Return if operations acting on VGPR tuples require even alignment. 1076fe6060f1SDimitry Andric bool needsAlignedVGPRs() const { return GFX90AInsts; } 1077fe6060f1SDimitry Andric 107881ad6265SDimitry Andric /// Return true if the target has the S_PACK_HL_B32_B16 instruction. 107981ad6265SDimitry Andric bool hasSPackHL() const { return GFX11Insts; } 108081ad6265SDimitry Andric 108181ad6265SDimitry Andric /// Return true if the target's EXP instruction has the COMPR flag, which 108281ad6265SDimitry Andric /// affects the meaning of the EN (enable) bits. 108381ad6265SDimitry Andric bool hasCompressedExport() const { return !GFX11Insts; } 108481ad6265SDimitry Andric 108581ad6265SDimitry Andric /// Return true if the target's EXP instruction supports the NULL export 108681ad6265SDimitry Andric /// target. 108781ad6265SDimitry Andric bool hasNullExportTarget() const { return !GFX11Insts; } 108881ad6265SDimitry Andric 1089*bdd1243dSDimitry Andric bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; } 1090*bdd1243dSDimitry Andric 109181ad6265SDimitry Andric bool hasVOPDInsts() const { return HasVOPDInsts; } 109281ad6265SDimitry Andric 109381ad6265SDimitry Andric bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; } 109481ad6265SDimitry Andric 109581ad6265SDimitry Andric /// Return true if the target has the S_DELAY_ALU instruction. 109681ad6265SDimitry Andric bool hasDelayAlu() const { return GFX11Insts; } 109781ad6265SDimitry Andric 1098fe6060f1SDimitry Andric bool hasPackedTID() const { return HasPackedTID; } 1099fe6060f1SDimitry Andric 110081ad6265SDimitry Andric // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that 110181ad6265SDimitry Andric // hasGFX90AInsts is also true. 110281ad6265SDimitry Andric bool hasGFX940Insts() const { return GFX940Insts; } 110381ad6265SDimitry Andric 1104e8d8bef9SDimitry Andric /// Return the maximum number of waves per SIMD for kernels using \p SGPRs 1105e8d8bef9SDimitry Andric /// SGPRs 1106e8d8bef9SDimitry Andric unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; 1107e8d8bef9SDimitry Andric 1108e8d8bef9SDimitry Andric /// Return the maximum number of waves per SIMD for kernels using \p VGPRs 1109e8d8bef9SDimitry Andric /// VGPRs 1110e8d8bef9SDimitry Andric unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; 1111e8d8bef9SDimitry Andric 1112e8d8bef9SDimitry Andric /// Return occupancy for the given function. Used LDS and a number of 1113e8d8bef9SDimitry Andric /// registers if provided. 1114e8d8bef9SDimitry Andric /// Note, occupancy can be affected by the scratch allocation as well, but 1115e8d8bef9SDimitry Andric /// we do not have enough information to compute it. 1116e8d8bef9SDimitry Andric unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, 1117e8d8bef9SDimitry Andric unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; 1118e8d8bef9SDimitry Andric 1119e8d8bef9SDimitry Andric /// \returns true if the flat_scratch register should be initialized with the 1120e8d8bef9SDimitry Andric /// pointer to the wave's scratch memory rather than a size and offset. 1121e8d8bef9SDimitry Andric bool flatScratchIsPointer() const { 1122e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 1123e8d8bef9SDimitry Andric } 1124e8d8bef9SDimitry Andric 1125fe6060f1SDimitry Andric /// \returns true if the flat_scratch register is initialized by the HW. 1126fe6060f1SDimitry Andric /// In this case it is readonly. 1127fe6060f1SDimitry Andric bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; } 1128fe6060f1SDimitry Andric 1129e8d8bef9SDimitry Andric /// \returns true if the machine has merged shaders in which s0-s7 are 1130e8d8bef9SDimitry Andric /// reserved by the hardware and user SGPRs start at s8 1131e8d8bef9SDimitry Andric bool hasMergedShaders() const { 1132e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 1133e8d8bef9SDimitry Andric } 1134e8d8bef9SDimitry Andric 113581ad6265SDimitry Andric // \returns true if the target supports the pre-NGG legacy geometry path. 113681ad6265SDimitry Andric bool hasLegacyGeometry() const { return getGeneration() < GFX11; } 113781ad6265SDimitry Andric 1138e8d8bef9SDimitry Andric /// \returns SGPR allocation granularity supported by the subtarget. 1139e8d8bef9SDimitry Andric unsigned getSGPRAllocGranule() const { 1140e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getSGPRAllocGranule(this); 1141e8d8bef9SDimitry Andric } 1142e8d8bef9SDimitry Andric 1143e8d8bef9SDimitry Andric /// \returns SGPR encoding granularity supported by the subtarget. 1144e8d8bef9SDimitry Andric unsigned getSGPREncodingGranule() const { 1145e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getSGPREncodingGranule(this); 1146e8d8bef9SDimitry Andric } 1147e8d8bef9SDimitry Andric 1148e8d8bef9SDimitry Andric /// \returns Total number of SGPRs supported by the subtarget. 1149e8d8bef9SDimitry Andric unsigned getTotalNumSGPRs() const { 1150e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getTotalNumSGPRs(this); 1151e8d8bef9SDimitry Andric } 1152e8d8bef9SDimitry Andric 1153e8d8bef9SDimitry Andric /// \returns Addressable number of SGPRs supported by the subtarget. 1154e8d8bef9SDimitry Andric unsigned getAddressableNumSGPRs() const { 1155e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); 1156e8d8bef9SDimitry Andric } 1157e8d8bef9SDimitry Andric 1158e8d8bef9SDimitry Andric /// \returns Minimum number of SGPRs that meets the given number of waves per 1159e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 1160e8d8bef9SDimitry Andric unsigned getMinNumSGPRs(unsigned WavesPerEU) const { 1161e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); 1162e8d8bef9SDimitry Andric } 1163e8d8bef9SDimitry Andric 1164e8d8bef9SDimitry Andric /// \returns Maximum number of SGPRs that meets the given number of waves per 1165e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 1166e8d8bef9SDimitry Andric unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { 1167e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); 1168e8d8bef9SDimitry Andric } 1169e8d8bef9SDimitry Andric 1170fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs. This is common 1171fe6060f1SDimitry Andric /// utility function called by MachineFunction and 1172fe6060f1SDimitry Andric /// Function variants of getReservedNumSGPRs. 117304eeddc0SDimitry Andric unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const; 1174fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs for given machine function \p MF. 1175e8d8bef9SDimitry Andric unsigned getReservedNumSGPRs(const MachineFunction &MF) const; 1176e8d8bef9SDimitry Andric 1177fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs for given function \p F. 1178fe6060f1SDimitry Andric unsigned getReservedNumSGPRs(const Function &F) const; 1179fe6060f1SDimitry Andric 1180fe6060f1SDimitry Andric /// \returns max num SGPRs. This is the common utility 1181fe6060f1SDimitry Andric /// function called by MachineFunction and Function 1182fe6060f1SDimitry Andric /// variants of getMaxNumSGPRs. 1183fe6060f1SDimitry Andric unsigned getBaseMaxNumSGPRs(const Function &F, 1184fe6060f1SDimitry Andric std::pair<unsigned, unsigned> WavesPerEU, 1185fe6060f1SDimitry Andric unsigned PreloadedSGPRs, 1186fe6060f1SDimitry Andric unsigned ReservedNumSGPRs) const; 1187fe6060f1SDimitry Andric 1188e8d8bef9SDimitry Andric /// \returns Maximum number of SGPRs that meets number of waves per execution 1189e8d8bef9SDimitry Andric /// unit requirement for function \p MF, or number of SGPRs explicitly 1190e8d8bef9SDimitry Andric /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. 1191e8d8bef9SDimitry Andric /// 1192e8d8bef9SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1193e8d8bef9SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1194e8d8bef9SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1195e8d8bef9SDimitry Andric /// unit requirement. 1196e8d8bef9SDimitry Andric unsigned getMaxNumSGPRs(const MachineFunction &MF) const; 1197e8d8bef9SDimitry Andric 1198fe6060f1SDimitry Andric /// \returns Maximum number of SGPRs that meets number of waves per execution 1199fe6060f1SDimitry Andric /// unit requirement for function \p F, or number of SGPRs explicitly 1200fe6060f1SDimitry Andric /// requested using "amdgpu-num-sgpr" attribute attached to function \p F. 1201fe6060f1SDimitry Andric /// 1202fe6060f1SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1203fe6060f1SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1204fe6060f1SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1205fe6060f1SDimitry Andric /// unit requirement. 1206fe6060f1SDimitry Andric unsigned getMaxNumSGPRs(const Function &F) const; 1207fe6060f1SDimitry Andric 1208e8d8bef9SDimitry Andric /// \returns VGPR allocation granularity supported by the subtarget. 1209e8d8bef9SDimitry Andric unsigned getVGPRAllocGranule() const { 1210e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getVGPRAllocGranule(this); 1211e8d8bef9SDimitry Andric } 1212e8d8bef9SDimitry Andric 1213e8d8bef9SDimitry Andric /// \returns VGPR encoding granularity supported by the subtarget. 1214e8d8bef9SDimitry Andric unsigned getVGPREncodingGranule() const { 1215e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getVGPREncodingGranule(this); 1216e8d8bef9SDimitry Andric } 1217e8d8bef9SDimitry Andric 1218e8d8bef9SDimitry Andric /// \returns Total number of VGPRs supported by the subtarget. 1219e8d8bef9SDimitry Andric unsigned getTotalNumVGPRs() const { 1220e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getTotalNumVGPRs(this); 1221e8d8bef9SDimitry Andric } 1222e8d8bef9SDimitry Andric 1223e8d8bef9SDimitry Andric /// \returns Addressable number of VGPRs supported by the subtarget. 1224e8d8bef9SDimitry Andric unsigned getAddressableNumVGPRs() const { 1225e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); 1226e8d8bef9SDimitry Andric } 1227e8d8bef9SDimitry Andric 1228*bdd1243dSDimitry Andric /// \returns the minimum number of VGPRs that will prevent achieving more than 1229*bdd1243dSDimitry Andric /// the specified number of waves \p WavesPerEU. 1230e8d8bef9SDimitry Andric unsigned getMinNumVGPRs(unsigned WavesPerEU) const { 1231e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); 1232e8d8bef9SDimitry Andric } 1233e8d8bef9SDimitry Andric 1234*bdd1243dSDimitry Andric /// \returns the maximum number of VGPRs that can be used and still achieved 1235*bdd1243dSDimitry Andric /// at least the specified number of waves \p WavesPerEU. 1236e8d8bef9SDimitry Andric unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { 1237e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); 1238e8d8bef9SDimitry Andric } 1239e8d8bef9SDimitry Andric 1240fe6060f1SDimitry Andric /// \returns max num VGPRs. This is the common utility function 1241fe6060f1SDimitry Andric /// called by MachineFunction and Function variants of getMaxNumVGPRs. 1242fe6060f1SDimitry Andric unsigned getBaseMaxNumVGPRs(const Function &F, 1243fe6060f1SDimitry Andric std::pair<unsigned, unsigned> WavesPerEU) const; 1244fe6060f1SDimitry Andric /// \returns Maximum number of VGPRs that meets number of waves per execution 1245fe6060f1SDimitry Andric /// unit requirement for function \p F, or number of VGPRs explicitly 1246fe6060f1SDimitry Andric /// requested using "amdgpu-num-vgpr" attribute attached to function \p F. 1247fe6060f1SDimitry Andric /// 1248fe6060f1SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1249fe6060f1SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1250fe6060f1SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1251fe6060f1SDimitry Andric /// unit requirement. 1252fe6060f1SDimitry Andric unsigned getMaxNumVGPRs(const Function &F) const; 1253fe6060f1SDimitry Andric 125481ad6265SDimitry Andric unsigned getMaxNumAGPRs(const Function &F) const { 125581ad6265SDimitry Andric return getMaxNumVGPRs(F); 125681ad6265SDimitry Andric } 125781ad6265SDimitry Andric 1258e8d8bef9SDimitry Andric /// \returns Maximum number of VGPRs that meets number of waves per execution 1259e8d8bef9SDimitry Andric /// unit requirement for function \p MF, or number of VGPRs explicitly 1260e8d8bef9SDimitry Andric /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. 1261e8d8bef9SDimitry Andric /// 1262e8d8bef9SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1263e8d8bef9SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1264e8d8bef9SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1265e8d8bef9SDimitry Andric /// unit requirement. 1266e8d8bef9SDimitry Andric unsigned getMaxNumVGPRs(const MachineFunction &MF) const; 1267e8d8bef9SDimitry Andric 1268e8d8bef9SDimitry Andric void getPostRAMutations( 1269e8d8bef9SDimitry Andric std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) 1270e8d8bef9SDimitry Andric const override; 1271e8d8bef9SDimitry Andric 1272349cc55cSDimitry Andric std::unique_ptr<ScheduleDAGMutation> 1273349cc55cSDimitry Andric createFillMFMAShadowMutation(const TargetInstrInfo *TII) const; 1274349cc55cSDimitry Andric 1275e8d8bef9SDimitry Andric bool isWave32() const { 1276e8d8bef9SDimitry Andric return getWavefrontSize() == 32; 1277e8d8bef9SDimitry Andric } 1278e8d8bef9SDimitry Andric 1279e8d8bef9SDimitry Andric bool isWave64() const { 1280e8d8bef9SDimitry Andric return getWavefrontSize() == 64; 1281e8d8bef9SDimitry Andric } 1282e8d8bef9SDimitry Andric 1283e8d8bef9SDimitry Andric const TargetRegisterClass *getBoolRC() const { 1284e8d8bef9SDimitry Andric return getRegisterInfo()->getBoolRC(); 1285e8d8bef9SDimitry Andric } 1286e8d8bef9SDimitry Andric 1287e8d8bef9SDimitry Andric /// \returns Maximum number of work groups per compute unit supported by the 1288e8d8bef9SDimitry Andric /// subtarget and limited by given \p FlatWorkGroupSize. 1289e8d8bef9SDimitry Andric unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { 1290e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); 1291e8d8bef9SDimitry Andric } 1292e8d8bef9SDimitry Andric 1293e8d8bef9SDimitry Andric /// \returns Minimum flat work group size supported by the subtarget. 1294e8d8bef9SDimitry Andric unsigned getMinFlatWorkGroupSize() const override { 1295e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); 1296e8d8bef9SDimitry Andric } 1297e8d8bef9SDimitry Andric 1298e8d8bef9SDimitry Andric /// \returns Maximum flat work group size supported by the subtarget. 1299e8d8bef9SDimitry Andric unsigned getMaxFlatWorkGroupSize() const override { 1300e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); 1301e8d8bef9SDimitry Andric } 1302e8d8bef9SDimitry Andric 1303e8d8bef9SDimitry Andric /// \returns Number of waves per execution unit required to support the given 1304e8d8bef9SDimitry Andric /// \p FlatWorkGroupSize. 1305e8d8bef9SDimitry Andric unsigned 1306e8d8bef9SDimitry Andric getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { 1307e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); 1308e8d8bef9SDimitry Andric } 1309e8d8bef9SDimitry Andric 1310e8d8bef9SDimitry Andric /// \returns Minimum number of waves per execution unit supported by the 1311e8d8bef9SDimitry Andric /// subtarget. 1312e8d8bef9SDimitry Andric unsigned getMinWavesPerEU() const override { 1313e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinWavesPerEU(this); 1314e8d8bef9SDimitry Andric } 1315e8d8bef9SDimitry Andric 1316e8d8bef9SDimitry Andric void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 1317e8d8bef9SDimitry Andric SDep &Dep) const override; 131881ad6265SDimitry Andric 131981ad6265SDimitry Andric // \returns true if it's beneficial on this subtarget for the scheduler to 132081ad6265SDimitry Andric // cluster stores as well as loads. 132181ad6265SDimitry Andric bool shouldClusterStores() const { return getGeneration() >= GFX11; } 1322*bdd1243dSDimitry Andric 1323*bdd1243dSDimitry Andric // \returns the number of address arguments from which to enable MIMG NSA 1324*bdd1243dSDimitry Andric // on supported architectures. 1325*bdd1243dSDimitry Andric unsigned getNSAThreshold(const MachineFunction &MF) const; 1326e8d8bef9SDimitry Andric }; 1327e8d8bef9SDimitry Andric 1328e8d8bef9SDimitry Andric } // end namespace llvm 1329e8d8bef9SDimitry Andric 1330e8d8bef9SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1331