109467b48Spatrick //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// 209467b48Spatrick // 309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information. 509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 609467b48Spatrick // 709467b48Spatrick //===----------------------------------------------------------------------===// 809467b48Spatrick // 909467b48Spatrick /// \file 1009467b48Spatrick // 1109467b48Spatrick //===----------------------------------------------------------------------===// 1209467b48Spatrick 1309467b48Spatrick #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 1409467b48Spatrick #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 1509467b48Spatrick 1609467b48Spatrick #include "AMDGPUArgumentUsageInfo.h" 1709467b48Spatrick #include "AMDGPUMachineFunction.h" 18*d415bd75Srobert #include "AMDGPUTargetMachine.h" 1909467b48Spatrick #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 2009467b48Spatrick #include "SIInstrInfo.h" 21*d415bd75Srobert #include "llvm/ADT/SetVector.h" 2209467b48Spatrick #include "llvm/CodeGen/MIRYamlMapping.h" 2309467b48Spatrick #include "llvm/CodeGen/PseudoSourceValue.h" 2473471bf0Spatrick #include "llvm/Support/raw_ostream.h" 25*d415bd75Srobert #include <optional> 2609467b48Spatrick 2709467b48Spatrick namespace llvm { 2809467b48Spatrick 2909467b48Spatrick class MachineFrameInfo; 3009467b48Spatrick class MachineFunction; 3173471bf0Spatrick class SIMachineFunctionInfo; 3273471bf0Spatrick class SIRegisterInfo; 33*d415bd75Srobert class TargetRegisterClass; 3409467b48Spatrick 3509467b48Spatrick class AMDGPUPseudoSourceValue : public PseudoSourceValue { 3609467b48Spatrick public: 3709467b48Spatrick enum AMDGPUPSVKind : unsigned { 38*d415bd75Srobert PSVImage = PseudoSourceValue::TargetCustom, 3909467b48Spatrick GWSResource 4009467b48Spatrick }; 4109467b48Spatrick 4209467b48Spatrick protected: AMDGPUPseudoSourceValue(unsigned Kind,const AMDGPUTargetMachine & TM)43*d415bd75Srobert AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM) 44*d415bd75Srobert : PseudoSourceValue(Kind, TM) {} 4509467b48Spatrick 4609467b48Spatrick public: isConstant(const MachineFrameInfo *)4709467b48Spatrick bool isConstant(const MachineFrameInfo *) const override { 4809467b48Spatrick // This should probably be true for most images, but we will start by being 4909467b48Spatrick // conservative. 5009467b48Spatrick return false; 5109467b48Spatrick } 5209467b48Spatrick isAliased(const MachineFrameInfo *)5309467b48Spatrick bool isAliased(const MachineFrameInfo *) const override { 5409467b48Spatrick return true; 5509467b48Spatrick } 5609467b48Spatrick mayAlias(const MachineFrameInfo *)5709467b48Spatrick bool mayAlias(const MachineFrameInfo *) const override { 5809467b48Spatrick return true; 5909467b48Spatrick } 6009467b48Spatrick }; 6109467b48Spatrick 6209467b48Spatrick class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { 6309467b48Spatrick public: AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine & TM)64*d415bd75Srobert explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM) 65*d415bd75Srobert : AMDGPUPseudoSourceValue(GWSResource, TM) {} 6609467b48Spatrick classof(const PseudoSourceValue * V)6709467b48Spatrick static bool classof(const PseudoSourceValue *V) { 6809467b48Spatrick return V->kind() == GWSResource; 6909467b48Spatrick } 7009467b48Spatrick 7109467b48Spatrick // These are inaccessible memory from IR. isAliased(const MachineFrameInfo *)7209467b48Spatrick bool isAliased(const MachineFrameInfo *) const override { 7309467b48Spatrick return false; 7409467b48Spatrick } 7509467b48Spatrick 7609467b48Spatrick // These are inaccessible memory from IR. mayAlias(const MachineFrameInfo *)7709467b48Spatrick bool mayAlias(const MachineFrameInfo *) const override { 7809467b48Spatrick return false; 7909467b48Spatrick } 8009467b48Spatrick printCustom(raw_ostream & OS)8109467b48Spatrick void printCustom(raw_ostream &OS) const override { 8209467b48Spatrick OS << "GWSResource"; 8309467b48Spatrick } 8409467b48Spatrick }; 8509467b48Spatrick 8609467b48Spatrick namespace yaml { 8709467b48Spatrick 8809467b48Spatrick struct SIArgument { 8909467b48Spatrick bool IsRegister; 9009467b48Spatrick union { 9109467b48Spatrick StringValue RegisterName; 9209467b48Spatrick unsigned StackOffset; 9309467b48Spatrick }; 94*d415bd75Srobert std::optional<unsigned> Mask; 9509467b48Spatrick 9609467b48Spatrick // Default constructor, which creates a stack argument. SIArgumentSIArgument9709467b48Spatrick SIArgument() : IsRegister(false), StackOffset(0) {} SIArgumentSIArgument9809467b48Spatrick SIArgument(const SIArgument &Other) { 9909467b48Spatrick IsRegister = Other.IsRegister; 10009467b48Spatrick if (IsRegister) { 10109467b48Spatrick ::new ((void *)std::addressof(RegisterName)) 10209467b48Spatrick StringValue(Other.RegisterName); 10309467b48Spatrick } else 10409467b48Spatrick StackOffset = Other.StackOffset; 10509467b48Spatrick Mask = Other.Mask; 10609467b48Spatrick } 10709467b48Spatrick SIArgument &operator=(const SIArgument &Other) { 10809467b48Spatrick IsRegister = Other.IsRegister; 10909467b48Spatrick if (IsRegister) { 11009467b48Spatrick ::new ((void *)std::addressof(RegisterName)) 11109467b48Spatrick StringValue(Other.RegisterName); 11209467b48Spatrick } else 11309467b48Spatrick StackOffset = Other.StackOffset; 11409467b48Spatrick Mask = Other.Mask; 11509467b48Spatrick return *this; 11609467b48Spatrick } ~SIArgumentSIArgument11709467b48Spatrick ~SIArgument() { 11809467b48Spatrick if (IsRegister) 11909467b48Spatrick RegisterName.~StringValue(); 12009467b48Spatrick } 12109467b48Spatrick 12209467b48Spatrick // Helper to create a register or stack argument. createArgumentSIArgument12309467b48Spatrick static inline SIArgument createArgument(bool IsReg) { 12409467b48Spatrick if (IsReg) 12509467b48Spatrick return SIArgument(IsReg); 12609467b48Spatrick return SIArgument(); 12709467b48Spatrick } 12809467b48Spatrick 12909467b48Spatrick private: 13009467b48Spatrick // Construct a register argument. SIArgumentSIArgument13109467b48Spatrick SIArgument(bool) : IsRegister(true), RegisterName() {} 13209467b48Spatrick }; 13309467b48Spatrick 13409467b48Spatrick template <> struct MappingTraits<SIArgument> { 13509467b48Spatrick static void mapping(IO &YamlIO, SIArgument &A) { 13609467b48Spatrick if (YamlIO.outputting()) { 13709467b48Spatrick if (A.IsRegister) 13809467b48Spatrick YamlIO.mapRequired("reg", A.RegisterName); 13909467b48Spatrick else 14009467b48Spatrick YamlIO.mapRequired("offset", A.StackOffset); 14109467b48Spatrick } else { 14209467b48Spatrick auto Keys = YamlIO.keys(); 14309467b48Spatrick if (is_contained(Keys, "reg")) { 14409467b48Spatrick A = SIArgument::createArgument(true); 14509467b48Spatrick YamlIO.mapRequired("reg", A.RegisterName); 14609467b48Spatrick } else if (is_contained(Keys, "offset")) 14709467b48Spatrick YamlIO.mapRequired("offset", A.StackOffset); 14809467b48Spatrick else 14909467b48Spatrick YamlIO.setError("missing required key 'reg' or 'offset'"); 15009467b48Spatrick } 15109467b48Spatrick YamlIO.mapOptional("mask", A.Mask); 15209467b48Spatrick } 15309467b48Spatrick static const bool flow = true; 15409467b48Spatrick }; 15509467b48Spatrick 15609467b48Spatrick struct SIArgumentInfo { 157*d415bd75Srobert std::optional<SIArgument> PrivateSegmentBuffer; 158*d415bd75Srobert std::optional<SIArgument> DispatchPtr; 159*d415bd75Srobert std::optional<SIArgument> QueuePtr; 160*d415bd75Srobert std::optional<SIArgument> KernargSegmentPtr; 161*d415bd75Srobert std::optional<SIArgument> DispatchID; 162*d415bd75Srobert std::optional<SIArgument> FlatScratchInit; 163*d415bd75Srobert std::optional<SIArgument> PrivateSegmentSize; 16409467b48Spatrick 165*d415bd75Srobert std::optional<SIArgument> WorkGroupIDX; 166*d415bd75Srobert std::optional<SIArgument> WorkGroupIDY; 167*d415bd75Srobert std::optional<SIArgument> WorkGroupIDZ; 168*d415bd75Srobert std::optional<SIArgument> WorkGroupInfo; 169*d415bd75Srobert std::optional<SIArgument> LDSKernelId; 170*d415bd75Srobert std::optional<SIArgument> PrivateSegmentWaveByteOffset; 17109467b48Spatrick 172*d415bd75Srobert std::optional<SIArgument> ImplicitArgPtr; 173*d415bd75Srobert std::optional<SIArgument> ImplicitBufferPtr; 17409467b48Spatrick 175*d415bd75Srobert std::optional<SIArgument> WorkItemIDX; 176*d415bd75Srobert std::optional<SIArgument> WorkItemIDY; 177*d415bd75Srobert std::optional<SIArgument> WorkItemIDZ; 17809467b48Spatrick }; 17909467b48Spatrick 18009467b48Spatrick template <> struct MappingTraits<SIArgumentInfo> { 18109467b48Spatrick static void mapping(IO &YamlIO, SIArgumentInfo &AI) { 18209467b48Spatrick YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer); 18309467b48Spatrick YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr); 18409467b48Spatrick YamlIO.mapOptional("queuePtr", AI.QueuePtr); 18509467b48Spatrick YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr); 18609467b48Spatrick YamlIO.mapOptional("dispatchID", AI.DispatchID); 18709467b48Spatrick YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit); 18809467b48Spatrick YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize); 18909467b48Spatrick 19009467b48Spatrick YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX); 19109467b48Spatrick YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY); 19209467b48Spatrick YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ); 19309467b48Spatrick YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo); 194*d415bd75Srobert YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId); 19509467b48Spatrick YamlIO.mapOptional("privateSegmentWaveByteOffset", 19609467b48Spatrick AI.PrivateSegmentWaveByteOffset); 19709467b48Spatrick 19809467b48Spatrick YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr); 19909467b48Spatrick YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr); 20009467b48Spatrick 20109467b48Spatrick YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX); 20209467b48Spatrick YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY); 20309467b48Spatrick YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ); 20409467b48Spatrick } 20509467b48Spatrick }; 20609467b48Spatrick 20709467b48Spatrick // Default to default mode for default calling convention. 20809467b48Spatrick struct SIMode { 20909467b48Spatrick bool IEEE = true; 21009467b48Spatrick bool DX10Clamp = true; 211097a140dSpatrick bool FP32InputDenormals = true; 212097a140dSpatrick bool FP32OutputDenormals = true; 213097a140dSpatrick bool FP64FP16InputDenormals = true; 214097a140dSpatrick bool FP64FP16OutputDenormals = true; 21509467b48Spatrick 21609467b48Spatrick SIMode() = default; 21709467b48Spatrick 21809467b48Spatrick SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { 21909467b48Spatrick IEEE = Mode.IEEE; 22009467b48Spatrick DX10Clamp = Mode.DX10Clamp; 221*d415bd75Srobert FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign; 222*d415bd75Srobert FP32OutputDenormals = 223*d415bd75Srobert Mode.FP32Denormals.Output != DenormalMode::PreserveSign; 224*d415bd75Srobert FP64FP16InputDenormals = 225*d415bd75Srobert Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign; 226*d415bd75Srobert FP64FP16OutputDenormals = 227*d415bd75Srobert Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign; 22809467b48Spatrick } 22909467b48Spatrick 23009467b48Spatrick bool operator ==(const SIMode Other) const { 23109467b48Spatrick return IEEE == Other.IEEE && 23209467b48Spatrick DX10Clamp == Other.DX10Clamp && 233097a140dSpatrick FP32InputDenormals == Other.FP32InputDenormals && 234097a140dSpatrick FP32OutputDenormals == Other.FP32OutputDenormals && 235097a140dSpatrick FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 236097a140dSpatrick FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 23709467b48Spatrick } 23809467b48Spatrick }; 23909467b48Spatrick 24009467b48Spatrick template <> struct MappingTraits<SIMode> { 24109467b48Spatrick static void mapping(IO &YamlIO, SIMode &Mode) { 24209467b48Spatrick YamlIO.mapOptional("ieee", Mode.IEEE, true); 24309467b48Spatrick YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); 244097a140dSpatrick YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true); 245097a140dSpatrick YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true); 246097a140dSpatrick YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true); 247097a140dSpatrick YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true); 24809467b48Spatrick } 24909467b48Spatrick }; 25009467b48Spatrick 25109467b48Spatrick struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { 25209467b48Spatrick uint64_t ExplicitKernArgSize = 0; 253*d415bd75Srobert Align MaxKernArgAlign; 254*d415bd75Srobert uint32_t LDSSize = 0; 255*d415bd75Srobert uint32_t GDSSize = 0; 25673471bf0Spatrick Align DynLDSAlign; 25709467b48Spatrick bool IsEntryFunction = false; 25809467b48Spatrick bool NoSignedZerosFPMath = false; 25909467b48Spatrick bool MemoryBound = false; 26009467b48Spatrick bool WaveLimiter = false; 26173471bf0Spatrick bool HasSpilledSGPRs = false; 26273471bf0Spatrick bool HasSpilledVGPRs = false; 26309467b48Spatrick uint32_t HighBitsOf32BitAddress = 0; 26409467b48Spatrick 26573471bf0Spatrick // TODO: 10 may be a better default since it's the maximum. 26673471bf0Spatrick unsigned Occupancy = 0; 26773471bf0Spatrick 268*d415bd75Srobert SmallVector<StringValue> WWMReservedRegs; 269*d415bd75Srobert 27009467b48Spatrick StringValue ScratchRSrcReg = "$private_rsrc_reg"; 27109467b48Spatrick StringValue FrameOffsetReg = "$fp_reg"; 27209467b48Spatrick StringValue StackPtrOffsetReg = "$sp_reg"; 27309467b48Spatrick 274*d415bd75Srobert unsigned BytesInStackArgArea = 0; 275*d415bd75Srobert bool ReturnsVoid = true; 276*d415bd75Srobert 277*d415bd75Srobert std::optional<SIArgumentInfo> ArgInfo; 27809467b48Spatrick SIMode Mode; 279*d415bd75Srobert std::optional<FrameIndex> ScavengeFI; 280*d415bd75Srobert StringValue VGPRForAGPRCopy; 28109467b48Spatrick 28209467b48Spatrick SIMachineFunctionInfo() = default; 28309467b48Spatrick SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, 28473471bf0Spatrick const TargetRegisterInfo &TRI, 28573471bf0Spatrick const llvm::MachineFunction &MF); 28609467b48Spatrick 28709467b48Spatrick void mappingImpl(yaml::IO &YamlIO) override; 28809467b48Spatrick ~SIMachineFunctionInfo() = default; 28909467b48Spatrick }; 29009467b48Spatrick 29109467b48Spatrick template <> struct MappingTraits<SIMachineFunctionInfo> { 29209467b48Spatrick static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { 29309467b48Spatrick YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize, 29409467b48Spatrick UINT64_C(0)); 295*d415bd75Srobert YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign); 29609467b48Spatrick YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); 297*d415bd75Srobert YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u); 29873471bf0Spatrick YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align()); 29909467b48Spatrick YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); 30009467b48Spatrick YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); 30109467b48Spatrick YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false); 30209467b48Spatrick YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false); 30373471bf0Spatrick YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false); 30473471bf0Spatrick YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false); 30509467b48Spatrick YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg, 30609467b48Spatrick StringValue("$private_rsrc_reg")); 30709467b48Spatrick YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg, 30809467b48Spatrick StringValue("$fp_reg")); 30909467b48Spatrick YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg, 31009467b48Spatrick StringValue("$sp_reg")); 311*d415bd75Srobert YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u); 312*d415bd75Srobert YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true); 31309467b48Spatrick YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); 31409467b48Spatrick YamlIO.mapOptional("mode", MFI.Mode, SIMode()); 31509467b48Spatrick YamlIO.mapOptional("highBitsOf32BitAddress", 31609467b48Spatrick MFI.HighBitsOf32BitAddress, 0u); 31773471bf0Spatrick YamlIO.mapOptional("occupancy", MFI.Occupancy, 0); 318*d415bd75Srobert YamlIO.mapOptional("wwmReservedRegs", MFI.WWMReservedRegs); 31973471bf0Spatrick YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI); 320*d415bd75Srobert YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy, 321*d415bd75Srobert StringValue()); // Don't print out when it's empty. 32209467b48Spatrick } 32309467b48Spatrick }; 32409467b48Spatrick 32509467b48Spatrick } // end namespace yaml 32609467b48Spatrick 327*d415bd75Srobert // A CSR SGPR value can be preserved inside a callee using one of the following 328*d415bd75Srobert // methods. 329*d415bd75Srobert // 1. Copy to an unused scratch SGPR. 330*d415bd75Srobert // 2. Spill to a VGPR lane. 331*d415bd75Srobert // 3. Spill to memory via. a scratch VGPR. 332*d415bd75Srobert // class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used 333*d415bd75Srobert // for an SGPR at function prolog/epilog. 334*d415bd75Srobert enum class SGPRSaveKind : uint8_t { 335*d415bd75Srobert COPY_TO_SCRATCH_SGPR, 336*d415bd75Srobert SPILL_TO_VGPR_LANE, 337*d415bd75Srobert SPILL_TO_MEM 338*d415bd75Srobert }; 339*d415bd75Srobert 340*d415bd75Srobert class PrologEpilogSGPRSaveRestoreInfo { 341*d415bd75Srobert SGPRSaveKind Kind; 342*d415bd75Srobert union { 343*d415bd75Srobert int Index; 344*d415bd75Srobert Register Reg; 345*d415bd75Srobert }; 346*d415bd75Srobert 347*d415bd75Srobert public: 348*d415bd75Srobert PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {} 349*d415bd75Srobert PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R) 350*d415bd75Srobert : Kind(K), Reg(R) {} 351*d415bd75Srobert Register getReg() const { return Reg; } 352*d415bd75Srobert int getIndex() const { return Index; } 353*d415bd75Srobert SGPRSaveKind getKind() const { return Kind; } 354*d415bd75Srobert }; 355*d415bd75Srobert 35609467b48Spatrick /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 35709467b48Spatrick /// tells the hardware which interpolation parameters to load. 35809467b48Spatrick class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 35909467b48Spatrick friend class GCNTargetMachine; 36009467b48Spatrick 361*d415bd75Srobert // State of MODE register, assumed FP mode. 362*d415bd75Srobert AMDGPU::SIModeRegisterDefaults Mode; 36309467b48Spatrick 36409467b48Spatrick // Registers that may be reserved for spilling purposes. These may be the same 36509467b48Spatrick // as the input registers. 366097a140dSpatrick Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; 36709467b48Spatrick 368*d415bd75Srobert // This is the unswizzled offset from the current dispatch's scratch wave 369097a140dSpatrick // base to the beginning of the current function's frame. 370097a140dSpatrick Register FrameOffsetReg = AMDGPU::FP_REG; 37109467b48Spatrick 372097a140dSpatrick // This is an ABI register used in the non-entry calling convention to 373097a140dSpatrick // communicate the unswizzled offset from the current dispatch's scratch wave 374097a140dSpatrick // base to the beginning of the new function's frame. 375097a140dSpatrick Register StackPtrOffsetReg = AMDGPU::SP_REG; 37609467b48Spatrick 37709467b48Spatrick AMDGPUFunctionArgInfo ArgInfo; 37809467b48Spatrick 37909467b48Spatrick // Graphics info. 38009467b48Spatrick unsigned PSInputAddr = 0; 38109467b48Spatrick unsigned PSInputEnable = 0; 38209467b48Spatrick 38309467b48Spatrick /// Number of bytes of arguments this function has on the stack. If the callee 38409467b48Spatrick /// is expected to restore the argument stack this should be a multiple of 16, 38509467b48Spatrick /// all usable during a tail call. 38609467b48Spatrick /// 38709467b48Spatrick /// The alternative would forbid tail call optimisation in some cases: if we 38809467b48Spatrick /// want to transfer control from a function with 8-bytes of stack-argument 38909467b48Spatrick /// space to a function with 16-bytes then misalignment of this value would 39009467b48Spatrick /// make a stack adjustment necessary, which could not be undone by the 39109467b48Spatrick /// callee. 39209467b48Spatrick unsigned BytesInStackArgArea = 0; 39309467b48Spatrick 39409467b48Spatrick bool ReturnsVoid = true; 39509467b48Spatrick 39609467b48Spatrick // A pair of default/requested minimum/maximum flat work group sizes. 39709467b48Spatrick // Minimum - first, maximum - second. 39809467b48Spatrick std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; 39909467b48Spatrick 40009467b48Spatrick // A pair of default/requested minimum/maximum number of waves per execution 40109467b48Spatrick // unit. Minimum - first, maximum - second. 40209467b48Spatrick std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; 40309467b48Spatrick 404*d415bd75Srobert const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; 40509467b48Spatrick 40609467b48Spatrick private: 40709467b48Spatrick unsigned NumUserSGPRs = 0; 40809467b48Spatrick unsigned NumSystemSGPRs = 0; 40909467b48Spatrick 41009467b48Spatrick bool HasSpilledSGPRs = false; 41109467b48Spatrick bool HasSpilledVGPRs = false; 41209467b48Spatrick bool HasNonSpillStackObjects = false; 41309467b48Spatrick bool IsStackRealigned = false; 41409467b48Spatrick 41509467b48Spatrick unsigned NumSpilledSGPRs = 0; 41609467b48Spatrick unsigned NumSpilledVGPRs = 0; 41709467b48Spatrick 41809467b48Spatrick // Feature bits required for inputs passed in user SGPRs. 41909467b48Spatrick bool PrivateSegmentBuffer : 1; 42009467b48Spatrick bool DispatchPtr : 1; 42109467b48Spatrick bool QueuePtr : 1; 42209467b48Spatrick bool KernargSegmentPtr : 1; 42309467b48Spatrick bool DispatchID : 1; 42409467b48Spatrick bool FlatScratchInit : 1; 42509467b48Spatrick 42609467b48Spatrick // Feature bits required for inputs passed in system SGPRs. 42709467b48Spatrick bool WorkGroupIDX : 1; // Always initialized. 42809467b48Spatrick bool WorkGroupIDY : 1; 42909467b48Spatrick bool WorkGroupIDZ : 1; 43009467b48Spatrick bool WorkGroupInfo : 1; 431*d415bd75Srobert bool LDSKernelId : 1; 43209467b48Spatrick bool PrivateSegmentWaveByteOffset : 1; 43309467b48Spatrick 43409467b48Spatrick bool WorkItemIDX : 1; // Always initialized. 43509467b48Spatrick bool WorkItemIDY : 1; 43609467b48Spatrick bool WorkItemIDZ : 1; 43709467b48Spatrick 43809467b48Spatrick // Private memory buffer 43909467b48Spatrick // Compute directly in sgpr[0:1] 44009467b48Spatrick // Other shaders indirect 64-bits at sgpr[0:1] 44109467b48Spatrick bool ImplicitBufferPtr : 1; 44209467b48Spatrick 44309467b48Spatrick // Pointer to where the ABI inserts special kernel arguments separate from the 44409467b48Spatrick // user arguments. This is an offset from the KernargSegmentPtr. 44509467b48Spatrick bool ImplicitArgPtr : 1; 44609467b48Spatrick 447*d415bd75Srobert bool MayNeedAGPRs : 1; 448*d415bd75Srobert 44909467b48Spatrick // The hard-wired high half of the address of the global information table 45009467b48Spatrick // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since 45109467b48Spatrick // current hardware only allows a 16 bit value. 45209467b48Spatrick unsigned GITPtrHigh; 45309467b48Spatrick 45409467b48Spatrick unsigned HighBitsOf32BitAddress; 45509467b48Spatrick 45609467b48Spatrick // Current recorded maximum possible occupancy. 45709467b48Spatrick unsigned Occupancy; 45809467b48Spatrick 459*d415bd75Srobert mutable std::optional<bool> UsesAGPRs; 460*d415bd75Srobert 46109467b48Spatrick MCPhysReg getNextUserSGPR() const; 46209467b48Spatrick 46309467b48Spatrick MCPhysReg getNextSystemSGPR() const; 46409467b48Spatrick 46509467b48Spatrick public: 46609467b48Spatrick struct VGPRSpillToAGPR { 46709467b48Spatrick SmallVector<MCPhysReg, 32> Lanes; 46809467b48Spatrick bool FullyAllocated = false; 469*d415bd75Srobert bool IsDead = false; 47009467b48Spatrick }; 47109467b48Spatrick 47209467b48Spatrick private: 473*d415bd75Srobert // To track VGPR + lane index for each subregister of the SGPR spilled to 474*d415bd75Srobert // frameindex key during SILowerSGPRSpills pass. 475*d415bd75Srobert DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> SGPRSpillToVGPRLanes; 476*d415bd75Srobert // To track VGPR + lane index for spilling special SGPRs like Frame Pointer 477*d415bd75Srobert // identified during PrologEpilogInserter. 478*d415bd75Srobert DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> 479*d415bd75Srobert PrologEpilogSGPRSpillToVGPRLanes; 48009467b48Spatrick unsigned NumVGPRSpillLanes = 0; 481*d415bd75Srobert unsigned NumVGPRPrologEpilogSpillLanes = 0; 482*d415bd75Srobert SmallVector<Register, 2> SpillVGPRs; 483*d415bd75Srobert using WWMSpillsMap = MapVector<Register, int>; 484*d415bd75Srobert // To track the registers used in instructions that can potentially modify the 485*d415bd75Srobert // inactive lanes. The WWM instructions and the writelane instructions for 486*d415bd75Srobert // spilling SGPRs to VGPRs fall under such category of operations. The VGPRs 487*d415bd75Srobert // modified by them should be spilled/restored at function prolog/epilog to 488*d415bd75Srobert // avoid any undesired outcome. Each entry in this map holds a pair of values, 489*d415bd75Srobert // the VGPR and its stack slot index. 490*d415bd75Srobert WWMSpillsMap WWMSpills; 491*d415bd75Srobert 492*d415bd75Srobert using ReservedRegSet = SmallSetVector<Register, 8>; 493*d415bd75Srobert // To track the VGPRs reserved for WWM instructions. They get stack slots 494*d415bd75Srobert // later during PrologEpilogInserter and get added into the superset WWMSpills 495*d415bd75Srobert // for actual spilling. A separate set makes the register reserved part and 496*d415bd75Srobert // the serialization easier. 497*d415bd75Srobert ReservedRegSet WWMReservedRegs; 498*d415bd75Srobert 499*d415bd75Srobert using PrologEpilogSGPRSpillsMap = 500*d415bd75Srobert DenseMap<Register, PrologEpilogSGPRSaveRestoreInfo>; 501*d415bd75Srobert // To track the SGPR spill method used for a CSR SGPR register during 502*d415bd75Srobert // frame lowering. Even though the SGPR spills are handled during 503*d415bd75Srobert // SILowerSGPRSpills pass, some special handling needed later during the 504*d415bd75Srobert // PrologEpilogInserter. 505*d415bd75Srobert PrologEpilogSGPRSpillsMap PrologEpilogSGPRSpills; 50609467b48Spatrick 50709467b48Spatrick DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills; 50809467b48Spatrick 50909467b48Spatrick // AGPRs used for VGPR spills. 51009467b48Spatrick SmallVector<MCPhysReg, 32> SpillAGPR; 51109467b48Spatrick 51209467b48Spatrick // VGPRs used for AGPR spills. 51309467b48Spatrick SmallVector<MCPhysReg, 32> SpillVGPR; 51409467b48Spatrick 51573471bf0Spatrick // Emergency stack slot. Sometimes, we create this before finalizing the stack 51673471bf0Spatrick // frame, so save it here and add it to the RegScavenger later. 517*d415bd75Srobert std::optional<int> ScavengeFI; 51873471bf0Spatrick 519*d415bd75Srobert private: 520*d415bd75Srobert Register VGPRForAGPRCopy; 52109467b48Spatrick 522*d415bd75Srobert bool allocateVGPRForSGPRSpills(MachineFunction &MF, int FI, 523*d415bd75Srobert unsigned LaneIndex); 524*d415bd75Srobert bool allocateVGPRForPrologEpilogSGPRSpills(MachineFunction &MF, int FI, 525*d415bd75Srobert unsigned LaneIndex); 526097a140dSpatrick 52709467b48Spatrick public: 528*d415bd75Srobert Register getVGPRForAGPRCopy() const { 529*d415bd75Srobert return VGPRForAGPRCopy; 530*d415bd75Srobert } 531*d415bd75Srobert 532*d415bd75Srobert void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) { 533*d415bd75Srobert VGPRForAGPRCopy = NewVGPRForAGPRCopy; 534*d415bd75Srobert } 535*d415bd75Srobert 536*d415bd75Srobert bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const; 537*d415bd75Srobert 538*d415bd75Srobert public: 539*d415bd75Srobert SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default; 540*d415bd75Srobert SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI); 541*d415bd75Srobert 542*d415bd75Srobert MachineFunctionInfo * 543*d415bd75Srobert clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, 544*d415bd75Srobert const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) 545*d415bd75Srobert const override; 54609467b48Spatrick 54773471bf0Spatrick bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, 54873471bf0Spatrick const MachineFunction &MF, 54973471bf0Spatrick PerFunctionMIParsingState &PFS, 55073471bf0Spatrick SMDiagnostic &Error, SMRange &SourceRange); 55173471bf0Spatrick 552*d415bd75Srobert void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); } 553*d415bd75Srobert 554*d415bd75Srobert AMDGPU::SIModeRegisterDefaults getMode() const { 555*d415bd75Srobert return Mode; 55673471bf0Spatrick } 55709467b48Spatrick 558*d415bd75Srobert ArrayRef<SIRegisterInfo::SpilledReg> 559*d415bd75Srobert getSGPRSpillToVGPRLanes(int FrameIndex) const { 560*d415bd75Srobert auto I = SGPRSpillToVGPRLanes.find(FrameIndex); 561*d415bd75Srobert return (I == SGPRSpillToVGPRLanes.end()) 562*d415bd75Srobert ? ArrayRef<SIRegisterInfo::SpilledReg>() 563*d415bd75Srobert : ArrayRef(I->second); 56409467b48Spatrick } 56509467b48Spatrick 566*d415bd75Srobert ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; } 567*d415bd75Srobert const WWMSpillsMap &getWWMSpills() const { return WWMSpills; } 568*d415bd75Srobert const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; } 56909467b48Spatrick 570*d415bd75Srobert const PrologEpilogSGPRSpillsMap &getPrologEpilogSGPRSpills() const { 571*d415bd75Srobert return PrologEpilogSGPRSpills; 572097a140dSpatrick } 573097a140dSpatrick 574*d415bd75Srobert void addToPrologEpilogSGPRSpills(Register Reg, 575*d415bd75Srobert PrologEpilogSGPRSaveRestoreInfo SI) { 576*d415bd75Srobert PrologEpilogSGPRSpills.insert(std::make_pair(Reg, SI)); 577*d415bd75Srobert } 578*d415bd75Srobert 579*d415bd75Srobert // Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true 580*d415bd75Srobert // on success and false otherwise. 581*d415bd75Srobert bool hasPrologEpilogSGPRSpillEntry(Register Reg) const { 582*d415bd75Srobert return PrologEpilogSGPRSpills.find(Reg) != PrologEpilogSGPRSpills.end(); 583*d415bd75Srobert } 584*d415bd75Srobert 585*d415bd75Srobert // Get the scratch SGPR if allocated to save/restore \p Reg. 586*d415bd75Srobert Register getScratchSGPRCopyDstReg(Register Reg) const { 587*d415bd75Srobert auto I = PrologEpilogSGPRSpills.find(Reg); 588*d415bd75Srobert if (I != PrologEpilogSGPRSpills.end() && 589*d415bd75Srobert I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) 590*d415bd75Srobert return I->second.getReg(); 591*d415bd75Srobert 592*d415bd75Srobert return AMDGPU::NoRegister; 593*d415bd75Srobert } 594*d415bd75Srobert 595*d415bd75Srobert // Get all scratch SGPRs allocated to copy/restore the SGPR spills. 596*d415bd75Srobert void getAllScratchSGPRCopyDstRegs(SmallVectorImpl<Register> &Regs) const { 597*d415bd75Srobert for (const auto &SI : PrologEpilogSGPRSpills) { 598*d415bd75Srobert if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) 599*d415bd75Srobert Regs.push_back(SI.second.getReg()); 600*d415bd75Srobert } 601*d415bd75Srobert } 602*d415bd75Srobert 603*d415bd75Srobert // Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI. 604*d415bd75Srobert bool checkIndexInPrologEpilogSGPRSpills(int FI) const { 605*d415bd75Srobert return find_if(PrologEpilogSGPRSpills, 606*d415bd75Srobert [FI](const std::pair<Register, 607*d415bd75Srobert PrologEpilogSGPRSaveRestoreInfo> &SI) { 608*d415bd75Srobert return SI.second.getKind() == 609*d415bd75Srobert SGPRSaveKind::SPILL_TO_VGPR_LANE && 610*d415bd75Srobert SI.second.getIndex() == FI; 611*d415bd75Srobert }) != PrologEpilogSGPRSpills.end(); 612*d415bd75Srobert } 613*d415bd75Srobert 614*d415bd75Srobert const PrologEpilogSGPRSaveRestoreInfo & 615*d415bd75Srobert getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const { 616*d415bd75Srobert auto I = PrologEpilogSGPRSpills.find(Reg); 617*d415bd75Srobert assert(I != PrologEpilogSGPRSpills.end()); 618*d415bd75Srobert 619*d415bd75Srobert return I->second; 620*d415bd75Srobert } 621*d415bd75Srobert 622*d415bd75Srobert ArrayRef<SIRegisterInfo::SpilledReg> 623*d415bd75Srobert getPrologEpilogSGPRSpillToVGPRLanes(int FrameIndex) const { 624*d415bd75Srobert auto I = PrologEpilogSGPRSpillToVGPRLanes.find(FrameIndex); 625*d415bd75Srobert return (I == PrologEpilogSGPRSpillToVGPRLanes.end()) 626*d415bd75Srobert ? ArrayRef<SIRegisterInfo::SpilledReg>() 627*d415bd75Srobert : ArrayRef(I->second); 628*d415bd75Srobert } 629*d415bd75Srobert 630*d415bd75Srobert void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4, 631*d415bd75Srobert Align Alignment = Align(4)); 632*d415bd75Srobert 633*d415bd75Srobert void splitWWMSpillRegisters( 634*d415bd75Srobert MachineFunction &MF, 635*d415bd75Srobert SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs, 636*d415bd75Srobert SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const; 637097a140dSpatrick 63809467b48Spatrick ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const { 63909467b48Spatrick return SpillAGPR; 64009467b48Spatrick } 64109467b48Spatrick 64209467b48Spatrick ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const { 64309467b48Spatrick return SpillVGPR; 64409467b48Spatrick } 64509467b48Spatrick 64609467b48Spatrick MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const { 64709467b48Spatrick auto I = VGPRToAGPRSpills.find(FrameIndex); 64809467b48Spatrick return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister 64909467b48Spatrick : I->second.Lanes[Lane]; 65009467b48Spatrick } 65109467b48Spatrick 652*d415bd75Srobert void setVGPRToAGPRSpillDead(int FrameIndex) { 653*d415bd75Srobert auto I = VGPRToAGPRSpills.find(FrameIndex); 654*d415bd75Srobert if (I != VGPRToAGPRSpills.end()) 655*d415bd75Srobert I->second.IsDead = true; 656*d415bd75Srobert } 657*d415bd75Srobert 658*d415bd75Srobert bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, 659*d415bd75Srobert bool IsPrologEpilog = false); 66009467b48Spatrick bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); 661*d415bd75Srobert 662*d415bd75Srobert /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill 663*d415bd75Srobert /// to the default stack. 664*d415bd75Srobert bool removeDeadFrameIndices(MachineFrameInfo &MFI, 665*d415bd75Srobert bool ResetSGPRSpillStackIDs); 66609467b48Spatrick 66773471bf0Spatrick int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI); 668*d415bd75Srobert std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; } 66909467b48Spatrick 67009467b48Spatrick unsigned getBytesInStackArgArea() const { 67109467b48Spatrick return BytesInStackArgArea; 67209467b48Spatrick } 67309467b48Spatrick 67409467b48Spatrick void setBytesInStackArgArea(unsigned Bytes) { 67509467b48Spatrick BytesInStackArgArea = Bytes; 67609467b48Spatrick } 67709467b48Spatrick 67809467b48Spatrick // Add user SGPRs. 679097a140dSpatrick Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 680097a140dSpatrick Register addDispatchPtr(const SIRegisterInfo &TRI); 681097a140dSpatrick Register addQueuePtr(const SIRegisterInfo &TRI); 682097a140dSpatrick Register addKernargSegmentPtr(const SIRegisterInfo &TRI); 683097a140dSpatrick Register addDispatchID(const SIRegisterInfo &TRI); 684097a140dSpatrick Register addFlatScratchInit(const SIRegisterInfo &TRI); 685097a140dSpatrick Register addImplicitBufferPtr(const SIRegisterInfo &TRI); 686*d415bd75Srobert Register addLDSKernelId(); 687*d415bd75Srobert 688*d415bd75Srobert /// Increment user SGPRs used for padding the argument list only. 689*d415bd75Srobert Register addReservedUserSGPR() { 690*d415bd75Srobert Register Next = getNextUserSGPR(); 691*d415bd75Srobert ++NumUserSGPRs; 692*d415bd75Srobert return Next; 693*d415bd75Srobert } 69409467b48Spatrick 69509467b48Spatrick // Add system SGPRs. 696097a140dSpatrick Register addWorkGroupIDX() { 69709467b48Spatrick ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR()); 69809467b48Spatrick NumSystemSGPRs += 1; 69909467b48Spatrick return ArgInfo.WorkGroupIDX.getRegister(); 70009467b48Spatrick } 70109467b48Spatrick 702097a140dSpatrick Register addWorkGroupIDY() { 70309467b48Spatrick ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR()); 70409467b48Spatrick NumSystemSGPRs += 1; 70509467b48Spatrick return ArgInfo.WorkGroupIDY.getRegister(); 70609467b48Spatrick } 70709467b48Spatrick 708097a140dSpatrick Register addWorkGroupIDZ() { 70909467b48Spatrick ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR()); 71009467b48Spatrick NumSystemSGPRs += 1; 71109467b48Spatrick return ArgInfo.WorkGroupIDZ.getRegister(); 71209467b48Spatrick } 71309467b48Spatrick 714097a140dSpatrick Register addWorkGroupInfo() { 71509467b48Spatrick ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR()); 71609467b48Spatrick NumSystemSGPRs += 1; 71709467b48Spatrick return ArgInfo.WorkGroupInfo.getRegister(); 71809467b48Spatrick } 71909467b48Spatrick 72009467b48Spatrick // Add special VGPR inputs 72109467b48Spatrick void setWorkItemIDX(ArgDescriptor Arg) { 72209467b48Spatrick ArgInfo.WorkItemIDX = Arg; 72309467b48Spatrick } 72409467b48Spatrick 72509467b48Spatrick void setWorkItemIDY(ArgDescriptor Arg) { 72609467b48Spatrick ArgInfo.WorkItemIDY = Arg; 72709467b48Spatrick } 72809467b48Spatrick 72909467b48Spatrick void setWorkItemIDZ(ArgDescriptor Arg) { 73009467b48Spatrick ArgInfo.WorkItemIDZ = Arg; 73109467b48Spatrick } 73209467b48Spatrick 733097a140dSpatrick Register addPrivateSegmentWaveByteOffset() { 73409467b48Spatrick ArgInfo.PrivateSegmentWaveByteOffset 73509467b48Spatrick = ArgDescriptor::createRegister(getNextSystemSGPR()); 73609467b48Spatrick NumSystemSGPRs += 1; 73709467b48Spatrick return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 73809467b48Spatrick } 73909467b48Spatrick 740097a140dSpatrick void setPrivateSegmentWaveByteOffset(Register Reg) { 74109467b48Spatrick ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); 74209467b48Spatrick } 74309467b48Spatrick 74409467b48Spatrick bool hasPrivateSegmentBuffer() const { 74509467b48Spatrick return PrivateSegmentBuffer; 74609467b48Spatrick } 74709467b48Spatrick 74809467b48Spatrick bool hasDispatchPtr() const { 74909467b48Spatrick return DispatchPtr; 75009467b48Spatrick } 75109467b48Spatrick 75209467b48Spatrick bool hasQueuePtr() const { 75309467b48Spatrick return QueuePtr; 75409467b48Spatrick } 75509467b48Spatrick 75609467b48Spatrick bool hasKernargSegmentPtr() const { 75709467b48Spatrick return KernargSegmentPtr; 75809467b48Spatrick } 75909467b48Spatrick 76009467b48Spatrick bool hasDispatchID() const { 76109467b48Spatrick return DispatchID; 76209467b48Spatrick } 76309467b48Spatrick 76409467b48Spatrick bool hasFlatScratchInit() const { 76509467b48Spatrick return FlatScratchInit; 76609467b48Spatrick } 76709467b48Spatrick 76809467b48Spatrick bool hasWorkGroupIDX() const { 76909467b48Spatrick return WorkGroupIDX; 77009467b48Spatrick } 77109467b48Spatrick 77209467b48Spatrick bool hasWorkGroupIDY() const { 77309467b48Spatrick return WorkGroupIDY; 77409467b48Spatrick } 77509467b48Spatrick 77609467b48Spatrick bool hasWorkGroupIDZ() const { 77709467b48Spatrick return WorkGroupIDZ; 77809467b48Spatrick } 77909467b48Spatrick 78009467b48Spatrick bool hasWorkGroupInfo() const { 78109467b48Spatrick return WorkGroupInfo; 78209467b48Spatrick } 78309467b48Spatrick 784*d415bd75Srobert bool hasLDSKernelId() const { return LDSKernelId; } 785*d415bd75Srobert 78609467b48Spatrick bool hasPrivateSegmentWaveByteOffset() const { 78709467b48Spatrick return PrivateSegmentWaveByteOffset; 78809467b48Spatrick } 78909467b48Spatrick 79009467b48Spatrick bool hasWorkItemIDX() const { 79109467b48Spatrick return WorkItemIDX; 79209467b48Spatrick } 79309467b48Spatrick 79409467b48Spatrick bool hasWorkItemIDY() const { 79509467b48Spatrick return WorkItemIDY; 79609467b48Spatrick } 79709467b48Spatrick 79809467b48Spatrick bool hasWorkItemIDZ() const { 79909467b48Spatrick return WorkItemIDZ; 80009467b48Spatrick } 80109467b48Spatrick 80209467b48Spatrick bool hasImplicitArgPtr() const { 80309467b48Spatrick return ImplicitArgPtr; 80409467b48Spatrick } 80509467b48Spatrick 80609467b48Spatrick bool hasImplicitBufferPtr() const { 80709467b48Spatrick return ImplicitBufferPtr; 80809467b48Spatrick } 80909467b48Spatrick 81009467b48Spatrick AMDGPUFunctionArgInfo &getArgInfo() { 81109467b48Spatrick return ArgInfo; 81209467b48Spatrick } 81309467b48Spatrick 81409467b48Spatrick const AMDGPUFunctionArgInfo &getArgInfo() const { 81509467b48Spatrick return ArgInfo; 81609467b48Spatrick } 81709467b48Spatrick 818097a140dSpatrick std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT> 81909467b48Spatrick getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 82009467b48Spatrick return ArgInfo.getPreloadedValue(Value); 82109467b48Spatrick } 82209467b48Spatrick 82373471bf0Spatrick MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { 824097a140dSpatrick auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value)); 82573471bf0Spatrick return Arg ? Arg->getRegister() : MCRegister(); 82609467b48Spatrick } 82709467b48Spatrick 82809467b48Spatrick unsigned getGITPtrHigh() const { 82909467b48Spatrick return GITPtrHigh; 83009467b48Spatrick } 83109467b48Spatrick 832097a140dSpatrick Register getGITPtrLoReg(const MachineFunction &MF) const; 833097a140dSpatrick 83409467b48Spatrick uint32_t get32BitAddressHighBits() const { 83509467b48Spatrick return HighBitsOf32BitAddress; 83609467b48Spatrick } 83709467b48Spatrick 83809467b48Spatrick unsigned getNumUserSGPRs() const { 83909467b48Spatrick return NumUserSGPRs; 84009467b48Spatrick } 84109467b48Spatrick 84209467b48Spatrick unsigned getNumPreloadedSGPRs() const { 84309467b48Spatrick return NumUserSGPRs + NumSystemSGPRs; 84409467b48Spatrick } 84509467b48Spatrick 846097a140dSpatrick Register getPrivateSegmentWaveByteOffsetSystemSGPR() const { 84709467b48Spatrick return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); 84809467b48Spatrick } 84909467b48Spatrick 85009467b48Spatrick /// Returns the physical register reserved for use as the resource 85109467b48Spatrick /// descriptor for scratch accesses. 852097a140dSpatrick Register getScratchRSrcReg() const { 85309467b48Spatrick return ScratchRSrcReg; 85409467b48Spatrick } 85509467b48Spatrick 856097a140dSpatrick void setScratchRSrcReg(Register Reg) { 85709467b48Spatrick assert(Reg != 0 && "Should never be unset"); 85809467b48Spatrick ScratchRSrcReg = Reg; 85909467b48Spatrick } 86009467b48Spatrick 861097a140dSpatrick Register getFrameOffsetReg() const { 86209467b48Spatrick return FrameOffsetReg; 86309467b48Spatrick } 86409467b48Spatrick 865097a140dSpatrick void setFrameOffsetReg(Register Reg) { 86609467b48Spatrick assert(Reg != 0 && "Should never be unset"); 86709467b48Spatrick FrameOffsetReg = Reg; 86809467b48Spatrick } 86909467b48Spatrick 870097a140dSpatrick void setStackPtrOffsetReg(Register Reg) { 87109467b48Spatrick assert(Reg != 0 && "Should never be unset"); 87209467b48Spatrick StackPtrOffsetReg = Reg; 87309467b48Spatrick } 87409467b48Spatrick 87509467b48Spatrick // Note the unset value for this is AMDGPU::SP_REG rather than 87609467b48Spatrick // NoRegister. This is mostly a workaround for MIR tests where state that 87709467b48Spatrick // can't be directly computed from the function is not preserved in serialized 87809467b48Spatrick // MIR. 879097a140dSpatrick Register getStackPtrOffsetReg() const { 88009467b48Spatrick return StackPtrOffsetReg; 88109467b48Spatrick } 88209467b48Spatrick 883097a140dSpatrick Register getQueuePtrUserSGPR() const { 88409467b48Spatrick return ArgInfo.QueuePtr.getRegister(); 88509467b48Spatrick } 88609467b48Spatrick 887097a140dSpatrick Register getImplicitBufferPtrUserSGPR() const { 88809467b48Spatrick return ArgInfo.ImplicitBufferPtr.getRegister(); 88909467b48Spatrick } 89009467b48Spatrick 89109467b48Spatrick bool hasSpilledSGPRs() const { 89209467b48Spatrick return HasSpilledSGPRs; 89309467b48Spatrick } 89409467b48Spatrick 89509467b48Spatrick void setHasSpilledSGPRs(bool Spill = true) { 89609467b48Spatrick HasSpilledSGPRs = Spill; 89709467b48Spatrick } 89809467b48Spatrick 89909467b48Spatrick bool hasSpilledVGPRs() const { 90009467b48Spatrick return HasSpilledVGPRs; 90109467b48Spatrick } 90209467b48Spatrick 90309467b48Spatrick void setHasSpilledVGPRs(bool Spill = true) { 90409467b48Spatrick HasSpilledVGPRs = Spill; 90509467b48Spatrick } 90609467b48Spatrick 90709467b48Spatrick bool hasNonSpillStackObjects() const { 90809467b48Spatrick return HasNonSpillStackObjects; 90909467b48Spatrick } 91009467b48Spatrick 91109467b48Spatrick void setHasNonSpillStackObjects(bool StackObject = true) { 91209467b48Spatrick HasNonSpillStackObjects = StackObject; 91309467b48Spatrick } 91409467b48Spatrick 91509467b48Spatrick bool isStackRealigned() const { 91609467b48Spatrick return IsStackRealigned; 91709467b48Spatrick } 91809467b48Spatrick 91909467b48Spatrick void setIsStackRealigned(bool Realigned = true) { 92009467b48Spatrick IsStackRealigned = Realigned; 92109467b48Spatrick } 92209467b48Spatrick 92309467b48Spatrick unsigned getNumSpilledSGPRs() const { 92409467b48Spatrick return NumSpilledSGPRs; 92509467b48Spatrick } 92609467b48Spatrick 92709467b48Spatrick unsigned getNumSpilledVGPRs() const { 92809467b48Spatrick return NumSpilledVGPRs; 92909467b48Spatrick } 93009467b48Spatrick 93109467b48Spatrick void addToSpilledSGPRs(unsigned num) { 93209467b48Spatrick NumSpilledSGPRs += num; 93309467b48Spatrick } 93409467b48Spatrick 93509467b48Spatrick void addToSpilledVGPRs(unsigned num) { 93609467b48Spatrick NumSpilledVGPRs += num; 93709467b48Spatrick } 93809467b48Spatrick 93909467b48Spatrick unsigned getPSInputAddr() const { 94009467b48Spatrick return PSInputAddr; 94109467b48Spatrick } 94209467b48Spatrick 94309467b48Spatrick unsigned getPSInputEnable() const { 94409467b48Spatrick return PSInputEnable; 94509467b48Spatrick } 94609467b48Spatrick 94709467b48Spatrick bool isPSInputAllocated(unsigned Index) const { 94809467b48Spatrick return PSInputAddr & (1 << Index); 94909467b48Spatrick } 95009467b48Spatrick 95109467b48Spatrick void markPSInputAllocated(unsigned Index) { 95209467b48Spatrick PSInputAddr |= 1 << Index; 95309467b48Spatrick } 95409467b48Spatrick 95509467b48Spatrick void markPSInputEnabled(unsigned Index) { 95609467b48Spatrick PSInputEnable |= 1 << Index; 95709467b48Spatrick } 95809467b48Spatrick 95909467b48Spatrick bool returnsVoid() const { 96009467b48Spatrick return ReturnsVoid; 96109467b48Spatrick } 96209467b48Spatrick 96309467b48Spatrick void setIfReturnsVoid(bool Value) { 96409467b48Spatrick ReturnsVoid = Value; 96509467b48Spatrick } 96609467b48Spatrick 96709467b48Spatrick /// \returns A pair of default/requested minimum/maximum flat work group sizes 96809467b48Spatrick /// for this function. 96909467b48Spatrick std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 97009467b48Spatrick return FlatWorkGroupSizes; 97109467b48Spatrick } 97209467b48Spatrick 97309467b48Spatrick /// \returns Default/requested minimum flat work group size for this function. 97409467b48Spatrick unsigned getMinFlatWorkGroupSize() const { 97509467b48Spatrick return FlatWorkGroupSizes.first; 97609467b48Spatrick } 97709467b48Spatrick 97809467b48Spatrick /// \returns Default/requested maximum flat work group size for this function. 97909467b48Spatrick unsigned getMaxFlatWorkGroupSize() const { 98009467b48Spatrick return FlatWorkGroupSizes.second; 98109467b48Spatrick } 98209467b48Spatrick 98309467b48Spatrick /// \returns A pair of default/requested minimum/maximum number of waves per 98409467b48Spatrick /// execution unit. 98509467b48Spatrick std::pair<unsigned, unsigned> getWavesPerEU() const { 98609467b48Spatrick return WavesPerEU; 98709467b48Spatrick } 98809467b48Spatrick 98909467b48Spatrick /// \returns Default/requested minimum number of waves per execution unit. 99009467b48Spatrick unsigned getMinWavesPerEU() const { 99109467b48Spatrick return WavesPerEU.first; 99209467b48Spatrick } 99309467b48Spatrick 99409467b48Spatrick /// \returns Default/requested maximum number of waves per execution unit. 99509467b48Spatrick unsigned getMaxWavesPerEU() const { 99609467b48Spatrick return WavesPerEU.second; 99709467b48Spatrick } 99809467b48Spatrick 99909467b48Spatrick /// \returns SGPR used for \p Dim's work group ID. 1000097a140dSpatrick Register getWorkGroupIDSGPR(unsigned Dim) const { 100109467b48Spatrick switch (Dim) { 100209467b48Spatrick case 0: 100309467b48Spatrick assert(hasWorkGroupIDX()); 100409467b48Spatrick return ArgInfo.WorkGroupIDX.getRegister(); 100509467b48Spatrick case 1: 100609467b48Spatrick assert(hasWorkGroupIDY()); 100709467b48Spatrick return ArgInfo.WorkGroupIDY.getRegister(); 100809467b48Spatrick case 2: 100909467b48Spatrick assert(hasWorkGroupIDZ()); 101009467b48Spatrick return ArgInfo.WorkGroupIDZ.getRegister(); 101109467b48Spatrick } 101209467b48Spatrick llvm_unreachable("unexpected dimension"); 101309467b48Spatrick } 101409467b48Spatrick 1015*d415bd75Srobert const AMDGPUGWSResourcePseudoSourceValue * 1016*d415bd75Srobert getGWSPSV(const AMDGPUTargetMachine &TM) { 1017*d415bd75Srobert return &GWSResourcePSV; 101809467b48Spatrick } 101909467b48Spatrick 102009467b48Spatrick unsigned getOccupancy() const { 102109467b48Spatrick return Occupancy; 102209467b48Spatrick } 102309467b48Spatrick 102409467b48Spatrick unsigned getMinAllowedOccupancy() const { 102509467b48Spatrick if (!isMemoryBound() && !needsWaveLimiter()) 102609467b48Spatrick return Occupancy; 102709467b48Spatrick return (Occupancy < 4) ? Occupancy : 4; 102809467b48Spatrick } 102909467b48Spatrick 103009467b48Spatrick void limitOccupancy(const MachineFunction &MF); 103109467b48Spatrick 103209467b48Spatrick void limitOccupancy(unsigned Limit) { 103309467b48Spatrick if (Occupancy > Limit) 103409467b48Spatrick Occupancy = Limit; 103509467b48Spatrick } 103609467b48Spatrick 103709467b48Spatrick void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { 103809467b48Spatrick if (Occupancy < Limit) 103909467b48Spatrick Occupancy = Limit; 104009467b48Spatrick limitOccupancy(MF); 104109467b48Spatrick } 1042*d415bd75Srobert 1043*d415bd75Srobert bool mayNeedAGPRs() const { 1044*d415bd75Srobert return MayNeedAGPRs; 1045*d415bd75Srobert } 1046*d415bd75Srobert 1047*d415bd75Srobert // \returns true if a function has a use of AGPRs via inline asm or 1048*d415bd75Srobert // has a call which may use it. 1049*d415bd75Srobert bool mayUseAGPRs(const Function &F) const; 1050*d415bd75Srobert 1051*d415bd75Srobert // \returns true if a function needs or may need AGPRs. 1052*d415bd75Srobert bool usesAGPRs(const MachineFunction &MF) const; 105309467b48Spatrick }; 105409467b48Spatrick 105509467b48Spatrick } // end namespace llvm 105609467b48Spatrick 105709467b48Spatrick #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 1058