xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp (revision be187369a03bf2df8bdbc76ecd381377b3bb6074)
12b08f6afSSebastian Neubauer //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
22b08f6afSSebastian Neubauer //
32b08f6afSSebastian Neubauer // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42b08f6afSSebastian Neubauer // See https://llvm.org/LICENSE.txt for license information.
52b08f6afSSebastian Neubauer // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62b08f6afSSebastian Neubauer //
72b08f6afSSebastian Neubauer //===----------------------------------------------------------------------===//
82b08f6afSSebastian Neubauer //
92b08f6afSSebastian Neubauer /// \file
102b08f6afSSebastian Neubauer /// \brief Analyzes how many registers and other resources are used by
112b08f6afSSebastian Neubauer /// functions.
122b08f6afSSebastian Neubauer ///
132b08f6afSSebastian Neubauer /// The results of this analysis are used to fill the register usage, flat
142b08f6afSSebastian Neubauer /// usage, etc. into hardware registers.
152b08f6afSSebastian Neubauer ///
162b08f6afSSebastian Neubauer //===----------------------------------------------------------------------===//
172b08f6afSSebastian Neubauer 
182b08f6afSSebastian Neubauer #include "AMDGPUResourceUsageAnalysis.h"
192b08f6afSSebastian Neubauer #include "AMDGPU.h"
202b08f6afSSebastian Neubauer #include "GCNSubtarget.h"
212b08f6afSSebastian Neubauer #include "SIMachineFunctionInfo.h"
22989f1c72Sserge-sans-paille #include "llvm/CodeGen/MachineFrameInfo.h"
23c897c13dSJanek van Oirschot #include "llvm/CodeGen/MachineModuleInfo.h"
242b08f6afSSebastian Neubauer #include "llvm/CodeGen/TargetPassConfig.h"
250567f033SAnshil Gandhi #include "llvm/IR/GlobalValue.h"
262b08f6afSSebastian Neubauer #include "llvm/Target/TargetMachine.h"
272b08f6afSSebastian Neubauer 
282b08f6afSSebastian Neubauer using namespace llvm;
292b08f6afSSebastian Neubauer using namespace llvm::AMDGPU;
302b08f6afSSebastian Neubauer 
312b08f6afSSebastian Neubauer #define DEBUG_TYPE "amdgpu-resource-usage"
322b08f6afSSebastian Neubauer 
332b08f6afSSebastian Neubauer char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
342b08f6afSSebastian Neubauer char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
352b08f6afSSebastian Neubauer 
363759398bSAbinav Puthan Purayil // In code object v4 and older, we need to tell the runtime some amount ahead of
373759398bSAbinav Puthan Purayil // time if we don't know the true stack size. Assume a smaller number if this is
383759398bSAbinav Puthan Purayil // only due to dynamic / non-entry block allocas.
39df5e431eSMaheshRavishankar static cl::opt<uint32_t> clAssumedStackSizeForExternalCall(
402b08f6afSSebastian Neubauer     "amdgpu-assume-external-call-stack-size",
412b08f6afSSebastian Neubauer     cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
422b08f6afSSebastian Neubauer     cl::init(16384));
432b08f6afSSebastian Neubauer 
44df5e431eSMaheshRavishankar static cl::opt<uint32_t> clAssumedStackSizeForDynamicSizeObjects(
452b08f6afSSebastian Neubauer     "amdgpu-assume-dynamic-stack-object-size",
462b08f6afSSebastian Neubauer     cl::desc("Assumed extra stack use if there are any "
472b08f6afSSebastian Neubauer              "variable sized objects (in bytes)"),
482b08f6afSSebastian Neubauer     cl::Hidden, cl::init(4096));
492b08f6afSSebastian Neubauer 
502b08f6afSSebastian Neubauer INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE,
512b08f6afSSebastian Neubauer                 "Function register usage analysis", true, true)
522b08f6afSSebastian Neubauer 
532b08f6afSSebastian Neubauer static const Function *getCalleeFunction(const MachineOperand &Op) {
542b08f6afSSebastian Neubauer   if (Op.isImm()) {
552b08f6afSSebastian Neubauer     assert(Op.getImm() == 0);
562b08f6afSSebastian Neubauer     return nullptr;
572b08f6afSSebastian Neubauer   }
58351a4b27SJoseph Huber   return cast<Function>(Op.getGlobal()->stripPointerCastsAndAliases());
592b08f6afSSebastian Neubauer }
602b08f6afSSebastian Neubauer 
612b08f6afSSebastian Neubauer static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
622b08f6afSSebastian Neubauer                                   const SIInstrInfo &TII, unsigned Reg) {
632b08f6afSSebastian Neubauer   for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
642b08f6afSSebastian Neubauer     if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
652b08f6afSSebastian Neubauer       return true;
662b08f6afSSebastian Neubauer   }
672b08f6afSSebastian Neubauer 
682b08f6afSSebastian Neubauer   return false;
692b08f6afSSebastian Neubauer }
702b08f6afSSebastian Neubauer 
71c897c13dSJanek van Oirschot bool AMDGPUResourceUsageAnalysis::runOnMachineFunction(MachineFunction &MF) {
722b08f6afSSebastian Neubauer   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
732b08f6afSSebastian Neubauer   if (!TPC)
742b08f6afSSebastian Neubauer     return false;
752b08f6afSSebastian Neubauer 
764622afa9SMatt Arsenault   const TargetMachine &TM = TPC->getTM<TargetMachine>();
773604fdf1SBaptiste   const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
78814a0abcSJacob Weightman 
793759398bSAbinav Puthan Purayil   // By default, for code object v5 and later, track only the minimum scratch
803759398bSAbinav Puthan Purayil   // size
81df5e431eSMaheshRavishankar   uint32_t AssumedStackSizeForDynamicSizeObjects =
82df5e431eSMaheshRavishankar       clAssumedStackSizeForDynamicSizeObjects;
83df5e431eSMaheshRavishankar   uint32_t AssumedStackSizeForExternalCall = clAssumedStackSizeForExternalCall;
84c897c13dSJanek van Oirschot   if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >=
85c897c13dSJanek van Oirschot           AMDGPU::AMDHSA_COV5 ||
863604fdf1SBaptiste       STI.getTargetTriple().getOS() == Triple::AMDPAL) {
87c897c13dSJanek van Oirschot     if (!clAssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
883759398bSAbinav Puthan Purayil       AssumedStackSizeForDynamicSizeObjects = 0;
89c897c13dSJanek van Oirschot     if (!clAssumedStackSizeForExternalCall.getNumOccurrences())
903759398bSAbinav Puthan Purayil       AssumedStackSizeForExternalCall = 0;
913759398bSAbinav Puthan Purayil   }
923759398bSAbinav Puthan Purayil 
93c897c13dSJanek van Oirschot   ResourceInfo = analyzeResourceUsage(MF, AssumedStackSizeForDynamicSizeObjects,
94df5e431eSMaheshRavishankar                                       AssumedStackSizeForExternalCall);
954622afa9SMatt Arsenault 
962b08f6afSSebastian Neubauer   return false;
972b08f6afSSebastian Neubauer }
982b08f6afSSebastian Neubauer 
992b08f6afSSebastian Neubauer AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
1004622afa9SMatt Arsenault AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
101c897c13dSJanek van Oirschot     const MachineFunction &MF, uint32_t AssumedStackSizeForDynamicSizeObjects,
102df5e431eSMaheshRavishankar     uint32_t AssumedStackSizeForExternalCall) const {
1032b08f6afSSebastian Neubauer   SIFunctionResourceInfo Info;
1042b08f6afSSebastian Neubauer 
1052b08f6afSSebastian Neubauer   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1062b08f6afSSebastian Neubauer   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1072b08f6afSSebastian Neubauer   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1082b08f6afSSebastian Neubauer   const MachineRegisterInfo &MRI = MF.getRegInfo();
1092b08f6afSSebastian Neubauer   const SIInstrInfo *TII = ST.getInstrInfo();
1102b08f6afSSebastian Neubauer   const SIRegisterInfo &TRI = TII->getRegisterInfo();
1112b08f6afSSebastian Neubauer 
1122b08f6afSSebastian Neubauer   Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
1132b08f6afSSebastian Neubauer                          MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
1142b08f6afSSebastian Neubauer                          MRI.isLiveIn(MFI->getPreloadedReg(
1152b08f6afSSebastian Neubauer                              AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT));
1162b08f6afSSebastian Neubauer 
1172b08f6afSSebastian Neubauer   // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
1182b08f6afSSebastian Neubauer   // instructions aren't used to access the scratch buffer. Inline assembly may
1192b08f6afSSebastian Neubauer   // need it though.
1202b08f6afSSebastian Neubauer   //
1212b08f6afSSebastian Neubauer   // If we only have implicit uses of flat_scr on flat instructions, it is not
1222b08f6afSSebastian Neubauer   // really needed.
123343be513SAustin Kerbow   if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() &&
1242b08f6afSSebastian Neubauer       (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
1252b08f6afSSebastian Neubauer        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
1262b08f6afSSebastian Neubauer        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
1272b08f6afSSebastian Neubauer     Info.UsesFlatScratch = false;
1282b08f6afSSebastian Neubauer   }
1292b08f6afSSebastian Neubauer 
1302b08f6afSSebastian Neubauer   Info.PrivateSegmentSize = FrameInfo.getStackSize();
1312b08f6afSSebastian Neubauer 
1322b08f6afSSebastian Neubauer   // Assume a big number if there are any unknown sized objects.
1332b08f6afSSebastian Neubauer   Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
1342b08f6afSSebastian Neubauer   if (Info.HasDynamicallySizedStack)
1352b08f6afSSebastian Neubauer     Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
1362b08f6afSSebastian Neubauer 
1372b08f6afSSebastian Neubauer   if (MFI->isStackRealigned())
1382b08f6afSSebastian Neubauer     Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
1392b08f6afSSebastian Neubauer 
1402b08f6afSSebastian Neubauer   Info.UsesVCC =
1412b08f6afSSebastian Neubauer       MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
1422b08f6afSSebastian Neubauer 
1432b08f6afSSebastian Neubauer   // If there are no calls, MachineRegisterInfo can tell us the used register
1442b08f6afSSebastian Neubauer   // count easily.
1452b08f6afSSebastian Neubauer   // A tail call isn't considered a call for MachineFrameInfo's purposes.
1462b08f6afSSebastian Neubauer   if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
147*1cc5290aSStanislav Mekhanoshin     Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
148*1cc5290aSStanislav Mekhanoshin     Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
149*1cc5290aSStanislav Mekhanoshin     if (ST.hasMAIInsts())
150*1cc5290aSStanislav Mekhanoshin       Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
1512b08f6afSSebastian Neubauer     return Info;
1522b08f6afSSebastian Neubauer   }
1532b08f6afSSebastian Neubauer 
1542b08f6afSSebastian Neubauer   int32_t MaxVGPR = -1;
1552b08f6afSSebastian Neubauer   int32_t MaxAGPR = -1;
1562b08f6afSSebastian Neubauer   int32_t MaxSGPR = -1;
157c897c13dSJanek van Oirschot   Info.CalleeSegmentSize = 0;
1582b08f6afSSebastian Neubauer 
1592b08f6afSSebastian Neubauer   for (const MachineBasicBlock &MBB : MF) {
1602b08f6afSSebastian Neubauer     for (const MachineInstr &MI : MBB) {
1612b08f6afSSebastian Neubauer       // TODO: Check regmasks? Do they occur anywhere except calls?
1622b08f6afSSebastian Neubauer       for (const MachineOperand &MO : MI.operands()) {
1632b08f6afSSebastian Neubauer         unsigned Width = 0;
1642b08f6afSSebastian Neubauer         bool IsSGPR = false;
1652b08f6afSSebastian Neubauer         bool IsAGPR = false;
1662b08f6afSSebastian Neubauer 
1672b08f6afSSebastian Neubauer         if (!MO.isReg())
1682b08f6afSSebastian Neubauer           continue;
1692b08f6afSSebastian Neubauer 
1702b08f6afSSebastian Neubauer         Register Reg = MO.getReg();
1712b08f6afSSebastian Neubauer         switch (Reg) {
1722b08f6afSSebastian Neubauer         case AMDGPU::EXEC:
1732b08f6afSSebastian Neubauer         case AMDGPU::EXEC_LO:
1742b08f6afSSebastian Neubauer         case AMDGPU::EXEC_HI:
1752b08f6afSSebastian Neubauer         case AMDGPU::SCC:
1762b08f6afSSebastian Neubauer         case AMDGPU::M0:
1772b08f6afSSebastian Neubauer         case AMDGPU::M0_LO16:
1782b08f6afSSebastian Neubauer         case AMDGPU::M0_HI16:
179220147d5SPierre van Houtryve         case AMDGPU::SRC_SHARED_BASE_LO:
1802b08f6afSSebastian Neubauer         case AMDGPU::SRC_SHARED_BASE:
181220147d5SPierre van Houtryve         case AMDGPU::SRC_SHARED_LIMIT_LO:
1822b08f6afSSebastian Neubauer         case AMDGPU::SRC_SHARED_LIMIT:
183220147d5SPierre van Houtryve         case AMDGPU::SRC_PRIVATE_BASE_LO:
1842b08f6afSSebastian Neubauer         case AMDGPU::SRC_PRIVATE_BASE:
185220147d5SPierre van Houtryve         case AMDGPU::SRC_PRIVATE_LIMIT_LO:
1862b08f6afSSebastian Neubauer         case AMDGPU::SRC_PRIVATE_LIMIT:
187520e0454SJay Foad         case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1882b08f6afSSebastian Neubauer         case AMDGPU::SGPR_NULL:
189cb9ae937SStanislav Mekhanoshin         case AMDGPU::SGPR_NULL64:
1902b08f6afSSebastian Neubauer         case AMDGPU::MODE:
1912b08f6afSSebastian Neubauer           continue;
1922b08f6afSSebastian Neubauer 
1932b08f6afSSebastian Neubauer         case AMDGPU::NoRegister:
1942b08f6afSSebastian Neubauer           assert(MI.isDebugInstr() &&
1952b08f6afSSebastian Neubauer                  "Instruction uses invalid noreg register");
1962b08f6afSSebastian Neubauer           continue;
1972b08f6afSSebastian Neubauer 
1982b08f6afSSebastian Neubauer         case AMDGPU::VCC:
1992b08f6afSSebastian Neubauer         case AMDGPU::VCC_LO:
2002b08f6afSSebastian Neubauer         case AMDGPU::VCC_HI:
2012b08f6afSSebastian Neubauer         case AMDGPU::VCC_LO_LO16:
2022b08f6afSSebastian Neubauer         case AMDGPU::VCC_LO_HI16:
2032b08f6afSSebastian Neubauer         case AMDGPU::VCC_HI_LO16:
2042b08f6afSSebastian Neubauer         case AMDGPU::VCC_HI_HI16:
2052b08f6afSSebastian Neubauer           Info.UsesVCC = true;
2062b08f6afSSebastian Neubauer           continue;
2072b08f6afSSebastian Neubauer 
2082b08f6afSSebastian Neubauer         case AMDGPU::FLAT_SCR:
2092b08f6afSSebastian Neubauer         case AMDGPU::FLAT_SCR_LO:
2102b08f6afSSebastian Neubauer         case AMDGPU::FLAT_SCR_HI:
2112b08f6afSSebastian Neubauer           continue;
2122b08f6afSSebastian Neubauer 
2132b08f6afSSebastian Neubauer         case AMDGPU::XNACK_MASK:
2142b08f6afSSebastian Neubauer         case AMDGPU::XNACK_MASK_LO:
2152b08f6afSSebastian Neubauer         case AMDGPU::XNACK_MASK_HI:
2162b08f6afSSebastian Neubauer           llvm_unreachable("xnack_mask registers should not be used");
2172b08f6afSSebastian Neubauer 
2182b08f6afSSebastian Neubauer         case AMDGPU::LDS_DIRECT:
2192b08f6afSSebastian Neubauer           llvm_unreachable("lds_direct register should not be used");
2202b08f6afSSebastian Neubauer 
2212b08f6afSSebastian Neubauer         case AMDGPU::TBA:
2222b08f6afSSebastian Neubauer         case AMDGPU::TBA_LO:
2232b08f6afSSebastian Neubauer         case AMDGPU::TBA_HI:
2242b08f6afSSebastian Neubauer         case AMDGPU::TMA:
2252b08f6afSSebastian Neubauer         case AMDGPU::TMA_LO:
2262b08f6afSSebastian Neubauer         case AMDGPU::TMA_HI:
2272b08f6afSSebastian Neubauer           llvm_unreachable("trap handler registers should not be used");
2282b08f6afSSebastian Neubauer 
2292b08f6afSSebastian Neubauer         case AMDGPU::SRC_VCCZ:
2302b08f6afSSebastian Neubauer           llvm_unreachable("src_vccz register should not be used");
2312b08f6afSSebastian Neubauer 
2322b08f6afSSebastian Neubauer         case AMDGPU::SRC_EXECZ:
2332b08f6afSSebastian Neubauer           llvm_unreachable("src_execz register should not be used");
2342b08f6afSSebastian Neubauer 
2352b08f6afSSebastian Neubauer         case AMDGPU::SRC_SCC:
2362b08f6afSSebastian Neubauer           llvm_unreachable("src_scc register should not be used");
2372b08f6afSSebastian Neubauer 
2382b08f6afSSebastian Neubauer         default:
2392b08f6afSSebastian Neubauer           break;
2402b08f6afSSebastian Neubauer         }
2412b08f6afSSebastian Neubauer 
2422d39f5b5SJay Foad         if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
2432d39f5b5SJay Foad             AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
2442b08f6afSSebastian Neubauer             AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
2452b08f6afSSebastian Neubauer           IsSGPR = true;
2462b08f6afSSebastian Neubauer           Width = 1;
2472b08f6afSSebastian Neubauer         } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
2488c6172b0SIvan Kosarev                    AMDGPU::VGPR_16RegClass.contains(Reg)) {
2492b08f6afSSebastian Neubauer           IsSGPR = false;
2502b08f6afSSebastian Neubauer           Width = 1;
2512b08f6afSSebastian Neubauer         } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
2522b08f6afSSebastian Neubauer                    AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
2532b08f6afSSebastian Neubauer           IsSGPR = false;
2542b08f6afSSebastian Neubauer           IsAGPR = true;
2552b08f6afSSebastian Neubauer           Width = 1;
2562d39f5b5SJay Foad         } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
2572b08f6afSSebastian Neubauer           IsSGPR = true;
2582b08f6afSSebastian Neubauer           Width = 2;
2592b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
2602b08f6afSSebastian Neubauer           IsSGPR = false;
2612b08f6afSSebastian Neubauer           Width = 2;
2622b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
2632b08f6afSSebastian Neubauer           IsSGPR = false;
2642b08f6afSSebastian Neubauer           IsAGPR = true;
2652b08f6afSSebastian Neubauer           Width = 2;
2662b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
2672b08f6afSSebastian Neubauer           IsSGPR = false;
2682b08f6afSSebastian Neubauer           Width = 3;
2692b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
2702b08f6afSSebastian Neubauer           IsSGPR = true;
2712b08f6afSSebastian Neubauer           Width = 3;
2722b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
2732b08f6afSSebastian Neubauer           IsSGPR = false;
2742b08f6afSSebastian Neubauer           IsAGPR = true;
2752b08f6afSSebastian Neubauer           Width = 3;
2762d39f5b5SJay Foad         } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
2772b08f6afSSebastian Neubauer           IsSGPR = true;
2782b08f6afSSebastian Neubauer           Width = 4;
2792b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
2802b08f6afSSebastian Neubauer           IsSGPR = false;
2812b08f6afSSebastian Neubauer           Width = 4;
2822b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
2832b08f6afSSebastian Neubauer           IsSGPR = false;
2842b08f6afSSebastian Neubauer           IsAGPR = true;
2852b08f6afSSebastian Neubauer           Width = 4;
2862b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
2872b08f6afSSebastian Neubauer           IsSGPR = false;
2882b08f6afSSebastian Neubauer           Width = 5;
2892b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
2902b08f6afSSebastian Neubauer           IsSGPR = true;
2912b08f6afSSebastian Neubauer           Width = 5;
2922b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
2932b08f6afSSebastian Neubauer           IsSGPR = false;
2942b08f6afSSebastian Neubauer           IsAGPR = true;
2952b08f6afSSebastian Neubauer           Width = 5;
2962b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
2972b08f6afSSebastian Neubauer           IsSGPR = false;
2982b08f6afSSebastian Neubauer           Width = 6;
2992b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
3002b08f6afSSebastian Neubauer           IsSGPR = true;
3012b08f6afSSebastian Neubauer           Width = 6;
3022b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
3032b08f6afSSebastian Neubauer           IsSGPR = false;
3042b08f6afSSebastian Neubauer           IsAGPR = true;
3052b08f6afSSebastian Neubauer           Width = 6;
3062b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
3072b08f6afSSebastian Neubauer           IsSGPR = false;
3082b08f6afSSebastian Neubauer           Width = 7;
3092b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
3102b08f6afSSebastian Neubauer           IsSGPR = true;
3112b08f6afSSebastian Neubauer           Width = 7;
3122b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
3132b08f6afSSebastian Neubauer           IsSGPR = false;
3142b08f6afSSebastian Neubauer           IsAGPR = true;
3152b08f6afSSebastian Neubauer           Width = 7;
3162b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
3172b08f6afSSebastian Neubauer           IsSGPR = true;
3182b08f6afSSebastian Neubauer           Width = 8;
3192b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
3202b08f6afSSebastian Neubauer           IsSGPR = false;
3212b08f6afSSebastian Neubauer           Width = 8;
3222b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
3232b08f6afSSebastian Neubauer           IsSGPR = false;
3242b08f6afSSebastian Neubauer           IsAGPR = true;
3252b08f6afSSebastian Neubauer           Width = 8;
326595a0884SMateja Marjanovic         } else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
327595a0884SMateja Marjanovic           IsSGPR = false;
328595a0884SMateja Marjanovic           Width = 9;
329595a0884SMateja Marjanovic         } else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
330595a0884SMateja Marjanovic           IsSGPR = true;
331595a0884SMateja Marjanovic           Width = 9;
332595a0884SMateja Marjanovic         } else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
333595a0884SMateja Marjanovic           IsSGPR = false;
334595a0884SMateja Marjanovic           IsAGPR = true;
335595a0884SMateja Marjanovic           Width = 9;
336595a0884SMateja Marjanovic         } else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
337595a0884SMateja Marjanovic           IsSGPR = false;
338595a0884SMateja Marjanovic           Width = 10;
339595a0884SMateja Marjanovic         } else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
340595a0884SMateja Marjanovic           IsSGPR = true;
341595a0884SMateja Marjanovic           Width = 10;
342595a0884SMateja Marjanovic         } else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
343595a0884SMateja Marjanovic           IsSGPR = false;
344595a0884SMateja Marjanovic           IsAGPR = true;
345595a0884SMateja Marjanovic           Width = 10;
346595a0884SMateja Marjanovic         } else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
347595a0884SMateja Marjanovic           IsSGPR = false;
348595a0884SMateja Marjanovic           Width = 11;
349595a0884SMateja Marjanovic         } else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
350595a0884SMateja Marjanovic           IsSGPR = true;
351595a0884SMateja Marjanovic           Width = 11;
352595a0884SMateja Marjanovic         } else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
353595a0884SMateja Marjanovic           IsSGPR = false;
354595a0884SMateja Marjanovic           IsAGPR = true;
355595a0884SMateja Marjanovic           Width = 11;
356595a0884SMateja Marjanovic         } else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
357595a0884SMateja Marjanovic           IsSGPR = false;
358595a0884SMateja Marjanovic           Width = 12;
359595a0884SMateja Marjanovic         } else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
360595a0884SMateja Marjanovic           IsSGPR = true;
361595a0884SMateja Marjanovic           Width = 12;
362595a0884SMateja Marjanovic         } else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
363595a0884SMateja Marjanovic           IsSGPR = false;
364595a0884SMateja Marjanovic           IsAGPR = true;
365595a0884SMateja Marjanovic           Width = 12;
3662b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
3672b08f6afSSebastian Neubauer           IsSGPR = true;
3682b08f6afSSebastian Neubauer           Width = 16;
3692b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
3702b08f6afSSebastian Neubauer           IsSGPR = false;
3712b08f6afSSebastian Neubauer           Width = 16;
3722b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
3732b08f6afSSebastian Neubauer           IsSGPR = false;
3742b08f6afSSebastian Neubauer           IsAGPR = true;
3752b08f6afSSebastian Neubauer           Width = 16;
3762b08f6afSSebastian Neubauer         } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
3772b08f6afSSebastian Neubauer           IsSGPR = true;
3782b08f6afSSebastian Neubauer           Width = 32;
3792b08f6afSSebastian Neubauer         } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
3802b08f6afSSebastian Neubauer           IsSGPR = false;
3812b08f6afSSebastian Neubauer           Width = 32;
3822b08f6afSSebastian Neubauer         } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
3832b08f6afSSebastian Neubauer           IsSGPR = false;
3842b08f6afSSebastian Neubauer           IsAGPR = true;
3852b08f6afSSebastian Neubauer           Width = 32;
3862b08f6afSSebastian Neubauer         } else {
387ec821884Spvanhout           // We only expect TTMP registers or registers that do not belong to
388ec821884Spvanhout           // any RC.
3892d39f5b5SJay Foad           assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
3902d39f5b5SJay Foad                   AMDGPU::TTMP_64RegClass.contains(Reg) ||
3912d39f5b5SJay Foad                   AMDGPU::TTMP_128RegClass.contains(Reg) ||
3922d39f5b5SJay Foad                   AMDGPU::TTMP_256RegClass.contains(Reg) ||
393ec821884Spvanhout                   AMDGPU::TTMP_512RegClass.contains(Reg) ||
394ec821884Spvanhout                   !TRI.getPhysRegBaseClass(Reg)) &&
3952d39f5b5SJay Foad                  "Unknown register class");
3962b08f6afSSebastian Neubauer         }
3972b08f6afSSebastian Neubauer         unsigned HWReg = TRI.getHWRegIndex(Reg);
3982b08f6afSSebastian Neubauer         int MaxUsed = HWReg + Width - 1;
3992b08f6afSSebastian Neubauer         if (IsSGPR) {
4002b08f6afSSebastian Neubauer           MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
4012b08f6afSSebastian Neubauer         } else if (IsAGPR) {
4022b08f6afSSebastian Neubauer           MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
4032b08f6afSSebastian Neubauer         } else {
4042b08f6afSSebastian Neubauer           MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
4052b08f6afSSebastian Neubauer         }
4062b08f6afSSebastian Neubauer       }
4072b08f6afSSebastian Neubauer 
4082b08f6afSSebastian Neubauer       if (MI.isCall()) {
4092b08f6afSSebastian Neubauer         // Pseudo used just to encode the underlying global. Is there a better
4102b08f6afSSebastian Neubauer         // way to track this?
4112b08f6afSSebastian Neubauer 
4122b08f6afSSebastian Neubauer         const MachineOperand *CalleeOp =
4132b08f6afSSebastian Neubauer             TII->getNamedOperand(MI, AMDGPU::OpName::callee);
4142b08f6afSSebastian Neubauer 
4152b08f6afSSebastian Neubauer         const Function *Callee = getCalleeFunction(*CalleeOp);
4162b08f6afSSebastian Neubauer 
4172b08f6afSSebastian Neubauer         // Avoid crashing on undefined behavior with an illegal call to a
4182b08f6afSSebastian Neubauer         // kernel. If a callsite's calling convention doesn't match the
4192b08f6afSSebastian Neubauer         // function's, it's undefined behavior. If the callsite calling
4202b08f6afSSebastian Neubauer         // convention does match, that would have errored earlier.
4212b08f6afSSebastian Neubauer         if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
4222b08f6afSSebastian Neubauer           report_fatal_error("invalid call to entry function");
4232b08f6afSSebastian Neubauer 
424c897c13dSJanek van Oirschot         auto isSameFunction = [](const MachineFunction &MF, const Function *F) {
425c897c13dSJanek van Oirschot           return F == &MF.getFunction();
426c897c13dSJanek van Oirschot         };
427c897c13dSJanek van Oirschot 
428c897c13dSJanek van Oirschot         if (Callee && !isSameFunction(MF, Callee))
429c897c13dSJanek van Oirschot           Info.Callees.push_back(Callee);
430c897c13dSJanek van Oirschot 
4312b08f6afSSebastian Neubauer         bool IsIndirect = !Callee || Callee->isDeclaration();
4322b08f6afSSebastian Neubauer 
433c7a0c2d0SMatt Arsenault         // FIXME: Call site could have norecurse on it
434c7a0c2d0SMatt Arsenault         if (!Callee || !Callee->doesNotRecurse()) {
435c7a0c2d0SMatt Arsenault           Info.HasRecursion = true;
436c7a0c2d0SMatt Arsenault 
437c7a0c2d0SMatt Arsenault           // TODO: If we happen to know there is no stack usage in the
438c7a0c2d0SMatt Arsenault           // callgraph, we don't need to assume an infinitely growing stack.
439c7a0c2d0SMatt Arsenault           if (!MI.isReturn()) {
440c7a0c2d0SMatt Arsenault             // We don't need to assume an unknown stack size for tail calls.
441c7a0c2d0SMatt Arsenault 
442c7a0c2d0SMatt Arsenault             // FIXME: This only benefits in the case where the kernel does not
443c7a0c2d0SMatt Arsenault             // directly call the tail called function. If a kernel directly
444c7a0c2d0SMatt Arsenault             // calls a tail recursive function, we'll assume maximum stack size
445c7a0c2d0SMatt Arsenault             // based on the regular call instruction.
446c897c13dSJanek van Oirschot             Info.CalleeSegmentSize = std::max(
447c897c13dSJanek van Oirschot                 Info.CalleeSegmentSize,
448c7a0c2d0SMatt Arsenault                 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
449c7a0c2d0SMatt Arsenault           }
450c7a0c2d0SMatt Arsenault         }
451c7a0c2d0SMatt Arsenault 
452c897c13dSJanek van Oirschot         if (IsIndirect) {
453c897c13dSJanek van Oirschot           Info.CalleeSegmentSize =
454c897c13dSJanek van Oirschot               std::max(Info.CalleeSegmentSize,
4552b08f6afSSebastian Neubauer                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
4562b08f6afSSebastian Neubauer 
4572b08f6afSSebastian Neubauer           // Register usage of indirect calls gets handled later
4582b08f6afSSebastian Neubauer           Info.UsesVCC = true;
4594622afa9SMatt Arsenault           Info.UsesFlatScratch = ST.hasFlatAddressSpace();
4602b08f6afSSebastian Neubauer           Info.HasDynamicallySizedStack = true;
4612b08f6afSSebastian Neubauer           Info.HasIndirectCall = true;
4622b08f6afSSebastian Neubauer         }
4632b08f6afSSebastian Neubauer       }
4642b08f6afSSebastian Neubauer     }
4652b08f6afSSebastian Neubauer   }
4662b08f6afSSebastian Neubauer 
4672b08f6afSSebastian Neubauer   Info.NumExplicitSGPR = MaxSGPR + 1;
4682b08f6afSSebastian Neubauer   Info.NumVGPR = MaxVGPR + 1;
4692b08f6afSSebastian Neubauer   Info.NumAGPR = MaxAGPR + 1;
4702b08f6afSSebastian Neubauer 
4712b08f6afSSebastian Neubauer   return Info;
4722b08f6afSSebastian Neubauer }
473