12b08f6afSSebastian Neubauer //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===// 22b08f6afSSebastian Neubauer // 32b08f6afSSebastian Neubauer // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42b08f6afSSebastian Neubauer // See https://llvm.org/LICENSE.txt for license information. 52b08f6afSSebastian Neubauer // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 62b08f6afSSebastian Neubauer // 72b08f6afSSebastian Neubauer //===----------------------------------------------------------------------===// 82b08f6afSSebastian Neubauer // 92b08f6afSSebastian Neubauer /// \file 102b08f6afSSebastian Neubauer /// \brief Analyzes how many registers and other resources are used by 112b08f6afSSebastian Neubauer /// functions. 122b08f6afSSebastian Neubauer /// 132b08f6afSSebastian Neubauer /// The results of this analysis are used to fill the register usage, flat 142b08f6afSSebastian Neubauer /// usage, etc. into hardware registers. 152b08f6afSSebastian Neubauer /// 162b08f6afSSebastian Neubauer //===----------------------------------------------------------------------===// 172b08f6afSSebastian Neubauer 182b08f6afSSebastian Neubauer #include "AMDGPUResourceUsageAnalysis.h" 192b08f6afSSebastian Neubauer #include "AMDGPU.h" 202b08f6afSSebastian Neubauer #include "GCNSubtarget.h" 212b08f6afSSebastian Neubauer #include "SIMachineFunctionInfo.h" 22989f1c72Sserge-sans-paille #include "llvm/CodeGen/MachineFrameInfo.h" 23c897c13dSJanek van Oirschot #include "llvm/CodeGen/MachineModuleInfo.h" 242b08f6afSSebastian Neubauer #include "llvm/CodeGen/TargetPassConfig.h" 250567f033SAnshil Gandhi #include "llvm/IR/GlobalValue.h" 262b08f6afSSebastian Neubauer #include "llvm/Target/TargetMachine.h" 272b08f6afSSebastian Neubauer 282b08f6afSSebastian Neubauer using namespace llvm; 292b08f6afSSebastian Neubauer using namespace llvm::AMDGPU; 302b08f6afSSebastian Neubauer 312b08f6afSSebastian Neubauer #define DEBUG_TYPE "amdgpu-resource-usage" 322b08f6afSSebastian Neubauer 332b08f6afSSebastian Neubauer char llvm::AMDGPUResourceUsageAnalysis::ID = 0; 342b08f6afSSebastian Neubauer char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID; 352b08f6afSSebastian Neubauer 363759398bSAbinav Puthan Purayil // In code object v4 and older, we need to tell the runtime some amount ahead of 373759398bSAbinav Puthan Purayil // time if we don't know the true stack size. Assume a smaller number if this is 383759398bSAbinav Puthan Purayil // only due to dynamic / non-entry block allocas. 39df5e431eSMaheshRavishankar static cl::opt<uint32_t> clAssumedStackSizeForExternalCall( 402b08f6afSSebastian Neubauer "amdgpu-assume-external-call-stack-size", 412b08f6afSSebastian Neubauer cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, 422b08f6afSSebastian Neubauer cl::init(16384)); 432b08f6afSSebastian Neubauer 44df5e431eSMaheshRavishankar static cl::opt<uint32_t> clAssumedStackSizeForDynamicSizeObjects( 452b08f6afSSebastian Neubauer "amdgpu-assume-dynamic-stack-object-size", 462b08f6afSSebastian Neubauer cl::desc("Assumed extra stack use if there are any " 472b08f6afSSebastian Neubauer "variable sized objects (in bytes)"), 482b08f6afSSebastian Neubauer cl::Hidden, cl::init(4096)); 492b08f6afSSebastian Neubauer 502b08f6afSSebastian Neubauer INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE, 512b08f6afSSebastian Neubauer "Function register usage analysis", true, true) 522b08f6afSSebastian Neubauer 532b08f6afSSebastian Neubauer static const Function *getCalleeFunction(const MachineOperand &Op) { 542b08f6afSSebastian Neubauer if (Op.isImm()) { 552b08f6afSSebastian Neubauer assert(Op.getImm() == 0); 562b08f6afSSebastian Neubauer return nullptr; 572b08f6afSSebastian Neubauer } 58351a4b27SJoseph Huber return cast<Function>(Op.getGlobal()->stripPointerCastsAndAliases()); 592b08f6afSSebastian Neubauer } 602b08f6afSSebastian Neubauer 612b08f6afSSebastian Neubauer static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, 622b08f6afSSebastian Neubauer const SIInstrInfo &TII, unsigned Reg) { 632b08f6afSSebastian Neubauer for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) { 642b08f6afSSebastian Neubauer if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent())) 652b08f6afSSebastian Neubauer return true; 662b08f6afSSebastian Neubauer } 672b08f6afSSebastian Neubauer 682b08f6afSSebastian Neubauer return false; 692b08f6afSSebastian Neubauer } 702b08f6afSSebastian Neubauer 71c897c13dSJanek van Oirschot bool AMDGPUResourceUsageAnalysis::runOnMachineFunction(MachineFunction &MF) { 722b08f6afSSebastian Neubauer auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 732b08f6afSSebastian Neubauer if (!TPC) 742b08f6afSSebastian Neubauer return false; 752b08f6afSSebastian Neubauer 764622afa9SMatt Arsenault const TargetMachine &TM = TPC->getTM<TargetMachine>(); 773604fdf1SBaptiste const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo(); 78814a0abcSJacob Weightman 793759398bSAbinav Puthan Purayil // By default, for code object v5 and later, track only the minimum scratch 803759398bSAbinav Puthan Purayil // size 81df5e431eSMaheshRavishankar uint32_t AssumedStackSizeForDynamicSizeObjects = 82df5e431eSMaheshRavishankar clAssumedStackSizeForDynamicSizeObjects; 83df5e431eSMaheshRavishankar uint32_t AssumedStackSizeForExternalCall = clAssumedStackSizeForExternalCall; 84c897c13dSJanek van Oirschot if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >= 85c897c13dSJanek van Oirschot AMDGPU::AMDHSA_COV5 || 863604fdf1SBaptiste STI.getTargetTriple().getOS() == Triple::AMDPAL) { 87c897c13dSJanek van Oirschot if (!clAssumedStackSizeForDynamicSizeObjects.getNumOccurrences()) 883759398bSAbinav Puthan Purayil AssumedStackSizeForDynamicSizeObjects = 0; 89c897c13dSJanek van Oirschot if (!clAssumedStackSizeForExternalCall.getNumOccurrences()) 903759398bSAbinav Puthan Purayil AssumedStackSizeForExternalCall = 0; 913759398bSAbinav Puthan Purayil } 923759398bSAbinav Puthan Purayil 93c897c13dSJanek van Oirschot ResourceInfo = analyzeResourceUsage(MF, AssumedStackSizeForDynamicSizeObjects, 94df5e431eSMaheshRavishankar AssumedStackSizeForExternalCall); 954622afa9SMatt Arsenault 962b08f6afSSebastian Neubauer return false; 972b08f6afSSebastian Neubauer } 982b08f6afSSebastian Neubauer 992b08f6afSSebastian Neubauer AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo 1004622afa9SMatt Arsenault AMDGPUResourceUsageAnalysis::analyzeResourceUsage( 101c897c13dSJanek van Oirschot const MachineFunction &MF, uint32_t AssumedStackSizeForDynamicSizeObjects, 102df5e431eSMaheshRavishankar uint32_t AssumedStackSizeForExternalCall) const { 1032b08f6afSSebastian Neubauer SIFunctionResourceInfo Info; 1042b08f6afSSebastian Neubauer 1052b08f6afSSebastian Neubauer const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1062b08f6afSSebastian Neubauer const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1072b08f6afSSebastian Neubauer const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1082b08f6afSSebastian Neubauer const MachineRegisterInfo &MRI = MF.getRegInfo(); 1092b08f6afSSebastian Neubauer const SIInstrInfo *TII = ST.getInstrInfo(); 1102b08f6afSSebastian Neubauer const SIRegisterInfo &TRI = TII->getRegisterInfo(); 1112b08f6afSSebastian Neubauer 1122b08f6afSSebastian Neubauer Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) || 1132b08f6afSSebastian Neubauer MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) || 1142b08f6afSSebastian Neubauer MRI.isLiveIn(MFI->getPreloadedReg( 1152b08f6afSSebastian Neubauer AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT)); 1162b08f6afSSebastian Neubauer 1172b08f6afSSebastian Neubauer // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat 1182b08f6afSSebastian Neubauer // instructions aren't used to access the scratch buffer. Inline assembly may 1192b08f6afSSebastian Neubauer // need it though. 1202b08f6afSSebastian Neubauer // 1212b08f6afSSebastian Neubauer // If we only have implicit uses of flat_scr on flat instructions, it is not 1222b08f6afSSebastian Neubauer // really needed. 123343be513SAustin Kerbow if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() && 1242b08f6afSSebastian Neubauer (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) && 1252b08f6afSSebastian Neubauer !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) && 1262b08f6afSSebastian Neubauer !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) { 1272b08f6afSSebastian Neubauer Info.UsesFlatScratch = false; 1282b08f6afSSebastian Neubauer } 1292b08f6afSSebastian Neubauer 1302b08f6afSSebastian Neubauer Info.PrivateSegmentSize = FrameInfo.getStackSize(); 1312b08f6afSSebastian Neubauer 1322b08f6afSSebastian Neubauer // Assume a big number if there are any unknown sized objects. 1332b08f6afSSebastian Neubauer Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects(); 1342b08f6afSSebastian Neubauer if (Info.HasDynamicallySizedStack) 1352b08f6afSSebastian Neubauer Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects; 1362b08f6afSSebastian Neubauer 1372b08f6afSSebastian Neubauer if (MFI->isStackRealigned()) 1382b08f6afSSebastian Neubauer Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value(); 1392b08f6afSSebastian Neubauer 1402b08f6afSSebastian Neubauer Info.UsesVCC = 1412b08f6afSSebastian Neubauer MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI); 1422b08f6afSSebastian Neubauer 1432b08f6afSSebastian Neubauer // If there are no calls, MachineRegisterInfo can tell us the used register 1442b08f6afSSebastian Neubauer // count easily. 1452b08f6afSSebastian Neubauer // A tail call isn't considered a call for MachineFrameInfo's purposes. 1462b08f6afSSebastian Neubauer if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) { 147*1cc5290aSStanislav Mekhanoshin Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass); 148*1cc5290aSStanislav Mekhanoshin Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass); 149*1cc5290aSStanislav Mekhanoshin if (ST.hasMAIInsts()) 150*1cc5290aSStanislav Mekhanoshin Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass); 1512b08f6afSSebastian Neubauer return Info; 1522b08f6afSSebastian Neubauer } 1532b08f6afSSebastian Neubauer 1542b08f6afSSebastian Neubauer int32_t MaxVGPR = -1; 1552b08f6afSSebastian Neubauer int32_t MaxAGPR = -1; 1562b08f6afSSebastian Neubauer int32_t MaxSGPR = -1; 157c897c13dSJanek van Oirschot Info.CalleeSegmentSize = 0; 1582b08f6afSSebastian Neubauer 1592b08f6afSSebastian Neubauer for (const MachineBasicBlock &MBB : MF) { 1602b08f6afSSebastian Neubauer for (const MachineInstr &MI : MBB) { 1612b08f6afSSebastian Neubauer // TODO: Check regmasks? Do they occur anywhere except calls? 1622b08f6afSSebastian Neubauer for (const MachineOperand &MO : MI.operands()) { 1632b08f6afSSebastian Neubauer unsigned Width = 0; 1642b08f6afSSebastian Neubauer bool IsSGPR = false; 1652b08f6afSSebastian Neubauer bool IsAGPR = false; 1662b08f6afSSebastian Neubauer 1672b08f6afSSebastian Neubauer if (!MO.isReg()) 1682b08f6afSSebastian Neubauer continue; 1692b08f6afSSebastian Neubauer 1702b08f6afSSebastian Neubauer Register Reg = MO.getReg(); 1712b08f6afSSebastian Neubauer switch (Reg) { 1722b08f6afSSebastian Neubauer case AMDGPU::EXEC: 1732b08f6afSSebastian Neubauer case AMDGPU::EXEC_LO: 1742b08f6afSSebastian Neubauer case AMDGPU::EXEC_HI: 1752b08f6afSSebastian Neubauer case AMDGPU::SCC: 1762b08f6afSSebastian Neubauer case AMDGPU::M0: 1772b08f6afSSebastian Neubauer case AMDGPU::M0_LO16: 1782b08f6afSSebastian Neubauer case AMDGPU::M0_HI16: 179220147d5SPierre van Houtryve case AMDGPU::SRC_SHARED_BASE_LO: 1802b08f6afSSebastian Neubauer case AMDGPU::SRC_SHARED_BASE: 181220147d5SPierre van Houtryve case AMDGPU::SRC_SHARED_LIMIT_LO: 1822b08f6afSSebastian Neubauer case AMDGPU::SRC_SHARED_LIMIT: 183220147d5SPierre van Houtryve case AMDGPU::SRC_PRIVATE_BASE_LO: 1842b08f6afSSebastian Neubauer case AMDGPU::SRC_PRIVATE_BASE: 185220147d5SPierre van Houtryve case AMDGPU::SRC_PRIVATE_LIMIT_LO: 1862b08f6afSSebastian Neubauer case AMDGPU::SRC_PRIVATE_LIMIT: 187520e0454SJay Foad case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1882b08f6afSSebastian Neubauer case AMDGPU::SGPR_NULL: 189cb9ae937SStanislav Mekhanoshin case AMDGPU::SGPR_NULL64: 1902b08f6afSSebastian Neubauer case AMDGPU::MODE: 1912b08f6afSSebastian Neubauer continue; 1922b08f6afSSebastian Neubauer 1932b08f6afSSebastian Neubauer case AMDGPU::NoRegister: 1942b08f6afSSebastian Neubauer assert(MI.isDebugInstr() && 1952b08f6afSSebastian Neubauer "Instruction uses invalid noreg register"); 1962b08f6afSSebastian Neubauer continue; 1972b08f6afSSebastian Neubauer 1982b08f6afSSebastian Neubauer case AMDGPU::VCC: 1992b08f6afSSebastian Neubauer case AMDGPU::VCC_LO: 2002b08f6afSSebastian Neubauer case AMDGPU::VCC_HI: 2012b08f6afSSebastian Neubauer case AMDGPU::VCC_LO_LO16: 2022b08f6afSSebastian Neubauer case AMDGPU::VCC_LO_HI16: 2032b08f6afSSebastian Neubauer case AMDGPU::VCC_HI_LO16: 2042b08f6afSSebastian Neubauer case AMDGPU::VCC_HI_HI16: 2052b08f6afSSebastian Neubauer Info.UsesVCC = true; 2062b08f6afSSebastian Neubauer continue; 2072b08f6afSSebastian Neubauer 2082b08f6afSSebastian Neubauer case AMDGPU::FLAT_SCR: 2092b08f6afSSebastian Neubauer case AMDGPU::FLAT_SCR_LO: 2102b08f6afSSebastian Neubauer case AMDGPU::FLAT_SCR_HI: 2112b08f6afSSebastian Neubauer continue; 2122b08f6afSSebastian Neubauer 2132b08f6afSSebastian Neubauer case AMDGPU::XNACK_MASK: 2142b08f6afSSebastian Neubauer case AMDGPU::XNACK_MASK_LO: 2152b08f6afSSebastian Neubauer case AMDGPU::XNACK_MASK_HI: 2162b08f6afSSebastian Neubauer llvm_unreachable("xnack_mask registers should not be used"); 2172b08f6afSSebastian Neubauer 2182b08f6afSSebastian Neubauer case AMDGPU::LDS_DIRECT: 2192b08f6afSSebastian Neubauer llvm_unreachable("lds_direct register should not be used"); 2202b08f6afSSebastian Neubauer 2212b08f6afSSebastian Neubauer case AMDGPU::TBA: 2222b08f6afSSebastian Neubauer case AMDGPU::TBA_LO: 2232b08f6afSSebastian Neubauer case AMDGPU::TBA_HI: 2242b08f6afSSebastian Neubauer case AMDGPU::TMA: 2252b08f6afSSebastian Neubauer case AMDGPU::TMA_LO: 2262b08f6afSSebastian Neubauer case AMDGPU::TMA_HI: 2272b08f6afSSebastian Neubauer llvm_unreachable("trap handler registers should not be used"); 2282b08f6afSSebastian Neubauer 2292b08f6afSSebastian Neubauer case AMDGPU::SRC_VCCZ: 2302b08f6afSSebastian Neubauer llvm_unreachable("src_vccz register should not be used"); 2312b08f6afSSebastian Neubauer 2322b08f6afSSebastian Neubauer case AMDGPU::SRC_EXECZ: 2332b08f6afSSebastian Neubauer llvm_unreachable("src_execz register should not be used"); 2342b08f6afSSebastian Neubauer 2352b08f6afSSebastian Neubauer case AMDGPU::SRC_SCC: 2362b08f6afSSebastian Neubauer llvm_unreachable("src_scc register should not be used"); 2372b08f6afSSebastian Neubauer 2382b08f6afSSebastian Neubauer default: 2392b08f6afSSebastian Neubauer break; 2402b08f6afSSebastian Neubauer } 2412b08f6afSSebastian Neubauer 2422d39f5b5SJay Foad if (AMDGPU::SGPR_32RegClass.contains(Reg) || 2432d39f5b5SJay Foad AMDGPU::SGPR_LO16RegClass.contains(Reg) || 2442b08f6afSSebastian Neubauer AMDGPU::SGPR_HI16RegClass.contains(Reg)) { 2452b08f6afSSebastian Neubauer IsSGPR = true; 2462b08f6afSSebastian Neubauer Width = 1; 2472b08f6afSSebastian Neubauer } else if (AMDGPU::VGPR_32RegClass.contains(Reg) || 2488c6172b0SIvan Kosarev AMDGPU::VGPR_16RegClass.contains(Reg)) { 2492b08f6afSSebastian Neubauer IsSGPR = false; 2502b08f6afSSebastian Neubauer Width = 1; 2512b08f6afSSebastian Neubauer } else if (AMDGPU::AGPR_32RegClass.contains(Reg) || 2522b08f6afSSebastian Neubauer AMDGPU::AGPR_LO16RegClass.contains(Reg)) { 2532b08f6afSSebastian Neubauer IsSGPR = false; 2542b08f6afSSebastian Neubauer IsAGPR = true; 2552b08f6afSSebastian Neubauer Width = 1; 2562d39f5b5SJay Foad } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) { 2572b08f6afSSebastian Neubauer IsSGPR = true; 2582b08f6afSSebastian Neubauer Width = 2; 2592b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_64RegClass.contains(Reg)) { 2602b08f6afSSebastian Neubauer IsSGPR = false; 2612b08f6afSSebastian Neubauer Width = 2; 2622b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_64RegClass.contains(Reg)) { 2632b08f6afSSebastian Neubauer IsSGPR = false; 2642b08f6afSSebastian Neubauer IsAGPR = true; 2652b08f6afSSebastian Neubauer Width = 2; 2662b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_96RegClass.contains(Reg)) { 2672b08f6afSSebastian Neubauer IsSGPR = false; 2682b08f6afSSebastian Neubauer Width = 3; 2692b08f6afSSebastian Neubauer } else if (AMDGPU::SReg_96RegClass.contains(Reg)) { 2702b08f6afSSebastian Neubauer IsSGPR = true; 2712b08f6afSSebastian Neubauer Width = 3; 2722b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_96RegClass.contains(Reg)) { 2732b08f6afSSebastian Neubauer IsSGPR = false; 2742b08f6afSSebastian Neubauer IsAGPR = true; 2752b08f6afSSebastian Neubauer Width = 3; 2762d39f5b5SJay Foad } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) { 2772b08f6afSSebastian Neubauer IsSGPR = true; 2782b08f6afSSebastian Neubauer Width = 4; 2792b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_128RegClass.contains(Reg)) { 2802b08f6afSSebastian Neubauer IsSGPR = false; 2812b08f6afSSebastian Neubauer Width = 4; 2822b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_128RegClass.contains(Reg)) { 2832b08f6afSSebastian Neubauer IsSGPR = false; 2842b08f6afSSebastian Neubauer IsAGPR = true; 2852b08f6afSSebastian Neubauer Width = 4; 2862b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_160RegClass.contains(Reg)) { 2872b08f6afSSebastian Neubauer IsSGPR = false; 2882b08f6afSSebastian Neubauer Width = 5; 2892b08f6afSSebastian Neubauer } else if (AMDGPU::SReg_160RegClass.contains(Reg)) { 2902b08f6afSSebastian Neubauer IsSGPR = true; 2912b08f6afSSebastian Neubauer Width = 5; 2922b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_160RegClass.contains(Reg)) { 2932b08f6afSSebastian Neubauer IsSGPR = false; 2942b08f6afSSebastian Neubauer IsAGPR = true; 2952b08f6afSSebastian Neubauer Width = 5; 2962b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_192RegClass.contains(Reg)) { 2972b08f6afSSebastian Neubauer IsSGPR = false; 2982b08f6afSSebastian Neubauer Width = 6; 2992b08f6afSSebastian Neubauer } else if (AMDGPU::SReg_192RegClass.contains(Reg)) { 3002b08f6afSSebastian Neubauer IsSGPR = true; 3012b08f6afSSebastian Neubauer Width = 6; 3022b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_192RegClass.contains(Reg)) { 3032b08f6afSSebastian Neubauer IsSGPR = false; 3042b08f6afSSebastian Neubauer IsAGPR = true; 3052b08f6afSSebastian Neubauer Width = 6; 3062b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_224RegClass.contains(Reg)) { 3072b08f6afSSebastian Neubauer IsSGPR = false; 3082b08f6afSSebastian Neubauer Width = 7; 3092b08f6afSSebastian Neubauer } else if (AMDGPU::SReg_224RegClass.contains(Reg)) { 3102b08f6afSSebastian Neubauer IsSGPR = true; 3112b08f6afSSebastian Neubauer Width = 7; 3122b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_224RegClass.contains(Reg)) { 3132b08f6afSSebastian Neubauer IsSGPR = false; 3142b08f6afSSebastian Neubauer IsAGPR = true; 3152b08f6afSSebastian Neubauer Width = 7; 3162b08f6afSSebastian Neubauer } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { 3172b08f6afSSebastian Neubauer IsSGPR = true; 3182b08f6afSSebastian Neubauer Width = 8; 3192b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_256RegClass.contains(Reg)) { 3202b08f6afSSebastian Neubauer IsSGPR = false; 3212b08f6afSSebastian Neubauer Width = 8; 3222b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_256RegClass.contains(Reg)) { 3232b08f6afSSebastian Neubauer IsSGPR = false; 3242b08f6afSSebastian Neubauer IsAGPR = true; 3252b08f6afSSebastian Neubauer Width = 8; 326595a0884SMateja Marjanovic } else if (AMDGPU::VReg_288RegClass.contains(Reg)) { 327595a0884SMateja Marjanovic IsSGPR = false; 328595a0884SMateja Marjanovic Width = 9; 329595a0884SMateja Marjanovic } else if (AMDGPU::SReg_288RegClass.contains(Reg)) { 330595a0884SMateja Marjanovic IsSGPR = true; 331595a0884SMateja Marjanovic Width = 9; 332595a0884SMateja Marjanovic } else if (AMDGPU::AReg_288RegClass.contains(Reg)) { 333595a0884SMateja Marjanovic IsSGPR = false; 334595a0884SMateja Marjanovic IsAGPR = true; 335595a0884SMateja Marjanovic Width = 9; 336595a0884SMateja Marjanovic } else if (AMDGPU::VReg_320RegClass.contains(Reg)) { 337595a0884SMateja Marjanovic IsSGPR = false; 338595a0884SMateja Marjanovic Width = 10; 339595a0884SMateja Marjanovic } else if (AMDGPU::SReg_320RegClass.contains(Reg)) { 340595a0884SMateja Marjanovic IsSGPR = true; 341595a0884SMateja Marjanovic Width = 10; 342595a0884SMateja Marjanovic } else if (AMDGPU::AReg_320RegClass.contains(Reg)) { 343595a0884SMateja Marjanovic IsSGPR = false; 344595a0884SMateja Marjanovic IsAGPR = true; 345595a0884SMateja Marjanovic Width = 10; 346595a0884SMateja Marjanovic } else if (AMDGPU::VReg_352RegClass.contains(Reg)) { 347595a0884SMateja Marjanovic IsSGPR = false; 348595a0884SMateja Marjanovic Width = 11; 349595a0884SMateja Marjanovic } else if (AMDGPU::SReg_352RegClass.contains(Reg)) { 350595a0884SMateja Marjanovic IsSGPR = true; 351595a0884SMateja Marjanovic Width = 11; 352595a0884SMateja Marjanovic } else if (AMDGPU::AReg_352RegClass.contains(Reg)) { 353595a0884SMateja Marjanovic IsSGPR = false; 354595a0884SMateja Marjanovic IsAGPR = true; 355595a0884SMateja Marjanovic Width = 11; 356595a0884SMateja Marjanovic } else if (AMDGPU::VReg_384RegClass.contains(Reg)) { 357595a0884SMateja Marjanovic IsSGPR = false; 358595a0884SMateja Marjanovic Width = 12; 359595a0884SMateja Marjanovic } else if (AMDGPU::SReg_384RegClass.contains(Reg)) { 360595a0884SMateja Marjanovic IsSGPR = true; 361595a0884SMateja Marjanovic Width = 12; 362595a0884SMateja Marjanovic } else if (AMDGPU::AReg_384RegClass.contains(Reg)) { 363595a0884SMateja Marjanovic IsSGPR = false; 364595a0884SMateja Marjanovic IsAGPR = true; 365595a0884SMateja Marjanovic Width = 12; 3662b08f6afSSebastian Neubauer } else if (AMDGPU::SReg_512RegClass.contains(Reg)) { 3672b08f6afSSebastian Neubauer IsSGPR = true; 3682b08f6afSSebastian Neubauer Width = 16; 3692b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_512RegClass.contains(Reg)) { 3702b08f6afSSebastian Neubauer IsSGPR = false; 3712b08f6afSSebastian Neubauer Width = 16; 3722b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_512RegClass.contains(Reg)) { 3732b08f6afSSebastian Neubauer IsSGPR = false; 3742b08f6afSSebastian Neubauer IsAGPR = true; 3752b08f6afSSebastian Neubauer Width = 16; 3762b08f6afSSebastian Neubauer } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) { 3772b08f6afSSebastian Neubauer IsSGPR = true; 3782b08f6afSSebastian Neubauer Width = 32; 3792b08f6afSSebastian Neubauer } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) { 3802b08f6afSSebastian Neubauer IsSGPR = false; 3812b08f6afSSebastian Neubauer Width = 32; 3822b08f6afSSebastian Neubauer } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) { 3832b08f6afSSebastian Neubauer IsSGPR = false; 3842b08f6afSSebastian Neubauer IsAGPR = true; 3852b08f6afSSebastian Neubauer Width = 32; 3862b08f6afSSebastian Neubauer } else { 387ec821884Spvanhout // We only expect TTMP registers or registers that do not belong to 388ec821884Spvanhout // any RC. 3892d39f5b5SJay Foad assert((AMDGPU::TTMP_32RegClass.contains(Reg) || 3902d39f5b5SJay Foad AMDGPU::TTMP_64RegClass.contains(Reg) || 3912d39f5b5SJay Foad AMDGPU::TTMP_128RegClass.contains(Reg) || 3922d39f5b5SJay Foad AMDGPU::TTMP_256RegClass.contains(Reg) || 393ec821884Spvanhout AMDGPU::TTMP_512RegClass.contains(Reg) || 394ec821884Spvanhout !TRI.getPhysRegBaseClass(Reg)) && 3952d39f5b5SJay Foad "Unknown register class"); 3962b08f6afSSebastian Neubauer } 3972b08f6afSSebastian Neubauer unsigned HWReg = TRI.getHWRegIndex(Reg); 3982b08f6afSSebastian Neubauer int MaxUsed = HWReg + Width - 1; 3992b08f6afSSebastian Neubauer if (IsSGPR) { 4002b08f6afSSebastian Neubauer MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR; 4012b08f6afSSebastian Neubauer } else if (IsAGPR) { 4022b08f6afSSebastian Neubauer MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR; 4032b08f6afSSebastian Neubauer } else { 4042b08f6afSSebastian Neubauer MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR; 4052b08f6afSSebastian Neubauer } 4062b08f6afSSebastian Neubauer } 4072b08f6afSSebastian Neubauer 4082b08f6afSSebastian Neubauer if (MI.isCall()) { 4092b08f6afSSebastian Neubauer // Pseudo used just to encode the underlying global. Is there a better 4102b08f6afSSebastian Neubauer // way to track this? 4112b08f6afSSebastian Neubauer 4122b08f6afSSebastian Neubauer const MachineOperand *CalleeOp = 4132b08f6afSSebastian Neubauer TII->getNamedOperand(MI, AMDGPU::OpName::callee); 4142b08f6afSSebastian Neubauer 4152b08f6afSSebastian Neubauer const Function *Callee = getCalleeFunction(*CalleeOp); 4162b08f6afSSebastian Neubauer 4172b08f6afSSebastian Neubauer // Avoid crashing on undefined behavior with an illegal call to a 4182b08f6afSSebastian Neubauer // kernel. If a callsite's calling convention doesn't match the 4192b08f6afSSebastian Neubauer // function's, it's undefined behavior. If the callsite calling 4202b08f6afSSebastian Neubauer // convention does match, that would have errored earlier. 4212b08f6afSSebastian Neubauer if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv())) 4222b08f6afSSebastian Neubauer report_fatal_error("invalid call to entry function"); 4232b08f6afSSebastian Neubauer 424c897c13dSJanek van Oirschot auto isSameFunction = [](const MachineFunction &MF, const Function *F) { 425c897c13dSJanek van Oirschot return F == &MF.getFunction(); 426c897c13dSJanek van Oirschot }; 427c897c13dSJanek van Oirschot 428c897c13dSJanek van Oirschot if (Callee && !isSameFunction(MF, Callee)) 429c897c13dSJanek van Oirschot Info.Callees.push_back(Callee); 430c897c13dSJanek van Oirschot 4312b08f6afSSebastian Neubauer bool IsIndirect = !Callee || Callee->isDeclaration(); 4322b08f6afSSebastian Neubauer 433c7a0c2d0SMatt Arsenault // FIXME: Call site could have norecurse on it 434c7a0c2d0SMatt Arsenault if (!Callee || !Callee->doesNotRecurse()) { 435c7a0c2d0SMatt Arsenault Info.HasRecursion = true; 436c7a0c2d0SMatt Arsenault 437c7a0c2d0SMatt Arsenault // TODO: If we happen to know there is no stack usage in the 438c7a0c2d0SMatt Arsenault // callgraph, we don't need to assume an infinitely growing stack. 439c7a0c2d0SMatt Arsenault if (!MI.isReturn()) { 440c7a0c2d0SMatt Arsenault // We don't need to assume an unknown stack size for tail calls. 441c7a0c2d0SMatt Arsenault 442c7a0c2d0SMatt Arsenault // FIXME: This only benefits in the case where the kernel does not 443c7a0c2d0SMatt Arsenault // directly call the tail called function. If a kernel directly 444c7a0c2d0SMatt Arsenault // calls a tail recursive function, we'll assume maximum stack size 445c7a0c2d0SMatt Arsenault // based on the regular call instruction. 446c897c13dSJanek van Oirschot Info.CalleeSegmentSize = std::max( 447c897c13dSJanek van Oirschot Info.CalleeSegmentSize, 448c7a0c2d0SMatt Arsenault static_cast<uint64_t>(AssumedStackSizeForExternalCall)); 449c7a0c2d0SMatt Arsenault } 450c7a0c2d0SMatt Arsenault } 451c7a0c2d0SMatt Arsenault 452c897c13dSJanek van Oirschot if (IsIndirect) { 453c897c13dSJanek van Oirschot Info.CalleeSegmentSize = 454c897c13dSJanek van Oirschot std::max(Info.CalleeSegmentSize, 4552b08f6afSSebastian Neubauer static_cast<uint64_t>(AssumedStackSizeForExternalCall)); 4562b08f6afSSebastian Neubauer 4572b08f6afSSebastian Neubauer // Register usage of indirect calls gets handled later 4582b08f6afSSebastian Neubauer Info.UsesVCC = true; 4594622afa9SMatt Arsenault Info.UsesFlatScratch = ST.hasFlatAddressSpace(); 4602b08f6afSSebastian Neubauer Info.HasDynamicallySizedStack = true; 4612b08f6afSSebastian Neubauer Info.HasIndirectCall = true; 4622b08f6afSSebastian Neubauer } 4632b08f6afSSebastian Neubauer } 4642b08f6afSSebastian Neubauer } 4652b08f6afSSebastian Neubauer } 4662b08f6afSSebastian Neubauer 4672b08f6afSSebastian Neubauer Info.NumExplicitSGPR = MaxSGPR + 1; 4682b08f6afSSebastian Neubauer Info.NumVGPR = MaxVGPR + 1; 4692b08f6afSSebastian Neubauer Info.NumAGPR = MaxAGPR + 1; 4702b08f6afSSebastian Neubauer 4712b08f6afSSebastian Neubauer return Info; 4722b08f6afSSebastian Neubauer } 473