xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fe6060f1SDimitry Andric //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file
10fe6060f1SDimitry Andric /// \brief Analyzes how many registers and other resources are used by
11fe6060f1SDimitry Andric /// functions.
12fe6060f1SDimitry Andric ///
13fe6060f1SDimitry Andric /// The results of this analysis are used to fill the register usage, flat
14fe6060f1SDimitry Andric /// usage, etc. into hardware registers.
15fe6060f1SDimitry Andric ///
16fe6060f1SDimitry Andric /// The analysis takes callees into account. E.g. if a function A that needs 10
17fe6060f1SDimitry Andric /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18fe6060f1SDimitry Andric /// will return 20.
19fe6060f1SDimitry Andric /// It is assumed that an indirect call can go into any function except
20fe6060f1SDimitry Andric /// hardware-entrypoints. Therefore the register usage of functions with
21fe6060f1SDimitry Andric /// indirect calls is estimated as the maximum of all non-entrypoint functions
22fe6060f1SDimitry Andric /// in the module.
23fe6060f1SDimitry Andric ///
24fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
25fe6060f1SDimitry Andric 
26fe6060f1SDimitry Andric #include "AMDGPUResourceUsageAnalysis.h"
27fe6060f1SDimitry Andric #include "AMDGPU.h"
28fe6060f1SDimitry Andric #include "GCNSubtarget.h"
29fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
3081ad6265SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
31fe6060f1SDimitry Andric #include "llvm/Analysis/CallGraph.h"
3281ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
33fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
34349cc55cSDimitry Andric #include "llvm/IR/GlobalAlias.h"
35349cc55cSDimitry Andric #include "llvm/IR/GlobalValue.h"
36fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h"
37fe6060f1SDimitry Andric 
38fe6060f1SDimitry Andric using namespace llvm;
39fe6060f1SDimitry Andric using namespace llvm::AMDGPU;
40fe6060f1SDimitry Andric 
41fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-resource-usage"
42fe6060f1SDimitry Andric 
43fe6060f1SDimitry Andric char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
44fe6060f1SDimitry Andric char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
45fe6060f1SDimitry Andric 
46bdd1243dSDimitry Andric // In code object v4 and older, we need to tell the runtime some amount ahead of
47bdd1243dSDimitry Andric // time if we don't know the true stack size. Assume a smaller number if this is
48bdd1243dSDimitry Andric // only due to dynamic / non-entry block allocas.
49*0fca6ea1SDimitry Andric static cl::opt<uint32_t> clAssumedStackSizeForExternalCall(
50fe6060f1SDimitry Andric     "amdgpu-assume-external-call-stack-size",
51fe6060f1SDimitry Andric     cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
52fe6060f1SDimitry Andric     cl::init(16384));
53fe6060f1SDimitry Andric 
54*0fca6ea1SDimitry Andric static cl::opt<uint32_t> clAssumedStackSizeForDynamicSizeObjects(
55fe6060f1SDimitry Andric     "amdgpu-assume-dynamic-stack-object-size",
56fe6060f1SDimitry Andric     cl::desc("Assumed extra stack use if there are any "
57fe6060f1SDimitry Andric              "variable sized objects (in bytes)"),
58fe6060f1SDimitry Andric     cl::Hidden, cl::init(4096));
59fe6060f1SDimitry Andric 
60fe6060f1SDimitry Andric INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE,
61fe6060f1SDimitry Andric                 "Function register usage analysis", true, true)
62fe6060f1SDimitry Andric 
63fe6060f1SDimitry Andric static const Function *getCalleeFunction(const MachineOperand &Op) {
64fe6060f1SDimitry Andric   if (Op.isImm()) {
65fe6060f1SDimitry Andric     assert(Op.getImm() == 0);
66fe6060f1SDimitry Andric     return nullptr;
67fe6060f1SDimitry Andric   }
68*0fca6ea1SDimitry Andric   return cast<Function>(Op.getGlobal()->stripPointerCastsAndAliases());
69fe6060f1SDimitry Andric }
70fe6060f1SDimitry Andric 
71fe6060f1SDimitry Andric static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
72fe6060f1SDimitry Andric                                   const SIInstrInfo &TII, unsigned Reg) {
73fe6060f1SDimitry Andric   for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
74fe6060f1SDimitry Andric     if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
75fe6060f1SDimitry Andric       return true;
76fe6060f1SDimitry Andric   }
77fe6060f1SDimitry Andric 
78fe6060f1SDimitry Andric   return false;
79fe6060f1SDimitry Andric }
80fe6060f1SDimitry Andric 
81fe6060f1SDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
82fe6060f1SDimitry Andric     const GCNSubtarget &ST) const {
83fe6060f1SDimitry Andric   return NumExplicitSGPR +
84fe6060f1SDimitry Andric          IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
85fe6060f1SDimitry Andric                                    ST.getTargetID().isXnackOnOrAny());
86fe6060f1SDimitry Andric }
87fe6060f1SDimitry Andric 
88fe6060f1SDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
89349cc55cSDimitry Andric     const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
9081ad6265SDimitry Andric   return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
91349cc55cSDimitry Andric }
92349cc55cSDimitry Andric 
93349cc55cSDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
94fe6060f1SDimitry Andric     const GCNSubtarget &ST) const {
95349cc55cSDimitry Andric   return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
96fe6060f1SDimitry Andric }
97fe6060f1SDimitry Andric 
9881ad6265SDimitry Andric bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
99fe6060f1SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
100fe6060f1SDimitry Andric   if (!TPC)
101fe6060f1SDimitry Andric     return false;
102fe6060f1SDimitry Andric 
10381ad6265SDimitry Andric   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
104fe6060f1SDimitry Andric   const TargetMachine &TM = TPC->getTM<TargetMachine>();
10506c3fb27SDimitry Andric   const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
106fe6060f1SDimitry Andric   bool HasIndirectCall = false;
107fe6060f1SDimitry Andric 
10881ad6265SDimitry Andric   CallGraph CG = CallGraph(M);
10981ad6265SDimitry Andric   auto End = po_end(&CG);
11081ad6265SDimitry Andric 
111bdd1243dSDimitry Andric   // By default, for code object v5 and later, track only the minimum scratch
112bdd1243dSDimitry Andric   // size
113*0fca6ea1SDimitry Andric   uint32_t AssumedStackSizeForDynamicSizeObjects =
114*0fca6ea1SDimitry Andric       clAssumedStackSizeForDynamicSizeObjects;
115*0fca6ea1SDimitry Andric   uint32_t AssumedStackSizeForExternalCall = clAssumedStackSizeForExternalCall;
1167a6dacacSDimitry Andric   if (AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
11706c3fb27SDimitry Andric       STI.getTargetTriple().getOS() == Triple::AMDPAL) {
118*0fca6ea1SDimitry Andric     if (clAssumedStackSizeForDynamicSizeObjects.getNumOccurrences() == 0)
119bdd1243dSDimitry Andric       AssumedStackSizeForDynamicSizeObjects = 0;
120*0fca6ea1SDimitry Andric     if (clAssumedStackSizeForExternalCall.getNumOccurrences() == 0)
121bdd1243dSDimitry Andric       AssumedStackSizeForExternalCall = 0;
122bdd1243dSDimitry Andric   }
123bdd1243dSDimitry Andric 
12481ad6265SDimitry Andric   for (auto IT = po_begin(&CG); IT != End; ++IT) {
12581ad6265SDimitry Andric     Function *F = IT->getFunction();
126fe6060f1SDimitry Andric     if (!F || F->isDeclaration())
127fe6060f1SDimitry Andric       continue;
128fe6060f1SDimitry Andric 
12981ad6265SDimitry Andric     MachineFunction *MF = MMI.getMachineFunction(*F);
13081ad6265SDimitry Andric     assert(MF && "function must have been generated already");
131fe6060f1SDimitry Andric 
132bdd1243dSDimitry Andric     auto CI =
133bdd1243dSDimitry Andric         CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
134fe6060f1SDimitry Andric     SIFunctionResourceInfo &Info = CI.first->second;
135fe6060f1SDimitry Andric     assert(CI.second && "should only be called once per function");
136*0fca6ea1SDimitry Andric     Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects,
137*0fca6ea1SDimitry Andric                                 AssumedStackSizeForExternalCall);
138fe6060f1SDimitry Andric     HasIndirectCall |= Info.HasIndirectCall;
139fe6060f1SDimitry Andric   }
140fe6060f1SDimitry Andric 
141bdd1243dSDimitry Andric   // It's possible we have unreachable functions in the module which weren't
142bdd1243dSDimitry Andric   // visited by the PO traversal. Make sure we have some resource counts to
143bdd1243dSDimitry Andric   // report.
144bdd1243dSDimitry Andric   for (const auto &IT : CG) {
145bdd1243dSDimitry Andric     const Function *F = IT.first;
146bdd1243dSDimitry Andric     if (!F || F->isDeclaration())
147bdd1243dSDimitry Andric       continue;
148bdd1243dSDimitry Andric 
149bdd1243dSDimitry Andric     auto CI =
150bdd1243dSDimitry Andric         CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
151bdd1243dSDimitry Andric     if (!CI.second) // Skip already visited functions
152bdd1243dSDimitry Andric       continue;
153bdd1243dSDimitry Andric 
154bdd1243dSDimitry Andric     SIFunctionResourceInfo &Info = CI.first->second;
155bdd1243dSDimitry Andric     MachineFunction *MF = MMI.getMachineFunction(*F);
156bdd1243dSDimitry Andric     assert(MF && "function must have been generated already");
157*0fca6ea1SDimitry Andric     Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects,
158*0fca6ea1SDimitry Andric                                 AssumedStackSizeForExternalCall);
159bdd1243dSDimitry Andric     HasIndirectCall |= Info.HasIndirectCall;
160bdd1243dSDimitry Andric   }
161bdd1243dSDimitry Andric 
162fe6060f1SDimitry Andric   if (HasIndirectCall)
163fe6060f1SDimitry Andric     propagateIndirectCallRegisterUsage();
164fe6060f1SDimitry Andric 
165fe6060f1SDimitry Andric   return false;
166fe6060f1SDimitry Andric }
167fe6060f1SDimitry Andric 
168fe6060f1SDimitry Andric AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
169fe6060f1SDimitry Andric AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
170*0fca6ea1SDimitry Andric     const MachineFunction &MF, const TargetMachine &TM,
171*0fca6ea1SDimitry Andric     uint32_t AssumedStackSizeForDynamicSizeObjects,
172*0fca6ea1SDimitry Andric     uint32_t AssumedStackSizeForExternalCall) const {
173fe6060f1SDimitry Andric   SIFunctionResourceInfo Info;
174fe6060f1SDimitry Andric 
175fe6060f1SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
176fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
177fe6060f1SDimitry Andric   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
178fe6060f1SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
179fe6060f1SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
180fe6060f1SDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
181fe6060f1SDimitry Andric 
182fe6060f1SDimitry Andric   Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
183fe6060f1SDimitry Andric                          MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
184fe6060f1SDimitry Andric                          MRI.isLiveIn(MFI->getPreloadedReg(
185fe6060f1SDimitry Andric                              AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT));
186fe6060f1SDimitry Andric 
187fe6060f1SDimitry Andric   // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
188fe6060f1SDimitry Andric   // instructions aren't used to access the scratch buffer. Inline assembly may
189fe6060f1SDimitry Andric   // need it though.
190fe6060f1SDimitry Andric   //
191fe6060f1SDimitry Andric   // If we only have implicit uses of flat_scr on flat instructions, it is not
192fe6060f1SDimitry Andric   // really needed.
1935f757f3fSDimitry Andric   if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() &&
194fe6060f1SDimitry Andric       (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
195fe6060f1SDimitry Andric        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
196fe6060f1SDimitry Andric        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
197fe6060f1SDimitry Andric     Info.UsesFlatScratch = false;
198fe6060f1SDimitry Andric   }
199fe6060f1SDimitry Andric 
200fe6060f1SDimitry Andric   Info.PrivateSegmentSize = FrameInfo.getStackSize();
201fe6060f1SDimitry Andric 
202fe6060f1SDimitry Andric   // Assume a big number if there are any unknown sized objects.
203fe6060f1SDimitry Andric   Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
204fe6060f1SDimitry Andric   if (Info.HasDynamicallySizedStack)
205fe6060f1SDimitry Andric     Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
206fe6060f1SDimitry Andric 
207fe6060f1SDimitry Andric   if (MFI->isStackRealigned())
208fe6060f1SDimitry Andric     Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
209fe6060f1SDimitry Andric 
210fe6060f1SDimitry Andric   Info.UsesVCC =
211fe6060f1SDimitry Andric       MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
212fe6060f1SDimitry Andric 
213fe6060f1SDimitry Andric   // If there are no calls, MachineRegisterInfo can tell us the used register
214fe6060f1SDimitry Andric   // count easily.
215fe6060f1SDimitry Andric   // A tail call isn't considered a call for MachineFrameInfo's purposes.
216fe6060f1SDimitry Andric   if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
217fe6060f1SDimitry Andric     MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
218fe6060f1SDimitry Andric     for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
219fe6060f1SDimitry Andric       if (MRI.isPhysRegUsed(Reg)) {
220fe6060f1SDimitry Andric         HighestVGPRReg = Reg;
221fe6060f1SDimitry Andric         break;
222fe6060f1SDimitry Andric       }
223fe6060f1SDimitry Andric     }
224fe6060f1SDimitry Andric 
225fe6060f1SDimitry Andric     if (ST.hasMAIInsts()) {
226fe6060f1SDimitry Andric       MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
227fe6060f1SDimitry Andric       for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
228fe6060f1SDimitry Andric         if (MRI.isPhysRegUsed(Reg)) {
229fe6060f1SDimitry Andric           HighestAGPRReg = Reg;
230fe6060f1SDimitry Andric           break;
231fe6060f1SDimitry Andric         }
232fe6060f1SDimitry Andric       }
233fe6060f1SDimitry Andric       Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
234fe6060f1SDimitry Andric                          ? 0
235fe6060f1SDimitry Andric                          : TRI.getHWRegIndex(HighestAGPRReg) + 1;
236fe6060f1SDimitry Andric     }
237fe6060f1SDimitry Andric 
238fe6060f1SDimitry Andric     MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
239fe6060f1SDimitry Andric     for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
240fe6060f1SDimitry Andric       if (MRI.isPhysRegUsed(Reg)) {
241fe6060f1SDimitry Andric         HighestSGPRReg = Reg;
242fe6060f1SDimitry Andric         break;
243fe6060f1SDimitry Andric       }
244fe6060f1SDimitry Andric     }
245fe6060f1SDimitry Andric 
246fe6060f1SDimitry Andric     // We found the maximum register index. They start at 0, so add one to get
247fe6060f1SDimitry Andric     // the number of registers.
248fe6060f1SDimitry Andric     Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
249fe6060f1SDimitry Andric                        ? 0
250fe6060f1SDimitry Andric                        : TRI.getHWRegIndex(HighestVGPRReg) + 1;
251fe6060f1SDimitry Andric     Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
252fe6060f1SDimitry Andric                                ? 0
253fe6060f1SDimitry Andric                                : TRI.getHWRegIndex(HighestSGPRReg) + 1;
254fe6060f1SDimitry Andric 
255fe6060f1SDimitry Andric     return Info;
256fe6060f1SDimitry Andric   }
257fe6060f1SDimitry Andric 
258fe6060f1SDimitry Andric   int32_t MaxVGPR = -1;
259fe6060f1SDimitry Andric   int32_t MaxAGPR = -1;
260fe6060f1SDimitry Andric   int32_t MaxSGPR = -1;
261fe6060f1SDimitry Andric   uint64_t CalleeFrameSize = 0;
262fe6060f1SDimitry Andric 
263fe6060f1SDimitry Andric   for (const MachineBasicBlock &MBB : MF) {
264fe6060f1SDimitry Andric     for (const MachineInstr &MI : MBB) {
265fe6060f1SDimitry Andric       // TODO: Check regmasks? Do they occur anywhere except calls?
266fe6060f1SDimitry Andric       for (const MachineOperand &MO : MI.operands()) {
267fe6060f1SDimitry Andric         unsigned Width = 0;
268fe6060f1SDimitry Andric         bool IsSGPR = false;
269fe6060f1SDimitry Andric         bool IsAGPR = false;
270fe6060f1SDimitry Andric 
271fe6060f1SDimitry Andric         if (!MO.isReg())
272fe6060f1SDimitry Andric           continue;
273fe6060f1SDimitry Andric 
274fe6060f1SDimitry Andric         Register Reg = MO.getReg();
275fe6060f1SDimitry Andric         switch (Reg) {
276fe6060f1SDimitry Andric         case AMDGPU::EXEC:
277fe6060f1SDimitry Andric         case AMDGPU::EXEC_LO:
278fe6060f1SDimitry Andric         case AMDGPU::EXEC_HI:
279fe6060f1SDimitry Andric         case AMDGPU::SCC:
280fe6060f1SDimitry Andric         case AMDGPU::M0:
281fe6060f1SDimitry Andric         case AMDGPU::M0_LO16:
282fe6060f1SDimitry Andric         case AMDGPU::M0_HI16:
283bdd1243dSDimitry Andric         case AMDGPU::SRC_SHARED_BASE_LO:
284fe6060f1SDimitry Andric         case AMDGPU::SRC_SHARED_BASE:
285bdd1243dSDimitry Andric         case AMDGPU::SRC_SHARED_LIMIT_LO:
286fe6060f1SDimitry Andric         case AMDGPU::SRC_SHARED_LIMIT:
287bdd1243dSDimitry Andric         case AMDGPU::SRC_PRIVATE_BASE_LO:
288fe6060f1SDimitry Andric         case AMDGPU::SRC_PRIVATE_BASE:
289bdd1243dSDimitry Andric         case AMDGPU::SRC_PRIVATE_LIMIT_LO:
290fe6060f1SDimitry Andric         case AMDGPU::SRC_PRIVATE_LIMIT:
291*0fca6ea1SDimitry Andric         case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
292fe6060f1SDimitry Andric         case AMDGPU::SGPR_NULL:
29381ad6265SDimitry Andric         case AMDGPU::SGPR_NULL64:
294fe6060f1SDimitry Andric         case AMDGPU::MODE:
295fe6060f1SDimitry Andric           continue;
296fe6060f1SDimitry Andric 
297fe6060f1SDimitry Andric         case AMDGPU::NoRegister:
298fe6060f1SDimitry Andric           assert(MI.isDebugInstr() &&
299fe6060f1SDimitry Andric                  "Instruction uses invalid noreg register");
300fe6060f1SDimitry Andric           continue;
301fe6060f1SDimitry Andric 
302fe6060f1SDimitry Andric         case AMDGPU::VCC:
303fe6060f1SDimitry Andric         case AMDGPU::VCC_LO:
304fe6060f1SDimitry Andric         case AMDGPU::VCC_HI:
305fe6060f1SDimitry Andric         case AMDGPU::VCC_LO_LO16:
306fe6060f1SDimitry Andric         case AMDGPU::VCC_LO_HI16:
307fe6060f1SDimitry Andric         case AMDGPU::VCC_HI_LO16:
308fe6060f1SDimitry Andric         case AMDGPU::VCC_HI_HI16:
309fe6060f1SDimitry Andric           Info.UsesVCC = true;
310fe6060f1SDimitry Andric           continue;
311fe6060f1SDimitry Andric 
312fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR:
313fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR_LO:
314fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR_HI:
315fe6060f1SDimitry Andric           continue;
316fe6060f1SDimitry Andric 
317fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK:
318fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK_LO:
319fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK_HI:
320fe6060f1SDimitry Andric           llvm_unreachable("xnack_mask registers should not be used");
321fe6060f1SDimitry Andric 
322fe6060f1SDimitry Andric         case AMDGPU::LDS_DIRECT:
323fe6060f1SDimitry Andric           llvm_unreachable("lds_direct register should not be used");
324fe6060f1SDimitry Andric 
325fe6060f1SDimitry Andric         case AMDGPU::TBA:
326fe6060f1SDimitry Andric         case AMDGPU::TBA_LO:
327fe6060f1SDimitry Andric         case AMDGPU::TBA_HI:
328fe6060f1SDimitry Andric         case AMDGPU::TMA:
329fe6060f1SDimitry Andric         case AMDGPU::TMA_LO:
330fe6060f1SDimitry Andric         case AMDGPU::TMA_HI:
331fe6060f1SDimitry Andric           llvm_unreachable("trap handler registers should not be used");
332fe6060f1SDimitry Andric 
333fe6060f1SDimitry Andric         case AMDGPU::SRC_VCCZ:
334fe6060f1SDimitry Andric           llvm_unreachable("src_vccz register should not be used");
335fe6060f1SDimitry Andric 
336fe6060f1SDimitry Andric         case AMDGPU::SRC_EXECZ:
337fe6060f1SDimitry Andric           llvm_unreachable("src_execz register should not be used");
338fe6060f1SDimitry Andric 
339fe6060f1SDimitry Andric         case AMDGPU::SRC_SCC:
340fe6060f1SDimitry Andric           llvm_unreachable("src_scc register should not be used");
341fe6060f1SDimitry Andric 
342fe6060f1SDimitry Andric         default:
343fe6060f1SDimitry Andric           break;
344fe6060f1SDimitry Andric         }
345fe6060f1SDimitry Andric 
34606c3fb27SDimitry Andric         if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
34706c3fb27SDimitry Andric             AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
348fe6060f1SDimitry Andric             AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
349fe6060f1SDimitry Andric           IsSGPR = true;
350fe6060f1SDimitry Andric           Width = 1;
351fe6060f1SDimitry Andric         } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
352647cbc5dSDimitry Andric                    AMDGPU::VGPR_16RegClass.contains(Reg)) {
353fe6060f1SDimitry Andric           IsSGPR = false;
354fe6060f1SDimitry Andric           Width = 1;
355fe6060f1SDimitry Andric         } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
356fe6060f1SDimitry Andric                    AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
357fe6060f1SDimitry Andric           IsSGPR = false;
358fe6060f1SDimitry Andric           IsAGPR = true;
359fe6060f1SDimitry Andric           Width = 1;
36006c3fb27SDimitry Andric         } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
361fe6060f1SDimitry Andric           IsSGPR = true;
362fe6060f1SDimitry Andric           Width = 2;
363fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
364fe6060f1SDimitry Andric           IsSGPR = false;
365fe6060f1SDimitry Andric           Width = 2;
366fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
367fe6060f1SDimitry Andric           IsSGPR = false;
368fe6060f1SDimitry Andric           IsAGPR = true;
369fe6060f1SDimitry Andric           Width = 2;
370fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
371fe6060f1SDimitry Andric           IsSGPR = false;
372fe6060f1SDimitry Andric           Width = 3;
373fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
374fe6060f1SDimitry Andric           IsSGPR = true;
375fe6060f1SDimitry Andric           Width = 3;
376fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
377fe6060f1SDimitry Andric           IsSGPR = false;
378fe6060f1SDimitry Andric           IsAGPR = true;
379fe6060f1SDimitry Andric           Width = 3;
38006c3fb27SDimitry Andric         } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
381fe6060f1SDimitry Andric           IsSGPR = true;
382fe6060f1SDimitry Andric           Width = 4;
383fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
384fe6060f1SDimitry Andric           IsSGPR = false;
385fe6060f1SDimitry Andric           Width = 4;
386fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
387fe6060f1SDimitry Andric           IsSGPR = false;
388fe6060f1SDimitry Andric           IsAGPR = true;
389fe6060f1SDimitry Andric           Width = 4;
390fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
391fe6060f1SDimitry Andric           IsSGPR = false;
392fe6060f1SDimitry Andric           Width = 5;
393fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
394fe6060f1SDimitry Andric           IsSGPR = true;
395fe6060f1SDimitry Andric           Width = 5;
396fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
397fe6060f1SDimitry Andric           IsSGPR = false;
398fe6060f1SDimitry Andric           IsAGPR = true;
399fe6060f1SDimitry Andric           Width = 5;
400fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
401fe6060f1SDimitry Andric           IsSGPR = false;
402fe6060f1SDimitry Andric           Width = 6;
403fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
404fe6060f1SDimitry Andric           IsSGPR = true;
405fe6060f1SDimitry Andric           Width = 6;
406fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
407fe6060f1SDimitry Andric           IsSGPR = false;
408fe6060f1SDimitry Andric           IsAGPR = true;
409fe6060f1SDimitry Andric           Width = 6;
410fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
411fe6060f1SDimitry Andric           IsSGPR = false;
412fe6060f1SDimitry Andric           Width = 7;
413fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
414fe6060f1SDimitry Andric           IsSGPR = true;
415fe6060f1SDimitry Andric           Width = 7;
416fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
417fe6060f1SDimitry Andric           IsSGPR = false;
418fe6060f1SDimitry Andric           IsAGPR = true;
419fe6060f1SDimitry Andric           Width = 7;
420fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
421fe6060f1SDimitry Andric           IsSGPR = true;
422fe6060f1SDimitry Andric           Width = 8;
423fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
424fe6060f1SDimitry Andric           IsSGPR = false;
425fe6060f1SDimitry Andric           Width = 8;
426fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
427fe6060f1SDimitry Andric           IsSGPR = false;
428fe6060f1SDimitry Andric           IsAGPR = true;
429fe6060f1SDimitry Andric           Width = 8;
430bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
431bdd1243dSDimitry Andric           IsSGPR = false;
432bdd1243dSDimitry Andric           Width = 9;
433bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
434bdd1243dSDimitry Andric           IsSGPR = true;
435bdd1243dSDimitry Andric           Width = 9;
436bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
437bdd1243dSDimitry Andric           IsSGPR = false;
438bdd1243dSDimitry Andric           IsAGPR = true;
439bdd1243dSDimitry Andric           Width = 9;
440bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
441bdd1243dSDimitry Andric           IsSGPR = false;
442bdd1243dSDimitry Andric           Width = 10;
443bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
444bdd1243dSDimitry Andric           IsSGPR = true;
445bdd1243dSDimitry Andric           Width = 10;
446bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
447bdd1243dSDimitry Andric           IsSGPR = false;
448bdd1243dSDimitry Andric           IsAGPR = true;
449bdd1243dSDimitry Andric           Width = 10;
450bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
451bdd1243dSDimitry Andric           IsSGPR = false;
452bdd1243dSDimitry Andric           Width = 11;
453bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
454bdd1243dSDimitry Andric           IsSGPR = true;
455bdd1243dSDimitry Andric           Width = 11;
456bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
457bdd1243dSDimitry Andric           IsSGPR = false;
458bdd1243dSDimitry Andric           IsAGPR = true;
459bdd1243dSDimitry Andric           Width = 11;
460bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
461bdd1243dSDimitry Andric           IsSGPR = false;
462bdd1243dSDimitry Andric           Width = 12;
463bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
464bdd1243dSDimitry Andric           IsSGPR = true;
465bdd1243dSDimitry Andric           Width = 12;
466bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
467bdd1243dSDimitry Andric           IsSGPR = false;
468bdd1243dSDimitry Andric           IsAGPR = true;
469bdd1243dSDimitry Andric           Width = 12;
470fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
471fe6060f1SDimitry Andric           IsSGPR = true;
472fe6060f1SDimitry Andric           Width = 16;
473fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
474fe6060f1SDimitry Andric           IsSGPR = false;
475fe6060f1SDimitry Andric           Width = 16;
476fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
477fe6060f1SDimitry Andric           IsSGPR = false;
478fe6060f1SDimitry Andric           IsAGPR = true;
479fe6060f1SDimitry Andric           Width = 16;
480fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
481fe6060f1SDimitry Andric           IsSGPR = true;
482fe6060f1SDimitry Andric           Width = 32;
483fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
484fe6060f1SDimitry Andric           IsSGPR = false;
485fe6060f1SDimitry Andric           Width = 32;
486fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
487fe6060f1SDimitry Andric           IsSGPR = false;
488fe6060f1SDimitry Andric           IsAGPR = true;
489fe6060f1SDimitry Andric           Width = 32;
490fe6060f1SDimitry Andric         } else {
49106c3fb27SDimitry Andric           // We only expect TTMP registers or registers that do not belong to
49206c3fb27SDimitry Andric           // any RC.
49306c3fb27SDimitry Andric           assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
49406c3fb27SDimitry Andric                   AMDGPU::TTMP_64RegClass.contains(Reg) ||
49506c3fb27SDimitry Andric                   AMDGPU::TTMP_128RegClass.contains(Reg) ||
49606c3fb27SDimitry Andric                   AMDGPU::TTMP_256RegClass.contains(Reg) ||
49706c3fb27SDimitry Andric                   AMDGPU::TTMP_512RegClass.contains(Reg) ||
49806c3fb27SDimitry Andric                   !TRI.getPhysRegBaseClass(Reg)) &&
49906c3fb27SDimitry Andric                  "Unknown register class");
500fe6060f1SDimitry Andric         }
501fe6060f1SDimitry Andric         unsigned HWReg = TRI.getHWRegIndex(Reg);
502fe6060f1SDimitry Andric         int MaxUsed = HWReg + Width - 1;
503fe6060f1SDimitry Andric         if (IsSGPR) {
504fe6060f1SDimitry Andric           MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
505fe6060f1SDimitry Andric         } else if (IsAGPR) {
506fe6060f1SDimitry Andric           MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
507fe6060f1SDimitry Andric         } else {
508fe6060f1SDimitry Andric           MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
509fe6060f1SDimitry Andric         }
510fe6060f1SDimitry Andric       }
511fe6060f1SDimitry Andric 
512fe6060f1SDimitry Andric       if (MI.isCall()) {
513fe6060f1SDimitry Andric         // Pseudo used just to encode the underlying global. Is there a better
514fe6060f1SDimitry Andric         // way to track this?
515fe6060f1SDimitry Andric 
516fe6060f1SDimitry Andric         const MachineOperand *CalleeOp =
517fe6060f1SDimitry Andric             TII->getNamedOperand(MI, AMDGPU::OpName::callee);
518fe6060f1SDimitry Andric 
519fe6060f1SDimitry Andric         const Function *Callee = getCalleeFunction(*CalleeOp);
520fe6060f1SDimitry Andric         DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
521fe6060f1SDimitry Andric             CallGraphResourceInfo.end();
522fe6060f1SDimitry Andric 
523fe6060f1SDimitry Andric         // Avoid crashing on undefined behavior with an illegal call to a
524fe6060f1SDimitry Andric         // kernel. If a callsite's calling convention doesn't match the
525fe6060f1SDimitry Andric         // function's, it's undefined behavior. If the callsite calling
526fe6060f1SDimitry Andric         // convention does match, that would have errored earlier.
527fe6060f1SDimitry Andric         if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
528fe6060f1SDimitry Andric           report_fatal_error("invalid call to entry function");
529fe6060f1SDimitry Andric 
530fe6060f1SDimitry Andric         bool IsIndirect = !Callee || Callee->isDeclaration();
531fe6060f1SDimitry Andric         if (!IsIndirect)
532fe6060f1SDimitry Andric           I = CallGraphResourceInfo.find(Callee);
533fe6060f1SDimitry Andric 
534349cc55cSDimitry Andric         // FIXME: Call site could have norecurse on it
535349cc55cSDimitry Andric         if (!Callee || !Callee->doesNotRecurse()) {
536349cc55cSDimitry Andric           Info.HasRecursion = true;
537349cc55cSDimitry Andric 
538349cc55cSDimitry Andric           // TODO: If we happen to know there is no stack usage in the
539349cc55cSDimitry Andric           // callgraph, we don't need to assume an infinitely growing stack.
540349cc55cSDimitry Andric           if (!MI.isReturn()) {
541349cc55cSDimitry Andric             // We don't need to assume an unknown stack size for tail calls.
542349cc55cSDimitry Andric 
543349cc55cSDimitry Andric             // FIXME: This only benefits in the case where the kernel does not
544349cc55cSDimitry Andric             // directly call the tail called function. If a kernel directly
545349cc55cSDimitry Andric             // calls a tail recursive function, we'll assume maximum stack size
546349cc55cSDimitry Andric             // based on the regular call instruction.
547*0fca6ea1SDimitry Andric             CalleeFrameSize = std::max(
548*0fca6ea1SDimitry Andric                 CalleeFrameSize,
549349cc55cSDimitry Andric                 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
550349cc55cSDimitry Andric           }
551349cc55cSDimitry Andric         }
552349cc55cSDimitry Andric 
553fe6060f1SDimitry Andric         if (IsIndirect || I == CallGraphResourceInfo.end()) {
554fe6060f1SDimitry Andric           CalleeFrameSize =
555fe6060f1SDimitry Andric               std::max(CalleeFrameSize,
556fe6060f1SDimitry Andric                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
557fe6060f1SDimitry Andric 
558fe6060f1SDimitry Andric           // Register usage of indirect calls gets handled later
559fe6060f1SDimitry Andric           Info.UsesVCC = true;
560fe6060f1SDimitry Andric           Info.UsesFlatScratch = ST.hasFlatAddressSpace();
561fe6060f1SDimitry Andric           Info.HasDynamicallySizedStack = true;
562fe6060f1SDimitry Andric           Info.HasIndirectCall = true;
563fe6060f1SDimitry Andric         } else {
564fe6060f1SDimitry Andric           // We force CodeGen to run in SCC order, so the callee's register
565fe6060f1SDimitry Andric           // usage etc. should be the cumulative usage of all callees.
566fe6060f1SDimitry Andric           MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
567fe6060f1SDimitry Andric           MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
568fe6060f1SDimitry Andric           MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
569fe6060f1SDimitry Andric           CalleeFrameSize =
570fe6060f1SDimitry Andric               std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
571fe6060f1SDimitry Andric           Info.UsesVCC |= I->second.UsesVCC;
572fe6060f1SDimitry Andric           Info.UsesFlatScratch |= I->second.UsesFlatScratch;
573fe6060f1SDimitry Andric           Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
574fe6060f1SDimitry Andric           Info.HasRecursion |= I->second.HasRecursion;
575fe6060f1SDimitry Andric           Info.HasIndirectCall |= I->second.HasIndirectCall;
576fe6060f1SDimitry Andric         }
577fe6060f1SDimitry Andric       }
578fe6060f1SDimitry Andric     }
579fe6060f1SDimitry Andric   }
580fe6060f1SDimitry Andric 
581fe6060f1SDimitry Andric   Info.NumExplicitSGPR = MaxSGPR + 1;
582fe6060f1SDimitry Andric   Info.NumVGPR = MaxVGPR + 1;
583fe6060f1SDimitry Andric   Info.NumAGPR = MaxAGPR + 1;
584fe6060f1SDimitry Andric   Info.PrivateSegmentSize += CalleeFrameSize;
585fe6060f1SDimitry Andric 
586fe6060f1SDimitry Andric   return Info;
587fe6060f1SDimitry Andric }
588fe6060f1SDimitry Andric 
589fe6060f1SDimitry Andric void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
590fe6060f1SDimitry Andric   // Collect the maximum number of registers from non-hardware-entrypoints.
591fe6060f1SDimitry Andric   // All these functions are potential targets for indirect calls.
592fe6060f1SDimitry Andric   int32_t NonKernelMaxSGPRs = 0;
593fe6060f1SDimitry Andric   int32_t NonKernelMaxVGPRs = 0;
594fe6060f1SDimitry Andric   int32_t NonKernelMaxAGPRs = 0;
595fe6060f1SDimitry Andric 
596fe6060f1SDimitry Andric   for (const auto &I : CallGraphResourceInfo) {
597fe6060f1SDimitry Andric     if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
598fe6060f1SDimitry Andric       auto &Info = I.getSecond();
599fe6060f1SDimitry Andric       NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
600fe6060f1SDimitry Andric       NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
601fe6060f1SDimitry Andric       NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
602fe6060f1SDimitry Andric     }
603fe6060f1SDimitry Andric   }
604fe6060f1SDimitry Andric 
605fe6060f1SDimitry Andric   // Add register usage for functions with indirect calls.
606fe6060f1SDimitry Andric   // For calls to unknown functions, we assume the maximum register usage of
607fe6060f1SDimitry Andric   // all non-hardware-entrypoints in the current module.
608fe6060f1SDimitry Andric   for (auto &I : CallGraphResourceInfo) {
609fe6060f1SDimitry Andric     auto &Info = I.getSecond();
610fe6060f1SDimitry Andric     if (Info.HasIndirectCall) {
611fe6060f1SDimitry Andric       Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
612fe6060f1SDimitry Andric       Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
613fe6060f1SDimitry Andric       Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
614fe6060f1SDimitry Andric     }
615fe6060f1SDimitry Andric   }
616fe6060f1SDimitry Andric }
617