xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp (revision 647cbc5de815c5651677bf8582797f716ec7b48d)
1fe6060f1SDimitry Andric //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file
10fe6060f1SDimitry Andric /// \brief Analyzes how many registers and other resources are used by
11fe6060f1SDimitry Andric /// functions.
12fe6060f1SDimitry Andric ///
13fe6060f1SDimitry Andric /// The results of this analysis are used to fill the register usage, flat
14fe6060f1SDimitry Andric /// usage, etc. into hardware registers.
15fe6060f1SDimitry Andric ///
16fe6060f1SDimitry Andric /// The analysis takes callees into account. E.g. if a function A that needs 10
17fe6060f1SDimitry Andric /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18fe6060f1SDimitry Andric /// will return 20.
19fe6060f1SDimitry Andric /// It is assumed that an indirect call can go into any function except
20fe6060f1SDimitry Andric /// hardware-entrypoints. Therefore the register usage of functions with
21fe6060f1SDimitry Andric /// indirect calls is estimated as the maximum of all non-entrypoint functions
22fe6060f1SDimitry Andric /// in the module.
23fe6060f1SDimitry Andric ///
24fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
25fe6060f1SDimitry Andric 
26fe6060f1SDimitry Andric #include "AMDGPUResourceUsageAnalysis.h"
27fe6060f1SDimitry Andric #include "AMDGPU.h"
28fe6060f1SDimitry Andric #include "GCNSubtarget.h"
29fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
3081ad6265SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
31fe6060f1SDimitry Andric #include "llvm/Analysis/CallGraph.h"
3281ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
33fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
34349cc55cSDimitry Andric #include "llvm/IR/GlobalAlias.h"
35349cc55cSDimitry Andric #include "llvm/IR/GlobalValue.h"
36fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h"
37fe6060f1SDimitry Andric 
38fe6060f1SDimitry Andric using namespace llvm;
39fe6060f1SDimitry Andric using namespace llvm::AMDGPU;
40fe6060f1SDimitry Andric 
41fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-resource-usage"
42fe6060f1SDimitry Andric 
43fe6060f1SDimitry Andric char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
44fe6060f1SDimitry Andric char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
45fe6060f1SDimitry Andric 
46bdd1243dSDimitry Andric // In code object v4 and older, we need to tell the runtime some amount ahead of
47bdd1243dSDimitry Andric // time if we don't know the true stack size. Assume a smaller number if this is
48bdd1243dSDimitry Andric // only due to dynamic / non-entry block allocas.
49fe6060f1SDimitry Andric static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
50fe6060f1SDimitry Andric     "amdgpu-assume-external-call-stack-size",
51fe6060f1SDimitry Andric     cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
52fe6060f1SDimitry Andric     cl::init(16384));
53fe6060f1SDimitry Andric 
54fe6060f1SDimitry Andric static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
55fe6060f1SDimitry Andric     "amdgpu-assume-dynamic-stack-object-size",
56fe6060f1SDimitry Andric     cl::desc("Assumed extra stack use if there are any "
57fe6060f1SDimitry Andric              "variable sized objects (in bytes)"),
58fe6060f1SDimitry Andric     cl::Hidden, cl::init(4096));
59fe6060f1SDimitry Andric 
60fe6060f1SDimitry Andric INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE,
61fe6060f1SDimitry Andric                 "Function register usage analysis", true, true)
62fe6060f1SDimitry Andric 
63fe6060f1SDimitry Andric static const Function *getCalleeFunction(const MachineOperand &Op) {
64fe6060f1SDimitry Andric   if (Op.isImm()) {
65fe6060f1SDimitry Andric     assert(Op.getImm() == 0);
66fe6060f1SDimitry Andric     return nullptr;
67fe6060f1SDimitry Andric   }
68349cc55cSDimitry Andric   if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
69349cc55cSDimitry Andric     return cast<Function>(GA->getOperand(0));
70fe6060f1SDimitry Andric   return cast<Function>(Op.getGlobal());
71fe6060f1SDimitry Andric }
72fe6060f1SDimitry Andric 
73fe6060f1SDimitry Andric static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
74fe6060f1SDimitry Andric                                   const SIInstrInfo &TII, unsigned Reg) {
75fe6060f1SDimitry Andric   for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
76fe6060f1SDimitry Andric     if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
77fe6060f1SDimitry Andric       return true;
78fe6060f1SDimitry Andric   }
79fe6060f1SDimitry Andric 
80fe6060f1SDimitry Andric   return false;
81fe6060f1SDimitry Andric }
82fe6060f1SDimitry Andric 
83fe6060f1SDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
84fe6060f1SDimitry Andric     const GCNSubtarget &ST) const {
85fe6060f1SDimitry Andric   return NumExplicitSGPR +
86fe6060f1SDimitry Andric          IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
87fe6060f1SDimitry Andric                                    ST.getTargetID().isXnackOnOrAny());
88fe6060f1SDimitry Andric }
89fe6060f1SDimitry Andric 
90fe6060f1SDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
91349cc55cSDimitry Andric     const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
9281ad6265SDimitry Andric   return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
93349cc55cSDimitry Andric }
94349cc55cSDimitry Andric 
95349cc55cSDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
96fe6060f1SDimitry Andric     const GCNSubtarget &ST) const {
97349cc55cSDimitry Andric   return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
98fe6060f1SDimitry Andric }
99fe6060f1SDimitry Andric 
10081ad6265SDimitry Andric bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
101fe6060f1SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
102fe6060f1SDimitry Andric   if (!TPC)
103fe6060f1SDimitry Andric     return false;
104fe6060f1SDimitry Andric 
10581ad6265SDimitry Andric   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
106fe6060f1SDimitry Andric   const TargetMachine &TM = TPC->getTM<TargetMachine>();
10706c3fb27SDimitry Andric   const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
108fe6060f1SDimitry Andric   bool HasIndirectCall = false;
109fe6060f1SDimitry Andric 
11081ad6265SDimitry Andric   CallGraph CG = CallGraph(M);
11181ad6265SDimitry Andric   auto End = po_end(&CG);
11281ad6265SDimitry Andric 
113bdd1243dSDimitry Andric   // By default, for code object v5 and later, track only the minimum scratch
114bdd1243dSDimitry Andric   // size
11506c3fb27SDimitry Andric   if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
11606c3fb27SDimitry Andric       STI.getTargetTriple().getOS() == Triple::AMDPAL) {
117bdd1243dSDimitry Andric     if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
118bdd1243dSDimitry Andric       AssumedStackSizeForDynamicSizeObjects = 0;
119bdd1243dSDimitry Andric     if (!AssumedStackSizeForExternalCall.getNumOccurrences())
120bdd1243dSDimitry Andric       AssumedStackSizeForExternalCall = 0;
121bdd1243dSDimitry Andric   }
122bdd1243dSDimitry Andric 
12381ad6265SDimitry Andric   for (auto IT = po_begin(&CG); IT != End; ++IT) {
12481ad6265SDimitry Andric     Function *F = IT->getFunction();
125fe6060f1SDimitry Andric     if (!F || F->isDeclaration())
126fe6060f1SDimitry Andric       continue;
127fe6060f1SDimitry Andric 
12881ad6265SDimitry Andric     MachineFunction *MF = MMI.getMachineFunction(*F);
12981ad6265SDimitry Andric     assert(MF && "function must have been generated already");
130fe6060f1SDimitry Andric 
131bdd1243dSDimitry Andric     auto CI =
132bdd1243dSDimitry Andric         CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
133fe6060f1SDimitry Andric     SIFunctionResourceInfo &Info = CI.first->second;
134fe6060f1SDimitry Andric     assert(CI.second && "should only be called once per function");
13581ad6265SDimitry Andric     Info = analyzeResourceUsage(*MF, TM);
136fe6060f1SDimitry Andric     HasIndirectCall |= Info.HasIndirectCall;
137fe6060f1SDimitry Andric   }
138fe6060f1SDimitry Andric 
139bdd1243dSDimitry Andric   // It's possible we have unreachable functions in the module which weren't
140bdd1243dSDimitry Andric   // visited by the PO traversal. Make sure we have some resource counts to
141bdd1243dSDimitry Andric   // report.
142bdd1243dSDimitry Andric   for (const auto &IT : CG) {
143bdd1243dSDimitry Andric     const Function *F = IT.first;
144bdd1243dSDimitry Andric     if (!F || F->isDeclaration())
145bdd1243dSDimitry Andric       continue;
146bdd1243dSDimitry Andric 
147bdd1243dSDimitry Andric     auto CI =
148bdd1243dSDimitry Andric         CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
149bdd1243dSDimitry Andric     if (!CI.second) // Skip already visited functions
150bdd1243dSDimitry Andric       continue;
151bdd1243dSDimitry Andric 
152bdd1243dSDimitry Andric     SIFunctionResourceInfo &Info = CI.first->second;
153bdd1243dSDimitry Andric     MachineFunction *MF = MMI.getMachineFunction(*F);
154bdd1243dSDimitry Andric     assert(MF && "function must have been generated already");
155bdd1243dSDimitry Andric     Info = analyzeResourceUsage(*MF, TM);
156bdd1243dSDimitry Andric     HasIndirectCall |= Info.HasIndirectCall;
157bdd1243dSDimitry Andric   }
158bdd1243dSDimitry Andric 
159fe6060f1SDimitry Andric   if (HasIndirectCall)
160fe6060f1SDimitry Andric     propagateIndirectCallRegisterUsage();
161fe6060f1SDimitry Andric 
162fe6060f1SDimitry Andric   return false;
163fe6060f1SDimitry Andric }
164fe6060f1SDimitry Andric 
165fe6060f1SDimitry Andric AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
166fe6060f1SDimitry Andric AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
167fe6060f1SDimitry Andric     const MachineFunction &MF, const TargetMachine &TM) const {
168fe6060f1SDimitry Andric   SIFunctionResourceInfo Info;
169fe6060f1SDimitry Andric 
170fe6060f1SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
171fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
172fe6060f1SDimitry Andric   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
173fe6060f1SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
174fe6060f1SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
175fe6060f1SDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
176fe6060f1SDimitry Andric 
177fe6060f1SDimitry Andric   Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
178fe6060f1SDimitry Andric                          MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
179fe6060f1SDimitry Andric                          MRI.isLiveIn(MFI->getPreloadedReg(
180fe6060f1SDimitry Andric                              AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT));
181fe6060f1SDimitry Andric 
182fe6060f1SDimitry Andric   // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
183fe6060f1SDimitry Andric   // instructions aren't used to access the scratch buffer. Inline assembly may
184fe6060f1SDimitry Andric   // need it though.
185fe6060f1SDimitry Andric   //
186fe6060f1SDimitry Andric   // If we only have implicit uses of flat_scr on flat instructions, it is not
187fe6060f1SDimitry Andric   // really needed.
1885f757f3fSDimitry Andric   if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() &&
189fe6060f1SDimitry Andric       (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
190fe6060f1SDimitry Andric        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
191fe6060f1SDimitry Andric        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
192fe6060f1SDimitry Andric     Info.UsesFlatScratch = false;
193fe6060f1SDimitry Andric   }
194fe6060f1SDimitry Andric 
195fe6060f1SDimitry Andric   Info.PrivateSegmentSize = FrameInfo.getStackSize();
196fe6060f1SDimitry Andric 
197fe6060f1SDimitry Andric   // Assume a big number if there are any unknown sized objects.
198fe6060f1SDimitry Andric   Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
199fe6060f1SDimitry Andric   if (Info.HasDynamicallySizedStack)
200fe6060f1SDimitry Andric     Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
201fe6060f1SDimitry Andric 
202fe6060f1SDimitry Andric   if (MFI->isStackRealigned())
203fe6060f1SDimitry Andric     Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
204fe6060f1SDimitry Andric 
205fe6060f1SDimitry Andric   Info.UsesVCC =
206fe6060f1SDimitry Andric       MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
207fe6060f1SDimitry Andric 
208fe6060f1SDimitry Andric   // If there are no calls, MachineRegisterInfo can tell us the used register
209fe6060f1SDimitry Andric   // count easily.
210fe6060f1SDimitry Andric   // A tail call isn't considered a call for MachineFrameInfo's purposes.
211fe6060f1SDimitry Andric   if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
212fe6060f1SDimitry Andric     MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
213fe6060f1SDimitry Andric     for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
214fe6060f1SDimitry Andric       if (MRI.isPhysRegUsed(Reg)) {
215fe6060f1SDimitry Andric         HighestVGPRReg = Reg;
216fe6060f1SDimitry Andric         break;
217fe6060f1SDimitry Andric       }
218fe6060f1SDimitry Andric     }
219fe6060f1SDimitry Andric 
220fe6060f1SDimitry Andric     if (ST.hasMAIInsts()) {
221fe6060f1SDimitry Andric       MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
222fe6060f1SDimitry Andric       for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
223fe6060f1SDimitry Andric         if (MRI.isPhysRegUsed(Reg)) {
224fe6060f1SDimitry Andric           HighestAGPRReg = Reg;
225fe6060f1SDimitry Andric           break;
226fe6060f1SDimitry Andric         }
227fe6060f1SDimitry Andric       }
228fe6060f1SDimitry Andric       Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
229fe6060f1SDimitry Andric                          ? 0
230fe6060f1SDimitry Andric                          : TRI.getHWRegIndex(HighestAGPRReg) + 1;
231fe6060f1SDimitry Andric     }
232fe6060f1SDimitry Andric 
233fe6060f1SDimitry Andric     MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
234fe6060f1SDimitry Andric     for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
235fe6060f1SDimitry Andric       if (MRI.isPhysRegUsed(Reg)) {
236fe6060f1SDimitry Andric         HighestSGPRReg = Reg;
237fe6060f1SDimitry Andric         break;
238fe6060f1SDimitry Andric       }
239fe6060f1SDimitry Andric     }
240fe6060f1SDimitry Andric 
241fe6060f1SDimitry Andric     // We found the maximum register index. They start at 0, so add one to get
242fe6060f1SDimitry Andric     // the number of registers.
243fe6060f1SDimitry Andric     Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
244fe6060f1SDimitry Andric                        ? 0
245fe6060f1SDimitry Andric                        : TRI.getHWRegIndex(HighestVGPRReg) + 1;
246fe6060f1SDimitry Andric     Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
247fe6060f1SDimitry Andric                                ? 0
248fe6060f1SDimitry Andric                                : TRI.getHWRegIndex(HighestSGPRReg) + 1;
249fe6060f1SDimitry Andric 
250fe6060f1SDimitry Andric     return Info;
251fe6060f1SDimitry Andric   }
252fe6060f1SDimitry Andric 
253fe6060f1SDimitry Andric   int32_t MaxVGPR = -1;
254fe6060f1SDimitry Andric   int32_t MaxAGPR = -1;
255fe6060f1SDimitry Andric   int32_t MaxSGPR = -1;
256fe6060f1SDimitry Andric   uint64_t CalleeFrameSize = 0;
257fe6060f1SDimitry Andric 
258fe6060f1SDimitry Andric   for (const MachineBasicBlock &MBB : MF) {
259fe6060f1SDimitry Andric     for (const MachineInstr &MI : MBB) {
260fe6060f1SDimitry Andric       // TODO: Check regmasks? Do they occur anywhere except calls?
261fe6060f1SDimitry Andric       for (const MachineOperand &MO : MI.operands()) {
262fe6060f1SDimitry Andric         unsigned Width = 0;
263fe6060f1SDimitry Andric         bool IsSGPR = false;
264fe6060f1SDimitry Andric         bool IsAGPR = false;
265fe6060f1SDimitry Andric 
266fe6060f1SDimitry Andric         if (!MO.isReg())
267fe6060f1SDimitry Andric           continue;
268fe6060f1SDimitry Andric 
269fe6060f1SDimitry Andric         Register Reg = MO.getReg();
270fe6060f1SDimitry Andric         switch (Reg) {
271fe6060f1SDimitry Andric         case AMDGPU::EXEC:
272fe6060f1SDimitry Andric         case AMDGPU::EXEC_LO:
273fe6060f1SDimitry Andric         case AMDGPU::EXEC_HI:
274fe6060f1SDimitry Andric         case AMDGPU::SCC:
275fe6060f1SDimitry Andric         case AMDGPU::M0:
276fe6060f1SDimitry Andric         case AMDGPU::M0_LO16:
277fe6060f1SDimitry Andric         case AMDGPU::M0_HI16:
278bdd1243dSDimitry Andric         case AMDGPU::SRC_SHARED_BASE_LO:
279fe6060f1SDimitry Andric         case AMDGPU::SRC_SHARED_BASE:
280bdd1243dSDimitry Andric         case AMDGPU::SRC_SHARED_LIMIT_LO:
281fe6060f1SDimitry Andric         case AMDGPU::SRC_SHARED_LIMIT:
282bdd1243dSDimitry Andric         case AMDGPU::SRC_PRIVATE_BASE_LO:
283fe6060f1SDimitry Andric         case AMDGPU::SRC_PRIVATE_BASE:
284bdd1243dSDimitry Andric         case AMDGPU::SRC_PRIVATE_LIMIT_LO:
285fe6060f1SDimitry Andric         case AMDGPU::SRC_PRIVATE_LIMIT:
286fe6060f1SDimitry Andric         case AMDGPU::SGPR_NULL:
28781ad6265SDimitry Andric         case AMDGPU::SGPR_NULL64:
288fe6060f1SDimitry Andric         case AMDGPU::MODE:
289fe6060f1SDimitry Andric           continue;
290fe6060f1SDimitry Andric 
291fe6060f1SDimitry Andric         case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
292fe6060f1SDimitry Andric           llvm_unreachable("src_pops_exiting_wave_id should not be used");
293fe6060f1SDimitry Andric 
294fe6060f1SDimitry Andric         case AMDGPU::NoRegister:
295fe6060f1SDimitry Andric           assert(MI.isDebugInstr() &&
296fe6060f1SDimitry Andric                  "Instruction uses invalid noreg register");
297fe6060f1SDimitry Andric           continue;
298fe6060f1SDimitry Andric 
299fe6060f1SDimitry Andric         case AMDGPU::VCC:
300fe6060f1SDimitry Andric         case AMDGPU::VCC_LO:
301fe6060f1SDimitry Andric         case AMDGPU::VCC_HI:
302fe6060f1SDimitry Andric         case AMDGPU::VCC_LO_LO16:
303fe6060f1SDimitry Andric         case AMDGPU::VCC_LO_HI16:
304fe6060f1SDimitry Andric         case AMDGPU::VCC_HI_LO16:
305fe6060f1SDimitry Andric         case AMDGPU::VCC_HI_HI16:
306fe6060f1SDimitry Andric           Info.UsesVCC = true;
307fe6060f1SDimitry Andric           continue;
308fe6060f1SDimitry Andric 
309fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR:
310fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR_LO:
311fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR_HI:
312fe6060f1SDimitry Andric           continue;
313fe6060f1SDimitry Andric 
314fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK:
315fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK_LO:
316fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK_HI:
317fe6060f1SDimitry Andric           llvm_unreachable("xnack_mask registers should not be used");
318fe6060f1SDimitry Andric 
319fe6060f1SDimitry Andric         case AMDGPU::LDS_DIRECT:
320fe6060f1SDimitry Andric           llvm_unreachable("lds_direct register should not be used");
321fe6060f1SDimitry Andric 
322fe6060f1SDimitry Andric         case AMDGPU::TBA:
323fe6060f1SDimitry Andric         case AMDGPU::TBA_LO:
324fe6060f1SDimitry Andric         case AMDGPU::TBA_HI:
325fe6060f1SDimitry Andric         case AMDGPU::TMA:
326fe6060f1SDimitry Andric         case AMDGPU::TMA_LO:
327fe6060f1SDimitry Andric         case AMDGPU::TMA_HI:
328fe6060f1SDimitry Andric           llvm_unreachable("trap handler registers should not be used");
329fe6060f1SDimitry Andric 
330fe6060f1SDimitry Andric         case AMDGPU::SRC_VCCZ:
331fe6060f1SDimitry Andric           llvm_unreachable("src_vccz register should not be used");
332fe6060f1SDimitry Andric 
333fe6060f1SDimitry Andric         case AMDGPU::SRC_EXECZ:
334fe6060f1SDimitry Andric           llvm_unreachable("src_execz register should not be used");
335fe6060f1SDimitry Andric 
336fe6060f1SDimitry Andric         case AMDGPU::SRC_SCC:
337fe6060f1SDimitry Andric           llvm_unreachable("src_scc register should not be used");
338fe6060f1SDimitry Andric 
339fe6060f1SDimitry Andric         default:
340fe6060f1SDimitry Andric           break;
341fe6060f1SDimitry Andric         }
342fe6060f1SDimitry Andric 
34306c3fb27SDimitry Andric         if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
34406c3fb27SDimitry Andric             AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
345fe6060f1SDimitry Andric             AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
346fe6060f1SDimitry Andric           IsSGPR = true;
347fe6060f1SDimitry Andric           Width = 1;
348fe6060f1SDimitry Andric         } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
349*647cbc5dSDimitry Andric                    AMDGPU::VGPR_16RegClass.contains(Reg)) {
350fe6060f1SDimitry Andric           IsSGPR = false;
351fe6060f1SDimitry Andric           Width = 1;
352fe6060f1SDimitry Andric         } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
353fe6060f1SDimitry Andric                    AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
354fe6060f1SDimitry Andric           IsSGPR = false;
355fe6060f1SDimitry Andric           IsAGPR = true;
356fe6060f1SDimitry Andric           Width = 1;
35706c3fb27SDimitry Andric         } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
358fe6060f1SDimitry Andric           IsSGPR = true;
359fe6060f1SDimitry Andric           Width = 2;
360fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
361fe6060f1SDimitry Andric           IsSGPR = false;
362fe6060f1SDimitry Andric           Width = 2;
363fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
364fe6060f1SDimitry Andric           IsSGPR = false;
365fe6060f1SDimitry Andric           IsAGPR = true;
366fe6060f1SDimitry Andric           Width = 2;
367fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
368fe6060f1SDimitry Andric           IsSGPR = false;
369fe6060f1SDimitry Andric           Width = 3;
370fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
371fe6060f1SDimitry Andric           IsSGPR = true;
372fe6060f1SDimitry Andric           Width = 3;
373fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
374fe6060f1SDimitry Andric           IsSGPR = false;
375fe6060f1SDimitry Andric           IsAGPR = true;
376fe6060f1SDimitry Andric           Width = 3;
37706c3fb27SDimitry Andric         } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
378fe6060f1SDimitry Andric           IsSGPR = true;
379fe6060f1SDimitry Andric           Width = 4;
380fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
381fe6060f1SDimitry Andric           IsSGPR = false;
382fe6060f1SDimitry Andric           Width = 4;
383fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
384fe6060f1SDimitry Andric           IsSGPR = false;
385fe6060f1SDimitry Andric           IsAGPR = true;
386fe6060f1SDimitry Andric           Width = 4;
387fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
388fe6060f1SDimitry Andric           IsSGPR = false;
389fe6060f1SDimitry Andric           Width = 5;
390fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
391fe6060f1SDimitry Andric           IsSGPR = true;
392fe6060f1SDimitry Andric           Width = 5;
393fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
394fe6060f1SDimitry Andric           IsSGPR = false;
395fe6060f1SDimitry Andric           IsAGPR = true;
396fe6060f1SDimitry Andric           Width = 5;
397fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
398fe6060f1SDimitry Andric           IsSGPR = false;
399fe6060f1SDimitry Andric           Width = 6;
400fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
401fe6060f1SDimitry Andric           IsSGPR = true;
402fe6060f1SDimitry Andric           Width = 6;
403fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
404fe6060f1SDimitry Andric           IsSGPR = false;
405fe6060f1SDimitry Andric           IsAGPR = true;
406fe6060f1SDimitry Andric           Width = 6;
407fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
408fe6060f1SDimitry Andric           IsSGPR = false;
409fe6060f1SDimitry Andric           Width = 7;
410fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
411fe6060f1SDimitry Andric           IsSGPR = true;
412fe6060f1SDimitry Andric           Width = 7;
413fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
414fe6060f1SDimitry Andric           IsSGPR = false;
415fe6060f1SDimitry Andric           IsAGPR = true;
416fe6060f1SDimitry Andric           Width = 7;
417fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
418fe6060f1SDimitry Andric           IsSGPR = true;
419fe6060f1SDimitry Andric           Width = 8;
420fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
421fe6060f1SDimitry Andric           IsSGPR = false;
422fe6060f1SDimitry Andric           Width = 8;
423fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
424fe6060f1SDimitry Andric           IsSGPR = false;
425fe6060f1SDimitry Andric           IsAGPR = true;
426fe6060f1SDimitry Andric           Width = 8;
427bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
428bdd1243dSDimitry Andric           IsSGPR = false;
429bdd1243dSDimitry Andric           Width = 9;
430bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
431bdd1243dSDimitry Andric           IsSGPR = true;
432bdd1243dSDimitry Andric           Width = 9;
433bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
434bdd1243dSDimitry Andric           IsSGPR = false;
435bdd1243dSDimitry Andric           IsAGPR = true;
436bdd1243dSDimitry Andric           Width = 9;
437bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
438bdd1243dSDimitry Andric           IsSGPR = false;
439bdd1243dSDimitry Andric           Width = 10;
440bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
441bdd1243dSDimitry Andric           IsSGPR = true;
442bdd1243dSDimitry Andric           Width = 10;
443bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
444bdd1243dSDimitry Andric           IsSGPR = false;
445bdd1243dSDimitry Andric           IsAGPR = true;
446bdd1243dSDimitry Andric           Width = 10;
447bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
448bdd1243dSDimitry Andric           IsSGPR = false;
449bdd1243dSDimitry Andric           Width = 11;
450bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
451bdd1243dSDimitry Andric           IsSGPR = true;
452bdd1243dSDimitry Andric           Width = 11;
453bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
454bdd1243dSDimitry Andric           IsSGPR = false;
455bdd1243dSDimitry Andric           IsAGPR = true;
456bdd1243dSDimitry Andric           Width = 11;
457bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
458bdd1243dSDimitry Andric           IsSGPR = false;
459bdd1243dSDimitry Andric           Width = 12;
460bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
461bdd1243dSDimitry Andric           IsSGPR = true;
462bdd1243dSDimitry Andric           Width = 12;
463bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
464bdd1243dSDimitry Andric           IsSGPR = false;
465bdd1243dSDimitry Andric           IsAGPR = true;
466bdd1243dSDimitry Andric           Width = 12;
467fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
468fe6060f1SDimitry Andric           IsSGPR = true;
469fe6060f1SDimitry Andric           Width = 16;
470fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
471fe6060f1SDimitry Andric           IsSGPR = false;
472fe6060f1SDimitry Andric           Width = 16;
473fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
474fe6060f1SDimitry Andric           IsSGPR = false;
475fe6060f1SDimitry Andric           IsAGPR = true;
476fe6060f1SDimitry Andric           Width = 16;
477fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
478fe6060f1SDimitry Andric           IsSGPR = true;
479fe6060f1SDimitry Andric           Width = 32;
480fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
481fe6060f1SDimitry Andric           IsSGPR = false;
482fe6060f1SDimitry Andric           Width = 32;
483fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
484fe6060f1SDimitry Andric           IsSGPR = false;
485fe6060f1SDimitry Andric           IsAGPR = true;
486fe6060f1SDimitry Andric           Width = 32;
487fe6060f1SDimitry Andric         } else {
48806c3fb27SDimitry Andric           // We only expect TTMP registers or registers that do not belong to
48906c3fb27SDimitry Andric           // any RC.
49006c3fb27SDimitry Andric           assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
49106c3fb27SDimitry Andric                   AMDGPU::TTMP_64RegClass.contains(Reg) ||
49206c3fb27SDimitry Andric                   AMDGPU::TTMP_128RegClass.contains(Reg) ||
49306c3fb27SDimitry Andric                   AMDGPU::TTMP_256RegClass.contains(Reg) ||
49406c3fb27SDimitry Andric                   AMDGPU::TTMP_512RegClass.contains(Reg) ||
49506c3fb27SDimitry Andric                   !TRI.getPhysRegBaseClass(Reg)) &&
49606c3fb27SDimitry Andric                  "Unknown register class");
497fe6060f1SDimitry Andric         }
498fe6060f1SDimitry Andric         unsigned HWReg = TRI.getHWRegIndex(Reg);
499fe6060f1SDimitry Andric         int MaxUsed = HWReg + Width - 1;
500fe6060f1SDimitry Andric         if (IsSGPR) {
501fe6060f1SDimitry Andric           MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
502fe6060f1SDimitry Andric         } else if (IsAGPR) {
503fe6060f1SDimitry Andric           MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
504fe6060f1SDimitry Andric         } else {
505fe6060f1SDimitry Andric           MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
506fe6060f1SDimitry Andric         }
507fe6060f1SDimitry Andric       }
508fe6060f1SDimitry Andric 
509fe6060f1SDimitry Andric       if (MI.isCall()) {
510fe6060f1SDimitry Andric         // Pseudo used just to encode the underlying global. Is there a better
511fe6060f1SDimitry Andric         // way to track this?
512fe6060f1SDimitry Andric 
513fe6060f1SDimitry Andric         const MachineOperand *CalleeOp =
514fe6060f1SDimitry Andric             TII->getNamedOperand(MI, AMDGPU::OpName::callee);
515fe6060f1SDimitry Andric 
516fe6060f1SDimitry Andric         const Function *Callee = getCalleeFunction(*CalleeOp);
517fe6060f1SDimitry Andric         DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
518fe6060f1SDimitry Andric             CallGraphResourceInfo.end();
519fe6060f1SDimitry Andric 
520fe6060f1SDimitry Andric         // Avoid crashing on undefined behavior with an illegal call to a
521fe6060f1SDimitry Andric         // kernel. If a callsite's calling convention doesn't match the
522fe6060f1SDimitry Andric         // function's, it's undefined behavior. If the callsite calling
523fe6060f1SDimitry Andric         // convention does match, that would have errored earlier.
524fe6060f1SDimitry Andric         if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
525fe6060f1SDimitry Andric           report_fatal_error("invalid call to entry function");
526fe6060f1SDimitry Andric 
527fe6060f1SDimitry Andric         bool IsIndirect = !Callee || Callee->isDeclaration();
528fe6060f1SDimitry Andric         if (!IsIndirect)
529fe6060f1SDimitry Andric           I = CallGraphResourceInfo.find(Callee);
530fe6060f1SDimitry Andric 
531349cc55cSDimitry Andric         // FIXME: Call site could have norecurse on it
532349cc55cSDimitry Andric         if (!Callee || !Callee->doesNotRecurse()) {
533349cc55cSDimitry Andric           Info.HasRecursion = true;
534349cc55cSDimitry Andric 
535349cc55cSDimitry Andric           // TODO: If we happen to know there is no stack usage in the
536349cc55cSDimitry Andric           // callgraph, we don't need to assume an infinitely growing stack.
537349cc55cSDimitry Andric           if (!MI.isReturn()) {
538349cc55cSDimitry Andric             // We don't need to assume an unknown stack size for tail calls.
539349cc55cSDimitry Andric 
540349cc55cSDimitry Andric             // FIXME: This only benefits in the case where the kernel does not
541349cc55cSDimitry Andric             // directly call the tail called function. If a kernel directly
542349cc55cSDimitry Andric             // calls a tail recursive function, we'll assume maximum stack size
543349cc55cSDimitry Andric             // based on the regular call instruction.
544349cc55cSDimitry Andric             CalleeFrameSize =
545349cc55cSDimitry Andric               std::max(CalleeFrameSize,
546349cc55cSDimitry Andric                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
547349cc55cSDimitry Andric           }
548349cc55cSDimitry Andric         }
549349cc55cSDimitry Andric 
550fe6060f1SDimitry Andric         if (IsIndirect || I == CallGraphResourceInfo.end()) {
551fe6060f1SDimitry Andric           CalleeFrameSize =
552fe6060f1SDimitry Andric               std::max(CalleeFrameSize,
553fe6060f1SDimitry Andric                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
554fe6060f1SDimitry Andric 
555fe6060f1SDimitry Andric           // Register usage of indirect calls gets handled later
556fe6060f1SDimitry Andric           Info.UsesVCC = true;
557fe6060f1SDimitry Andric           Info.UsesFlatScratch = ST.hasFlatAddressSpace();
558fe6060f1SDimitry Andric           Info.HasDynamicallySizedStack = true;
559fe6060f1SDimitry Andric           Info.HasIndirectCall = true;
560fe6060f1SDimitry Andric         } else {
561fe6060f1SDimitry Andric           // We force CodeGen to run in SCC order, so the callee's register
562fe6060f1SDimitry Andric           // usage etc. should be the cumulative usage of all callees.
563fe6060f1SDimitry Andric           MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
564fe6060f1SDimitry Andric           MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
565fe6060f1SDimitry Andric           MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
566fe6060f1SDimitry Andric           CalleeFrameSize =
567fe6060f1SDimitry Andric               std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
568fe6060f1SDimitry Andric           Info.UsesVCC |= I->second.UsesVCC;
569fe6060f1SDimitry Andric           Info.UsesFlatScratch |= I->second.UsesFlatScratch;
570fe6060f1SDimitry Andric           Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
571fe6060f1SDimitry Andric           Info.HasRecursion |= I->second.HasRecursion;
572fe6060f1SDimitry Andric           Info.HasIndirectCall |= I->second.HasIndirectCall;
573fe6060f1SDimitry Andric         }
574fe6060f1SDimitry Andric       }
575fe6060f1SDimitry Andric     }
576fe6060f1SDimitry Andric   }
577fe6060f1SDimitry Andric 
578fe6060f1SDimitry Andric   Info.NumExplicitSGPR = MaxSGPR + 1;
579fe6060f1SDimitry Andric   Info.NumVGPR = MaxVGPR + 1;
580fe6060f1SDimitry Andric   Info.NumAGPR = MaxAGPR + 1;
581fe6060f1SDimitry Andric   Info.PrivateSegmentSize += CalleeFrameSize;
582fe6060f1SDimitry Andric 
583fe6060f1SDimitry Andric   return Info;
584fe6060f1SDimitry Andric }
585fe6060f1SDimitry Andric 
586fe6060f1SDimitry Andric void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
587fe6060f1SDimitry Andric   // Collect the maximum number of registers from non-hardware-entrypoints.
588fe6060f1SDimitry Andric   // All these functions are potential targets for indirect calls.
589fe6060f1SDimitry Andric   int32_t NonKernelMaxSGPRs = 0;
590fe6060f1SDimitry Andric   int32_t NonKernelMaxVGPRs = 0;
591fe6060f1SDimitry Andric   int32_t NonKernelMaxAGPRs = 0;
592fe6060f1SDimitry Andric 
593fe6060f1SDimitry Andric   for (const auto &I : CallGraphResourceInfo) {
594fe6060f1SDimitry Andric     if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
595fe6060f1SDimitry Andric       auto &Info = I.getSecond();
596fe6060f1SDimitry Andric       NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
597fe6060f1SDimitry Andric       NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
598fe6060f1SDimitry Andric       NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
599fe6060f1SDimitry Andric     }
600fe6060f1SDimitry Andric   }
601fe6060f1SDimitry Andric 
602fe6060f1SDimitry Andric   // Add register usage for functions with indirect calls.
603fe6060f1SDimitry Andric   // For calls to unknown functions, we assume the maximum register usage of
604fe6060f1SDimitry Andric   // all non-hardware-entrypoints in the current module.
605fe6060f1SDimitry Andric   for (auto &I : CallGraphResourceInfo) {
606fe6060f1SDimitry Andric     auto &Info = I.getSecond();
607fe6060f1SDimitry Andric     if (Info.HasIndirectCall) {
608fe6060f1SDimitry Andric       Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
609fe6060f1SDimitry Andric       Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
610fe6060f1SDimitry Andric       Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
611fe6060f1SDimitry Andric     }
612fe6060f1SDimitry Andric   }
613fe6060f1SDimitry Andric }
614