xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1fe6060f1SDimitry Andric //===- AMDGPUResourceUsageAnalysis.h ---- analysis of resources -----------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file
10fe6060f1SDimitry Andric /// \brief Analyzes how many registers and other resources are used by
11fe6060f1SDimitry Andric /// functions.
12fe6060f1SDimitry Andric ///
13fe6060f1SDimitry Andric /// The results of this analysis are used to fill the register usage, flat
14fe6060f1SDimitry Andric /// usage, etc. into hardware registers.
15fe6060f1SDimitry Andric ///
16fe6060f1SDimitry Andric /// The analysis takes callees into account. E.g. if a function A that needs 10
17fe6060f1SDimitry Andric /// VGPRs calls a function B that needs 20 VGPRs, querying the VGPR usage of A
18fe6060f1SDimitry Andric /// will return 20.
19fe6060f1SDimitry Andric /// It is assumed that an indirect call can go into any function except
20fe6060f1SDimitry Andric /// hardware-entrypoints. Therefore the register usage of functions with
21fe6060f1SDimitry Andric /// indirect calls is estimated as the maximum of all non-entrypoint functions
22fe6060f1SDimitry Andric /// in the module.
23fe6060f1SDimitry Andric ///
24fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
25fe6060f1SDimitry Andric 
26fe6060f1SDimitry Andric #include "AMDGPUResourceUsageAnalysis.h"
27fe6060f1SDimitry Andric #include "AMDGPU.h"
28fe6060f1SDimitry Andric #include "GCNSubtarget.h"
29fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
3081ad6265SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
31fe6060f1SDimitry Andric #include "llvm/Analysis/CallGraph.h"
3281ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
33fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
34349cc55cSDimitry Andric #include "llvm/IR/GlobalAlias.h"
35349cc55cSDimitry Andric #include "llvm/IR/GlobalValue.h"
36fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h"
37fe6060f1SDimitry Andric 
38fe6060f1SDimitry Andric using namespace llvm;
39fe6060f1SDimitry Andric using namespace llvm::AMDGPU;
40fe6060f1SDimitry Andric 
41fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-resource-usage"
42fe6060f1SDimitry Andric 
43fe6060f1SDimitry Andric char llvm::AMDGPUResourceUsageAnalysis::ID = 0;
44fe6060f1SDimitry Andric char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
45fe6060f1SDimitry Andric 
46*bdd1243dSDimitry Andric // In code object v4 and older, we need to tell the runtime some amount ahead of
47*bdd1243dSDimitry Andric // time if we don't know the true stack size. Assume a smaller number if this is
48*bdd1243dSDimitry Andric // only due to dynamic / non-entry block allocas.
49fe6060f1SDimitry Andric static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
50fe6060f1SDimitry Andric     "amdgpu-assume-external-call-stack-size",
51fe6060f1SDimitry Andric     cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
52fe6060f1SDimitry Andric     cl::init(16384));
53fe6060f1SDimitry Andric 
54fe6060f1SDimitry Andric static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
55fe6060f1SDimitry Andric     "amdgpu-assume-dynamic-stack-object-size",
56fe6060f1SDimitry Andric     cl::desc("Assumed extra stack use if there are any "
57fe6060f1SDimitry Andric              "variable sized objects (in bytes)"),
58fe6060f1SDimitry Andric     cl::Hidden, cl::init(4096));
59fe6060f1SDimitry Andric 
60fe6060f1SDimitry Andric INITIALIZE_PASS(AMDGPUResourceUsageAnalysis, DEBUG_TYPE,
61fe6060f1SDimitry Andric                 "Function register usage analysis", true, true)
62fe6060f1SDimitry Andric 
63fe6060f1SDimitry Andric static const Function *getCalleeFunction(const MachineOperand &Op) {
64fe6060f1SDimitry Andric   if (Op.isImm()) {
65fe6060f1SDimitry Andric     assert(Op.getImm() == 0);
66fe6060f1SDimitry Andric     return nullptr;
67fe6060f1SDimitry Andric   }
68349cc55cSDimitry Andric   if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
69349cc55cSDimitry Andric     return cast<Function>(GA->getOperand(0));
70fe6060f1SDimitry Andric   return cast<Function>(Op.getGlobal());
71fe6060f1SDimitry Andric }
72fe6060f1SDimitry Andric 
73fe6060f1SDimitry Andric static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
74fe6060f1SDimitry Andric                                   const SIInstrInfo &TII, unsigned Reg) {
75fe6060f1SDimitry Andric   for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
76fe6060f1SDimitry Andric     if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
77fe6060f1SDimitry Andric       return true;
78fe6060f1SDimitry Andric   }
79fe6060f1SDimitry Andric 
80fe6060f1SDimitry Andric   return false;
81fe6060f1SDimitry Andric }
82fe6060f1SDimitry Andric 
83fe6060f1SDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
84fe6060f1SDimitry Andric     const GCNSubtarget &ST) const {
85fe6060f1SDimitry Andric   return NumExplicitSGPR +
86fe6060f1SDimitry Andric          IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch,
87fe6060f1SDimitry Andric                                    ST.getTargetID().isXnackOnOrAny());
88fe6060f1SDimitry Andric }
89fe6060f1SDimitry Andric 
90fe6060f1SDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
91349cc55cSDimitry Andric     const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
9281ad6265SDimitry Andric   return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
93349cc55cSDimitry Andric }
94349cc55cSDimitry Andric 
95349cc55cSDimitry Andric int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
96fe6060f1SDimitry Andric     const GCNSubtarget &ST) const {
97349cc55cSDimitry Andric   return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
98fe6060f1SDimitry Andric }
99fe6060f1SDimitry Andric 
10081ad6265SDimitry Andric bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
101fe6060f1SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
102fe6060f1SDimitry Andric   if (!TPC)
103fe6060f1SDimitry Andric     return false;
104fe6060f1SDimitry Andric 
10581ad6265SDimitry Andric   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
106fe6060f1SDimitry Andric   const TargetMachine &TM = TPC->getTM<TargetMachine>();
107fe6060f1SDimitry Andric   bool HasIndirectCall = false;
108fe6060f1SDimitry Andric 
10981ad6265SDimitry Andric   CallGraph CG = CallGraph(M);
11081ad6265SDimitry Andric   auto End = po_end(&CG);
11181ad6265SDimitry Andric 
112*bdd1243dSDimitry Andric   // By default, for code object v5 and later, track only the minimum scratch
113*bdd1243dSDimitry Andric   // size
114*bdd1243dSDimitry Andric   if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) {
115*bdd1243dSDimitry Andric     if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
116*bdd1243dSDimitry Andric       AssumedStackSizeForDynamicSizeObjects = 0;
117*bdd1243dSDimitry Andric     if (!AssumedStackSizeForExternalCall.getNumOccurrences())
118*bdd1243dSDimitry Andric       AssumedStackSizeForExternalCall = 0;
119*bdd1243dSDimitry Andric   }
120*bdd1243dSDimitry Andric 
12181ad6265SDimitry Andric   for (auto IT = po_begin(&CG); IT != End; ++IT) {
12281ad6265SDimitry Andric     Function *F = IT->getFunction();
123fe6060f1SDimitry Andric     if (!F || F->isDeclaration())
124fe6060f1SDimitry Andric       continue;
125fe6060f1SDimitry Andric 
12681ad6265SDimitry Andric     MachineFunction *MF = MMI.getMachineFunction(*F);
12781ad6265SDimitry Andric     assert(MF && "function must have been generated already");
128fe6060f1SDimitry Andric 
129*bdd1243dSDimitry Andric     auto CI =
130*bdd1243dSDimitry Andric         CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
131fe6060f1SDimitry Andric     SIFunctionResourceInfo &Info = CI.first->second;
132fe6060f1SDimitry Andric     assert(CI.second && "should only be called once per function");
13381ad6265SDimitry Andric     Info = analyzeResourceUsage(*MF, TM);
134fe6060f1SDimitry Andric     HasIndirectCall |= Info.HasIndirectCall;
135fe6060f1SDimitry Andric   }
136fe6060f1SDimitry Andric 
137*bdd1243dSDimitry Andric   // It's possible we have unreachable functions in the module which weren't
138*bdd1243dSDimitry Andric   // visited by the PO traversal. Make sure we have some resource counts to
139*bdd1243dSDimitry Andric   // report.
140*bdd1243dSDimitry Andric   for (const auto &IT : CG) {
141*bdd1243dSDimitry Andric     const Function *F = IT.first;
142*bdd1243dSDimitry Andric     if (!F || F->isDeclaration())
143*bdd1243dSDimitry Andric       continue;
144*bdd1243dSDimitry Andric 
145*bdd1243dSDimitry Andric     auto CI =
146*bdd1243dSDimitry Andric         CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
147*bdd1243dSDimitry Andric     if (!CI.second) // Skip already visited functions
148*bdd1243dSDimitry Andric       continue;
149*bdd1243dSDimitry Andric 
150*bdd1243dSDimitry Andric     SIFunctionResourceInfo &Info = CI.first->second;
151*bdd1243dSDimitry Andric     MachineFunction *MF = MMI.getMachineFunction(*F);
152*bdd1243dSDimitry Andric     assert(MF && "function must have been generated already");
153*bdd1243dSDimitry Andric     Info = analyzeResourceUsage(*MF, TM);
154*bdd1243dSDimitry Andric     HasIndirectCall |= Info.HasIndirectCall;
155*bdd1243dSDimitry Andric   }
156*bdd1243dSDimitry Andric 
157fe6060f1SDimitry Andric   if (HasIndirectCall)
158fe6060f1SDimitry Andric     propagateIndirectCallRegisterUsage();
159fe6060f1SDimitry Andric 
160fe6060f1SDimitry Andric   return false;
161fe6060f1SDimitry Andric }
162fe6060f1SDimitry Andric 
163fe6060f1SDimitry Andric AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
164fe6060f1SDimitry Andric AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
165fe6060f1SDimitry Andric     const MachineFunction &MF, const TargetMachine &TM) const {
166fe6060f1SDimitry Andric   SIFunctionResourceInfo Info;
167fe6060f1SDimitry Andric 
168fe6060f1SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
169fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
170fe6060f1SDimitry Andric   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
171fe6060f1SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
172fe6060f1SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
173fe6060f1SDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
174fe6060f1SDimitry Andric 
175fe6060f1SDimitry Andric   Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
176fe6060f1SDimitry Andric                          MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
177fe6060f1SDimitry Andric                          MRI.isLiveIn(MFI->getPreloadedReg(
178fe6060f1SDimitry Andric                              AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT));
179fe6060f1SDimitry Andric 
180fe6060f1SDimitry Andric   // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
181fe6060f1SDimitry Andric   // instructions aren't used to access the scratch buffer. Inline assembly may
182fe6060f1SDimitry Andric   // need it though.
183fe6060f1SDimitry Andric   //
184fe6060f1SDimitry Andric   // If we only have implicit uses of flat_scr on flat instructions, it is not
185fe6060f1SDimitry Andric   // really needed.
186fe6060f1SDimitry Andric   if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
187fe6060f1SDimitry Andric       (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
188fe6060f1SDimitry Andric        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
189fe6060f1SDimitry Andric        !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
190fe6060f1SDimitry Andric     Info.UsesFlatScratch = false;
191fe6060f1SDimitry Andric   }
192fe6060f1SDimitry Andric 
193fe6060f1SDimitry Andric   Info.PrivateSegmentSize = FrameInfo.getStackSize();
194fe6060f1SDimitry Andric 
195fe6060f1SDimitry Andric   // Assume a big number if there are any unknown sized objects.
196fe6060f1SDimitry Andric   Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
197fe6060f1SDimitry Andric   if (Info.HasDynamicallySizedStack)
198fe6060f1SDimitry Andric     Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
199fe6060f1SDimitry Andric 
200fe6060f1SDimitry Andric   if (MFI->isStackRealigned())
201fe6060f1SDimitry Andric     Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
202fe6060f1SDimitry Andric 
203fe6060f1SDimitry Andric   Info.UsesVCC =
204fe6060f1SDimitry Andric       MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
205fe6060f1SDimitry Andric 
206fe6060f1SDimitry Andric   // If there are no calls, MachineRegisterInfo can tell us the used register
207fe6060f1SDimitry Andric   // count easily.
208fe6060f1SDimitry Andric   // A tail call isn't considered a call for MachineFrameInfo's purposes.
209fe6060f1SDimitry Andric   if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
210fe6060f1SDimitry Andric     MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
211fe6060f1SDimitry Andric     for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
212fe6060f1SDimitry Andric       if (MRI.isPhysRegUsed(Reg)) {
213fe6060f1SDimitry Andric         HighestVGPRReg = Reg;
214fe6060f1SDimitry Andric         break;
215fe6060f1SDimitry Andric       }
216fe6060f1SDimitry Andric     }
217fe6060f1SDimitry Andric 
218fe6060f1SDimitry Andric     if (ST.hasMAIInsts()) {
219fe6060f1SDimitry Andric       MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
220fe6060f1SDimitry Andric       for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
221fe6060f1SDimitry Andric         if (MRI.isPhysRegUsed(Reg)) {
222fe6060f1SDimitry Andric           HighestAGPRReg = Reg;
223fe6060f1SDimitry Andric           break;
224fe6060f1SDimitry Andric         }
225fe6060f1SDimitry Andric       }
226fe6060f1SDimitry Andric       Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister
227fe6060f1SDimitry Andric                          ? 0
228fe6060f1SDimitry Andric                          : TRI.getHWRegIndex(HighestAGPRReg) + 1;
229fe6060f1SDimitry Andric     }
230fe6060f1SDimitry Andric 
231fe6060f1SDimitry Andric     MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
232fe6060f1SDimitry Andric     for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
233fe6060f1SDimitry Andric       if (MRI.isPhysRegUsed(Reg)) {
234fe6060f1SDimitry Andric         HighestSGPRReg = Reg;
235fe6060f1SDimitry Andric         break;
236fe6060f1SDimitry Andric       }
237fe6060f1SDimitry Andric     }
238fe6060f1SDimitry Andric 
239fe6060f1SDimitry Andric     // We found the maximum register index. They start at 0, so add one to get
240fe6060f1SDimitry Andric     // the number of registers.
241fe6060f1SDimitry Andric     Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister
242fe6060f1SDimitry Andric                        ? 0
243fe6060f1SDimitry Andric                        : TRI.getHWRegIndex(HighestVGPRReg) + 1;
244fe6060f1SDimitry Andric     Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister
245fe6060f1SDimitry Andric                                ? 0
246fe6060f1SDimitry Andric                                : TRI.getHWRegIndex(HighestSGPRReg) + 1;
247fe6060f1SDimitry Andric 
248fe6060f1SDimitry Andric     return Info;
249fe6060f1SDimitry Andric   }
250fe6060f1SDimitry Andric 
251fe6060f1SDimitry Andric   int32_t MaxVGPR = -1;
252fe6060f1SDimitry Andric   int32_t MaxAGPR = -1;
253fe6060f1SDimitry Andric   int32_t MaxSGPR = -1;
254fe6060f1SDimitry Andric   uint64_t CalleeFrameSize = 0;
255fe6060f1SDimitry Andric 
256fe6060f1SDimitry Andric   for (const MachineBasicBlock &MBB : MF) {
257fe6060f1SDimitry Andric     for (const MachineInstr &MI : MBB) {
258fe6060f1SDimitry Andric       // TODO: Check regmasks? Do they occur anywhere except calls?
259fe6060f1SDimitry Andric       for (const MachineOperand &MO : MI.operands()) {
260fe6060f1SDimitry Andric         unsigned Width = 0;
261fe6060f1SDimitry Andric         bool IsSGPR = false;
262fe6060f1SDimitry Andric         bool IsAGPR = false;
263fe6060f1SDimitry Andric 
264fe6060f1SDimitry Andric         if (!MO.isReg())
265fe6060f1SDimitry Andric           continue;
266fe6060f1SDimitry Andric 
267fe6060f1SDimitry Andric         Register Reg = MO.getReg();
268fe6060f1SDimitry Andric         switch (Reg) {
269fe6060f1SDimitry Andric         case AMDGPU::EXEC:
270fe6060f1SDimitry Andric         case AMDGPU::EXEC_LO:
271fe6060f1SDimitry Andric         case AMDGPU::EXEC_HI:
272fe6060f1SDimitry Andric         case AMDGPU::SCC:
273fe6060f1SDimitry Andric         case AMDGPU::M0:
274fe6060f1SDimitry Andric         case AMDGPU::M0_LO16:
275fe6060f1SDimitry Andric         case AMDGPU::M0_HI16:
276*bdd1243dSDimitry Andric         case AMDGPU::SRC_SHARED_BASE_LO:
277fe6060f1SDimitry Andric         case AMDGPU::SRC_SHARED_BASE:
278*bdd1243dSDimitry Andric         case AMDGPU::SRC_SHARED_LIMIT_LO:
279fe6060f1SDimitry Andric         case AMDGPU::SRC_SHARED_LIMIT:
280*bdd1243dSDimitry Andric         case AMDGPU::SRC_PRIVATE_BASE_LO:
281fe6060f1SDimitry Andric         case AMDGPU::SRC_PRIVATE_BASE:
282*bdd1243dSDimitry Andric         case AMDGPU::SRC_PRIVATE_LIMIT_LO:
283fe6060f1SDimitry Andric         case AMDGPU::SRC_PRIVATE_LIMIT:
284fe6060f1SDimitry Andric         case AMDGPU::SGPR_NULL:
28581ad6265SDimitry Andric         case AMDGPU::SGPR_NULL64:
286fe6060f1SDimitry Andric         case AMDGPU::MODE:
287fe6060f1SDimitry Andric           continue;
288fe6060f1SDimitry Andric 
289fe6060f1SDimitry Andric         case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
290fe6060f1SDimitry Andric           llvm_unreachable("src_pops_exiting_wave_id should not be used");
291fe6060f1SDimitry Andric 
292fe6060f1SDimitry Andric         case AMDGPU::NoRegister:
293fe6060f1SDimitry Andric           assert(MI.isDebugInstr() &&
294fe6060f1SDimitry Andric                  "Instruction uses invalid noreg register");
295fe6060f1SDimitry Andric           continue;
296fe6060f1SDimitry Andric 
297fe6060f1SDimitry Andric         case AMDGPU::VCC:
298fe6060f1SDimitry Andric         case AMDGPU::VCC_LO:
299fe6060f1SDimitry Andric         case AMDGPU::VCC_HI:
300fe6060f1SDimitry Andric         case AMDGPU::VCC_LO_LO16:
301fe6060f1SDimitry Andric         case AMDGPU::VCC_LO_HI16:
302fe6060f1SDimitry Andric         case AMDGPU::VCC_HI_LO16:
303fe6060f1SDimitry Andric         case AMDGPU::VCC_HI_HI16:
304fe6060f1SDimitry Andric           Info.UsesVCC = true;
305fe6060f1SDimitry Andric           continue;
306fe6060f1SDimitry Andric 
307fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR:
308fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR_LO:
309fe6060f1SDimitry Andric         case AMDGPU::FLAT_SCR_HI:
310fe6060f1SDimitry Andric           continue;
311fe6060f1SDimitry Andric 
312fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK:
313fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK_LO:
314fe6060f1SDimitry Andric         case AMDGPU::XNACK_MASK_HI:
315fe6060f1SDimitry Andric           llvm_unreachable("xnack_mask registers should not be used");
316fe6060f1SDimitry Andric 
317fe6060f1SDimitry Andric         case AMDGPU::LDS_DIRECT:
318fe6060f1SDimitry Andric           llvm_unreachable("lds_direct register should not be used");
319fe6060f1SDimitry Andric 
320fe6060f1SDimitry Andric         case AMDGPU::TBA:
321fe6060f1SDimitry Andric         case AMDGPU::TBA_LO:
322fe6060f1SDimitry Andric         case AMDGPU::TBA_HI:
323fe6060f1SDimitry Andric         case AMDGPU::TMA:
324fe6060f1SDimitry Andric         case AMDGPU::TMA_LO:
325fe6060f1SDimitry Andric         case AMDGPU::TMA_HI:
326fe6060f1SDimitry Andric           llvm_unreachable("trap handler registers should not be used");
327fe6060f1SDimitry Andric 
328fe6060f1SDimitry Andric         case AMDGPU::SRC_VCCZ:
329fe6060f1SDimitry Andric           llvm_unreachable("src_vccz register should not be used");
330fe6060f1SDimitry Andric 
331fe6060f1SDimitry Andric         case AMDGPU::SRC_EXECZ:
332fe6060f1SDimitry Andric           llvm_unreachable("src_execz register should not be used");
333fe6060f1SDimitry Andric 
334fe6060f1SDimitry Andric         case AMDGPU::SRC_SCC:
335fe6060f1SDimitry Andric           llvm_unreachable("src_scc register should not be used");
336fe6060f1SDimitry Andric 
337fe6060f1SDimitry Andric         default:
338fe6060f1SDimitry Andric           break;
339fe6060f1SDimitry Andric         }
340fe6060f1SDimitry Andric 
341fe6060f1SDimitry Andric         if (AMDGPU::SReg_32RegClass.contains(Reg) ||
342fe6060f1SDimitry Andric             AMDGPU::SReg_LO16RegClass.contains(Reg) ||
343fe6060f1SDimitry Andric             AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
344fe6060f1SDimitry Andric           assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
345fe6060f1SDimitry Andric                  "trap handler registers should not be used");
346fe6060f1SDimitry Andric           IsSGPR = true;
347fe6060f1SDimitry Andric           Width = 1;
348fe6060f1SDimitry Andric         } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
349fe6060f1SDimitry Andric                    AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
350fe6060f1SDimitry Andric                    AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
351fe6060f1SDimitry Andric           IsSGPR = false;
352fe6060f1SDimitry Andric           Width = 1;
353fe6060f1SDimitry Andric         } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
354fe6060f1SDimitry Andric                    AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
355fe6060f1SDimitry Andric           IsSGPR = false;
356fe6060f1SDimitry Andric           IsAGPR = true;
357fe6060f1SDimitry Andric           Width = 1;
358fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
359fe6060f1SDimitry Andric           assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
360fe6060f1SDimitry Andric                  "trap handler registers should not be used");
361fe6060f1SDimitry Andric           IsSGPR = true;
362fe6060f1SDimitry Andric           Width = 2;
363fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
364fe6060f1SDimitry Andric           IsSGPR = false;
365fe6060f1SDimitry Andric           Width = 2;
366fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
367fe6060f1SDimitry Andric           IsSGPR = false;
368fe6060f1SDimitry Andric           IsAGPR = true;
369fe6060f1SDimitry Andric           Width = 2;
370fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
371fe6060f1SDimitry Andric           IsSGPR = false;
372fe6060f1SDimitry Andric           Width = 3;
373fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
374fe6060f1SDimitry Andric           IsSGPR = true;
375fe6060f1SDimitry Andric           Width = 3;
376fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
377fe6060f1SDimitry Andric           IsSGPR = false;
378fe6060f1SDimitry Andric           IsAGPR = true;
379fe6060f1SDimitry Andric           Width = 3;
380fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
381fe6060f1SDimitry Andric           assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
382fe6060f1SDimitry Andric                  "trap handler registers should not be used");
383fe6060f1SDimitry Andric           IsSGPR = true;
384fe6060f1SDimitry Andric           Width = 4;
385fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
386fe6060f1SDimitry Andric           IsSGPR = false;
387fe6060f1SDimitry Andric           Width = 4;
388fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
389fe6060f1SDimitry Andric           IsSGPR = false;
390fe6060f1SDimitry Andric           IsAGPR = true;
391fe6060f1SDimitry Andric           Width = 4;
392fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
393fe6060f1SDimitry Andric           IsSGPR = false;
394fe6060f1SDimitry Andric           Width = 5;
395fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
396fe6060f1SDimitry Andric           IsSGPR = true;
397fe6060f1SDimitry Andric           Width = 5;
398fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
399fe6060f1SDimitry Andric           IsSGPR = false;
400fe6060f1SDimitry Andric           IsAGPR = true;
401fe6060f1SDimitry Andric           Width = 5;
402fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
403fe6060f1SDimitry Andric           IsSGPR = false;
404fe6060f1SDimitry Andric           Width = 6;
405fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
406fe6060f1SDimitry Andric           IsSGPR = true;
407fe6060f1SDimitry Andric           Width = 6;
408fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
409fe6060f1SDimitry Andric           IsSGPR = false;
410fe6060f1SDimitry Andric           IsAGPR = true;
411fe6060f1SDimitry Andric           Width = 6;
412fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
413fe6060f1SDimitry Andric           IsSGPR = false;
414fe6060f1SDimitry Andric           Width = 7;
415fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
416fe6060f1SDimitry Andric           IsSGPR = true;
417fe6060f1SDimitry Andric           Width = 7;
418fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
419fe6060f1SDimitry Andric           IsSGPR = false;
420fe6060f1SDimitry Andric           IsAGPR = true;
421fe6060f1SDimitry Andric           Width = 7;
422fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
423fe6060f1SDimitry Andric           assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
424fe6060f1SDimitry Andric                  "trap handler registers should not be used");
425fe6060f1SDimitry Andric           IsSGPR = true;
426fe6060f1SDimitry Andric           Width = 8;
427fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
428fe6060f1SDimitry Andric           IsSGPR = false;
429fe6060f1SDimitry Andric           Width = 8;
430fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
431fe6060f1SDimitry Andric           IsSGPR = false;
432fe6060f1SDimitry Andric           IsAGPR = true;
433fe6060f1SDimitry Andric           Width = 8;
434*bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
435*bdd1243dSDimitry Andric           IsSGPR = false;
436*bdd1243dSDimitry Andric           Width = 9;
437*bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
438*bdd1243dSDimitry Andric           IsSGPR = true;
439*bdd1243dSDimitry Andric           Width = 9;
440*bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
441*bdd1243dSDimitry Andric           IsSGPR = false;
442*bdd1243dSDimitry Andric           IsAGPR = true;
443*bdd1243dSDimitry Andric           Width = 9;
444*bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
445*bdd1243dSDimitry Andric           IsSGPR = false;
446*bdd1243dSDimitry Andric           Width = 10;
447*bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
448*bdd1243dSDimitry Andric           IsSGPR = true;
449*bdd1243dSDimitry Andric           Width = 10;
450*bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
451*bdd1243dSDimitry Andric           IsSGPR = false;
452*bdd1243dSDimitry Andric           IsAGPR = true;
453*bdd1243dSDimitry Andric           Width = 10;
454*bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
455*bdd1243dSDimitry Andric           IsSGPR = false;
456*bdd1243dSDimitry Andric           Width = 11;
457*bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
458*bdd1243dSDimitry Andric           IsSGPR = true;
459*bdd1243dSDimitry Andric           Width = 11;
460*bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
461*bdd1243dSDimitry Andric           IsSGPR = false;
462*bdd1243dSDimitry Andric           IsAGPR = true;
463*bdd1243dSDimitry Andric           Width = 11;
464*bdd1243dSDimitry Andric         } else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
465*bdd1243dSDimitry Andric           IsSGPR = false;
466*bdd1243dSDimitry Andric           Width = 12;
467*bdd1243dSDimitry Andric         } else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
468*bdd1243dSDimitry Andric           IsSGPR = true;
469*bdd1243dSDimitry Andric           Width = 12;
470*bdd1243dSDimitry Andric         } else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
471*bdd1243dSDimitry Andric           IsSGPR = false;
472*bdd1243dSDimitry Andric           IsAGPR = true;
473*bdd1243dSDimitry Andric           Width = 12;
474fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
475fe6060f1SDimitry Andric           assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
476fe6060f1SDimitry Andric                  "trap handler registers should not be used");
477fe6060f1SDimitry Andric           IsSGPR = true;
478fe6060f1SDimitry Andric           Width = 16;
479fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
480fe6060f1SDimitry Andric           IsSGPR = false;
481fe6060f1SDimitry Andric           Width = 16;
482fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
483fe6060f1SDimitry Andric           IsSGPR = false;
484fe6060f1SDimitry Andric           IsAGPR = true;
485fe6060f1SDimitry Andric           Width = 16;
486fe6060f1SDimitry Andric         } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
487fe6060f1SDimitry Andric           IsSGPR = true;
488fe6060f1SDimitry Andric           Width = 32;
489fe6060f1SDimitry Andric         } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
490fe6060f1SDimitry Andric           IsSGPR = false;
491fe6060f1SDimitry Andric           Width = 32;
492fe6060f1SDimitry Andric         } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
493fe6060f1SDimitry Andric           IsSGPR = false;
494fe6060f1SDimitry Andric           IsAGPR = true;
495fe6060f1SDimitry Andric           Width = 32;
496fe6060f1SDimitry Andric         } else {
497fe6060f1SDimitry Andric           llvm_unreachable("Unknown register class");
498fe6060f1SDimitry Andric         }
499fe6060f1SDimitry Andric         unsigned HWReg = TRI.getHWRegIndex(Reg);
500fe6060f1SDimitry Andric         int MaxUsed = HWReg + Width - 1;
501fe6060f1SDimitry Andric         if (IsSGPR) {
502fe6060f1SDimitry Andric           MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
503fe6060f1SDimitry Andric         } else if (IsAGPR) {
504fe6060f1SDimitry Andric           MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
505fe6060f1SDimitry Andric         } else {
506fe6060f1SDimitry Andric           MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
507fe6060f1SDimitry Andric         }
508fe6060f1SDimitry Andric       }
509fe6060f1SDimitry Andric 
510fe6060f1SDimitry Andric       if (MI.isCall()) {
511fe6060f1SDimitry Andric         // Pseudo used just to encode the underlying global. Is there a better
512fe6060f1SDimitry Andric         // way to track this?
513fe6060f1SDimitry Andric 
514fe6060f1SDimitry Andric         const MachineOperand *CalleeOp =
515fe6060f1SDimitry Andric             TII->getNamedOperand(MI, AMDGPU::OpName::callee);
516fe6060f1SDimitry Andric 
517fe6060f1SDimitry Andric         const Function *Callee = getCalleeFunction(*CalleeOp);
518fe6060f1SDimitry Andric         DenseMap<const Function *, SIFunctionResourceInfo>::const_iterator I =
519fe6060f1SDimitry Andric             CallGraphResourceInfo.end();
520fe6060f1SDimitry Andric 
521fe6060f1SDimitry Andric         // Avoid crashing on undefined behavior with an illegal call to a
522fe6060f1SDimitry Andric         // kernel. If a callsite's calling convention doesn't match the
523fe6060f1SDimitry Andric         // function's, it's undefined behavior. If the callsite calling
524fe6060f1SDimitry Andric         // convention does match, that would have errored earlier.
525fe6060f1SDimitry Andric         if (Callee && AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
526fe6060f1SDimitry Andric           report_fatal_error("invalid call to entry function");
527fe6060f1SDimitry Andric 
528fe6060f1SDimitry Andric         bool IsIndirect = !Callee || Callee->isDeclaration();
529fe6060f1SDimitry Andric         if (!IsIndirect)
530fe6060f1SDimitry Andric           I = CallGraphResourceInfo.find(Callee);
531fe6060f1SDimitry Andric 
532349cc55cSDimitry Andric         // FIXME: Call site could have norecurse on it
533349cc55cSDimitry Andric         if (!Callee || !Callee->doesNotRecurse()) {
534349cc55cSDimitry Andric           Info.HasRecursion = true;
535349cc55cSDimitry Andric 
536349cc55cSDimitry Andric           // TODO: If we happen to know there is no stack usage in the
537349cc55cSDimitry Andric           // callgraph, we don't need to assume an infinitely growing stack.
538349cc55cSDimitry Andric           if (!MI.isReturn()) {
539349cc55cSDimitry Andric             // We don't need to assume an unknown stack size for tail calls.
540349cc55cSDimitry Andric 
541349cc55cSDimitry Andric             // FIXME: This only benefits in the case where the kernel does not
542349cc55cSDimitry Andric             // directly call the tail called function. If a kernel directly
543349cc55cSDimitry Andric             // calls a tail recursive function, we'll assume maximum stack size
544349cc55cSDimitry Andric             // based on the regular call instruction.
545349cc55cSDimitry Andric             CalleeFrameSize =
546349cc55cSDimitry Andric               std::max(CalleeFrameSize,
547349cc55cSDimitry Andric                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
548349cc55cSDimitry Andric           }
549349cc55cSDimitry Andric         }
550349cc55cSDimitry Andric 
551fe6060f1SDimitry Andric         if (IsIndirect || I == CallGraphResourceInfo.end()) {
552fe6060f1SDimitry Andric           CalleeFrameSize =
553fe6060f1SDimitry Andric               std::max(CalleeFrameSize,
554fe6060f1SDimitry Andric                        static_cast<uint64_t>(AssumedStackSizeForExternalCall));
555fe6060f1SDimitry Andric 
556fe6060f1SDimitry Andric           // Register usage of indirect calls gets handled later
557fe6060f1SDimitry Andric           Info.UsesVCC = true;
558fe6060f1SDimitry Andric           Info.UsesFlatScratch = ST.hasFlatAddressSpace();
559fe6060f1SDimitry Andric           Info.HasDynamicallySizedStack = true;
560fe6060f1SDimitry Andric           Info.HasIndirectCall = true;
561fe6060f1SDimitry Andric         } else {
562fe6060f1SDimitry Andric           // We force CodeGen to run in SCC order, so the callee's register
563fe6060f1SDimitry Andric           // usage etc. should be the cumulative usage of all callees.
564fe6060f1SDimitry Andric           MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
565fe6060f1SDimitry Andric           MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
566fe6060f1SDimitry Andric           MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
567fe6060f1SDimitry Andric           CalleeFrameSize =
568fe6060f1SDimitry Andric               std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
569fe6060f1SDimitry Andric           Info.UsesVCC |= I->second.UsesVCC;
570fe6060f1SDimitry Andric           Info.UsesFlatScratch |= I->second.UsesFlatScratch;
571fe6060f1SDimitry Andric           Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
572fe6060f1SDimitry Andric           Info.HasRecursion |= I->second.HasRecursion;
573fe6060f1SDimitry Andric           Info.HasIndirectCall |= I->second.HasIndirectCall;
574fe6060f1SDimitry Andric         }
575fe6060f1SDimitry Andric       }
576fe6060f1SDimitry Andric     }
577fe6060f1SDimitry Andric   }
578fe6060f1SDimitry Andric 
579fe6060f1SDimitry Andric   Info.NumExplicitSGPR = MaxSGPR + 1;
580fe6060f1SDimitry Andric   Info.NumVGPR = MaxVGPR + 1;
581fe6060f1SDimitry Andric   Info.NumAGPR = MaxAGPR + 1;
582fe6060f1SDimitry Andric   Info.PrivateSegmentSize += CalleeFrameSize;
583fe6060f1SDimitry Andric 
584fe6060f1SDimitry Andric   return Info;
585fe6060f1SDimitry Andric }
586fe6060f1SDimitry Andric 
587fe6060f1SDimitry Andric void AMDGPUResourceUsageAnalysis::propagateIndirectCallRegisterUsage() {
588fe6060f1SDimitry Andric   // Collect the maximum number of registers from non-hardware-entrypoints.
589fe6060f1SDimitry Andric   // All these functions are potential targets for indirect calls.
590fe6060f1SDimitry Andric   int32_t NonKernelMaxSGPRs = 0;
591fe6060f1SDimitry Andric   int32_t NonKernelMaxVGPRs = 0;
592fe6060f1SDimitry Andric   int32_t NonKernelMaxAGPRs = 0;
593fe6060f1SDimitry Andric 
594fe6060f1SDimitry Andric   for (const auto &I : CallGraphResourceInfo) {
595fe6060f1SDimitry Andric     if (!AMDGPU::isEntryFunctionCC(I.getFirst()->getCallingConv())) {
596fe6060f1SDimitry Andric       auto &Info = I.getSecond();
597fe6060f1SDimitry Andric       NonKernelMaxSGPRs = std::max(NonKernelMaxSGPRs, Info.NumExplicitSGPR);
598fe6060f1SDimitry Andric       NonKernelMaxVGPRs = std::max(NonKernelMaxVGPRs, Info.NumVGPR);
599fe6060f1SDimitry Andric       NonKernelMaxAGPRs = std::max(NonKernelMaxAGPRs, Info.NumAGPR);
600fe6060f1SDimitry Andric     }
601fe6060f1SDimitry Andric   }
602fe6060f1SDimitry Andric 
603fe6060f1SDimitry Andric   // Add register usage for functions with indirect calls.
604fe6060f1SDimitry Andric   // For calls to unknown functions, we assume the maximum register usage of
605fe6060f1SDimitry Andric   // all non-hardware-entrypoints in the current module.
606fe6060f1SDimitry Andric   for (auto &I : CallGraphResourceInfo) {
607fe6060f1SDimitry Andric     auto &Info = I.getSecond();
608fe6060f1SDimitry Andric     if (Info.HasIndirectCall) {
609fe6060f1SDimitry Andric       Info.NumExplicitSGPR = std::max(Info.NumExplicitSGPR, NonKernelMaxSGPRs);
610fe6060f1SDimitry Andric       Info.NumVGPR = std::max(Info.NumVGPR, NonKernelMaxVGPRs);
611fe6060f1SDimitry Andric       Info.NumAGPR = std::max(Info.NumAGPR, NonKernelMaxAGPRs);
612fe6060f1SDimitry Andric     }
613fe6060f1SDimitry Andric   }
614fe6060f1SDimitry Andric }
615