xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "SIMachineFunctionInfo.h"
14 #include "llvm/CodeGen/LivePhysRegs.h"
15 #include "llvm/CodeGen/MachineFrameInfo.h"
16 #include "llvm/CodeGen/RegisterScavenging.h"
17 #include "llvm/Target/TargetMachine.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "frame-info"
22 
23 // Find a scratch register that we can use in the prologue. We avoid using
24 // callee-save registers since they may appear to be free when this is called
25 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
26 // when this is called from emitPrologue.
findScratchNonCalleeSaveRegister(MachineRegisterInfo & MRI,LivePhysRegs & LiveRegs,const TargetRegisterClass & RC,bool Unused=false)27 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
28                                                    LivePhysRegs &LiveRegs,
29                                                    const TargetRegisterClass &RC,
30                                                    bool Unused = false) {
31   // Mark callee saved registers as used so we will not choose them.
32   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
33   for (unsigned i = 0; CSRegs[i]; ++i)
34     LiveRegs.addReg(CSRegs[i]);
35 
36   if (Unused) {
37     // We are looking for a register that can be used throughout the entire
38     // function, so any use is unacceptable.
39     for (MCRegister Reg : RC) {
40       if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
41         return Reg;
42     }
43   } else {
44     for (MCRegister Reg : RC) {
45       if (LiveRegs.available(MRI, Reg))
46         return Reg;
47     }
48   }
49 
50   return MCRegister();
51 }
52 
getVGPRSpillLaneOrTempRegister(MachineFunction & MF,LivePhysRegs & LiveRegs,Register & TempSGPR,Optional<int> & FrameIndex,bool IsFP)53 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
54                                            LivePhysRegs &LiveRegs,
55                                            Register &TempSGPR,
56                                            Optional<int> &FrameIndex,
57                                            bool IsFP) {
58   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
59   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
60 
61   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
62   const SIRegisterInfo *TRI = ST.getRegisterInfo();
63 
64   // We need to save and restore the current FP/BP.
65 
66   // 1: If there is already a VGPR with free lanes, use it. We
67   // may already have to pay the penalty for spilling a CSR VGPR.
68   if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
69     int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
70                                             TargetStackID::SGPRSpill);
71 
72     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
73       llvm_unreachable("allocate SGPR spill should have worked");
74 
75     FrameIndex = NewFI;
76 
77     LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
78                dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to  "
79                       << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
80                       << '\n');
81     return;
82   }
83 
84   // 2: Next, try to save the FP/BP in an unused SGPR.
85   TempSGPR = findScratchNonCalleeSaveRegister(
86       MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
87 
88   if (!TempSGPR) {
89     int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
90                                             TargetStackID::SGPRSpill);
91 
92     if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
93       // 3: There's no free lane to spill, and no free register to save FP/BP,
94       // so we're forced to spill another VGPR to use for the spill.
95       FrameIndex = NewFI;
96 
97       LLVM_DEBUG(
98           auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
99           dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
100                  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
101     } else {
102       // Remove dead <NewFI> index
103       MF.getFrameInfo().RemoveStackObject(NewFI);
104       // 4: If all else fails, spill the FP/BP to memory.
105       FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
106       LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
107                         << (IsFP ? "FP" : "BP") << '\n');
108     }
109   } else {
110     LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
111                       << printReg(TempSGPR, TRI) << '\n');
112   }
113 }
114 
115 // We need to specially emit stack operations here because a different frame
116 // register is used than in the rest of the function, as getFrameRegister would
117 // use.
buildPrologSpill(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LivePhysRegs & LiveRegs,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register SpillReg,int FI)118 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
119                              const SIMachineFunctionInfo &FuncInfo,
120                              LivePhysRegs &LiveRegs, MachineFunction &MF,
121                              MachineBasicBlock &MBB,
122                              MachineBasicBlock::iterator I, Register SpillReg,
123                              int FI) {
124   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
125                                         : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
126 
127   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
128   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
129   MachineMemOperand *MMO = MF.getMachineMemOperand(
130       PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
131       FrameInfo.getObjectAlign(FI));
132   LiveRegs.addReg(SpillReg);
133   TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
134                           FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
135                           &LiveRegs);
136   LiveRegs.removeReg(SpillReg);
137 }
138 
buildEpilogRestore(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LivePhysRegs & LiveRegs,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register SpillReg,int FI)139 static void buildEpilogRestore(const GCNSubtarget &ST,
140                                const SIRegisterInfo &TRI,
141                                const SIMachineFunctionInfo &FuncInfo,
142                                LivePhysRegs &LiveRegs, MachineFunction &MF,
143                                MachineBasicBlock &MBB,
144                                MachineBasicBlock::iterator I, Register SpillReg,
145                                int FI) {
146   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
147                                         : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
148 
149   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
150   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
151   MachineMemOperand *MMO = MF.getMachineMemOperand(
152       PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
153       FrameInfo.getObjectAlign(FI));
154   TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
155                           FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
156                           &LiveRegs);
157 }
158 
buildGitPtr(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,const SIInstrInfo * TII,Register TargetReg)159 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
160                         const DebugLoc &DL, const SIInstrInfo *TII,
161                         Register TargetReg) {
162   MachineFunction *MF = MBB.getParent();
163   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
164   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
165   const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
166   Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
167   Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
168 
169   if (MFI->getGITPtrHigh() != 0xffffffff) {
170     BuildMI(MBB, I, DL, SMovB32, TargetHi)
171         .addImm(MFI->getGITPtrHigh())
172         .addReg(TargetReg, RegState::ImplicitDefine);
173   } else {
174     const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
175     BuildMI(MBB, I, DL, GetPC64, TargetReg);
176   }
177   Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
178   MF->getRegInfo().addLiveIn(GitPtrLo);
179   MBB.addLiveIn(GitPtrLo);
180   BuildMI(MBB, I, DL, SMovB32, TargetLo)
181     .addReg(GitPtrLo);
182 }
183 
184 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
emitEntryFunctionFlatScratchInit(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register ScratchWaveOffsetReg) const185 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
186     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
187     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
188   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
189   const SIInstrInfo *TII = ST.getInstrInfo();
190   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
191   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
192 
193   // We don't need this if we only have spills since there is no user facing
194   // scratch.
195 
196   // TODO: If we know we don't have flat instructions earlier, we can omit
197   // this from the input registers.
198   //
199   // TODO: We only need to know if we access scratch space through a flat
200   // pointer. Because we only detect if flat instructions are used at all,
201   // this will be used more often than necessary on VI.
202 
203   Register FlatScrInitLo;
204   Register FlatScrInitHi;
205 
206   if (ST.isAmdPalOS()) {
207     // Extract the scratch offset from the descriptor in the GIT
208     LivePhysRegs LiveRegs;
209     LiveRegs.init(*TRI);
210     LiveRegs.addLiveIns(MBB);
211 
212     // Find unused reg to load flat scratch init into
213     MachineRegisterInfo &MRI = MF.getRegInfo();
214     Register FlatScrInit = AMDGPU::NoRegister;
215     ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
216     unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
217     AllSGPR64s = AllSGPR64s.slice(
218         std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
219     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
220     for (MCPhysReg Reg : AllSGPR64s) {
221       if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
222           !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
223         FlatScrInit = Reg;
224         break;
225       }
226     }
227     assert(FlatScrInit && "Failed to find free register for scratch init");
228 
229     FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
230     FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
231 
232     buildGitPtr(MBB, I, DL, TII, FlatScrInit);
233 
234     // We now have the GIT ptr - now get the scratch descriptor from the entry
235     // at offset 0 (or offset 16 for a compute shader).
236     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
237     const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
238     auto *MMO = MF.getMachineMemOperand(
239         PtrInfo,
240         MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
241             MachineMemOperand::MODereferenceable,
242         8, Align(4));
243     unsigned Offset =
244         MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
245     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
246     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
247     BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
248         .addReg(FlatScrInit)
249         .addImm(EncodedOffset) // offset
250         .addImm(0)             // cpol
251         .addMemOperand(MMO);
252 
253     // Mask the offset in [47:0] of the descriptor
254     const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
255     BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
256         .addReg(FlatScrInitHi)
257         .addImm(0xffff);
258   } else {
259     Register FlatScratchInitReg =
260         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
261     assert(FlatScratchInitReg);
262 
263     MachineRegisterInfo &MRI = MF.getRegInfo();
264     MRI.addLiveIn(FlatScratchInitReg);
265     MBB.addLiveIn(FlatScratchInitReg);
266 
267     FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
268     FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
269   }
270 
271   // Do a 64-bit pointer add.
272   if (ST.flatScratchIsPointer()) {
273     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
274       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
275         .addReg(FlatScrInitLo)
276         .addReg(ScratchWaveOffsetReg);
277       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
278         .addReg(FlatScrInitHi)
279         .addImm(0);
280       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
281         addReg(FlatScrInitLo).
282         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
283                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
284       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
285         addReg(FlatScrInitHi).
286         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
287                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
288       return;
289     }
290 
291     // For GFX9.
292     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
293       .addReg(FlatScrInitLo)
294       .addReg(ScratchWaveOffsetReg);
295     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
296       .addReg(FlatScrInitHi)
297       .addImm(0);
298 
299     return;
300   }
301 
302   assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
303 
304   // Copy the size in bytes.
305   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
306     .addReg(FlatScrInitHi, RegState::Kill);
307 
308   // Add wave offset in bytes to private base offset.
309   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
310   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
311     .addReg(FlatScrInitLo)
312     .addReg(ScratchWaveOffsetReg);
313 
314   // Convert offset to 256-byte units.
315   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
316     .addReg(FlatScrInitLo, RegState::Kill)
317     .addImm(8);
318 }
319 
320 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
321 // memory. They should have been removed by now.
allStackObjectsAreDead(const MachineFrameInfo & MFI)322 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
323   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
324        I != E; ++I) {
325     if (!MFI.isDeadObjectIndex(I))
326       return false;
327   }
328 
329   return true;
330 }
331 
332 // Shift down registers reserved for the scratch RSRC.
getEntryFunctionReservedScratchRsrcReg(MachineFunction & MF) const333 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
334     MachineFunction &MF) const {
335 
336   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
337   const SIInstrInfo *TII = ST.getInstrInfo();
338   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
339   MachineRegisterInfo &MRI = MF.getRegInfo();
340   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
341 
342   assert(MFI->isEntryFunction());
343 
344   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
345 
346   if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
347                           allStackObjectsAreDead(MF.getFrameInfo())))
348     return Register();
349 
350   if (ST.hasSGPRInitBug() ||
351       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
352     return ScratchRsrcReg;
353 
354   // We reserved the last registers for this. Shift it down to the end of those
355   // which were actually used.
356   //
357   // FIXME: It might be safer to use a pseudoregister before replacement.
358 
359   // FIXME: We should be able to eliminate unused input registers. We only
360   // cannot do this for the resources required for scratch access. For now we
361   // skip over user SGPRs and may leave unused holes.
362 
363   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
364   ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
365   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
366 
367   // Skip the last N reserved elements because they should have already been
368   // reserved for VCC etc.
369   Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
370   for (MCPhysReg Reg : AllSGPR128s) {
371     // Pick the first unallocated one. Make sure we don't clobber the other
372     // reserved input we needed. Also for PAL, make sure we don't clobber
373     // the GIT pointer passed in SGPR0 or SGPR8.
374     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
375         !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
376       MRI.replaceRegWith(ScratchRsrcReg, Reg);
377       MFI->setScratchRSrcReg(Reg);
378       return Reg;
379     }
380   }
381 
382   return ScratchRsrcReg;
383 }
384 
getScratchScaleFactor(const GCNSubtarget & ST)385 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
386   return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
387 }
388 
emitEntryFunctionPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const389 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
390                                                 MachineBasicBlock &MBB) const {
391   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
392 
393   // FIXME: If we only have SGPR spills, we won't actually be using scratch
394   // memory since these spill to VGPRs. We should be cleaning up these unused
395   // SGPR spill frame indices somewhere.
396 
397   // FIXME: We still have implicit uses on SGPR spill instructions in case they
398   // need to spill to vector memory. It's likely that will not happen, but at
399   // this point it appears we need the setup. This part of the prolog should be
400   // emitted after frame indices are eliminated.
401 
402   // FIXME: Remove all of the isPhysRegUsed checks
403 
404   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
405   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
406   const SIInstrInfo *TII = ST.getInstrInfo();
407   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
408   MachineRegisterInfo &MRI = MF.getRegInfo();
409   const Function &F = MF.getFunction();
410 
411   assert(MFI->isEntryFunction());
412 
413   Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
414       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
415   // FIXME: Hack to not crash in situations which emitted an error.
416   if (!PreloadedScratchWaveOffsetReg)
417     return;
418 
419   // We need to do the replacement of the private segment buffer register even
420   // if there are no stack objects. There could be stores to undef or a
421   // constant without an associated object.
422   //
423   // This will return `Register()` in cases where there are no actual
424   // uses of the SRSRC.
425   Register ScratchRsrcReg;
426   if (!ST.enableFlatScratch())
427     ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
428 
429   // Make the selected register live throughout the function.
430   if (ScratchRsrcReg) {
431     for (MachineBasicBlock &OtherBB : MF) {
432       if (&OtherBB != &MBB) {
433         OtherBB.addLiveIn(ScratchRsrcReg);
434       }
435     }
436   }
437 
438   // Now that we have fixed the reserved SRSRC we need to locate the
439   // (potentially) preloaded SRSRC.
440   Register PreloadedScratchRsrcReg;
441   if (ST.isAmdHsaOrMesa(F)) {
442     PreloadedScratchRsrcReg =
443         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
444     if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
445       // We added live-ins during argument lowering, but since they were not
446       // used they were deleted. We're adding the uses now, so add them back.
447       MRI.addLiveIn(PreloadedScratchRsrcReg);
448       MBB.addLiveIn(PreloadedScratchRsrcReg);
449     }
450   }
451 
452   // Debug location must be unknown since the first debug location is used to
453   // determine the end of the prologue.
454   DebugLoc DL;
455   MachineBasicBlock::iterator I = MBB.begin();
456 
457   // We found the SRSRC first because it needs four registers and has an
458   // alignment requirement. If the SRSRC that we found is clobbering with
459   // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
460   // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
461   // wave offset to a free SGPR.
462   Register ScratchWaveOffsetReg;
463   if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
464     ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
465     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
466     AllSGPRs = AllSGPRs.slice(
467         std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
468     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
469     for (MCPhysReg Reg : AllSGPRs) {
470       if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
471           !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
472         ScratchWaveOffsetReg = Reg;
473         BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
474             .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
475         break;
476       }
477     }
478   } else {
479     ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
480   }
481   assert(ScratchWaveOffsetReg);
482 
483   if (requiresStackPointerReference(MF)) {
484     Register SPReg = MFI->getStackPtrOffsetReg();
485     assert(SPReg != AMDGPU::SP_REG);
486     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
487         .addImm(MF.getFrameInfo().getStackSize() * getScratchScaleFactor(ST));
488   }
489 
490   if (hasFP(MF)) {
491     Register FPReg = MFI->getFrameOffsetReg();
492     assert(FPReg != AMDGPU::FP_REG);
493     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
494   }
495 
496   if ((MFI->hasFlatScratchInit() || ScratchRsrcReg) &&
497       !ST.flatScratchIsArchitected()) {
498     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
499     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
500   }
501 
502   if (MFI->hasFlatScratchInit()) {
503     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
504   }
505 
506   if (ScratchRsrcReg) {
507     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
508                                          PreloadedScratchRsrcReg,
509                                          ScratchRsrcReg, ScratchWaveOffsetReg);
510   }
511 }
512 
513 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
emitEntryFunctionScratchRsrcRegSetup(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register PreloadedScratchRsrcReg,Register ScratchRsrcReg,Register ScratchWaveOffsetReg) const514 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
515     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
516     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
517     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
518 
519   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
520   const SIInstrInfo *TII = ST.getInstrInfo();
521   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
522   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
523   const Function &Fn = MF.getFunction();
524 
525   if (ST.isAmdPalOS()) {
526     // The pointer to the GIT is formed from the offset passed in and either
527     // the amdgpu-git-ptr-high function attribute or the top part of the PC
528     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
529     Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
530 
531     buildGitPtr(MBB, I, DL, TII, Rsrc01);
532 
533     // We now have the GIT ptr - now get the scratch descriptor from the entry
534     // at offset 0 (or offset 16 for a compute shader).
535     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
536     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
537     auto MMO = MF.getMachineMemOperand(PtrInfo,
538                                        MachineMemOperand::MOLoad |
539                                            MachineMemOperand::MOInvariant |
540                                            MachineMemOperand::MODereferenceable,
541                                        16, Align(4));
542     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
543     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
544     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
545     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
546       .addReg(Rsrc01)
547       .addImm(EncodedOffset) // offset
548       .addImm(0) // cpol
549       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
550       .addMemOperand(MMO);
551 
552     // The driver will always set the SRD for wave 64 (bits 118:117 of
553     // descriptor / bits 22:21 of third sub-reg will be 0b11)
554     // If the shader is actually wave32 we have to modify the const_index_stride
555     // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
556     // reason the driver does this is that there can be cases where it presents
557     // 2 shaders with different wave size (e.g. VsFs).
558     // TODO: convert to using SCRATCH instructions or multiple SRD buffers
559     if (ST.isWave32()) {
560       const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
561       BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
562           .addImm(21)
563           .addReg(Rsrc03);
564     }
565   } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
566     assert(!ST.isAmdHsaOrMesa(Fn));
567     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
568 
569     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
570     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
571 
572     // Use relocations to get the pointer, and setup the other bits manually.
573     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
574 
575     if (MFI->hasImplicitBufferPtr()) {
576       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
577 
578       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
579         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
580 
581         BuildMI(MBB, I, DL, Mov64, Rsrc01)
582           .addReg(MFI->getImplicitBufferPtrUserSGPR())
583           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
584       } else {
585         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
586 
587         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
588         auto MMO = MF.getMachineMemOperand(
589             PtrInfo,
590             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
591                 MachineMemOperand::MODereferenceable,
592             8, Align(4));
593         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
594           .addReg(MFI->getImplicitBufferPtrUserSGPR())
595           .addImm(0) // offset
596           .addImm(0) // cpol
597           .addMemOperand(MMO)
598           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
599 
600         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
601         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
602       }
603     } else {
604       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
605       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
606 
607       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
608         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
609         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
610 
611       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
612         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
613         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
614 
615     }
616 
617     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
618       .addImm(Rsrc23 & 0xffffffff)
619       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
620 
621     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
622       .addImm(Rsrc23 >> 32)
623       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
624   } else if (ST.isAmdHsaOrMesa(Fn)) {
625     assert(PreloadedScratchRsrcReg);
626 
627     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
628       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
629           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
630     }
631   }
632 
633   // Add the scratch wave offset into the scratch RSRC.
634   //
635   // We only want to update the first 48 bits, which is the base address
636   // pointer, without touching the adjacent 16 bits of flags. We know this add
637   // cannot carry-out from bit 47, otherwise the scratch allocation would be
638   // impossible to fit in the 48-bit global address space.
639   //
640   // TODO: Evaluate if it is better to just construct an SRD using the flat
641   // scratch init and some constants rather than update the one we are passed.
642   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
643   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
644 
645   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
646   // the kernel body via inreg arguments.
647   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
648       .addReg(ScratchRsrcSub0)
649       .addReg(ScratchWaveOffsetReg)
650       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
651   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
652       .addReg(ScratchRsrcSub1)
653       .addImm(0)
654       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
655 }
656 
isSupportedStackID(TargetStackID::Value ID) const657 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
658   switch (ID) {
659   case TargetStackID::Default:
660   case TargetStackID::NoAlloc:
661   case TargetStackID::SGPRSpill:
662     return true;
663   case TargetStackID::ScalableVector:
664     return false;
665   }
666   llvm_unreachable("Invalid TargetStackID::Value");
667 }
668 
initLiveRegs(LivePhysRegs & LiveRegs,const SIRegisterInfo & TRI,const SIMachineFunctionInfo * FuncInfo,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool IsProlog)669 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
670                          const SIMachineFunctionInfo *FuncInfo,
671                          MachineFunction &MF, MachineBasicBlock &MBB,
672                          MachineBasicBlock::iterator MBBI, bool IsProlog) {
673   if (LiveRegs.empty()) {
674     LiveRegs.init(TRI);
675     if (IsProlog) {
676       LiveRegs.addLiveIns(MBB);
677     } else {
678       // In epilog.
679       LiveRegs.addLiveOuts(MBB);
680       LiveRegs.stepBackward(*MBBI);
681     }
682   }
683 }
684 
685 // Activate all lanes, returns saved exec.
buildScratchExecCopy(LivePhysRegs & LiveRegs,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool IsProlog)686 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
687                                      MachineFunction &MF,
688                                      MachineBasicBlock &MBB,
689                                      MachineBasicBlock::iterator MBBI,
690                                      bool IsProlog) {
691   Register ScratchExecCopy;
692   MachineRegisterInfo &MRI = MF.getRegInfo();
693   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
694   const SIInstrInfo *TII = ST.getInstrInfo();
695   const SIRegisterInfo &TRI = TII->getRegisterInfo();
696   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
697   DebugLoc DL;
698 
699   initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
700 
701   ScratchExecCopy = findScratchNonCalleeSaveRegister(
702       MRI, LiveRegs, *TRI.getWaveMaskRegClass());
703   if (!ScratchExecCopy)
704     report_fatal_error("failed to find free scratch register");
705 
706   LiveRegs.addReg(ScratchExecCopy);
707 
708   const unsigned OrSaveExec =
709       ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
710   BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
711 
712   return ScratchExecCopy;
713 }
714 
715 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
716 // Otherwise we are spilling to memory.
spilledToMemory(const MachineFunction & MF,int SaveIndex)717 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
718   const MachineFrameInfo &MFI = MF.getFrameInfo();
719   return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
720 }
721 
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const722 void SIFrameLowering::emitPrologue(MachineFunction &MF,
723                                    MachineBasicBlock &MBB) const {
724   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
725   if (FuncInfo->isEntryFunction()) {
726     emitEntryFunctionPrologue(MF, MBB);
727     return;
728   }
729 
730   const MachineFrameInfo &MFI = MF.getFrameInfo();
731   MachineRegisterInfo &MRI = MF.getRegInfo();
732   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
733   const SIInstrInfo *TII = ST.getInstrInfo();
734   const SIRegisterInfo &TRI = TII->getRegisterInfo();
735 
736   Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
737   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
738   Register BasePtrReg =
739       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
740   LivePhysRegs LiveRegs;
741 
742   MachineBasicBlock::iterator MBBI = MBB.begin();
743   DebugLoc DL;
744 
745   bool HasFP = false;
746   bool HasBP = false;
747   uint32_t NumBytes = MFI.getStackSize();
748   uint32_t RoundedSize = NumBytes;
749   // To avoid clobbering VGPRs in lanes that weren't active on function entry,
750   // turn on all lanes before doing the spill to memory.
751   Register ScratchExecCopy;
752 
753   Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
754   Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
755 
756   // VGPRs used for SGPR->VGPR spills
757   for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
758        FuncInfo->getSGPRSpillVGPRs()) {
759     if (!Reg.FI)
760       continue;
761 
762     if (!ScratchExecCopy)
763       ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
764                                              /*IsProlog*/ true);
765 
766     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
767                      *Reg.FI);
768   }
769 
770   // VGPRs used for Whole Wave Mode
771   for (const auto &Reg : FuncInfo->WWMReservedRegs) {
772     auto VGPR = Reg.first;
773     auto FI = Reg.second;
774     if (!FI)
775       continue;
776 
777     if (!ScratchExecCopy)
778       ScratchExecCopy =
779           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
780 
781     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
782   }
783 
784   if (ScratchExecCopy) {
785     // FIXME: Split block and make terminator.
786     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
787     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
788     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
789         .addReg(ScratchExecCopy, RegState::Kill);
790     LiveRegs.addReg(ScratchExecCopy);
791   }
792 
793   if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
794     const int FramePtrFI = *FPSaveIndex;
795     assert(!MFI.isDeadObjectIndex(FramePtrFI));
796 
797     initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
798 
799     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
800         MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
801     if (!TmpVGPR)
802       report_fatal_error("failed to find free scratch register");
803 
804     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
805         .addReg(FramePtrReg);
806 
807     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
808                      FramePtrFI);
809   }
810 
811   if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
812     const int BasePtrFI = *BPSaveIndex;
813     assert(!MFI.isDeadObjectIndex(BasePtrFI));
814 
815     initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
816 
817     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
818         MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
819     if (!TmpVGPR)
820       report_fatal_error("failed to find free scratch register");
821 
822     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
823         .addReg(BasePtrReg);
824 
825     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
826                      BasePtrFI);
827   }
828 
829   // In this case, spill the FP to a reserved VGPR.
830   if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
831     const int FramePtrFI = *FPSaveIndex;
832     assert(!MFI.isDeadObjectIndex(FramePtrFI));
833 
834     assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
835     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
836         FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
837     assert(Spill.size() == 1);
838 
839     // Save FP before setting it up.
840     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
841         .addReg(FramePtrReg)
842         .addImm(Spill[0].Lane)
843         .addReg(Spill[0].VGPR, RegState::Undef);
844   }
845 
846   // In this case, spill the BP to a reserved VGPR.
847   if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
848     const int BasePtrFI = *BPSaveIndex;
849     assert(!MFI.isDeadObjectIndex(BasePtrFI));
850 
851     assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
852     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
853         FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
854     assert(Spill.size() == 1);
855 
856     // Save BP before setting it up.
857     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
858         .addReg(BasePtrReg)
859         .addImm(Spill[0].Lane)
860         .addReg(Spill[0].VGPR, RegState::Undef);
861   }
862 
863   // Emit the copy if we need an FP, and are using a free SGPR to save it.
864   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
865     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
866             FuncInfo->SGPRForFPSaveRestoreCopy)
867         .addReg(FramePtrReg)
868         .setMIFlag(MachineInstr::FrameSetup);
869   }
870 
871   // Emit the copy if we need a BP, and are using a free SGPR to save it.
872   if (FuncInfo->SGPRForBPSaveRestoreCopy) {
873     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
874             FuncInfo->SGPRForBPSaveRestoreCopy)
875         .addReg(BasePtrReg)
876         .setMIFlag(MachineInstr::FrameSetup);
877   }
878 
879   // If a copy has been emitted for FP and/or BP, Make the SGPRs
880   // used in the copy instructions live throughout the function.
881   SmallVector<MCPhysReg, 2> TempSGPRs;
882   if (FuncInfo->SGPRForFPSaveRestoreCopy)
883     TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
884 
885   if (FuncInfo->SGPRForBPSaveRestoreCopy)
886     TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
887 
888   if (!TempSGPRs.empty()) {
889     for (MachineBasicBlock &MBB : MF) {
890       for (MCPhysReg Reg : TempSGPRs)
891         MBB.addLiveIn(Reg);
892 
893       MBB.sortUniqueLiveIns();
894     }
895     if (!LiveRegs.empty()) {
896       LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
897       LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
898     }
899   }
900 
901   if (TRI.hasStackRealignment(MF)) {
902     HasFP = true;
903     const unsigned Alignment = MFI.getMaxAlign().value();
904 
905     RoundedSize += Alignment;
906     if (LiveRegs.empty()) {
907       LiveRegs.init(TRI);
908       LiveRegs.addLiveIns(MBB);
909     }
910 
911     // s_add_u32 s33, s32, NumBytes
912     // s_and_b32 s33, s33, 0b111...0000
913     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg)
914         .addReg(StackPtrReg)
915         .addImm((Alignment - 1) * getScratchScaleFactor(ST))
916         .setMIFlag(MachineInstr::FrameSetup);
917     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
918         .addReg(FramePtrReg, RegState::Kill)
919         .addImm(-Alignment * getScratchScaleFactor(ST))
920         .setMIFlag(MachineInstr::FrameSetup);
921     FuncInfo->setIsStackRealigned(true);
922   } else if ((HasFP = hasFP(MF))) {
923     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
924         .addReg(StackPtrReg)
925         .setMIFlag(MachineInstr::FrameSetup);
926   }
927 
928   // If we need a base pointer, set it up here. It's whatever the value of
929   // the stack pointer is at this point. Any variable size objects will be
930   // allocated after this, so we can still use the base pointer to reference
931   // the incoming arguments.
932   if ((HasBP = TRI.hasBasePointer(MF))) {
933     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
934         .addReg(StackPtrReg)
935         .setMIFlag(MachineInstr::FrameSetup);
936   }
937 
938   if (HasFP && RoundedSize != 0) {
939     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
940         .addReg(StackPtrReg)
941         .addImm(RoundedSize * getScratchScaleFactor(ST))
942         .setMIFlag(MachineInstr::FrameSetup);
943   }
944 
945   assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
946                      FuncInfo->FramePointerSaveIndex)) &&
947          "Needed to save FP but didn't save it anywhere");
948 
949   assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
950                     !FuncInfo->FramePointerSaveIndex)) &&
951          "Saved FP but didn't need it");
952 
953   assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
954                      FuncInfo->BasePointerSaveIndex)) &&
955          "Needed to save BP but didn't save it anywhere");
956 
957   assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
958                     !FuncInfo->BasePointerSaveIndex)) &&
959          "Saved BP but didn't need it");
960 }
961 
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const962 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
963                                    MachineBasicBlock &MBB) const {
964   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
965   if (FuncInfo->isEntryFunction())
966     return;
967 
968   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
969   const SIInstrInfo *TII = ST.getInstrInfo();
970   MachineRegisterInfo &MRI = MF.getRegInfo();
971   const SIRegisterInfo &TRI = TII->getRegisterInfo();
972   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
973   LivePhysRegs LiveRegs;
974   DebugLoc DL;
975 
976   const MachineFrameInfo &MFI = MF.getFrameInfo();
977   uint32_t NumBytes = MFI.getStackSize();
978   uint32_t RoundedSize = FuncInfo->isStackRealigned()
979                              ? NumBytes + MFI.getMaxAlign().value()
980                              : NumBytes;
981   const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
982   const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
983   const Register BasePtrReg =
984       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
985 
986   Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
987   Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
988 
989   if (RoundedSize != 0 && hasFP(MF)) {
990     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
991       .addReg(StackPtrReg)
992       .addImm(RoundedSize * getScratchScaleFactor(ST))
993       .setMIFlag(MachineInstr::FrameDestroy);
994   }
995 
996   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
997     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
998         .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
999         .setMIFlag(MachineInstr::FrameDestroy);
1000   }
1001 
1002   if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1003     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1004         .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1005         .setMIFlag(MachineInstr::FrameDestroy);
1006   }
1007 
1008   if (FPSaveIndex) {
1009     const int FramePtrFI = *FPSaveIndex;
1010     assert(!MFI.isDeadObjectIndex(FramePtrFI));
1011     if (spilledToMemory(MF, FramePtrFI)) {
1012       initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1013 
1014       MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1015           MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1016       if (!TmpVGPR)
1017         report_fatal_error("failed to find free scratch register");
1018       buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1019                          FramePtrFI);
1020       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
1021           .addReg(TmpVGPR, RegState::Kill);
1022     } else {
1023       // Reload from VGPR spill.
1024       assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
1025       ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1026           FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
1027       assert(Spill.size() == 1);
1028       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
1029           .addReg(Spill[0].VGPR)
1030           .addImm(Spill[0].Lane);
1031     }
1032   }
1033 
1034   if (BPSaveIndex) {
1035     const int BasePtrFI = *BPSaveIndex;
1036     assert(!MFI.isDeadObjectIndex(BasePtrFI));
1037     if (spilledToMemory(MF, BasePtrFI)) {
1038       initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1039 
1040       MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1041           MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1042       if (!TmpVGPR)
1043         report_fatal_error("failed to find free scratch register");
1044       buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1045                          BasePtrFI);
1046       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1047           .addReg(TmpVGPR, RegState::Kill);
1048     } else {
1049       // Reload from VGPR spill.
1050       assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
1051       ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1052           FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1053       assert(Spill.size() == 1);
1054       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1055           .addReg(Spill[0].VGPR)
1056           .addImm(Spill[0].Lane);
1057     }
1058   }
1059 
1060   Register ScratchExecCopy;
1061   for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
1062        FuncInfo->getSGPRSpillVGPRs()) {
1063     if (!Reg.FI)
1064       continue;
1065 
1066     if (!ScratchExecCopy)
1067       ScratchExecCopy =
1068           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1069 
1070     buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
1071                        *Reg.FI);
1072   }
1073 
1074   for (const auto &Reg : FuncInfo->WWMReservedRegs) {
1075     auto VGPR = Reg.first;
1076     auto FI = Reg.second;
1077     if (!FI)
1078       continue;
1079 
1080     if (!ScratchExecCopy)
1081       ScratchExecCopy =
1082           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1083 
1084     buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
1085   }
1086 
1087   if (ScratchExecCopy) {
1088     // FIXME: Split block and make terminator.
1089     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1090     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1091     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1092         .addReg(ScratchExecCopy, RegState::Kill);
1093   }
1094 }
1095 
1096 #ifndef NDEBUG
allSGPRSpillsAreDead(const MachineFunction & MF)1097 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1098   const MachineFrameInfo &MFI = MF.getFrameInfo();
1099   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1100   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1101        I != E; ++I) {
1102     if (!MFI.isDeadObjectIndex(I) &&
1103         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1104         (I != FuncInfo->FramePointerSaveIndex &&
1105          I != FuncInfo->BasePointerSaveIndex)) {
1106       return false;
1107     }
1108   }
1109 
1110   return true;
1111 }
1112 #endif
1113 
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const1114 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1115                                                     int FI,
1116                                                     Register &FrameReg) const {
1117   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1118 
1119   FrameReg = RI->getFrameRegister(MF);
1120   return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
1121 }
1122 
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const1123 void SIFrameLowering::processFunctionBeforeFrameFinalized(
1124   MachineFunction &MF,
1125   RegScavenger *RS) const {
1126   MachineFrameInfo &MFI = MF.getFrameInfo();
1127 
1128   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1129   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1130   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1131 
1132   FuncInfo->removeDeadFrameIndices(MFI);
1133   assert(allSGPRSpillsAreDead(MF) &&
1134          "SGPR spill should have been removed in SILowerSGPRSpills");
1135 
1136   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1137   // but currently hasNonSpillStackObjects is set only from source
1138   // allocas. Stack temps produced from legalization are not counted currently.
1139   if (!allStackObjectsAreDead(MFI)) {
1140     assert(RS && "RegScavenger required if spilling");
1141 
1142     // Add an emergency spill slot
1143     RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1144   }
1145 }
1146 
1147 // Only report VGPRs to generic code.
determineCalleeSaves(MachineFunction & MF,BitVector & SavedVGPRs,RegScavenger * RS) const1148 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1149                                            BitVector &SavedVGPRs,
1150                                            RegScavenger *RS) const {
1151   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1152   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1153   if (MFI->isEntryFunction())
1154     return;
1155 
1156   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1157   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1158   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1159 
1160   // Ignore the SGPRs the default implementation found.
1161   SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1162 
1163   // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1164   // In gfx908 there was do AGPR loads and stores and thus spilling also
1165   // require a temporary VGPR.
1166   if (!ST.hasGFX90AInsts())
1167     SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1168 
1169   // hasFP only knows about stack objects that already exist. We're now
1170   // determining the stack slots that will be created, so we have to predict
1171   // them. Stack objects force FP usage with calls.
1172   //
1173   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1174   // don't want to report it here.
1175   //
1176   // FIXME: Is this really hasReservedCallFrame?
1177   const bool WillHaveFP =
1178       FrameInfo.hasCalls() &&
1179       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1180 
1181   // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1182   // so don't allow the default insertion to handle them.
1183   for (auto SSpill : MFI->getSGPRSpillVGPRs())
1184     SavedVGPRs.reset(SSpill.VGPR);
1185 
1186   LivePhysRegs LiveRegs;
1187   LiveRegs.init(*TRI);
1188 
1189   if (WillHaveFP || hasFP(MF)) {
1190     assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&
1191            "Re-reserving spill slot for FP");
1192     getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
1193                                    MFI->FramePointerSaveIndex, true);
1194   }
1195 
1196   if (TRI->hasBasePointer(MF)) {
1197     if (MFI->SGPRForFPSaveRestoreCopy)
1198       LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1199 
1200     assert(!MFI->SGPRForBPSaveRestoreCopy &&
1201            !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1202     getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
1203                                    MFI->BasePointerSaveIndex, false);
1204   }
1205 }
1206 
determineCalleeSavesSGPR(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1207 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1208                                                BitVector &SavedRegs,
1209                                                RegScavenger *RS) const {
1210   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1211   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1212   if (MFI->isEntryFunction())
1213     return;
1214 
1215   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1216   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1217 
1218   // The SP is specifically managed and we don't want extra spills of it.
1219   SavedRegs.reset(MFI->getStackPtrOffsetReg());
1220 
1221   const BitVector AllSavedRegs = SavedRegs;
1222   SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1223 
1224   // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1225   const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1226 
1227   // We have to anticipate introducing CSR VGPR spills if we don't have any
1228   // stack objects already, since we require an FP if there is a call and stack.
1229   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1230   const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR;
1231 
1232   // FP will be specially managed like SP.
1233   if (WillHaveFP || hasFP(MF))
1234     SavedRegs.reset(MFI->getFrameOffsetReg());
1235 }
1236 
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const1237 bool SIFrameLowering::assignCalleeSavedSpillSlots(
1238     MachineFunction &MF, const TargetRegisterInfo *TRI,
1239     std::vector<CalleeSavedInfo> &CSI) const {
1240   if (CSI.empty())
1241     return true; // Early exit if no callee saved registers are modified!
1242 
1243   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1244   if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1245       !FuncInfo->SGPRForBPSaveRestoreCopy)
1246     return false;
1247 
1248   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1249   const SIRegisterInfo *RI = ST.getRegisterInfo();
1250   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1251   Register BasePtrReg = RI->getBaseRegister();
1252   unsigned NumModifiedRegs = 0;
1253 
1254   if (FuncInfo->SGPRForFPSaveRestoreCopy)
1255     NumModifiedRegs++;
1256   if (FuncInfo->SGPRForBPSaveRestoreCopy)
1257     NumModifiedRegs++;
1258 
1259   for (auto &CS : CSI) {
1260     if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1261       CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1262       if (--NumModifiedRegs)
1263         break;
1264     } else if (CS.getReg() == BasePtrReg &&
1265                FuncInfo->SGPRForBPSaveRestoreCopy) {
1266       CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1267       if (--NumModifiedRegs)
1268         break;
1269     }
1270   }
1271 
1272   return false;
1273 }
1274 
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const1275 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1276   MachineFunction &MF,
1277   MachineBasicBlock &MBB,
1278   MachineBasicBlock::iterator I) const {
1279   int64_t Amount = I->getOperand(0).getImm();
1280   if (Amount == 0)
1281     return MBB.erase(I);
1282 
1283   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1284   const SIInstrInfo *TII = ST.getInstrInfo();
1285   const DebugLoc &DL = I->getDebugLoc();
1286   unsigned Opc = I->getOpcode();
1287   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1288   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1289 
1290   if (!hasReservedCallFrame(MF)) {
1291     Amount = alignTo(Amount, getStackAlign());
1292     assert(isUInt<32>(Amount) && "exceeded stack address space size");
1293     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1294     Register SPReg = MFI->getStackPtrOffsetReg();
1295 
1296     unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1297     BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1298       .addReg(SPReg)
1299       .addImm(Amount * getScratchScaleFactor(ST));
1300   } else if (CalleePopAmount != 0) {
1301     llvm_unreachable("is this used?");
1302   }
1303 
1304   return MBB.erase(I);
1305 }
1306 
1307 /// Returns true if the frame will require a reference to the stack pointer.
1308 ///
1309 /// This is the set of conditions common to setting up the stack pointer in a
1310 /// kernel, and for using a frame pointer in a callable function.
1311 ///
1312 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1313 /// references SP.
frameTriviallyRequiresSP(const MachineFrameInfo & MFI)1314 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1315   return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1316 }
1317 
1318 // The FP for kernels is always known 0, so we never really need to setup an
1319 // explicit register for it. However, DisableFramePointerElim will force us to
1320 // use a register for it.
hasFP(const MachineFunction & MF) const1321 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1322   const MachineFrameInfo &MFI = MF.getFrameInfo();
1323 
1324   // For entry functions we can use an immediate offset in most cases, so the
1325   // presence of calls doesn't imply we need a distinct frame pointer.
1326   if (MFI.hasCalls() &&
1327       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1328     // All offsets are unsigned, so need to be addressed in the same direction
1329     // as stack growth.
1330 
1331     // FIXME: This function is pretty broken, since it can be called before the
1332     // frame layout is determined or CSR spills are inserted.
1333     return MFI.getStackSize() != 0;
1334   }
1335 
1336   return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1337          MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1338              MF) ||
1339          MF.getTarget().Options.DisableFramePointerElim(MF);
1340 }
1341 
1342 // This is essentially a reduced version of hasFP for entry functions. Since the
1343 // stack pointer is known 0 on entry to kernels, we never really need an FP
1344 // register. We may need to initialize the stack pointer depending on the frame
1345 // properties, which logically overlaps many of the cases where an ordinary
1346 // function would require an FP.
requiresStackPointerReference(const MachineFunction & MF) const1347 bool SIFrameLowering::requiresStackPointerReference(
1348     const MachineFunction &MF) const {
1349   // Callable functions always require a stack pointer reference.
1350   assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
1351          "only expected to call this for entry points");
1352 
1353   const MachineFrameInfo &MFI = MF.getFrameInfo();
1354 
1355   // Entry points ordinarily don't need to initialize SP. We have to set it up
1356   // for callees if there are any. Also note tail calls are impossible/don't
1357   // make any sense for kernels.
1358   if (MFI.hasCalls())
1359     return true;
1360 
1361   // We still need to initialize the SP if we're doing anything weird that
1362   // references the SP, like variable sized stack objects.
1363   return frameTriviallyRequiresSP(MFI);
1364 }
1365