1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "SIMachineFunctionInfo.h"
14 #include "llvm/CodeGen/LivePhysRegs.h"
15 #include "llvm/CodeGen/MachineFrameInfo.h"
16 #include "llvm/CodeGen/RegisterScavenging.h"
17 #include "llvm/Target/TargetMachine.h"
18
19 using namespace llvm;
20
21 #define DEBUG_TYPE "frame-info"
22
23 // Find a scratch register that we can use in the prologue. We avoid using
24 // callee-save registers since they may appear to be free when this is called
25 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
26 // when this is called from emitPrologue.
findScratchNonCalleeSaveRegister(MachineRegisterInfo & MRI,LivePhysRegs & LiveRegs,const TargetRegisterClass & RC,bool Unused=false)27 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
28 LivePhysRegs &LiveRegs,
29 const TargetRegisterClass &RC,
30 bool Unused = false) {
31 // Mark callee saved registers as used so we will not choose them.
32 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
33 for (unsigned i = 0; CSRegs[i]; ++i)
34 LiveRegs.addReg(CSRegs[i]);
35
36 if (Unused) {
37 // We are looking for a register that can be used throughout the entire
38 // function, so any use is unacceptable.
39 for (MCRegister Reg : RC) {
40 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
41 return Reg;
42 }
43 } else {
44 for (MCRegister Reg : RC) {
45 if (LiveRegs.available(MRI, Reg))
46 return Reg;
47 }
48 }
49
50 return MCRegister();
51 }
52
getVGPRSpillLaneOrTempRegister(MachineFunction & MF,LivePhysRegs & LiveRegs,Register & TempSGPR,Optional<int> & FrameIndex,bool IsFP)53 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
54 LivePhysRegs &LiveRegs,
55 Register &TempSGPR,
56 Optional<int> &FrameIndex,
57 bool IsFP) {
58 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
59 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
60
61 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
62 const SIRegisterInfo *TRI = ST.getRegisterInfo();
63
64 // We need to save and restore the current FP/BP.
65
66 // 1: If there is already a VGPR with free lanes, use it. We
67 // may already have to pay the penalty for spilling a CSR VGPR.
68 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
69 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
70 TargetStackID::SGPRSpill);
71
72 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
73 llvm_unreachable("allocate SGPR spill should have worked");
74
75 FrameIndex = NewFI;
76
77 LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
78 dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
79 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
80 << '\n');
81 return;
82 }
83
84 // 2: Next, try to save the FP/BP in an unused SGPR.
85 TempSGPR = findScratchNonCalleeSaveRegister(
86 MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
87
88 if (!TempSGPR) {
89 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
90 TargetStackID::SGPRSpill);
91
92 if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
93 // 3: There's no free lane to spill, and no free register to save FP/BP,
94 // so we're forced to spill another VGPR to use for the spill.
95 FrameIndex = NewFI;
96
97 LLVM_DEBUG(
98 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
99 dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
100 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
101 } else {
102 // Remove dead <NewFI> index
103 MF.getFrameInfo().RemoveStackObject(NewFI);
104 // 4: If all else fails, spill the FP/BP to memory.
105 FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
106 LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
107 << (IsFP ? "FP" : "BP") << '\n');
108 }
109 } else {
110 LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
111 << printReg(TempSGPR, TRI) << '\n');
112 }
113 }
114
115 // We need to specially emit stack operations here because a different frame
116 // register is used than in the rest of the function, as getFrameRegister would
117 // use.
buildPrologSpill(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LivePhysRegs & LiveRegs,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register SpillReg,int FI)118 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
119 const SIMachineFunctionInfo &FuncInfo,
120 LivePhysRegs &LiveRegs, MachineFunction &MF,
121 MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator I, Register SpillReg,
123 int FI) {
124 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
125 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
126
127 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
128 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
129 MachineMemOperand *MMO = MF.getMachineMemOperand(
130 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
131 FrameInfo.getObjectAlign(FI));
132 LiveRegs.addReg(SpillReg);
133 TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
134 FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
135 &LiveRegs);
136 LiveRegs.removeReg(SpillReg);
137 }
138
buildEpilogRestore(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LivePhysRegs & LiveRegs,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register SpillReg,int FI)139 static void buildEpilogRestore(const GCNSubtarget &ST,
140 const SIRegisterInfo &TRI,
141 const SIMachineFunctionInfo &FuncInfo,
142 LivePhysRegs &LiveRegs, MachineFunction &MF,
143 MachineBasicBlock &MBB,
144 MachineBasicBlock::iterator I, Register SpillReg,
145 int FI) {
146 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
147 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
148
149 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
150 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
151 MachineMemOperand *MMO = MF.getMachineMemOperand(
152 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
153 FrameInfo.getObjectAlign(FI));
154 TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
155 FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
156 &LiveRegs);
157 }
158
buildGitPtr(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,const SIInstrInfo * TII,Register TargetReg)159 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
160 const DebugLoc &DL, const SIInstrInfo *TII,
161 Register TargetReg) {
162 MachineFunction *MF = MBB.getParent();
163 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
164 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
165 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
166 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
167 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
168
169 if (MFI->getGITPtrHigh() != 0xffffffff) {
170 BuildMI(MBB, I, DL, SMovB32, TargetHi)
171 .addImm(MFI->getGITPtrHigh())
172 .addReg(TargetReg, RegState::ImplicitDefine);
173 } else {
174 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
175 BuildMI(MBB, I, DL, GetPC64, TargetReg);
176 }
177 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
178 MF->getRegInfo().addLiveIn(GitPtrLo);
179 MBB.addLiveIn(GitPtrLo);
180 BuildMI(MBB, I, DL, SMovB32, TargetLo)
181 .addReg(GitPtrLo);
182 }
183
184 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
emitEntryFunctionFlatScratchInit(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register ScratchWaveOffsetReg) const185 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
186 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
187 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
188 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
189 const SIInstrInfo *TII = ST.getInstrInfo();
190 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
191 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
192
193 // We don't need this if we only have spills since there is no user facing
194 // scratch.
195
196 // TODO: If we know we don't have flat instructions earlier, we can omit
197 // this from the input registers.
198 //
199 // TODO: We only need to know if we access scratch space through a flat
200 // pointer. Because we only detect if flat instructions are used at all,
201 // this will be used more often than necessary on VI.
202
203 Register FlatScrInitLo;
204 Register FlatScrInitHi;
205
206 if (ST.isAmdPalOS()) {
207 // Extract the scratch offset from the descriptor in the GIT
208 LivePhysRegs LiveRegs;
209 LiveRegs.init(*TRI);
210 LiveRegs.addLiveIns(MBB);
211
212 // Find unused reg to load flat scratch init into
213 MachineRegisterInfo &MRI = MF.getRegInfo();
214 Register FlatScrInit = AMDGPU::NoRegister;
215 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
216 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
217 AllSGPR64s = AllSGPR64s.slice(
218 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
219 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
220 for (MCPhysReg Reg : AllSGPR64s) {
221 if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
222 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
223 FlatScrInit = Reg;
224 break;
225 }
226 }
227 assert(FlatScrInit && "Failed to find free register for scratch init");
228
229 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
230 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
231
232 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
233
234 // We now have the GIT ptr - now get the scratch descriptor from the entry
235 // at offset 0 (or offset 16 for a compute shader).
236 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
237 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
238 auto *MMO = MF.getMachineMemOperand(
239 PtrInfo,
240 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
241 MachineMemOperand::MODereferenceable,
242 8, Align(4));
243 unsigned Offset =
244 MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
245 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
246 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
247 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
248 .addReg(FlatScrInit)
249 .addImm(EncodedOffset) // offset
250 .addImm(0) // cpol
251 .addMemOperand(MMO);
252
253 // Mask the offset in [47:0] of the descriptor
254 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
255 BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
256 .addReg(FlatScrInitHi)
257 .addImm(0xffff);
258 } else {
259 Register FlatScratchInitReg =
260 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
261 assert(FlatScratchInitReg);
262
263 MachineRegisterInfo &MRI = MF.getRegInfo();
264 MRI.addLiveIn(FlatScratchInitReg);
265 MBB.addLiveIn(FlatScratchInitReg);
266
267 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
268 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
269 }
270
271 // Do a 64-bit pointer add.
272 if (ST.flatScratchIsPointer()) {
273 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
274 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
275 .addReg(FlatScrInitLo)
276 .addReg(ScratchWaveOffsetReg);
277 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
278 .addReg(FlatScrInitHi)
279 .addImm(0);
280 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
281 addReg(FlatScrInitLo).
282 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
283 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
284 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
285 addReg(FlatScrInitHi).
286 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
287 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
288 return;
289 }
290
291 // For GFX9.
292 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
293 .addReg(FlatScrInitLo)
294 .addReg(ScratchWaveOffsetReg);
295 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
296 .addReg(FlatScrInitHi)
297 .addImm(0);
298
299 return;
300 }
301
302 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
303
304 // Copy the size in bytes.
305 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
306 .addReg(FlatScrInitHi, RegState::Kill);
307
308 // Add wave offset in bytes to private base offset.
309 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
310 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
311 .addReg(FlatScrInitLo)
312 .addReg(ScratchWaveOffsetReg);
313
314 // Convert offset to 256-byte units.
315 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
316 .addReg(FlatScrInitLo, RegState::Kill)
317 .addImm(8);
318 }
319
320 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
321 // memory. They should have been removed by now.
allStackObjectsAreDead(const MachineFrameInfo & MFI)322 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
323 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
324 I != E; ++I) {
325 if (!MFI.isDeadObjectIndex(I))
326 return false;
327 }
328
329 return true;
330 }
331
332 // Shift down registers reserved for the scratch RSRC.
getEntryFunctionReservedScratchRsrcReg(MachineFunction & MF) const333 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
334 MachineFunction &MF) const {
335
336 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
337 const SIInstrInfo *TII = ST.getInstrInfo();
338 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
339 MachineRegisterInfo &MRI = MF.getRegInfo();
340 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
341
342 assert(MFI->isEntryFunction());
343
344 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
345
346 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
347 allStackObjectsAreDead(MF.getFrameInfo())))
348 return Register();
349
350 if (ST.hasSGPRInitBug() ||
351 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
352 return ScratchRsrcReg;
353
354 // We reserved the last registers for this. Shift it down to the end of those
355 // which were actually used.
356 //
357 // FIXME: It might be safer to use a pseudoregister before replacement.
358
359 // FIXME: We should be able to eliminate unused input registers. We only
360 // cannot do this for the resources required for scratch access. For now we
361 // skip over user SGPRs and may leave unused holes.
362
363 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
364 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
365 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
366
367 // Skip the last N reserved elements because they should have already been
368 // reserved for VCC etc.
369 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
370 for (MCPhysReg Reg : AllSGPR128s) {
371 // Pick the first unallocated one. Make sure we don't clobber the other
372 // reserved input we needed. Also for PAL, make sure we don't clobber
373 // the GIT pointer passed in SGPR0 or SGPR8.
374 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
375 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
376 MRI.replaceRegWith(ScratchRsrcReg, Reg);
377 MFI->setScratchRSrcReg(Reg);
378 return Reg;
379 }
380 }
381
382 return ScratchRsrcReg;
383 }
384
getScratchScaleFactor(const GCNSubtarget & ST)385 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
386 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
387 }
388
emitEntryFunctionPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const389 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
390 MachineBasicBlock &MBB) const {
391 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
392
393 // FIXME: If we only have SGPR spills, we won't actually be using scratch
394 // memory since these spill to VGPRs. We should be cleaning up these unused
395 // SGPR spill frame indices somewhere.
396
397 // FIXME: We still have implicit uses on SGPR spill instructions in case they
398 // need to spill to vector memory. It's likely that will not happen, but at
399 // this point it appears we need the setup. This part of the prolog should be
400 // emitted after frame indices are eliminated.
401
402 // FIXME: Remove all of the isPhysRegUsed checks
403
404 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
405 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
406 const SIInstrInfo *TII = ST.getInstrInfo();
407 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
408 MachineRegisterInfo &MRI = MF.getRegInfo();
409 const Function &F = MF.getFunction();
410
411 assert(MFI->isEntryFunction());
412
413 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
414 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
415 // FIXME: Hack to not crash in situations which emitted an error.
416 if (!PreloadedScratchWaveOffsetReg)
417 return;
418
419 // We need to do the replacement of the private segment buffer register even
420 // if there are no stack objects. There could be stores to undef or a
421 // constant without an associated object.
422 //
423 // This will return `Register()` in cases where there are no actual
424 // uses of the SRSRC.
425 Register ScratchRsrcReg;
426 if (!ST.enableFlatScratch())
427 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
428
429 // Make the selected register live throughout the function.
430 if (ScratchRsrcReg) {
431 for (MachineBasicBlock &OtherBB : MF) {
432 if (&OtherBB != &MBB) {
433 OtherBB.addLiveIn(ScratchRsrcReg);
434 }
435 }
436 }
437
438 // Now that we have fixed the reserved SRSRC we need to locate the
439 // (potentially) preloaded SRSRC.
440 Register PreloadedScratchRsrcReg;
441 if (ST.isAmdHsaOrMesa(F)) {
442 PreloadedScratchRsrcReg =
443 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
444 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
445 // We added live-ins during argument lowering, but since they were not
446 // used they were deleted. We're adding the uses now, so add them back.
447 MRI.addLiveIn(PreloadedScratchRsrcReg);
448 MBB.addLiveIn(PreloadedScratchRsrcReg);
449 }
450 }
451
452 // Debug location must be unknown since the first debug location is used to
453 // determine the end of the prologue.
454 DebugLoc DL;
455 MachineBasicBlock::iterator I = MBB.begin();
456
457 // We found the SRSRC first because it needs four registers and has an
458 // alignment requirement. If the SRSRC that we found is clobbering with
459 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
460 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
461 // wave offset to a free SGPR.
462 Register ScratchWaveOffsetReg;
463 if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
464 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
465 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
466 AllSGPRs = AllSGPRs.slice(
467 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
468 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
469 for (MCPhysReg Reg : AllSGPRs) {
470 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
471 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
472 ScratchWaveOffsetReg = Reg;
473 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
474 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
475 break;
476 }
477 }
478 } else {
479 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
480 }
481 assert(ScratchWaveOffsetReg);
482
483 if (requiresStackPointerReference(MF)) {
484 Register SPReg = MFI->getStackPtrOffsetReg();
485 assert(SPReg != AMDGPU::SP_REG);
486 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
487 .addImm(MF.getFrameInfo().getStackSize() * getScratchScaleFactor(ST));
488 }
489
490 if (hasFP(MF)) {
491 Register FPReg = MFI->getFrameOffsetReg();
492 assert(FPReg != AMDGPU::FP_REG);
493 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
494 }
495
496 if ((MFI->hasFlatScratchInit() || ScratchRsrcReg) &&
497 !ST.flatScratchIsArchitected()) {
498 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
499 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
500 }
501
502 if (MFI->hasFlatScratchInit()) {
503 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
504 }
505
506 if (ScratchRsrcReg) {
507 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
508 PreloadedScratchRsrcReg,
509 ScratchRsrcReg, ScratchWaveOffsetReg);
510 }
511 }
512
513 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
emitEntryFunctionScratchRsrcRegSetup(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register PreloadedScratchRsrcReg,Register ScratchRsrcReg,Register ScratchWaveOffsetReg) const514 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
515 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
516 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
517 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
518
519 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
520 const SIInstrInfo *TII = ST.getInstrInfo();
521 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
522 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
523 const Function &Fn = MF.getFunction();
524
525 if (ST.isAmdPalOS()) {
526 // The pointer to the GIT is formed from the offset passed in and either
527 // the amdgpu-git-ptr-high function attribute or the top part of the PC
528 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
529 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
530
531 buildGitPtr(MBB, I, DL, TII, Rsrc01);
532
533 // We now have the GIT ptr - now get the scratch descriptor from the entry
534 // at offset 0 (or offset 16 for a compute shader).
535 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
536 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
537 auto MMO = MF.getMachineMemOperand(PtrInfo,
538 MachineMemOperand::MOLoad |
539 MachineMemOperand::MOInvariant |
540 MachineMemOperand::MODereferenceable,
541 16, Align(4));
542 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
543 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
544 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
545 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
546 .addReg(Rsrc01)
547 .addImm(EncodedOffset) // offset
548 .addImm(0) // cpol
549 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
550 .addMemOperand(MMO);
551
552 // The driver will always set the SRD for wave 64 (bits 118:117 of
553 // descriptor / bits 22:21 of third sub-reg will be 0b11)
554 // If the shader is actually wave32 we have to modify the const_index_stride
555 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
556 // reason the driver does this is that there can be cases where it presents
557 // 2 shaders with different wave size (e.g. VsFs).
558 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
559 if (ST.isWave32()) {
560 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
561 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
562 .addImm(21)
563 .addReg(Rsrc03);
564 }
565 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
566 assert(!ST.isAmdHsaOrMesa(Fn));
567 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
568
569 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
570 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
571
572 // Use relocations to get the pointer, and setup the other bits manually.
573 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
574
575 if (MFI->hasImplicitBufferPtr()) {
576 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
577
578 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
579 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
580
581 BuildMI(MBB, I, DL, Mov64, Rsrc01)
582 .addReg(MFI->getImplicitBufferPtrUserSGPR())
583 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
584 } else {
585 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
586
587 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
588 auto MMO = MF.getMachineMemOperand(
589 PtrInfo,
590 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
591 MachineMemOperand::MODereferenceable,
592 8, Align(4));
593 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
594 .addReg(MFI->getImplicitBufferPtrUserSGPR())
595 .addImm(0) // offset
596 .addImm(0) // cpol
597 .addMemOperand(MMO)
598 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
599
600 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
601 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
602 }
603 } else {
604 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
605 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
606
607 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
608 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
609 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
610
611 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
612 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
613 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
614
615 }
616
617 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
618 .addImm(Rsrc23 & 0xffffffff)
619 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
620
621 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
622 .addImm(Rsrc23 >> 32)
623 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
624 } else if (ST.isAmdHsaOrMesa(Fn)) {
625 assert(PreloadedScratchRsrcReg);
626
627 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
628 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
629 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
630 }
631 }
632
633 // Add the scratch wave offset into the scratch RSRC.
634 //
635 // We only want to update the first 48 bits, which is the base address
636 // pointer, without touching the adjacent 16 bits of flags. We know this add
637 // cannot carry-out from bit 47, otherwise the scratch allocation would be
638 // impossible to fit in the 48-bit global address space.
639 //
640 // TODO: Evaluate if it is better to just construct an SRD using the flat
641 // scratch init and some constants rather than update the one we are passed.
642 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
643 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
644
645 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
646 // the kernel body via inreg arguments.
647 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
648 .addReg(ScratchRsrcSub0)
649 .addReg(ScratchWaveOffsetReg)
650 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
651 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
652 .addReg(ScratchRsrcSub1)
653 .addImm(0)
654 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
655 }
656
isSupportedStackID(TargetStackID::Value ID) const657 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
658 switch (ID) {
659 case TargetStackID::Default:
660 case TargetStackID::NoAlloc:
661 case TargetStackID::SGPRSpill:
662 return true;
663 case TargetStackID::ScalableVector:
664 return false;
665 }
666 llvm_unreachable("Invalid TargetStackID::Value");
667 }
668
initLiveRegs(LivePhysRegs & LiveRegs,const SIRegisterInfo & TRI,const SIMachineFunctionInfo * FuncInfo,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool IsProlog)669 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
670 const SIMachineFunctionInfo *FuncInfo,
671 MachineFunction &MF, MachineBasicBlock &MBB,
672 MachineBasicBlock::iterator MBBI, bool IsProlog) {
673 if (LiveRegs.empty()) {
674 LiveRegs.init(TRI);
675 if (IsProlog) {
676 LiveRegs.addLiveIns(MBB);
677 } else {
678 // In epilog.
679 LiveRegs.addLiveOuts(MBB);
680 LiveRegs.stepBackward(*MBBI);
681 }
682 }
683 }
684
685 // Activate all lanes, returns saved exec.
buildScratchExecCopy(LivePhysRegs & LiveRegs,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool IsProlog)686 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
687 MachineFunction &MF,
688 MachineBasicBlock &MBB,
689 MachineBasicBlock::iterator MBBI,
690 bool IsProlog) {
691 Register ScratchExecCopy;
692 MachineRegisterInfo &MRI = MF.getRegInfo();
693 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
694 const SIInstrInfo *TII = ST.getInstrInfo();
695 const SIRegisterInfo &TRI = TII->getRegisterInfo();
696 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
697 DebugLoc DL;
698
699 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
700
701 ScratchExecCopy = findScratchNonCalleeSaveRegister(
702 MRI, LiveRegs, *TRI.getWaveMaskRegClass());
703 if (!ScratchExecCopy)
704 report_fatal_error("failed to find free scratch register");
705
706 LiveRegs.addReg(ScratchExecCopy);
707
708 const unsigned OrSaveExec =
709 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
710 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
711
712 return ScratchExecCopy;
713 }
714
715 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
716 // Otherwise we are spilling to memory.
spilledToMemory(const MachineFunction & MF,int SaveIndex)717 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
718 const MachineFrameInfo &MFI = MF.getFrameInfo();
719 return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
720 }
721
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const722 void SIFrameLowering::emitPrologue(MachineFunction &MF,
723 MachineBasicBlock &MBB) const {
724 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
725 if (FuncInfo->isEntryFunction()) {
726 emitEntryFunctionPrologue(MF, MBB);
727 return;
728 }
729
730 const MachineFrameInfo &MFI = MF.getFrameInfo();
731 MachineRegisterInfo &MRI = MF.getRegInfo();
732 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
733 const SIInstrInfo *TII = ST.getInstrInfo();
734 const SIRegisterInfo &TRI = TII->getRegisterInfo();
735
736 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
737 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
738 Register BasePtrReg =
739 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
740 LivePhysRegs LiveRegs;
741
742 MachineBasicBlock::iterator MBBI = MBB.begin();
743 DebugLoc DL;
744
745 bool HasFP = false;
746 bool HasBP = false;
747 uint32_t NumBytes = MFI.getStackSize();
748 uint32_t RoundedSize = NumBytes;
749 // To avoid clobbering VGPRs in lanes that weren't active on function entry,
750 // turn on all lanes before doing the spill to memory.
751 Register ScratchExecCopy;
752
753 Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
754 Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
755
756 // VGPRs used for SGPR->VGPR spills
757 for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
758 FuncInfo->getSGPRSpillVGPRs()) {
759 if (!Reg.FI)
760 continue;
761
762 if (!ScratchExecCopy)
763 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
764 /*IsProlog*/ true);
765
766 buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
767 *Reg.FI);
768 }
769
770 // VGPRs used for Whole Wave Mode
771 for (const auto &Reg : FuncInfo->WWMReservedRegs) {
772 auto VGPR = Reg.first;
773 auto FI = Reg.second;
774 if (!FI)
775 continue;
776
777 if (!ScratchExecCopy)
778 ScratchExecCopy =
779 buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
780
781 buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
782 }
783
784 if (ScratchExecCopy) {
785 // FIXME: Split block and make terminator.
786 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
787 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
788 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
789 .addReg(ScratchExecCopy, RegState::Kill);
790 LiveRegs.addReg(ScratchExecCopy);
791 }
792
793 if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
794 const int FramePtrFI = *FPSaveIndex;
795 assert(!MFI.isDeadObjectIndex(FramePtrFI));
796
797 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
798
799 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
800 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
801 if (!TmpVGPR)
802 report_fatal_error("failed to find free scratch register");
803
804 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
805 .addReg(FramePtrReg);
806
807 buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
808 FramePtrFI);
809 }
810
811 if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
812 const int BasePtrFI = *BPSaveIndex;
813 assert(!MFI.isDeadObjectIndex(BasePtrFI));
814
815 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
816
817 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
818 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
819 if (!TmpVGPR)
820 report_fatal_error("failed to find free scratch register");
821
822 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
823 .addReg(BasePtrReg);
824
825 buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
826 BasePtrFI);
827 }
828
829 // In this case, spill the FP to a reserved VGPR.
830 if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
831 const int FramePtrFI = *FPSaveIndex;
832 assert(!MFI.isDeadObjectIndex(FramePtrFI));
833
834 assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
835 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
836 FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
837 assert(Spill.size() == 1);
838
839 // Save FP before setting it up.
840 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
841 .addReg(FramePtrReg)
842 .addImm(Spill[0].Lane)
843 .addReg(Spill[0].VGPR, RegState::Undef);
844 }
845
846 // In this case, spill the BP to a reserved VGPR.
847 if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
848 const int BasePtrFI = *BPSaveIndex;
849 assert(!MFI.isDeadObjectIndex(BasePtrFI));
850
851 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
852 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
853 FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
854 assert(Spill.size() == 1);
855
856 // Save BP before setting it up.
857 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
858 .addReg(BasePtrReg)
859 .addImm(Spill[0].Lane)
860 .addReg(Spill[0].VGPR, RegState::Undef);
861 }
862
863 // Emit the copy if we need an FP, and are using a free SGPR to save it.
864 if (FuncInfo->SGPRForFPSaveRestoreCopy) {
865 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
866 FuncInfo->SGPRForFPSaveRestoreCopy)
867 .addReg(FramePtrReg)
868 .setMIFlag(MachineInstr::FrameSetup);
869 }
870
871 // Emit the copy if we need a BP, and are using a free SGPR to save it.
872 if (FuncInfo->SGPRForBPSaveRestoreCopy) {
873 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
874 FuncInfo->SGPRForBPSaveRestoreCopy)
875 .addReg(BasePtrReg)
876 .setMIFlag(MachineInstr::FrameSetup);
877 }
878
879 // If a copy has been emitted for FP and/or BP, Make the SGPRs
880 // used in the copy instructions live throughout the function.
881 SmallVector<MCPhysReg, 2> TempSGPRs;
882 if (FuncInfo->SGPRForFPSaveRestoreCopy)
883 TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
884
885 if (FuncInfo->SGPRForBPSaveRestoreCopy)
886 TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
887
888 if (!TempSGPRs.empty()) {
889 for (MachineBasicBlock &MBB : MF) {
890 for (MCPhysReg Reg : TempSGPRs)
891 MBB.addLiveIn(Reg);
892
893 MBB.sortUniqueLiveIns();
894 }
895 if (!LiveRegs.empty()) {
896 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
897 LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
898 }
899 }
900
901 if (TRI.hasStackRealignment(MF)) {
902 HasFP = true;
903 const unsigned Alignment = MFI.getMaxAlign().value();
904
905 RoundedSize += Alignment;
906 if (LiveRegs.empty()) {
907 LiveRegs.init(TRI);
908 LiveRegs.addLiveIns(MBB);
909 }
910
911 // s_add_u32 s33, s32, NumBytes
912 // s_and_b32 s33, s33, 0b111...0000
913 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg)
914 .addReg(StackPtrReg)
915 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
916 .setMIFlag(MachineInstr::FrameSetup);
917 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
918 .addReg(FramePtrReg, RegState::Kill)
919 .addImm(-Alignment * getScratchScaleFactor(ST))
920 .setMIFlag(MachineInstr::FrameSetup);
921 FuncInfo->setIsStackRealigned(true);
922 } else if ((HasFP = hasFP(MF))) {
923 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
924 .addReg(StackPtrReg)
925 .setMIFlag(MachineInstr::FrameSetup);
926 }
927
928 // If we need a base pointer, set it up here. It's whatever the value of
929 // the stack pointer is at this point. Any variable size objects will be
930 // allocated after this, so we can still use the base pointer to reference
931 // the incoming arguments.
932 if ((HasBP = TRI.hasBasePointer(MF))) {
933 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
934 .addReg(StackPtrReg)
935 .setMIFlag(MachineInstr::FrameSetup);
936 }
937
938 if (HasFP && RoundedSize != 0) {
939 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
940 .addReg(StackPtrReg)
941 .addImm(RoundedSize * getScratchScaleFactor(ST))
942 .setMIFlag(MachineInstr::FrameSetup);
943 }
944
945 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
946 FuncInfo->FramePointerSaveIndex)) &&
947 "Needed to save FP but didn't save it anywhere");
948
949 assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
950 !FuncInfo->FramePointerSaveIndex)) &&
951 "Saved FP but didn't need it");
952
953 assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
954 FuncInfo->BasePointerSaveIndex)) &&
955 "Needed to save BP but didn't save it anywhere");
956
957 assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
958 !FuncInfo->BasePointerSaveIndex)) &&
959 "Saved BP but didn't need it");
960 }
961
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const962 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
963 MachineBasicBlock &MBB) const {
964 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
965 if (FuncInfo->isEntryFunction())
966 return;
967
968 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
969 const SIInstrInfo *TII = ST.getInstrInfo();
970 MachineRegisterInfo &MRI = MF.getRegInfo();
971 const SIRegisterInfo &TRI = TII->getRegisterInfo();
972 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
973 LivePhysRegs LiveRegs;
974 DebugLoc DL;
975
976 const MachineFrameInfo &MFI = MF.getFrameInfo();
977 uint32_t NumBytes = MFI.getStackSize();
978 uint32_t RoundedSize = FuncInfo->isStackRealigned()
979 ? NumBytes + MFI.getMaxAlign().value()
980 : NumBytes;
981 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
982 const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
983 const Register BasePtrReg =
984 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
985
986 Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
987 Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
988
989 if (RoundedSize != 0 && hasFP(MF)) {
990 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
991 .addReg(StackPtrReg)
992 .addImm(RoundedSize * getScratchScaleFactor(ST))
993 .setMIFlag(MachineInstr::FrameDestroy);
994 }
995
996 if (FuncInfo->SGPRForFPSaveRestoreCopy) {
997 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
998 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
999 .setMIFlag(MachineInstr::FrameDestroy);
1000 }
1001
1002 if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1003 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1004 .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1005 .setMIFlag(MachineInstr::FrameDestroy);
1006 }
1007
1008 if (FPSaveIndex) {
1009 const int FramePtrFI = *FPSaveIndex;
1010 assert(!MFI.isDeadObjectIndex(FramePtrFI));
1011 if (spilledToMemory(MF, FramePtrFI)) {
1012 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1013
1014 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1015 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1016 if (!TmpVGPR)
1017 report_fatal_error("failed to find free scratch register");
1018 buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1019 FramePtrFI);
1020 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
1021 .addReg(TmpVGPR, RegState::Kill);
1022 } else {
1023 // Reload from VGPR spill.
1024 assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
1025 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1026 FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
1027 assert(Spill.size() == 1);
1028 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
1029 .addReg(Spill[0].VGPR)
1030 .addImm(Spill[0].Lane);
1031 }
1032 }
1033
1034 if (BPSaveIndex) {
1035 const int BasePtrFI = *BPSaveIndex;
1036 assert(!MFI.isDeadObjectIndex(BasePtrFI));
1037 if (spilledToMemory(MF, BasePtrFI)) {
1038 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1039
1040 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1041 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1042 if (!TmpVGPR)
1043 report_fatal_error("failed to find free scratch register");
1044 buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1045 BasePtrFI);
1046 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1047 .addReg(TmpVGPR, RegState::Kill);
1048 } else {
1049 // Reload from VGPR spill.
1050 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
1051 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1052 FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1053 assert(Spill.size() == 1);
1054 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1055 .addReg(Spill[0].VGPR)
1056 .addImm(Spill[0].Lane);
1057 }
1058 }
1059
1060 Register ScratchExecCopy;
1061 for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
1062 FuncInfo->getSGPRSpillVGPRs()) {
1063 if (!Reg.FI)
1064 continue;
1065
1066 if (!ScratchExecCopy)
1067 ScratchExecCopy =
1068 buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1069
1070 buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
1071 *Reg.FI);
1072 }
1073
1074 for (const auto &Reg : FuncInfo->WWMReservedRegs) {
1075 auto VGPR = Reg.first;
1076 auto FI = Reg.second;
1077 if (!FI)
1078 continue;
1079
1080 if (!ScratchExecCopy)
1081 ScratchExecCopy =
1082 buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1083
1084 buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
1085 }
1086
1087 if (ScratchExecCopy) {
1088 // FIXME: Split block and make terminator.
1089 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1090 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1091 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1092 .addReg(ScratchExecCopy, RegState::Kill);
1093 }
1094 }
1095
1096 #ifndef NDEBUG
allSGPRSpillsAreDead(const MachineFunction & MF)1097 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1098 const MachineFrameInfo &MFI = MF.getFrameInfo();
1099 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1100 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1101 I != E; ++I) {
1102 if (!MFI.isDeadObjectIndex(I) &&
1103 MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1104 (I != FuncInfo->FramePointerSaveIndex &&
1105 I != FuncInfo->BasePointerSaveIndex)) {
1106 return false;
1107 }
1108 }
1109
1110 return true;
1111 }
1112 #endif
1113
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const1114 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1115 int FI,
1116 Register &FrameReg) const {
1117 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1118
1119 FrameReg = RI->getFrameRegister(MF);
1120 return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
1121 }
1122
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const1123 void SIFrameLowering::processFunctionBeforeFrameFinalized(
1124 MachineFunction &MF,
1125 RegScavenger *RS) const {
1126 MachineFrameInfo &MFI = MF.getFrameInfo();
1127
1128 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1129 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1130 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1131
1132 FuncInfo->removeDeadFrameIndices(MFI);
1133 assert(allSGPRSpillsAreDead(MF) &&
1134 "SGPR spill should have been removed in SILowerSGPRSpills");
1135
1136 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1137 // but currently hasNonSpillStackObjects is set only from source
1138 // allocas. Stack temps produced from legalization are not counted currently.
1139 if (!allStackObjectsAreDead(MFI)) {
1140 assert(RS && "RegScavenger required if spilling");
1141
1142 // Add an emergency spill slot
1143 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1144 }
1145 }
1146
1147 // Only report VGPRs to generic code.
determineCalleeSaves(MachineFunction & MF,BitVector & SavedVGPRs,RegScavenger * RS) const1148 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1149 BitVector &SavedVGPRs,
1150 RegScavenger *RS) const {
1151 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1152 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1153 if (MFI->isEntryFunction())
1154 return;
1155
1156 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1157 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1158 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1159
1160 // Ignore the SGPRs the default implementation found.
1161 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1162
1163 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1164 // In gfx908 there was do AGPR loads and stores and thus spilling also
1165 // require a temporary VGPR.
1166 if (!ST.hasGFX90AInsts())
1167 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1168
1169 // hasFP only knows about stack objects that already exist. We're now
1170 // determining the stack slots that will be created, so we have to predict
1171 // them. Stack objects force FP usage with calls.
1172 //
1173 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1174 // don't want to report it here.
1175 //
1176 // FIXME: Is this really hasReservedCallFrame?
1177 const bool WillHaveFP =
1178 FrameInfo.hasCalls() &&
1179 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1180
1181 // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1182 // so don't allow the default insertion to handle them.
1183 for (auto SSpill : MFI->getSGPRSpillVGPRs())
1184 SavedVGPRs.reset(SSpill.VGPR);
1185
1186 LivePhysRegs LiveRegs;
1187 LiveRegs.init(*TRI);
1188
1189 if (WillHaveFP || hasFP(MF)) {
1190 assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&
1191 "Re-reserving spill slot for FP");
1192 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
1193 MFI->FramePointerSaveIndex, true);
1194 }
1195
1196 if (TRI->hasBasePointer(MF)) {
1197 if (MFI->SGPRForFPSaveRestoreCopy)
1198 LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1199
1200 assert(!MFI->SGPRForBPSaveRestoreCopy &&
1201 !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1202 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
1203 MFI->BasePointerSaveIndex, false);
1204 }
1205 }
1206
determineCalleeSavesSGPR(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1207 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1208 BitVector &SavedRegs,
1209 RegScavenger *RS) const {
1210 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1211 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1212 if (MFI->isEntryFunction())
1213 return;
1214
1215 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1216 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1217
1218 // The SP is specifically managed and we don't want extra spills of it.
1219 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1220
1221 const BitVector AllSavedRegs = SavedRegs;
1222 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1223
1224 // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1225 const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1226
1227 // We have to anticipate introducing CSR VGPR spills if we don't have any
1228 // stack objects already, since we require an FP if there is a call and stack.
1229 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1230 const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR;
1231
1232 // FP will be specially managed like SP.
1233 if (WillHaveFP || hasFP(MF))
1234 SavedRegs.reset(MFI->getFrameOffsetReg());
1235 }
1236
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const1237 bool SIFrameLowering::assignCalleeSavedSpillSlots(
1238 MachineFunction &MF, const TargetRegisterInfo *TRI,
1239 std::vector<CalleeSavedInfo> &CSI) const {
1240 if (CSI.empty())
1241 return true; // Early exit if no callee saved registers are modified!
1242
1243 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1244 if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1245 !FuncInfo->SGPRForBPSaveRestoreCopy)
1246 return false;
1247
1248 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1249 const SIRegisterInfo *RI = ST.getRegisterInfo();
1250 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1251 Register BasePtrReg = RI->getBaseRegister();
1252 unsigned NumModifiedRegs = 0;
1253
1254 if (FuncInfo->SGPRForFPSaveRestoreCopy)
1255 NumModifiedRegs++;
1256 if (FuncInfo->SGPRForBPSaveRestoreCopy)
1257 NumModifiedRegs++;
1258
1259 for (auto &CS : CSI) {
1260 if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1261 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1262 if (--NumModifiedRegs)
1263 break;
1264 } else if (CS.getReg() == BasePtrReg &&
1265 FuncInfo->SGPRForBPSaveRestoreCopy) {
1266 CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1267 if (--NumModifiedRegs)
1268 break;
1269 }
1270 }
1271
1272 return false;
1273 }
1274
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const1275 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1276 MachineFunction &MF,
1277 MachineBasicBlock &MBB,
1278 MachineBasicBlock::iterator I) const {
1279 int64_t Amount = I->getOperand(0).getImm();
1280 if (Amount == 0)
1281 return MBB.erase(I);
1282
1283 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1284 const SIInstrInfo *TII = ST.getInstrInfo();
1285 const DebugLoc &DL = I->getDebugLoc();
1286 unsigned Opc = I->getOpcode();
1287 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1288 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1289
1290 if (!hasReservedCallFrame(MF)) {
1291 Amount = alignTo(Amount, getStackAlign());
1292 assert(isUInt<32>(Amount) && "exceeded stack address space size");
1293 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1294 Register SPReg = MFI->getStackPtrOffsetReg();
1295
1296 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1297 BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1298 .addReg(SPReg)
1299 .addImm(Amount * getScratchScaleFactor(ST));
1300 } else if (CalleePopAmount != 0) {
1301 llvm_unreachable("is this used?");
1302 }
1303
1304 return MBB.erase(I);
1305 }
1306
1307 /// Returns true if the frame will require a reference to the stack pointer.
1308 ///
1309 /// This is the set of conditions common to setting up the stack pointer in a
1310 /// kernel, and for using a frame pointer in a callable function.
1311 ///
1312 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1313 /// references SP.
frameTriviallyRequiresSP(const MachineFrameInfo & MFI)1314 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1315 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1316 }
1317
1318 // The FP for kernels is always known 0, so we never really need to setup an
1319 // explicit register for it. However, DisableFramePointerElim will force us to
1320 // use a register for it.
hasFP(const MachineFunction & MF) const1321 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1322 const MachineFrameInfo &MFI = MF.getFrameInfo();
1323
1324 // For entry functions we can use an immediate offset in most cases, so the
1325 // presence of calls doesn't imply we need a distinct frame pointer.
1326 if (MFI.hasCalls() &&
1327 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1328 // All offsets are unsigned, so need to be addressed in the same direction
1329 // as stack growth.
1330
1331 // FIXME: This function is pretty broken, since it can be called before the
1332 // frame layout is determined or CSR spills are inserted.
1333 return MFI.getStackSize() != 0;
1334 }
1335
1336 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1337 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1338 MF) ||
1339 MF.getTarget().Options.DisableFramePointerElim(MF);
1340 }
1341
1342 // This is essentially a reduced version of hasFP for entry functions. Since the
1343 // stack pointer is known 0 on entry to kernels, we never really need an FP
1344 // register. We may need to initialize the stack pointer depending on the frame
1345 // properties, which logically overlaps many of the cases where an ordinary
1346 // function would require an FP.
requiresStackPointerReference(const MachineFunction & MF) const1347 bool SIFrameLowering::requiresStackPointerReference(
1348 const MachineFunction &MF) const {
1349 // Callable functions always require a stack pointer reference.
1350 assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
1351 "only expected to call this for entry points");
1352
1353 const MachineFrameInfo &MFI = MF.getFrameInfo();
1354
1355 // Entry points ordinarily don't need to initialize SP. We have to set it up
1356 // for callees if there are any. Also note tail calls are impossible/don't
1357 // make any sense for kernels.
1358 if (MFI.hasCalls())
1359 return true;
1360
1361 // We still need to initialize the SP if we're doing anything weird that
1362 // references the SP, like variable sized stack objects.
1363 return frameTriviallyRequiresSP(MFI);
1364 }
1365