1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "AMDGPU.h"
19 #include "GCNSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/InitializePasses.h"
25
26 using namespace llvm;
27
28 #define DEBUG_TYPE "si-lower-sgpr-spills"
29
30 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
31
32 namespace {
33
34 static cl::opt<bool> EnableSpillVGPRToAGPR(
35 "amdgpu-spill-vgpr-to-agpr",
36 cl::desc("Enable spilling VGPRs to AGPRs"),
37 cl::ReallyHidden,
38 cl::init(true));
39
40 class SILowerSGPRSpills : public MachineFunctionPass {
41 private:
42 const SIRegisterInfo *TRI = nullptr;
43 const SIInstrInfo *TII = nullptr;
44 VirtRegMap *VRM = nullptr;
45 LiveIntervals *LIS = nullptr;
46
47 // Save and Restore blocks of the current function. Typically there is a
48 // single save block, unless Windows EH funclets are involved.
49 MBBVector SaveBlocks;
50 MBBVector RestoreBlocks;
51
52 public:
53 static char ID;
54
SILowerSGPRSpills()55 SILowerSGPRSpills() : MachineFunctionPass(ID) {}
56
57 void calculateSaveRestoreBlocks(MachineFunction &MF);
58 bool spillCalleeSavedRegs(MachineFunction &MF);
59
60 bool runOnMachineFunction(MachineFunction &MF) override;
61
getAnalysisUsage(AnalysisUsage & AU) const62 void getAnalysisUsage(AnalysisUsage &AU) const override {
63 AU.setPreservesAll();
64 MachineFunctionPass::getAnalysisUsage(AU);
65 }
66 };
67
68 } // end anonymous namespace
69
70 char SILowerSGPRSpills::ID = 0;
71
72 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
73 "SI lower SGPR spill instructions", false, false)
74 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
75 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
76 "SI lower SGPR spill instructions", false, false)
77
78 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
79
80 /// Insert restore code for the callee-saved registers used in the function.
insertCSRSaves(MachineBasicBlock & SaveBlock,ArrayRef<CalleeSavedInfo> CSI,LiveIntervals * LIS)81 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
82 ArrayRef<CalleeSavedInfo> CSI,
83 LiveIntervals *LIS) {
84 MachineFunction &MF = *SaveBlock.getParent();
85 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
86 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
87 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
88
89 MachineBasicBlock::iterator I = SaveBlock.begin();
90 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
91 const MachineRegisterInfo &MRI = MF.getRegInfo();
92
93 for (const CalleeSavedInfo &CS : CSI) {
94 // Insert the spill to the stack frame.
95 MCRegister Reg = CS.getReg();
96
97 MachineInstrSpan MIS(I, &SaveBlock);
98 const TargetRegisterClass *RC =
99 TRI->getMinimalPhysRegClass(Reg, MVT::i32);
100
101 // If this value was already livein, we probably have a direct use of the
102 // incoming register value, so don't kill at the spill point. This happens
103 // since we pass some special inputs (workgroup IDs) in the callee saved
104 // range.
105 const bool IsLiveIn = MRI.isLiveIn(Reg);
106 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
107 RC, TRI);
108
109 if (LIS) {
110 assert(std::distance(MIS.begin(), I) == 1);
111 MachineInstr &Inst = *std::prev(I);
112
113 LIS->InsertMachineInstrInMaps(Inst);
114 LIS->removeAllRegUnitsForPhysReg(Reg);
115 }
116 }
117 }
118 }
119
120 /// Insert restore code for the callee-saved registers used in the function.
insertCSRRestores(MachineBasicBlock & RestoreBlock,MutableArrayRef<CalleeSavedInfo> CSI,LiveIntervals * LIS)121 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
122 MutableArrayRef<CalleeSavedInfo> CSI,
123 LiveIntervals *LIS) {
124 MachineFunction &MF = *RestoreBlock.getParent();
125 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
126 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
127 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
128
129 // Restore all registers immediately before the return and any
130 // terminators that precede it.
131 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
132
133 // FIXME: Just emit the readlane/writelane directly
134 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
135 for (const CalleeSavedInfo &CI : reverse(CSI)) {
136 unsigned Reg = CI.getReg();
137 const TargetRegisterClass *RC =
138 TRI->getMinimalPhysRegClass(Reg, MVT::i32);
139
140 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
141 assert(I != RestoreBlock.begin() &&
142 "loadRegFromStackSlot didn't insert any code!");
143 // Insert in reverse order. loadRegFromStackSlot can insert
144 // multiple instructions.
145
146 if (LIS) {
147 MachineInstr &Inst = *std::prev(I);
148 LIS->InsertMachineInstrInMaps(Inst);
149 LIS->removeAllRegUnitsForPhysReg(Reg);
150 }
151 }
152 }
153 }
154
155 /// Compute the sets of entry and return blocks for saving and restoring
156 /// callee-saved registers, and placing prolog and epilog code.
calculateSaveRestoreBlocks(MachineFunction & MF)157 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
158 const MachineFrameInfo &MFI = MF.getFrameInfo();
159
160 // Even when we do not change any CSR, we still want to insert the
161 // prologue and epilogue of the function.
162 // So set the save points for those.
163
164 // Use the points found by shrink-wrapping, if any.
165 if (MFI.getSavePoint()) {
166 SaveBlocks.push_back(MFI.getSavePoint());
167 assert(MFI.getRestorePoint() && "Both restore and save must be set");
168 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
169 // If RestoreBlock does not have any successor and is not a return block
170 // then the end point is unreachable and we do not need to insert any
171 // epilogue.
172 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
173 RestoreBlocks.push_back(RestoreBlock);
174 return;
175 }
176
177 // Save refs to entry and return blocks.
178 SaveBlocks.push_back(&MF.front());
179 for (MachineBasicBlock &MBB : MF) {
180 if (MBB.isEHFuncletEntry())
181 SaveBlocks.push_back(&MBB);
182 if (MBB.isReturnBlock())
183 RestoreBlocks.push_back(&MBB);
184 }
185 }
186
187 // TODO: To support shrink wrapping, this would need to copy
188 // PrologEpilogInserter's updateLiveness.
updateLiveness(MachineFunction & MF,ArrayRef<CalleeSavedInfo> CSI)189 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
190 MachineBasicBlock &EntryBB = MF.front();
191
192 for (const CalleeSavedInfo &CSIReg : CSI)
193 EntryBB.addLiveIn(CSIReg.getReg());
194 EntryBB.sortUniqueLiveIns();
195 }
196
spillCalleeSavedRegs(MachineFunction & MF)197 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
198 MachineRegisterInfo &MRI = MF.getRegInfo();
199 const Function &F = MF.getFunction();
200 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
201 const SIFrameLowering *TFI = ST.getFrameLowering();
202 MachineFrameInfo &MFI = MF.getFrameInfo();
203 RegScavenger *RS = nullptr;
204
205 // Determine which of the registers in the callee save list should be saved.
206 BitVector SavedRegs;
207 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
208
209 // Add the code to save and restore the callee saved registers.
210 if (!F.hasFnAttribute(Attribute::Naked)) {
211 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
212 // necessary for verifier liveness checks.
213 MFI.setCalleeSavedInfoValid(true);
214
215 std::vector<CalleeSavedInfo> CSI;
216 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
217
218 for (unsigned I = 0; CSRegs[I]; ++I) {
219 MCRegister Reg = CSRegs[I];
220
221 if (SavedRegs.test(Reg)) {
222 const TargetRegisterClass *RC =
223 TRI->getMinimalPhysRegClass(Reg, MVT::i32);
224 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
225 TRI->getSpillAlign(*RC), true);
226
227 CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
228 }
229 }
230
231 if (!CSI.empty()) {
232 for (MachineBasicBlock *SaveBlock : SaveBlocks)
233 insertCSRSaves(*SaveBlock, CSI, LIS);
234
235 // Add live ins to save blocks.
236 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
237 updateLiveness(MF, CSI);
238
239 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
240 insertCSRRestores(*RestoreBlock, CSI, LIS);
241 return true;
242 }
243 }
244
245 return false;
246 }
247
248 // Find lowest available VGPR and use it as VGPR reserved for SGPR spills.
lowerShiftReservedVGPR(MachineFunction & MF,const GCNSubtarget & ST)249 static bool lowerShiftReservedVGPR(MachineFunction &MF,
250 const GCNSubtarget &ST) {
251 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
252 const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
253 // Early out if pre-reservation of a VGPR for SGPR spilling is disabled.
254 if (!PreReservedVGPR)
255 return false;
256
257 // If there are no free lower VGPRs available, default to using the
258 // pre-reserved register instead.
259 const SIRegisterInfo *TRI = ST.getRegisterInfo();
260 Register LowestAvailableVGPR =
261 TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF);
262 if (!LowestAvailableVGPR)
263 LowestAvailableVGPR = PreReservedVGPR;
264
265 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
266 // Create a stack object for a possible spill in the function prologue.
267 // Note Non-CSR VGPR also need this as we may overwrite inactive lanes.
268 Optional<int> FI = FrameInfo.CreateSpillStackObject(4, Align(4));
269
270 // Find saved info about the pre-reserved register.
271 const auto *ReservedVGPRInfoItr =
272 llvm::find_if(FuncInfo->getSGPRSpillVGPRs(),
273 [PreReservedVGPR](const auto &SpillRegInfo) {
274 return SpillRegInfo.VGPR == PreReservedVGPR;
275 });
276
277 assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end());
278 auto Index =
279 std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr);
280
281 FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index);
282
283 for (MachineBasicBlock &MBB : MF) {
284 assert(LowestAvailableVGPR.isValid() && "Did not find an available VGPR");
285 MBB.addLiveIn(LowestAvailableVGPR);
286 MBB.sortUniqueLiveIns();
287 }
288
289 return true;
290 }
291
runOnMachineFunction(MachineFunction & MF)292 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
293 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
294 TII = ST.getInstrInfo();
295 TRI = &TII->getRegisterInfo();
296
297 VRM = getAnalysisIfAvailable<VirtRegMap>();
298
299 assert(SaveBlocks.empty() && RestoreBlocks.empty());
300
301 // First, expose any CSR SGPR spills. This is mostly the same as what PEI
302 // does, but somewhat simpler.
303 calculateSaveRestoreBlocks(MF);
304 bool HasCSRs = spillCalleeSavedRegs(MF);
305
306 MachineFrameInfo &MFI = MF.getFrameInfo();
307 MachineRegisterInfo &MRI = MF.getRegInfo();
308 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
309
310 if (!MFI.hasStackObjects() && !HasCSRs) {
311 SaveBlocks.clear();
312 RestoreBlocks.clear();
313 if (FuncInfo->VGPRReservedForSGPRSpill) {
314 // Free the reserved VGPR for later possible use by frame lowering.
315 FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
316 MRI.freezeReservedRegs(MF);
317 }
318 return false;
319 }
320
321 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
322 && EnableSpillVGPRToAGPR;
323
324 bool MadeChange = false;
325
326 const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
327 std::unique_ptr<RegScavenger> RS;
328
329 bool NewReservedRegs = false;
330
331 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
332 // handled as SpilledToReg in regular PrologEpilogInserter.
333 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
334 (HasCSRs || FuncInfo->hasSpilledSGPRs());
335 if (HasSGPRSpillToVGPR || SpillVGPRToAGPR) {
336 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
337 // are spilled to VGPRs, in which case we can eliminate the stack usage.
338 //
339 // This operates under the assumption that only other SGPR spills are users
340 // of the frame index.
341
342 lowerShiftReservedVGPR(MF, ST);
343
344 // To track the spill frame indices handled in this pass.
345 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
346
347 for (MachineBasicBlock &MBB : MF) {
348 MachineBasicBlock::iterator Next;
349 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
350 MachineInstr &MI = *I;
351 Next = std::next(I);
352
353 if (SpillToAGPR && TII->isVGPRSpill(MI)) {
354 // Try to eliminate stack used by VGPR spills before frame
355 // finalization.
356 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
357 AMDGPU::OpName::vaddr);
358 int FI = MI.getOperand(FIOp).getIndex();
359 Register VReg =
360 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
361 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
362 TRI->isAGPR(MRI, VReg))) {
363 NewReservedRegs = true;
364 if (!RS)
365 RS.reset(new RegScavenger());
366
367 // FIXME: change to enterBasicBlockEnd()
368 RS->enterBasicBlock(MBB);
369 TRI->eliminateFrameIndex(MI, 0, FIOp, RS.get());
370 SpillFIs.set(FI);
371 continue;
372 }
373 }
374
375 if (!TII->isSGPRSpill(MI) || !TRI->spillSGPRToVGPR())
376 continue;
377
378 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
379 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
380 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
381 NewReservedRegs = true;
382 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
383 (void)Spilled;
384 assert(Spilled && "failed to spill SGPR to VGPR when allocated");
385 SpillFIs.set(FI);
386 }
387 }
388 }
389
390 for (MachineBasicBlock &MBB : MF) {
391 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
392 MBB.addLiveIn(SSpill.VGPR);
393
394 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
395 MBB.addLiveIn(Reg);
396
397 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
398 MBB.addLiveIn(Reg);
399
400 MBB.sortUniqueLiveIns();
401
402 // FIXME: The dead frame indices are replaced with a null register from
403 // the debug value instructions. We should instead, update it with the
404 // correct register value. But not sure the register value alone is
405 // adequate to lower the DIExpression. It should be worked out later.
406 for (MachineInstr &MI : MBB) {
407 if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
408 SpillFIs[MI.getOperand(0).getIndex()]) {
409 MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
410 MI.getOperand(0).setIsDebug();
411 }
412 }
413 }
414
415 MadeChange = true;
416 } else if (FuncInfo->VGPRReservedForSGPRSpill) {
417 FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
418 }
419
420 SaveBlocks.clear();
421 RestoreBlocks.clear();
422
423 // Updated the reserved registers with any VGPRs added for SGPR spills.
424 if (NewReservedRegs)
425 MRI.freezeReservedRegs(MF);
426
427 return MadeChange;
428 }
429