1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "AMDGPU.h"
19 #include "GCNSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/RegisterScavenging.h"
25 #include "llvm/InitializePasses.h"
26
27 using namespace llvm;
28
29 #define DEBUG_TYPE "si-lower-sgpr-spills"
30
31 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
32
33 namespace {
34
35 class SILowerSGPRSpills : public MachineFunctionPass {
36 private:
37 const SIRegisterInfo *TRI = nullptr;
38 const SIInstrInfo *TII = nullptr;
39 LiveIntervals *LIS = nullptr;
40 SlotIndexes *Indexes = nullptr;
41
42 // Save and Restore blocks of the current function. Typically there is a
43 // single save block, unless Windows EH funclets are involved.
44 MBBVector SaveBlocks;
45 MBBVector RestoreBlocks;
46
47 public:
48 static char ID;
49
SILowerSGPRSpills()50 SILowerSGPRSpills() : MachineFunctionPass(ID) {}
51
52 void calculateSaveRestoreBlocks(MachineFunction &MF);
53 bool spillCalleeSavedRegs(MachineFunction &MF);
54
55 bool runOnMachineFunction(MachineFunction &MF) override;
56
getAnalysisUsage(AnalysisUsage & AU) const57 void getAnalysisUsage(AnalysisUsage &AU) const override {
58 AU.setPreservesAll();
59 MachineFunctionPass::getAnalysisUsage(AU);
60 }
61 };
62
63 } // end anonymous namespace
64
65 char SILowerSGPRSpills::ID = 0;
66
67 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
68 "SI lower SGPR spill instructions", false, false)
69 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
70 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
71 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
72 "SI lower SGPR spill instructions", false, false)
73
74 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
75
76 /// Insert spill code for the callee-saved registers used in the function.
insertCSRSaves(MachineBasicBlock & SaveBlock,ArrayRef<CalleeSavedInfo> CSI,SlotIndexes * Indexes,LiveIntervals * LIS)77 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
78 ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
79 LiveIntervals *LIS) {
80 MachineFunction &MF = *SaveBlock.getParent();
81 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
82 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
83 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
84 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
85 const SIRegisterInfo *RI = ST.getRegisterInfo();
86
87 MachineBasicBlock::iterator I = SaveBlock.begin();
88 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
89 const MachineRegisterInfo &MRI = MF.getRegInfo();
90
91 for (const CalleeSavedInfo &CS : CSI) {
92 // Insert the spill to the stack frame.
93 MCRegister Reg = CS.getReg();
94
95 MachineInstrSpan MIS(I, &SaveBlock);
96 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
97 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
98
99 // If this value was already livein, we probably have a direct use of the
100 // incoming register value, so don't kill at the spill point. This happens
101 // since we pass some special inputs (workgroup IDs) in the callee saved
102 // range.
103 const bool IsLiveIn = MRI.isLiveIn(Reg);
104 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
105 RC, TRI, Register());
106
107 if (Indexes) {
108 assert(std::distance(MIS.begin(), I) == 1);
109 MachineInstr &Inst = *std::prev(I);
110 Indexes->insertMachineInstrInMaps(Inst);
111 }
112
113 if (LIS)
114 LIS->removeAllRegUnitsForPhysReg(Reg);
115 }
116 }
117 }
118
119 /// Insert restore code for the callee-saved registers used in the function.
insertCSRRestores(MachineBasicBlock & RestoreBlock,MutableArrayRef<CalleeSavedInfo> CSI,SlotIndexes * Indexes,LiveIntervals * LIS)120 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
121 MutableArrayRef<CalleeSavedInfo> CSI,
122 SlotIndexes *Indexes, LiveIntervals *LIS) {
123 MachineFunction &MF = *RestoreBlock.getParent();
124 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
125 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
126 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
127 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
128 const SIRegisterInfo *RI = ST.getRegisterInfo();
129 // Restore all registers immediately before the return and any
130 // terminators that precede it.
131 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
132
133 // FIXME: Just emit the readlane/writelane directly
134 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
135 for (const CalleeSavedInfo &CI : reverse(CSI)) {
136 Register Reg = CI.getReg();
137 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
138 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
139
140 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI,
141 Register());
142 assert(I != RestoreBlock.begin() &&
143 "loadRegFromStackSlot didn't insert any code!");
144 // Insert in reverse order. loadRegFromStackSlot can insert
145 // multiple instructions.
146
147 if (Indexes) {
148 MachineInstr &Inst = *std::prev(I);
149 Indexes->insertMachineInstrInMaps(Inst);
150 }
151
152 if (LIS)
153 LIS->removeAllRegUnitsForPhysReg(Reg);
154 }
155 }
156 }
157
158 /// Compute the sets of entry and return blocks for saving and restoring
159 /// callee-saved registers, and placing prolog and epilog code.
calculateSaveRestoreBlocks(MachineFunction & MF)160 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
161 const MachineFrameInfo &MFI = MF.getFrameInfo();
162
163 // Even when we do not change any CSR, we still want to insert the
164 // prologue and epilogue of the function.
165 // So set the save points for those.
166
167 // Use the points found by shrink-wrapping, if any.
168 if (MFI.getSavePoint()) {
169 SaveBlocks.push_back(MFI.getSavePoint());
170 assert(MFI.getRestorePoint() && "Both restore and save must be set");
171 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
172 // If RestoreBlock does not have any successor and is not a return block
173 // then the end point is unreachable and we do not need to insert any
174 // epilogue.
175 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
176 RestoreBlocks.push_back(RestoreBlock);
177 return;
178 }
179
180 // Save refs to entry and return blocks.
181 SaveBlocks.push_back(&MF.front());
182 for (MachineBasicBlock &MBB : MF) {
183 if (MBB.isEHFuncletEntry())
184 SaveBlocks.push_back(&MBB);
185 if (MBB.isReturnBlock())
186 RestoreBlocks.push_back(&MBB);
187 }
188 }
189
190 // TODO: To support shrink wrapping, this would need to copy
191 // PrologEpilogInserter's updateLiveness.
updateLiveness(MachineFunction & MF,ArrayRef<CalleeSavedInfo> CSI)192 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
193 MachineBasicBlock &EntryBB = MF.front();
194
195 for (const CalleeSavedInfo &CSIReg : CSI)
196 EntryBB.addLiveIn(CSIReg.getReg());
197 EntryBB.sortUniqueLiveIns();
198 }
199
spillCalleeSavedRegs(MachineFunction & MF)200 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
201 MachineRegisterInfo &MRI = MF.getRegInfo();
202 const Function &F = MF.getFunction();
203 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
204 const SIFrameLowering *TFI = ST.getFrameLowering();
205 MachineFrameInfo &MFI = MF.getFrameInfo();
206 RegScavenger *RS = nullptr;
207
208 // Determine which of the registers in the callee save list should be saved.
209 BitVector SavedRegs;
210 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
211
212 // Add the code to save and restore the callee saved registers.
213 if (!F.hasFnAttribute(Attribute::Naked)) {
214 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
215 // necessary for verifier liveness checks.
216 MFI.setCalleeSavedInfoValid(true);
217
218 std::vector<CalleeSavedInfo> CSI;
219 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
220
221 for (unsigned I = 0; CSRegs[I]; ++I) {
222 MCRegister Reg = CSRegs[I];
223
224 if (SavedRegs.test(Reg)) {
225 const TargetRegisterClass *RC =
226 TRI->getMinimalPhysRegClass(Reg, MVT::i32);
227 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
228 TRI->getSpillAlign(*RC), true);
229
230 CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
231 }
232 }
233
234 if (!CSI.empty()) {
235 for (MachineBasicBlock *SaveBlock : SaveBlocks)
236 insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
237
238 // Add live ins to save blocks.
239 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
240 updateLiveness(MF, CSI);
241
242 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
243 insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
244 return true;
245 }
246 }
247
248 return false;
249 }
250
runOnMachineFunction(MachineFunction & MF)251 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
252 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
253 TII = ST.getInstrInfo();
254 TRI = &TII->getRegisterInfo();
255
256 LIS = getAnalysisIfAvailable<LiveIntervals>();
257 Indexes = getAnalysisIfAvailable<SlotIndexes>();
258
259 assert(SaveBlocks.empty() && RestoreBlocks.empty());
260
261 // First, expose any CSR SGPR spills. This is mostly the same as what PEI
262 // does, but somewhat simpler.
263 calculateSaveRestoreBlocks(MF);
264 bool HasCSRs = spillCalleeSavedRegs(MF);
265
266 MachineFrameInfo &MFI = MF.getFrameInfo();
267 MachineRegisterInfo &MRI = MF.getRegInfo();
268 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
269
270 if (!MFI.hasStackObjects() && !HasCSRs) {
271 SaveBlocks.clear();
272 RestoreBlocks.clear();
273 return false;
274 }
275
276 bool MadeChange = false;
277 bool NewReservedRegs = false;
278
279 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
280 // handled as SpilledToReg in regular PrologEpilogInserter.
281 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
282 (HasCSRs || FuncInfo->hasSpilledSGPRs());
283 if (HasSGPRSpillToVGPR) {
284 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
285 // are spilled to VGPRs, in which case we can eliminate the stack usage.
286 //
287 // This operates under the assumption that only other SGPR spills are users
288 // of the frame index.
289
290 // To track the spill frame indices handled in this pass.
291 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
292
293 for (MachineBasicBlock &MBB : MF) {
294 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
295 if (!TII->isSGPRSpill(MI))
296 continue;
297
298 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
299 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
300 if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
301 NewReservedRegs = true;
302 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
303 MI, FI, nullptr, Indexes, LIS);
304 (void)Spilled;
305 assert(Spilled && "failed to spill SGPR to VGPR when allocated");
306 SpillFIs.set(FI);
307 }
308 }
309 }
310
311 // FIXME: Adding to live-ins redundant with reserving registers.
312 for (MachineBasicBlock &MBB : MF) {
313 for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
314 MBB.addLiveIn(Reg);
315 MBB.sortUniqueLiveIns();
316
317 // FIXME: The dead frame indices are replaced with a null register from
318 // the debug value instructions. We should instead, update it with the
319 // correct register value. But not sure the register value alone is
320 // adequate to lower the DIExpression. It should be worked out later.
321 for (MachineInstr &MI : MBB) {
322 if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
323 !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
324 SpillFIs[MI.getOperand(0).getIndex()]) {
325 MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
326 }
327 }
328 }
329
330 // All those frame indices which are dead by now should be removed from the
331 // function frame. Otherwise, there is a side effect such as re-mapping of
332 // free frame index ids by the later pass(es) like "stack slot coloring"
333 // which in turn could mess-up with the book keeping of "frame index to VGPR
334 // lane".
335 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
336
337 MadeChange = true;
338 }
339
340 SaveBlocks.clear();
341 RestoreBlocks.clear();
342
343 // Updated the reserved registers with any VGPRs added for SGPR spills.
344 if (NewReservedRegs)
345 MRI.freezeReservedRegs(MF);
346
347 return MadeChange;
348 }
349