xref: /llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp (revision 8387cbd0f9056fdf4e3886652e50fe4d94aaad7c)
1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "SILowerSGPRSpills.h"
19 #include "AMDGPU.h"
20 #include "GCNSubtarget.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "SIMachineFunctionInfo.h"
23 #include "llvm/CodeGen/LiveIntervals.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "si-lower-sgpr-spills"
31 
32 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
33 
34 namespace {
35 
36 static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
37     "amdgpu-num-vgprs-for-wwm-alloc",
38     cl::desc("Max num VGPRs for whole-wave register allocation."),
39     cl::ReallyHidden, cl::init(10));
40 
41 class SILowerSGPRSpills {
42 private:
43   const SIRegisterInfo *TRI = nullptr;
44   const SIInstrInfo *TII = nullptr;
45   LiveIntervals *LIS = nullptr;
46   SlotIndexes *Indexes = nullptr;
47   MachineDominatorTree *MDT = nullptr;
48 
49   // Save and Restore blocks of the current function. Typically there is a
50   // single save block, unless Windows EH funclets are involved.
51   MBBVector SaveBlocks;
52   MBBVector RestoreBlocks;
53 
54 public:
55   SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes,
56                     MachineDominatorTree *MDT)
57       : LIS(LIS), Indexes(Indexes), MDT(MDT) {}
58   bool run(MachineFunction &MF);
59   void calculateSaveRestoreBlocks(MachineFunction &MF);
60   bool spillCalleeSavedRegs(MachineFunction &MF,
61                             SmallVectorImpl<int> &CalleeSavedFIs);
62   void updateLaneVGPRDomInstr(
63       int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
64       DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr);
65   void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
66 };
67 
68 class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
69 public:
70   static char ID;
71 
72   SILowerSGPRSpillsLegacy() : MachineFunctionPass(ID) {}
73 
74   bool runOnMachineFunction(MachineFunction &MF) override;
75 
76   void getAnalysisUsage(AnalysisUsage &AU) const override {
77     AU.addRequired<MachineDominatorTreeWrapperPass>();
78     AU.setPreservesAll();
79     MachineFunctionPass::getAnalysisUsage(AU);
80   }
81 
82   MachineFunctionProperties getClearedProperties() const override {
83     // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
84     return MachineFunctionProperties()
85         .set(MachineFunctionProperties::Property::IsSSA)
86         .set(MachineFunctionProperties::Property::NoVRegs);
87   }
88 };
89 
90 } // end anonymous namespace
91 
92 char SILowerSGPRSpillsLegacy::ID = 0;
93 
94 INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
95                       "SI lower SGPR spill instructions", false, false)
96 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
97 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
98 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
99 INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
100                     "SI lower SGPR spill instructions", false, false)
101 
102 char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID;
103 
104 /// Insert spill code for the callee-saved registers used in the function.
105 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
106                            ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
107                            LiveIntervals *LIS) {
108   MachineFunction &MF = *SaveBlock.getParent();
109   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
110   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
111   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
112   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
113   const SIRegisterInfo *RI = ST.getRegisterInfo();
114 
115   MachineBasicBlock::iterator I = SaveBlock.begin();
116   if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
117     const MachineRegisterInfo &MRI = MF.getRegInfo();
118 
119     for (const CalleeSavedInfo &CS : CSI) {
120       // Insert the spill to the stack frame.
121       MCRegister Reg = CS.getReg();
122 
123       MachineInstrSpan MIS(I, &SaveBlock);
124       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
125           Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
126 
127       // If this value was already livein, we probably have a direct use of the
128       // incoming register value, so don't kill at the spill point. This happens
129       // since we pass some special inputs (workgroup IDs) in the callee saved
130       // range.
131       const bool IsLiveIn = MRI.isLiveIn(Reg);
132       TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
133                               RC, TRI, Register());
134 
135       if (Indexes) {
136         assert(std::distance(MIS.begin(), I) == 1);
137         MachineInstr &Inst = *std::prev(I);
138         Indexes->insertMachineInstrInMaps(Inst);
139       }
140 
141       if (LIS)
142         LIS->removeAllRegUnitsForPhysReg(Reg);
143     }
144   }
145 }
146 
147 /// Insert restore code for the callee-saved registers used in the function.
148 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
149                               MutableArrayRef<CalleeSavedInfo> CSI,
150                               SlotIndexes *Indexes, LiveIntervals *LIS) {
151   MachineFunction &MF = *RestoreBlock.getParent();
152   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
153   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
154   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
155   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
156   const SIRegisterInfo *RI = ST.getRegisterInfo();
157   // Restore all registers immediately before the return and any
158   // terminators that precede it.
159   MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
160 
161   // FIXME: Just emit the readlane/writelane directly
162   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
163     for (const CalleeSavedInfo &CI : reverse(CSI)) {
164       Register Reg = CI.getReg();
165       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
166           Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
167 
168       TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI,
169                                Register());
170       assert(I != RestoreBlock.begin() &&
171              "loadRegFromStackSlot didn't insert any code!");
172       // Insert in reverse order.  loadRegFromStackSlot can insert
173       // multiple instructions.
174 
175       if (Indexes) {
176         MachineInstr &Inst = *std::prev(I);
177         Indexes->insertMachineInstrInMaps(Inst);
178       }
179 
180       if (LIS)
181         LIS->removeAllRegUnitsForPhysReg(Reg);
182     }
183   }
184 }
185 
186 /// Compute the sets of entry and return blocks for saving and restoring
187 /// callee-saved registers, and placing prolog and epilog code.
188 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
189   const MachineFrameInfo &MFI = MF.getFrameInfo();
190 
191   // Even when we do not change any CSR, we still want to insert the
192   // prologue and epilogue of the function.
193   // So set the save points for those.
194 
195   // Use the points found by shrink-wrapping, if any.
196   if (MFI.getSavePoint()) {
197     SaveBlocks.push_back(MFI.getSavePoint());
198     assert(MFI.getRestorePoint() && "Both restore and save must be set");
199     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
200     // If RestoreBlock does not have any successor and is not a return block
201     // then the end point is unreachable and we do not need to insert any
202     // epilogue.
203     if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
204       RestoreBlocks.push_back(RestoreBlock);
205     return;
206   }
207 
208   // Save refs to entry and return blocks.
209   SaveBlocks.push_back(&MF.front());
210   for (MachineBasicBlock &MBB : MF) {
211     if (MBB.isEHFuncletEntry())
212       SaveBlocks.push_back(&MBB);
213     if (MBB.isReturnBlock())
214       RestoreBlocks.push_back(&MBB);
215   }
216 }
217 
218 // TODO: To support shrink wrapping, this would need to copy
219 // PrologEpilogInserter's updateLiveness.
220 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
221   MachineBasicBlock &EntryBB = MF.front();
222 
223   for (const CalleeSavedInfo &CSIReg : CSI)
224     EntryBB.addLiveIn(CSIReg.getReg());
225   EntryBB.sortUniqueLiveIns();
226 }
227 
228 bool SILowerSGPRSpills::spillCalleeSavedRegs(
229     MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
230   MachineRegisterInfo &MRI = MF.getRegInfo();
231   const Function &F = MF.getFunction();
232   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
233   const SIFrameLowering *TFI = ST.getFrameLowering();
234   MachineFrameInfo &MFI = MF.getFrameInfo();
235   RegScavenger *RS = nullptr;
236 
237   // Determine which of the registers in the callee save list should be saved.
238   BitVector SavedRegs;
239   TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
240 
241   // Add the code to save and restore the callee saved registers.
242   if (!F.hasFnAttribute(Attribute::Naked)) {
243     // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
244     // necessary for verifier liveness checks.
245     MFI.setCalleeSavedInfoValid(true);
246 
247     std::vector<CalleeSavedInfo> CSI;
248     const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
249 
250     for (unsigned I = 0; CSRegs[I]; ++I) {
251       MCRegister Reg = CSRegs[I];
252 
253       if (SavedRegs.test(Reg)) {
254         const TargetRegisterClass *RC =
255           TRI->getMinimalPhysRegClass(Reg, MVT::i32);
256         int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
257                                            TRI->getSpillAlign(*RC), true);
258 
259         CSI.emplace_back(Reg, JunkFI);
260         CalleeSavedFIs.push_back(JunkFI);
261       }
262     }
263 
264     if (!CSI.empty()) {
265       for (MachineBasicBlock *SaveBlock : SaveBlocks)
266         insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
267 
268       // Add live ins to save blocks.
269       assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
270       updateLiveness(MF, CSI);
271 
272       for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
273         insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
274       return true;
275     }
276   }
277 
278   return false;
279 }
280 
281 void SILowerSGPRSpills::updateLaneVGPRDomInstr(
282     int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
283     DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
284   // For the Def of a virtual LaneVPGR to dominate all its uses, we should
285   // insert an IMPLICIT_DEF before the dominating spill. Switching to a
286   // depth first order doesn't really help since the machine function can be in
287   // the unstructured control flow post-SSA. For each virtual register, hence
288   // finding the common dominator to get either the dominating spill or a block
289   // dominating all spills.
290   SIMachineFunctionInfo *FuncInfo =
291       MBB->getParent()->getInfo<SIMachineFunctionInfo>();
292   ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills =
293       FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI);
294   Register PrevLaneVGPR;
295   for (auto &Spill : VGPRSpills) {
296     if (PrevLaneVGPR == Spill.VGPR)
297       continue;
298 
299     PrevLaneVGPR = Spill.VGPR;
300     auto I = LaneVGPRDomInstr.find(Spill.VGPR);
301     if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {
302       // Initially add the spill instruction itself for Insertion point.
303       LaneVGPRDomInstr[Spill.VGPR] = InsertPt;
304     } else {
305       assert(I != LaneVGPRDomInstr.end());
306       auto PrevInsertPt = I->second;
307       MachineBasicBlock *DomMBB = PrevInsertPt->getParent();
308       if (DomMBB == MBB) {
309         // The insertion point earlier selected in a predecessor block whose
310         // spills are currently being lowered. The earlier InsertPt would be
311         // the one just before the block terminator and it should be changed
312         // if we insert any new spill in it.
313         if (MDT->dominates(&*InsertPt, &*PrevInsertPt))
314           I->second = InsertPt;
315 
316         continue;
317       }
318 
319       // Find the common dominator block between PrevInsertPt and the
320       // current spill.
321       DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB);
322       if (DomMBB == MBB)
323         I->second = InsertPt;
324       else if (DomMBB != PrevInsertPt->getParent())
325         I->second = &(*DomMBB->getFirstTerminator());
326     }
327   }
328 }
329 
330 void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
331                                                       BitVector &RegMask) {
332   // Determine an optimal number of VGPRs for WWM allocation. The complement
333   // list will be available for allocating other VGPR virtual registers.
334   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
335   MachineRegisterInfo &MRI = MF.getRegInfo();
336   BitVector ReservedRegs = TRI->getReservedRegs(MF);
337   BitVector NonWwmAllocMask(TRI->getNumRegs());
338 
339   // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
340   // to have a balanced allocation between WWM values and per-thread vector
341   // register operands.
342   unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
343   NumRegs =
344       std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
345 
346   auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF);
347   // Try to use the highest available registers for now. Later after
348   // vgpr-regalloc, they can be shifted to the lowest range.
349   unsigned I = 0;
350   for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;
351        (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
352     if (!ReservedRegs.test(Reg) &&
353         !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) {
354       TRI->markSuperRegs(RegMask, Reg);
355       ++I;
356     }
357   }
358 
359   if (I != NumRegs) {
360     // Reserve an arbitrary register and report the error.
361     TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
362     MF.getFunction().getContext().emitError(
363         "can't find enough VGPRs for wwm-regalloc");
364   }
365 }
366 
367 bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
368   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
369   LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
370   auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
371   SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
372   MachineDominatorTree *MDT =
373       &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
374   return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
375 }
376 
377 bool SILowerSGPRSpills::run(MachineFunction &MF) {
378   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
379   TII = ST.getInstrInfo();
380   TRI = &TII->getRegisterInfo();
381 
382   assert(SaveBlocks.empty() && RestoreBlocks.empty());
383 
384   // First, expose any CSR SGPR spills. This is mostly the same as what PEI
385   // does, but somewhat simpler.
386   calculateSaveRestoreBlocks(MF);
387   SmallVector<int> CalleeSavedFIs;
388   bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
389 
390   MachineFrameInfo &MFI = MF.getFrameInfo();
391   MachineRegisterInfo &MRI = MF.getRegInfo();
392   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
393 
394   if (!MFI.hasStackObjects() && !HasCSRs) {
395     SaveBlocks.clear();
396     RestoreBlocks.clear();
397     return false;
398   }
399 
400   bool MadeChange = false;
401   bool SpilledToVirtVGPRLanes = false;
402 
403   // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
404   // handled as SpilledToReg in regular PrologEpilogInserter.
405   const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
406                                   (HasCSRs || FuncInfo->hasSpilledSGPRs());
407   if (HasSGPRSpillToVGPR) {
408     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
409     // are spilled to VGPRs, in which case we can eliminate the stack usage.
410     //
411     // This operates under the assumption that only other SGPR spills are users
412     // of the frame index.
413 
414     // To track the spill frame indices handled in this pass.
415     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
416 
417     // To track the IMPLICIT_DEF insertion point for the lane vgprs.
418     DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
419 
420     for (MachineBasicBlock &MBB : MF) {
421       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
422         if (!TII->isSGPRSpill(MI))
423           continue;
424 
425         if (MI.getOperand(0).isUndef()) {
426           if (Indexes)
427             Indexes->removeMachineInstrFromMaps(MI);
428           MI.eraseFromParent();
429           continue;
430         }
431 
432         int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
433         assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
434 
435         bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
436         if (IsCalleeSaveSGPRSpill) {
437           // Spill callee-saved SGPRs into physical VGPR lanes.
438 
439           // TODO: This is to ensure the CFIs are static for efficient frame
440           // unwinding in the debugger. Spilling them into virtual VGPR lanes
441           // involve regalloc to allocate the physical VGPRs and that might
442           // cause intermediate spill/split of such liveranges for successful
443           // allocation. This would result in broken CFI encoding unless the
444           // regalloc aware CFI generation to insert new CFIs along with the
445           // intermediate spills is implemented. There is no such support
446           // currently exist in the LLVM compiler.
447           if (FuncInfo->allocateSGPRSpillToVGPRLane(
448                   MF, FI, /*SpillToPhysVGPRLane=*/true)) {
449             bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
450                 MI, FI, nullptr, Indexes, LIS, true);
451             if (!Spilled)
452               llvm_unreachable(
453                   "failed to spill SGPR to physical VGPR lane when allocated");
454           }
455         } else {
456           MachineInstrSpan MIS(&MI, &MBB);
457           if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
458             bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
459                 MI, FI, nullptr, Indexes, LIS);
460             if (!Spilled)
461               llvm_unreachable(
462                   "failed to spill SGPR to virtual VGPR lane when allocated");
463             SpillFIs.set(FI);
464             updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr);
465             SpilledToVirtVGPRLanes = true;
466           }
467         }
468       }
469     }
470 
471     for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
472       auto InsertPt = LaneVGPRDomInstr[Reg];
473       // Insert the IMPLICIT_DEF at the identified points.
474       MachineBasicBlock &Block = *InsertPt->getParent();
475       DebugLoc DL = Block.findDebugLoc(InsertPt);
476       auto MIB =
477           BuildMI(Block, *InsertPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg);
478 
479       // Add WWM flag to the virtual register.
480       FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
481 
482       // Set SGPR_SPILL asm printer flag
483       MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
484       if (LIS) {
485         LIS->InsertMachineInstrInMaps(*MIB);
486         LIS->createAndComputeVirtRegInterval(Reg);
487       }
488     }
489 
490     // Determine the registers for WWM allocation and also compute the register
491     // mask for non-wwm VGPR allocation.
492     if (FuncInfo->getSGPRSpillVGPRs().size()) {
493       BitVector WwmRegMask(TRI->getNumRegs());
494 
495       determineRegsForWWMAllocation(MF, WwmRegMask);
496 
497       BitVector NonWwmRegMask(WwmRegMask);
498       NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask());
499 
500       // The complement set will be the registers for non-wwm (per-thread) vgpr
501       // allocation.
502       FuncInfo->updateNonWWMRegMask(NonWwmRegMask);
503     }
504 
505     for (MachineBasicBlock &MBB : MF) {
506       // FIXME: The dead frame indices are replaced with a null register from
507       // the debug value instructions. We should instead, update it with the
508       // correct register value. But not sure the register value alone is
509       // adequate to lower the DIExpression. It should be worked out later.
510       for (MachineInstr &MI : MBB) {
511         if (MI.isDebugValue()) {
512           uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
513           if (MI.getOperand(StackOperandIdx).isFI() &&
514               !MFI.isFixedObjectIndex(
515                   MI.getOperand(StackOperandIdx).getIndex()) &&
516               SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
517             MI.getOperand(StackOperandIdx)
518                 .ChangeToRegister(Register(), false /*isDef*/);
519           }
520         }
521       }
522     }
523 
524     // All those frame indices which are dead by now should be removed from the
525     // function frame. Otherwise, there is a side effect such as re-mapping of
526     // free frame index ids by the later pass(es) like "stack slot coloring"
527     // which in turn could mess-up with the book keeping of "frame index to VGPR
528     // lane".
529     FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
530 
531     MadeChange = true;
532   }
533 
534   if (SpilledToVirtVGPRLanes) {
535     const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
536     // Shift back the reserved SGPR for EXEC copy into the lowest range.
537     // This SGPR is reserved to handle the whole-wave spill/copy operations
538     // that might get inserted during vgpr regalloc.
539     Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
540     if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
541                              TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
542       FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
543   } else {
544     // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
545     // spills/copies. Reset the SGPR reserved for EXEC copy.
546     FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
547   }
548 
549   SaveBlocks.clear();
550   RestoreBlocks.clear();
551 
552   return MadeChange;
553 }
554 
555 PreservedAnalyses
556 SILowerSGPRSpillsPass::run(MachineFunction &MF,
557                            MachineFunctionAnalysisManager &MFAM) {
558   MFPropsModifier _(*this, MF);
559   auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
560   auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
561   MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
562   SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
563   return PreservedAnalyses::all();
564 }
565