1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "SILowerSGPRSpills.h" 19 #include "AMDGPU.h" 20 #include "GCNSubtarget.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "si-lower-sgpr-spills" 31 32 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 33 34 namespace { 35 36 static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation( 37 "amdgpu-num-vgprs-for-wwm-alloc", 38 cl::desc("Max num VGPRs for whole-wave register allocation."), 39 cl::ReallyHidden, cl::init(10)); 40 41 class SILowerSGPRSpills { 42 private: 43 const SIRegisterInfo *TRI = nullptr; 44 const SIInstrInfo *TII = nullptr; 45 LiveIntervals *LIS = nullptr; 46 SlotIndexes *Indexes = nullptr; 47 MachineDominatorTree *MDT = nullptr; 48 49 // Save and Restore blocks of the current function. Typically there is a 50 // single save block, unless Windows EH funclets are involved. 51 MBBVector SaveBlocks; 52 MBBVector RestoreBlocks; 53 54 public: 55 SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes, 56 MachineDominatorTree *MDT) 57 : LIS(LIS), Indexes(Indexes), MDT(MDT) {} 58 bool run(MachineFunction &MF); 59 void calculateSaveRestoreBlocks(MachineFunction &MF); 60 bool spillCalleeSavedRegs(MachineFunction &MF, 61 SmallVectorImpl<int> &CalleeSavedFIs); 62 void updateLaneVGPRDomInstr( 63 int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt, 64 DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr); 65 void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask); 66 }; 67 68 class SILowerSGPRSpillsLegacy : public MachineFunctionPass { 69 public: 70 static char ID; 71 72 SILowerSGPRSpillsLegacy() : MachineFunctionPass(ID) {} 73 74 bool runOnMachineFunction(MachineFunction &MF) override; 75 76 void getAnalysisUsage(AnalysisUsage &AU) const override { 77 AU.addRequired<MachineDominatorTreeWrapperPass>(); 78 AU.setPreservesAll(); 79 MachineFunctionPass::getAnalysisUsage(AU); 80 } 81 82 MachineFunctionProperties getClearedProperties() const override { 83 // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs. 84 return MachineFunctionProperties() 85 .set(MachineFunctionProperties::Property::IsSSA) 86 .set(MachineFunctionProperties::Property::NoVRegs); 87 } 88 }; 89 90 } // end anonymous namespace 91 92 char SILowerSGPRSpillsLegacy::ID = 0; 93 94 INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE, 95 "SI lower SGPR spill instructions", false, false) 96 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 97 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 98 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) 99 INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE, 100 "SI lower SGPR spill instructions", false, false) 101 102 char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID; 103 104 /// Insert spill code for the callee-saved registers used in the function. 105 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 106 ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes, 107 LiveIntervals *LIS) { 108 MachineFunction &MF = *SaveBlock.getParent(); 109 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 110 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 111 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 112 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 113 const SIRegisterInfo *RI = ST.getRegisterInfo(); 114 115 MachineBasicBlock::iterator I = SaveBlock.begin(); 116 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 117 const MachineRegisterInfo &MRI = MF.getRegInfo(); 118 119 for (const CalleeSavedInfo &CS : CSI) { 120 // Insert the spill to the stack frame. 121 MCRegister Reg = CS.getReg(); 122 123 MachineInstrSpan MIS(I, &SaveBlock); 124 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 125 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 126 127 // If this value was already livein, we probably have a direct use of the 128 // incoming register value, so don't kill at the spill point. This happens 129 // since we pass some special inputs (workgroup IDs) in the callee saved 130 // range. 131 const bool IsLiveIn = MRI.isLiveIn(Reg); 132 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), 133 RC, TRI, Register()); 134 135 if (Indexes) { 136 assert(std::distance(MIS.begin(), I) == 1); 137 MachineInstr &Inst = *std::prev(I); 138 Indexes->insertMachineInstrInMaps(Inst); 139 } 140 141 if (LIS) 142 LIS->removeAllRegUnitsForPhysReg(Reg); 143 } 144 } 145 } 146 147 /// Insert restore code for the callee-saved registers used in the function. 148 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 149 MutableArrayRef<CalleeSavedInfo> CSI, 150 SlotIndexes *Indexes, LiveIntervals *LIS) { 151 MachineFunction &MF = *RestoreBlock.getParent(); 152 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 153 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 154 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 155 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 156 const SIRegisterInfo *RI = ST.getRegisterInfo(); 157 // Restore all registers immediately before the return and any 158 // terminators that precede it. 159 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 160 161 // FIXME: Just emit the readlane/writelane directly 162 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 163 for (const CalleeSavedInfo &CI : reverse(CSI)) { 164 Register Reg = CI.getReg(); 165 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 166 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 167 168 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI, 169 Register()); 170 assert(I != RestoreBlock.begin() && 171 "loadRegFromStackSlot didn't insert any code!"); 172 // Insert in reverse order. loadRegFromStackSlot can insert 173 // multiple instructions. 174 175 if (Indexes) { 176 MachineInstr &Inst = *std::prev(I); 177 Indexes->insertMachineInstrInMaps(Inst); 178 } 179 180 if (LIS) 181 LIS->removeAllRegUnitsForPhysReg(Reg); 182 } 183 } 184 } 185 186 /// Compute the sets of entry and return blocks for saving and restoring 187 /// callee-saved registers, and placing prolog and epilog code. 188 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 189 const MachineFrameInfo &MFI = MF.getFrameInfo(); 190 191 // Even when we do not change any CSR, we still want to insert the 192 // prologue and epilogue of the function. 193 // So set the save points for those. 194 195 // Use the points found by shrink-wrapping, if any. 196 if (MFI.getSavePoint()) { 197 SaveBlocks.push_back(MFI.getSavePoint()); 198 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 199 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 200 // If RestoreBlock does not have any successor and is not a return block 201 // then the end point is unreachable and we do not need to insert any 202 // epilogue. 203 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 204 RestoreBlocks.push_back(RestoreBlock); 205 return; 206 } 207 208 // Save refs to entry and return blocks. 209 SaveBlocks.push_back(&MF.front()); 210 for (MachineBasicBlock &MBB : MF) { 211 if (MBB.isEHFuncletEntry()) 212 SaveBlocks.push_back(&MBB); 213 if (MBB.isReturnBlock()) 214 RestoreBlocks.push_back(&MBB); 215 } 216 } 217 218 // TODO: To support shrink wrapping, this would need to copy 219 // PrologEpilogInserter's updateLiveness. 220 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) { 221 MachineBasicBlock &EntryBB = MF.front(); 222 223 for (const CalleeSavedInfo &CSIReg : CSI) 224 EntryBB.addLiveIn(CSIReg.getReg()); 225 EntryBB.sortUniqueLiveIns(); 226 } 227 228 bool SILowerSGPRSpills::spillCalleeSavedRegs( 229 MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) { 230 MachineRegisterInfo &MRI = MF.getRegInfo(); 231 const Function &F = MF.getFunction(); 232 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 233 const SIFrameLowering *TFI = ST.getFrameLowering(); 234 MachineFrameInfo &MFI = MF.getFrameInfo(); 235 RegScavenger *RS = nullptr; 236 237 // Determine which of the registers in the callee save list should be saved. 238 BitVector SavedRegs; 239 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 240 241 // Add the code to save and restore the callee saved registers. 242 if (!F.hasFnAttribute(Attribute::Naked)) { 243 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 244 // necessary for verifier liveness checks. 245 MFI.setCalleeSavedInfoValid(true); 246 247 std::vector<CalleeSavedInfo> CSI; 248 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 249 250 for (unsigned I = 0; CSRegs[I]; ++I) { 251 MCRegister Reg = CSRegs[I]; 252 253 if (SavedRegs.test(Reg)) { 254 const TargetRegisterClass *RC = 255 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 256 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 257 TRI->getSpillAlign(*RC), true); 258 259 CSI.emplace_back(Reg, JunkFI); 260 CalleeSavedFIs.push_back(JunkFI); 261 } 262 } 263 264 if (!CSI.empty()) { 265 for (MachineBasicBlock *SaveBlock : SaveBlocks) 266 insertCSRSaves(*SaveBlock, CSI, Indexes, LIS); 267 268 // Add live ins to save blocks. 269 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); 270 updateLiveness(MF, CSI); 271 272 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 273 insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS); 274 return true; 275 } 276 } 277 278 return false; 279 } 280 281 void SILowerSGPRSpills::updateLaneVGPRDomInstr( 282 int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt, 283 DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) { 284 // For the Def of a virtual LaneVPGR to dominate all its uses, we should 285 // insert an IMPLICIT_DEF before the dominating spill. Switching to a 286 // depth first order doesn't really help since the machine function can be in 287 // the unstructured control flow post-SSA. For each virtual register, hence 288 // finding the common dominator to get either the dominating spill or a block 289 // dominating all spills. 290 SIMachineFunctionInfo *FuncInfo = 291 MBB->getParent()->getInfo<SIMachineFunctionInfo>(); 292 ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills = 293 FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI); 294 Register PrevLaneVGPR; 295 for (auto &Spill : VGPRSpills) { 296 if (PrevLaneVGPR == Spill.VGPR) 297 continue; 298 299 PrevLaneVGPR = Spill.VGPR; 300 auto I = LaneVGPRDomInstr.find(Spill.VGPR); 301 if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) { 302 // Initially add the spill instruction itself for Insertion point. 303 LaneVGPRDomInstr[Spill.VGPR] = InsertPt; 304 } else { 305 assert(I != LaneVGPRDomInstr.end()); 306 auto PrevInsertPt = I->second; 307 MachineBasicBlock *DomMBB = PrevInsertPt->getParent(); 308 if (DomMBB == MBB) { 309 // The insertion point earlier selected in a predecessor block whose 310 // spills are currently being lowered. The earlier InsertPt would be 311 // the one just before the block terminator and it should be changed 312 // if we insert any new spill in it. 313 if (MDT->dominates(&*InsertPt, &*PrevInsertPt)) 314 I->second = InsertPt; 315 316 continue; 317 } 318 319 // Find the common dominator block between PrevInsertPt and the 320 // current spill. 321 DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB); 322 if (DomMBB == MBB) 323 I->second = InsertPt; 324 else if (DomMBB != PrevInsertPt->getParent()) 325 I->second = &(*DomMBB->getFirstTerminator()); 326 } 327 } 328 } 329 330 void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF, 331 BitVector &RegMask) { 332 // Determine an optimal number of VGPRs for WWM allocation. The complement 333 // list will be available for allocating other VGPR virtual registers. 334 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 335 MachineRegisterInfo &MRI = MF.getRegInfo(); 336 BitVector ReservedRegs = TRI->getReservedRegs(MF); 337 BitVector NonWwmAllocMask(TRI->getNumRegs()); 338 339 // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future 340 // to have a balanced allocation between WWM values and per-thread vector 341 // register operands. 342 unsigned NumRegs = MaxNumVGPRsForWwmAllocation; 343 NumRegs = 344 std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs); 345 346 auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF); 347 // Try to use the highest available registers for now. Later after 348 // vgpr-regalloc, they can be shifted to the lowest range. 349 unsigned I = 0; 350 for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1; 351 (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) { 352 if (!ReservedRegs.test(Reg) && 353 !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) { 354 TRI->markSuperRegs(RegMask, Reg); 355 ++I; 356 } 357 } 358 359 if (I != NumRegs) { 360 // Reserve an arbitrary register and report the error. 361 TRI->markSuperRegs(RegMask, AMDGPU::VGPR0); 362 MF.getFunction().getContext().emitError( 363 "can't find enough VGPRs for wwm-regalloc"); 364 } 365 } 366 367 bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) { 368 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 369 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 370 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); 371 SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; 372 MachineDominatorTree *MDT = 373 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 374 return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF); 375 } 376 377 bool SILowerSGPRSpills::run(MachineFunction &MF) { 378 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 379 TII = ST.getInstrInfo(); 380 TRI = &TII->getRegisterInfo(); 381 382 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 383 384 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 385 // does, but somewhat simpler. 386 calculateSaveRestoreBlocks(MF); 387 SmallVector<int> CalleeSavedFIs; 388 bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs); 389 390 MachineFrameInfo &MFI = MF.getFrameInfo(); 391 MachineRegisterInfo &MRI = MF.getRegInfo(); 392 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 393 394 if (!MFI.hasStackObjects() && !HasCSRs) { 395 SaveBlocks.clear(); 396 RestoreBlocks.clear(); 397 return false; 398 } 399 400 bool MadeChange = false; 401 bool SpilledToVirtVGPRLanes = false; 402 403 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 404 // handled as SpilledToReg in regular PrologEpilogInserter. 405 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && 406 (HasCSRs || FuncInfo->hasSpilledSGPRs()); 407 if (HasSGPRSpillToVGPR) { 408 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 409 // are spilled to VGPRs, in which case we can eliminate the stack usage. 410 // 411 // This operates under the assumption that only other SGPR spills are users 412 // of the frame index. 413 414 // To track the spill frame indices handled in this pass. 415 BitVector SpillFIs(MFI.getObjectIndexEnd(), false); 416 417 // To track the IMPLICIT_DEF insertion point for the lane vgprs. 418 DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr; 419 420 for (MachineBasicBlock &MBB : MF) { 421 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 422 if (!TII->isSGPRSpill(MI)) 423 continue; 424 425 if (MI.getOperand(0).isUndef()) { 426 if (Indexes) 427 Indexes->removeMachineInstrFromMaps(MI); 428 MI.eraseFromParent(); 429 continue; 430 } 431 432 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 433 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 434 435 bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI); 436 if (IsCalleeSaveSGPRSpill) { 437 // Spill callee-saved SGPRs into physical VGPR lanes. 438 439 // TODO: This is to ensure the CFIs are static for efficient frame 440 // unwinding in the debugger. Spilling them into virtual VGPR lanes 441 // involve regalloc to allocate the physical VGPRs and that might 442 // cause intermediate spill/split of such liveranges for successful 443 // allocation. This would result in broken CFI encoding unless the 444 // regalloc aware CFI generation to insert new CFIs along with the 445 // intermediate spills is implemented. There is no such support 446 // currently exist in the LLVM compiler. 447 if (FuncInfo->allocateSGPRSpillToVGPRLane( 448 MF, FI, /*SpillToPhysVGPRLane=*/true)) { 449 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 450 MI, FI, nullptr, Indexes, LIS, true); 451 if (!Spilled) 452 llvm_unreachable( 453 "failed to spill SGPR to physical VGPR lane when allocated"); 454 } 455 } else { 456 MachineInstrSpan MIS(&MI, &MBB); 457 if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { 458 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 459 MI, FI, nullptr, Indexes, LIS); 460 if (!Spilled) 461 llvm_unreachable( 462 "failed to spill SGPR to virtual VGPR lane when allocated"); 463 SpillFIs.set(FI); 464 updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr); 465 SpilledToVirtVGPRLanes = true; 466 } 467 } 468 } 469 } 470 471 for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) { 472 auto InsertPt = LaneVGPRDomInstr[Reg]; 473 // Insert the IMPLICIT_DEF at the identified points. 474 MachineBasicBlock &Block = *InsertPt->getParent(); 475 DebugLoc DL = Block.findDebugLoc(InsertPt); 476 auto MIB = 477 BuildMI(Block, *InsertPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg); 478 479 // Add WWM flag to the virtual register. 480 FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); 481 482 // Set SGPR_SPILL asm printer flag 483 MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL); 484 if (LIS) { 485 LIS->InsertMachineInstrInMaps(*MIB); 486 LIS->createAndComputeVirtRegInterval(Reg); 487 } 488 } 489 490 // Determine the registers for WWM allocation and also compute the register 491 // mask for non-wwm VGPR allocation. 492 if (FuncInfo->getSGPRSpillVGPRs().size()) { 493 BitVector WwmRegMask(TRI->getNumRegs()); 494 495 determineRegsForWWMAllocation(MF, WwmRegMask); 496 497 BitVector NonWwmRegMask(WwmRegMask); 498 NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask()); 499 500 // The complement set will be the registers for non-wwm (per-thread) vgpr 501 // allocation. 502 FuncInfo->updateNonWWMRegMask(NonWwmRegMask); 503 } 504 505 for (MachineBasicBlock &MBB : MF) { 506 // FIXME: The dead frame indices are replaced with a null register from 507 // the debug value instructions. We should instead, update it with the 508 // correct register value. But not sure the register value alone is 509 // adequate to lower the DIExpression. It should be worked out later. 510 for (MachineInstr &MI : MBB) { 511 if (MI.isDebugValue()) { 512 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0; 513 if (MI.getOperand(StackOperandIdx).isFI() && 514 !MFI.isFixedObjectIndex( 515 MI.getOperand(StackOperandIdx).getIndex()) && 516 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) { 517 MI.getOperand(StackOperandIdx) 518 .ChangeToRegister(Register(), false /*isDef*/); 519 } 520 } 521 } 522 } 523 524 // All those frame indices which are dead by now should be removed from the 525 // function frame. Otherwise, there is a side effect such as re-mapping of 526 // free frame index ids by the later pass(es) like "stack slot coloring" 527 // which in turn could mess-up with the book keeping of "frame index to VGPR 528 // lane". 529 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); 530 531 MadeChange = true; 532 } 533 534 if (SpilledToVirtVGPRLanes) { 535 const TargetRegisterClass *RC = TRI->getWaveMaskRegClass(); 536 // Shift back the reserved SGPR for EXEC copy into the lowest range. 537 // This SGPR is reserved to handle the whole-wave spill/copy operations 538 // that might get inserted during vgpr regalloc. 539 Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF); 540 if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) < 541 TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy())) 542 FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR); 543 } else { 544 // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM 545 // spills/copies. Reset the SGPR reserved for EXEC copy. 546 FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister); 547 } 548 549 SaveBlocks.clear(); 550 RestoreBlocks.clear(); 551 552 return MadeChange; 553 } 554 555 PreservedAnalyses 556 SILowerSGPRSpillsPass::run(MachineFunction &MF, 557 MachineFunctionAnalysisManager &MFAM) { 558 MFPropsModifier _(*this, MF); 559 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF); 560 auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF); 561 MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF); 562 SILowerSGPRSpills(LIS, Indexes, MDT).run(MF); 563 return PreservedAnalyses::all(); 564 } 565