10b57cec5SDimitry Andric //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 100b57cec5SDimitry Andric // SGPR spills, so must insert CSR SGPR spills as well as expand them. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric // This pass must never create new SGPR virtual registers. 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric // FIXME: Must stop RegScavenger spills in later passes. 150b57cec5SDimitry Andric // 160b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric #include "AMDGPU.h" 19e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 20e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 210b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 220b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 2381ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 24e8d8bef9SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h" 25480093f4SDimitry Andric #include "llvm/InitializePasses.h" 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric using namespace llvm; 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric #define DEBUG_TYPE "si-lower-sgpr-spills" 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric using MBBVector = SmallVector<MachineBasicBlock *, 4>; 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric namespace { 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric class SILowerSGPRSpills : public MachineFunctionPass { 360b57cec5SDimitry Andric private: 370b57cec5SDimitry Andric const SIRegisterInfo *TRI = nullptr; 380b57cec5SDimitry Andric const SIInstrInfo *TII = nullptr; 390b57cec5SDimitry Andric LiveIntervals *LIS = nullptr; 40bdd1243dSDimitry Andric SlotIndexes *Indexes = nullptr; 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric // Save and Restore blocks of the current function. Typically there is a 430b57cec5SDimitry Andric // single save block, unless Windows EH funclets are involved. 440b57cec5SDimitry Andric MBBVector SaveBlocks; 450b57cec5SDimitry Andric MBBVector RestoreBlocks; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric public: 480b57cec5SDimitry Andric static char ID; 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric SILowerSGPRSpills() : MachineFunctionPass(ID) {} 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric void calculateSaveRestoreBlocks(MachineFunction &MF); 535f757f3fSDimitry Andric bool spillCalleeSavedRegs(MachineFunction &MF, 545f757f3fSDimitry Andric SmallVectorImpl<int> &CalleeSavedFIs); 555f757f3fSDimitry Andric void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS); 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 600b57cec5SDimitry Andric AU.setPreservesAll(); 610b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 620b57cec5SDimitry Andric } 635f757f3fSDimitry Andric 645f757f3fSDimitry Andric MachineFunctionProperties getClearedProperties() const override { 655f757f3fSDimitry Andric // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs. 665f757f3fSDimitry Andric return MachineFunctionProperties() 675f757f3fSDimitry Andric .set(MachineFunctionProperties::Property::IsSSA) 685f757f3fSDimitry Andric .set(MachineFunctionProperties::Property::NoVRegs); 695f757f3fSDimitry Andric } 700b57cec5SDimitry Andric }; 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric } // end anonymous namespace 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric char SILowerSGPRSpills::ID = 0; 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 770b57cec5SDimitry Andric "SI lower SGPR spill instructions", false, false) 78*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 790b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 800b57cec5SDimitry Andric INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 810b57cec5SDimitry Andric "SI lower SGPR spill instructions", false, false) 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 840b57cec5SDimitry Andric 85753f127fSDimitry Andric /// Insert spill code for the callee-saved registers used in the function. 860b57cec5SDimitry Andric static void insertCSRSaves(MachineBasicBlock &SaveBlock, 87bdd1243dSDimitry Andric ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes, 880b57cec5SDimitry Andric LiveIntervals *LIS) { 890b57cec5SDimitry Andric MachineFunction &MF = *SaveBlock.getParent(); 900b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 910b57cec5SDimitry Andric const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 920b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 9381ad6265SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 9481ad6265SDimitry Andric const SIRegisterInfo *RI = ST.getRegisterInfo(); 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric MachineBasicBlock::iterator I = SaveBlock.begin(); 970b57cec5SDimitry Andric if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 98fe6060f1SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 99fe6060f1SDimitry Andric 1000b57cec5SDimitry Andric for (const CalleeSavedInfo &CS : CSI) { 1010b57cec5SDimitry Andric // Insert the spill to the stack frame. 102e8d8bef9SDimitry Andric MCRegister Reg = CS.getReg(); 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric MachineInstrSpan MIS(I, &SaveBlock); 10581ad6265SDimitry Andric const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 10681ad6265SDimitry Andric Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 1070b57cec5SDimitry Andric 108fe6060f1SDimitry Andric // If this value was already livein, we probably have a direct use of the 109fe6060f1SDimitry Andric // incoming register value, so don't kill at the spill point. This happens 110fe6060f1SDimitry Andric // since we pass some special inputs (workgroup IDs) in the callee saved 111fe6060f1SDimitry Andric // range. 112fe6060f1SDimitry Andric const bool IsLiveIn = MRI.isLiveIn(Reg); 113fe6060f1SDimitry Andric TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), 114bdd1243dSDimitry Andric RC, TRI, Register()); 1150b57cec5SDimitry Andric 116bdd1243dSDimitry Andric if (Indexes) { 1170b57cec5SDimitry Andric assert(std::distance(MIS.begin(), I) == 1); 1180b57cec5SDimitry Andric MachineInstr &Inst = *std::prev(I); 119bdd1243dSDimitry Andric Indexes->insertMachineInstrInMaps(Inst); 1200b57cec5SDimitry Andric } 121bdd1243dSDimitry Andric 122bdd1243dSDimitry Andric if (LIS) 123bdd1243dSDimitry Andric LIS->removeAllRegUnitsForPhysReg(Reg); 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric } 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric /// Insert restore code for the callee-saved registers used in the function. 1290b57cec5SDimitry Andric static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 1305ffd83dbSDimitry Andric MutableArrayRef<CalleeSavedInfo> CSI, 131bdd1243dSDimitry Andric SlotIndexes *Indexes, LiveIntervals *LIS) { 1320b57cec5SDimitry Andric MachineFunction &MF = *RestoreBlock.getParent(); 1330b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1340b57cec5SDimitry Andric const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 1350b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 13681ad6265SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 13781ad6265SDimitry Andric const SIRegisterInfo *RI = ST.getRegisterInfo(); 1380b57cec5SDimitry Andric // Restore all registers immediately before the return and any 1390b57cec5SDimitry Andric // terminators that precede it. 1400b57cec5SDimitry Andric MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric // FIXME: Just emit the readlane/writelane directly 1430b57cec5SDimitry Andric if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 1440b57cec5SDimitry Andric for (const CalleeSavedInfo &CI : reverse(CSI)) { 14504eeddc0SDimitry Andric Register Reg = CI.getReg(); 14681ad6265SDimitry Andric const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 14781ad6265SDimitry Andric Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 1480b57cec5SDimitry Andric 149bdd1243dSDimitry Andric TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI, 150bdd1243dSDimitry Andric Register()); 1510b57cec5SDimitry Andric assert(I != RestoreBlock.begin() && 1520b57cec5SDimitry Andric "loadRegFromStackSlot didn't insert any code!"); 1530b57cec5SDimitry Andric // Insert in reverse order. loadRegFromStackSlot can insert 1540b57cec5SDimitry Andric // multiple instructions. 1550b57cec5SDimitry Andric 156bdd1243dSDimitry Andric if (Indexes) { 1570b57cec5SDimitry Andric MachineInstr &Inst = *std::prev(I); 158bdd1243dSDimitry Andric Indexes->insertMachineInstrInMaps(Inst); 1590b57cec5SDimitry Andric } 160bdd1243dSDimitry Andric 161bdd1243dSDimitry Andric if (LIS) 162bdd1243dSDimitry Andric LIS->removeAllRegUnitsForPhysReg(Reg); 1630b57cec5SDimitry Andric } 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric /// Compute the sets of entry and return blocks for saving and restoring 1680b57cec5SDimitry Andric /// callee-saved registers, and placing prolog and epilog code. 1690b57cec5SDimitry Andric void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 1700b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric // Even when we do not change any CSR, we still want to insert the 1730b57cec5SDimitry Andric // prologue and epilogue of the function. 1740b57cec5SDimitry Andric // So set the save points for those. 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric // Use the points found by shrink-wrapping, if any. 1770b57cec5SDimitry Andric if (MFI.getSavePoint()) { 1780b57cec5SDimitry Andric SaveBlocks.push_back(MFI.getSavePoint()); 1790b57cec5SDimitry Andric assert(MFI.getRestorePoint() && "Both restore and save must be set"); 1800b57cec5SDimitry Andric MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1810b57cec5SDimitry Andric // If RestoreBlock does not have any successor and is not a return block 1820b57cec5SDimitry Andric // then the end point is unreachable and we do not need to insert any 1830b57cec5SDimitry Andric // epilogue. 1840b57cec5SDimitry Andric if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 1850b57cec5SDimitry Andric RestoreBlocks.push_back(RestoreBlock); 1860b57cec5SDimitry Andric return; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric // Save refs to entry and return blocks. 1900b57cec5SDimitry Andric SaveBlocks.push_back(&MF.front()); 1910b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 1920b57cec5SDimitry Andric if (MBB.isEHFuncletEntry()) 1930b57cec5SDimitry Andric SaveBlocks.push_back(&MBB); 1940b57cec5SDimitry Andric if (MBB.isReturnBlock()) 1950b57cec5SDimitry Andric RestoreBlocks.push_back(&MBB); 1960b57cec5SDimitry Andric } 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 199e8d8bef9SDimitry Andric // TODO: To support shrink wrapping, this would need to copy 200e8d8bef9SDimitry Andric // PrologEpilogInserter's updateLiveness. 201e8d8bef9SDimitry Andric static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) { 202e8d8bef9SDimitry Andric MachineBasicBlock &EntryBB = MF.front(); 203e8d8bef9SDimitry Andric 204e8d8bef9SDimitry Andric for (const CalleeSavedInfo &CSIReg : CSI) 205e8d8bef9SDimitry Andric EntryBB.addLiveIn(CSIReg.getReg()); 206e8d8bef9SDimitry Andric EntryBB.sortUniqueLiveIns(); 207e8d8bef9SDimitry Andric } 208e8d8bef9SDimitry Andric 2095f757f3fSDimitry Andric bool SILowerSGPRSpills::spillCalleeSavedRegs( 2105f757f3fSDimitry Andric MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) { 2110b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 2120b57cec5SDimitry Andric const Function &F = MF.getFunction(); 2130b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 2140b57cec5SDimitry Andric const SIFrameLowering *TFI = ST.getFrameLowering(); 2150b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 2160b57cec5SDimitry Andric RegScavenger *RS = nullptr; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric // Determine which of the registers in the callee save list should be saved. 2190b57cec5SDimitry Andric BitVector SavedRegs; 2200b57cec5SDimitry Andric TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric // Add the code to save and restore the callee saved registers. 2230b57cec5SDimitry Andric if (!F.hasFnAttribute(Attribute::Naked)) { 2240b57cec5SDimitry Andric // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 2250b57cec5SDimitry Andric // necessary for verifier liveness checks. 2260b57cec5SDimitry Andric MFI.setCalleeSavedInfoValid(true); 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric std::vector<CalleeSavedInfo> CSI; 2290b57cec5SDimitry Andric const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 232e8d8bef9SDimitry Andric MCRegister Reg = CSRegs[I]; 233e8d8bef9SDimitry Andric 2340b57cec5SDimitry Andric if (SavedRegs.test(Reg)) { 2355ffd83dbSDimitry Andric const TargetRegisterClass *RC = 2365ffd83dbSDimitry Andric TRI->getMinimalPhysRegClass(Reg, MVT::i32); 2370b57cec5SDimitry Andric int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 2385ffd83dbSDimitry Andric TRI->getSpillAlign(*RC), true); 2390b57cec5SDimitry Andric 240*0fca6ea1SDimitry Andric CSI.emplace_back(Reg, JunkFI); 2415f757f3fSDimitry Andric CalleeSavedFIs.push_back(JunkFI); 2420b57cec5SDimitry Andric } 2430b57cec5SDimitry Andric } 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric if (!CSI.empty()) { 2460b57cec5SDimitry Andric for (MachineBasicBlock *SaveBlock : SaveBlocks) 247bdd1243dSDimitry Andric insertCSRSaves(*SaveBlock, CSI, Indexes, LIS); 2480b57cec5SDimitry Andric 249e8d8bef9SDimitry Andric // Add live ins to save blocks. 250e8d8bef9SDimitry Andric assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); 251e8d8bef9SDimitry Andric updateLiveness(MF, CSI); 252e8d8bef9SDimitry Andric 2530b57cec5SDimitry Andric for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 254bdd1243dSDimitry Andric insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS); 2550b57cec5SDimitry Andric return true; 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric return false; 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric 2625f757f3fSDimitry Andric void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF, 2635f757f3fSDimitry Andric LiveIntervals *LIS) { 2645f757f3fSDimitry Andric // TODO: This is a workaround to avoid the unmodelled liveness computed with 2655f757f3fSDimitry Andric // whole-wave virtual registers when allocated together with the regular VGPR 2665f757f3fSDimitry Andric // virtual registers. Presently, the liveness computed during the regalloc is 2675f757f3fSDimitry Andric // only uniform (or single lane aware) and it doesn't take account of the 2685f757f3fSDimitry Andric // divergent control flow that exists for our GPUs. Since the WWM registers 2695f757f3fSDimitry Andric // can modify inactive lanes, the wave-aware liveness should be computed for 2705f757f3fSDimitry Andric // the virtual registers to accurately plot their interferences. Without 2715f757f3fSDimitry Andric // having the divergent CFG for the function, it is difficult to implement the 2725f757f3fSDimitry Andric // wave-aware liveness info. Until then, we conservatively extend the liveness 2735f757f3fSDimitry Andric // of the wwm registers into the entire function so that they won't be reused 2745f757f3fSDimitry Andric // without first spilling/splitting their liveranges. 2755f757f3fSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 2765f757f3fSDimitry Andric 2775f757f3fSDimitry Andric // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks. 2785f757f3fSDimitry Andric for (auto Reg : MFI->getSGPRSpillVGPRs()) { 2795f757f3fSDimitry Andric for (MachineBasicBlock *SaveBlock : SaveBlocks) { 2805f757f3fSDimitry Andric MachineBasicBlock::iterator InsertBefore = SaveBlock->begin(); 281*0fca6ea1SDimitry Andric DebugLoc DL = SaveBlock->findDebugLoc(InsertBefore); 282*0fca6ea1SDimitry Andric auto MIB = BuildMI(*SaveBlock, InsertBefore, DL, 2835f757f3fSDimitry Andric TII->get(AMDGPU::IMPLICIT_DEF), Reg); 2845f757f3fSDimitry Andric MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); 2855f757f3fSDimitry Andric // Set SGPR_SPILL asm printer flag 2865f757f3fSDimitry Andric MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL); 2875f757f3fSDimitry Andric if (LIS) { 2885f757f3fSDimitry Andric LIS->InsertMachineInstrInMaps(*MIB); 2895f757f3fSDimitry Andric } 2905f757f3fSDimitry Andric } 2915f757f3fSDimitry Andric } 2925f757f3fSDimitry Andric 2935f757f3fSDimitry Andric // Insert the KILL in the return blocks to extend their liveness untill the 2945f757f3fSDimitry Andric // end of function. Insert a separate KILL for each VGPR. 2955f757f3fSDimitry Andric for (MachineBasicBlock *RestoreBlock : RestoreBlocks) { 2965f757f3fSDimitry Andric MachineBasicBlock::iterator InsertBefore = 2975f757f3fSDimitry Andric RestoreBlock->getFirstTerminator(); 298*0fca6ea1SDimitry Andric DebugLoc DL = RestoreBlock->findDebugLoc(InsertBefore); 2995f757f3fSDimitry Andric for (auto Reg : MFI->getSGPRSpillVGPRs()) { 300*0fca6ea1SDimitry Andric auto MIB = BuildMI(*RestoreBlock, InsertBefore, DL, 3015f757f3fSDimitry Andric TII->get(TargetOpcode::KILL)); 3025f757f3fSDimitry Andric MIB.addReg(Reg); 3035f757f3fSDimitry Andric if (LIS) 3045f757f3fSDimitry Andric LIS->InsertMachineInstrInMaps(*MIB); 3055f757f3fSDimitry Andric } 3065f757f3fSDimitry Andric } 3075f757f3fSDimitry Andric } 3085f757f3fSDimitry Andric 3090b57cec5SDimitry Andric bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 3100b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3110b57cec5SDimitry Andric TII = ST.getInstrInfo(); 3120b57cec5SDimitry Andric TRI = &TII->getRegisterInfo(); 3130b57cec5SDimitry Andric 314*0fca6ea1SDimitry Andric auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 315*0fca6ea1SDimitry Andric LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 316*0fca6ea1SDimitry Andric auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); 317*0fca6ea1SDimitry Andric Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric assert(SaveBlocks.empty() && RestoreBlocks.empty()); 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric // First, expose any CSR SGPR spills. This is mostly the same as what PEI 3220b57cec5SDimitry Andric // does, but somewhat simpler. 3230b57cec5SDimitry Andric calculateSaveRestoreBlocks(MF); 3245f757f3fSDimitry Andric SmallVector<int> CalleeSavedFIs; 3255f757f3fSDimitry Andric bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs); 3260b57cec5SDimitry Andric 3270b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 328fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 329fe6060f1SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 330fe6060f1SDimitry Andric 3310b57cec5SDimitry Andric if (!MFI.hasStackObjects() && !HasCSRs) { 3320b57cec5SDimitry Andric SaveBlocks.clear(); 3330b57cec5SDimitry Andric RestoreBlocks.clear(); 3340b57cec5SDimitry Andric return false; 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric bool MadeChange = false; 3385f757f3fSDimitry Andric bool SpilledToVirtVGPRLanes = false; 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 3410b57cec5SDimitry Andric // handled as SpilledToReg in regular PrologEpilogInserter. 342e8d8bef9SDimitry Andric const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && 343e8d8bef9SDimitry Andric (HasCSRs || FuncInfo->hasSpilledSGPRs()); 344fe6060f1SDimitry Andric if (HasSGPRSpillToVGPR) { 3450b57cec5SDimitry Andric // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 3460b57cec5SDimitry Andric // are spilled to VGPRs, in which case we can eliminate the stack usage. 3470b57cec5SDimitry Andric // 3480b57cec5SDimitry Andric // This operates under the assumption that only other SGPR spills are users 3490b57cec5SDimitry Andric // of the frame index. 3505ffd83dbSDimitry Andric 351fe6060f1SDimitry Andric // To track the spill frame indices handled in this pass. 352fe6060f1SDimitry Andric BitVector SpillFIs(MFI.getObjectIndexEnd(), false); 353fe6060f1SDimitry Andric 3540b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 355349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 3560b57cec5SDimitry Andric if (!TII->isSGPRSpill(MI)) 3570b57cec5SDimitry Andric continue; 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 3600b57cec5SDimitry Andric assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 3615f757f3fSDimitry Andric 3625f757f3fSDimitry Andric bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI); 3635f757f3fSDimitry Andric if (IsCalleeSaveSGPRSpill) { 3645f757f3fSDimitry Andric // Spill callee-saved SGPRs into physical VGPR lanes. 3655f757f3fSDimitry Andric 3665f757f3fSDimitry Andric // TODO: This is to ensure the CFIs are static for efficient frame 3675f757f3fSDimitry Andric // unwinding in the debugger. Spilling them into virtual VGPR lanes 3685f757f3fSDimitry Andric // involve regalloc to allocate the physical VGPRs and that might 3695f757f3fSDimitry Andric // cause intermediate spill/split of such liveranges for successful 3705f757f3fSDimitry Andric // allocation. This would result in broken CFI encoding unless the 3715f757f3fSDimitry Andric // regalloc aware CFI generation to insert new CFIs along with the 3725f757f3fSDimitry Andric // intermediate spills is implemented. There is no such support 3735f757f3fSDimitry Andric // currently exist in the LLVM compiler. 3747a6dacacSDimitry Andric if (FuncInfo->allocateSGPRSpillToVGPRLane( 3757a6dacacSDimitry Andric MF, FI, /*SpillToPhysVGPRLane=*/true)) { 376bdd1243dSDimitry Andric bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 3775f757f3fSDimitry Andric MI, FI, nullptr, Indexes, LIS, true); 3785f757f3fSDimitry Andric if (!Spilled) 3795f757f3fSDimitry Andric llvm_unreachable( 3805f757f3fSDimitry Andric "failed to spill SGPR to physical VGPR lane when allocated"); 3815f757f3fSDimitry Andric } 3825f757f3fSDimitry Andric } else { 3835f757f3fSDimitry Andric if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { 3845f757f3fSDimitry Andric bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 385bdd1243dSDimitry Andric MI, FI, nullptr, Indexes, LIS); 3865f757f3fSDimitry Andric if (!Spilled) 3875f757f3fSDimitry Andric llvm_unreachable( 3885f757f3fSDimitry Andric "failed to spill SGPR to virtual VGPR lane when allocated"); 389fe6060f1SDimitry Andric SpillFIs.set(FI); 3905f757f3fSDimitry Andric SpilledToVirtVGPRLanes = true; 3915f757f3fSDimitry Andric } 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric 3965f757f3fSDimitry Andric if (SpilledToVirtVGPRLanes) { 3975f757f3fSDimitry Andric extendWWMVirtRegLiveness(MF, LIS); 3985f757f3fSDimitry Andric if (LIS) { 3995f757f3fSDimitry Andric // Compute the LiveInterval for the newly created virtual registers. 4008a4dda33SDimitry Andric for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) 4015f757f3fSDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 4025f757f3fSDimitry Andric } 4035f757f3fSDimitry Andric } 4048a4dda33SDimitry Andric 4055f757f3fSDimitry Andric for (MachineBasicBlock &MBB : MF) { 406fe6060f1SDimitry Andric // FIXME: The dead frame indices are replaced with a null register from 407fe6060f1SDimitry Andric // the debug value instructions. We should instead, update it with the 408fe6060f1SDimitry Andric // correct register value. But not sure the register value alone is 409fe6060f1SDimitry Andric // adequate to lower the DIExpression. It should be worked out later. 410fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) { 411fe6060f1SDimitry Andric if (MI.isDebugValue() && MI.getOperand(0).isFI() && 412bdd1243dSDimitry Andric !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) && 413fe6060f1SDimitry Andric SpillFIs[MI.getOperand(0).getIndex()]) { 414fe6060f1SDimitry Andric MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); 415fe6060f1SDimitry Andric } 416fe6060f1SDimitry Andric } 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric 419349cc55cSDimitry Andric // All those frame indices which are dead by now should be removed from the 420349cc55cSDimitry Andric // function frame. Otherwise, there is a side effect such as re-mapping of 421349cc55cSDimitry Andric // free frame index ids by the later pass(es) like "stack slot coloring" 422349cc55cSDimitry Andric // which in turn could mess-up with the book keeping of "frame index to VGPR 423349cc55cSDimitry Andric // lane". 42481ad6265SDimitry Andric FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); 425349cc55cSDimitry Andric 4265f757f3fSDimitry Andric MadeChange = true; 4275f757f3fSDimitry Andric } 4285f757f3fSDimitry Andric 4295f757f3fSDimitry Andric if (SpilledToVirtVGPRLanes) { 43006c3fb27SDimitry Andric const TargetRegisterClass *RC = TRI->getWaveMaskRegClass(); 43106c3fb27SDimitry Andric // Shift back the reserved SGPR for EXEC copy into the lowest range. 43206c3fb27SDimitry Andric // This SGPR is reserved to handle the whole-wave spill/copy operations 43306c3fb27SDimitry Andric // that might get inserted during vgpr regalloc. 43406c3fb27SDimitry Andric Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF); 43506c3fb27SDimitry Andric if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) < 43606c3fb27SDimitry Andric TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy())) 43706c3fb27SDimitry Andric FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR); 43806c3fb27SDimitry Andric } else { 4395f757f3fSDimitry Andric // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM 4405f757f3fSDimitry Andric // spills/copies. Reset the SGPR reserved for EXEC copy. 44106c3fb27SDimitry Andric FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister); 44206c3fb27SDimitry Andric } 44306c3fb27SDimitry Andric 4440b57cec5SDimitry Andric SaveBlocks.clear(); 4450b57cec5SDimitry Andric RestoreBlocks.clear(); 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric return MadeChange; 4480b57cec5SDimitry Andric } 449