10b57cec5SDimitry Andric //===----------------------- SIFrameLowering.cpp --------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "SIFrameLowering.h" 10e8d8bef9SDimitry Andric #include "AMDGPU.h" 11e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 120b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h" 145f757f3fSDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h" 150b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 160b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h" 17e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric using namespace llvm; 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info" 220b57cec5SDimitry Andric 23fe6060f1SDimitry Andric static cl::opt<bool> EnableSpillVGPRToAGPR( 24fe6060f1SDimitry Andric "amdgpu-spill-vgpr-to-agpr", 25fe6060f1SDimitry Andric cl::desc("Enable spilling VGPRs to AGPRs"), 26fe6060f1SDimitry Andric cl::ReallyHidden, 27fe6060f1SDimitry Andric cl::init(true)); 280b57cec5SDimitry Andric 295f757f3fSDimitry Andric // Find a register matching \p RC from \p LiveUnits which is unused and 305f757f3fSDimitry Andric // available throughout the function. On failure, returns AMDGPU::NoRegister. 315f757f3fSDimitry Andric // TODO: Rewrite the loop here to iterate over MCRegUnits instead of 325f757f3fSDimitry Andric // MCRegisters. This should reduce the number of iterations and avoid redundant 335f757f3fSDimitry Andric // checking. 34bdd1243dSDimitry Andric static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, 355f757f3fSDimitry Andric const LiveRegUnits &LiveUnits, 36bdd1243dSDimitry Andric const TargetRegisterClass &RC) { 37bdd1243dSDimitry Andric for (MCRegister Reg : RC) { 385f757f3fSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) && 395f757f3fSDimitry Andric !MRI.isReserved(Reg)) 40bdd1243dSDimitry Andric return Reg; 41bdd1243dSDimitry Andric } 42bdd1243dSDimitry Andric return MCRegister(); 43bdd1243dSDimitry Andric } 44bdd1243dSDimitry Andric 45fe6060f1SDimitry Andric // Find a scratch register that we can use in the prologue. We avoid using 46fe6060f1SDimitry Andric // callee-save registers since they may appear to be free when this is called 47fe6060f1SDimitry Andric // from canUseAsPrologue (during shrink wrapping), but then no longer be free 48fe6060f1SDimitry Andric // when this is called from emitPrologue. 495f757f3fSDimitry Andric static MCRegister findScratchNonCalleeSaveRegister( 505f757f3fSDimitry Andric MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, 515f757f3fSDimitry Andric const TargetRegisterClass &RC, bool Unused = false) { 520b57cec5SDimitry Andric // Mark callee saved registers as used so we will not choose them. 530b57cec5SDimitry Andric const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 540b57cec5SDimitry Andric for (unsigned i = 0; CSRegs[i]; ++i) 555f757f3fSDimitry Andric LiveUnits.addReg(CSRegs[i]); 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric // We are looking for a register that can be used throughout the entire 580b57cec5SDimitry Andric // function, so any use is unacceptable. 59bdd1243dSDimitry Andric if (Unused) 605f757f3fSDimitry Andric return findUnusedRegister(MRI, LiveUnits, RC); 61bdd1243dSDimitry Andric 625ffd83dbSDimitry Andric for (MCRegister Reg : RC) { 635f757f3fSDimitry Andric if (LiveUnits.available(Reg) && !MRI.isReserved(Reg)) 640b57cec5SDimitry Andric return Reg; 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 675ffd83dbSDimitry Andric return MCRegister(); 680b57cec5SDimitry Andric } 690b57cec5SDimitry Andric 7006c3fb27SDimitry Andric /// Query target location for spilling SGPRs 7106c3fb27SDimitry Andric /// \p IncludeScratchCopy : Also look for free scratch SGPRs 72bdd1243dSDimitry Andric static void getVGPRSpillLaneOrTempRegister( 735f757f3fSDimitry Andric MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, 7406c3fb27SDimitry Andric const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass, 7506c3fb27SDimitry Andric bool IncludeScratchCopy = true) { 765ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 775ffd83dbSDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 785ffd83dbSDimitry Andric 795ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 805ffd83dbSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 81bdd1243dSDimitry Andric unsigned Size = TRI->getSpillSize(RC); 82bdd1243dSDimitry Andric Align Alignment = TRI->getSpillAlign(RC); 835ffd83dbSDimitry Andric 84bdd1243dSDimitry Andric // We need to save and restore the given SGPR. 855ffd83dbSDimitry Andric 8606c3fb27SDimitry Andric Register ScratchSGPR; 875f757f3fSDimitry Andric // 1: Try to save the given register into an unused scratch SGPR. The 885f757f3fSDimitry Andric // LiveUnits should have all the callee saved registers marked as used. For 895f757f3fSDimitry Andric // certain cases we skip copy to scratch SGPR. 9006c3fb27SDimitry Andric if (IncludeScratchCopy) 915f757f3fSDimitry Andric ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC); 92bdd1243dSDimitry Andric 93bdd1243dSDimitry Andric if (!ScratchSGPR) { 94bdd1243dSDimitry Andric int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr, 955ffd83dbSDimitry Andric TargetStackID::SGPRSpill); 965ffd83dbSDimitry Andric 97bdd1243dSDimitry Andric if (TRI->spillSGPRToVGPR() && 987a6dacacSDimitry Andric MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true, 997a6dacacSDimitry Andric /*IsPrologEpilog=*/true)) { 100bdd1243dSDimitry Andric // 2: There's no free lane to spill, and no free register to save the 101bdd1243dSDimitry Andric // SGPR, so we're forced to take another VGPR to use for the spill. 102bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills( 103bdd1243dSDimitry Andric SGPR, PrologEpilogSGPRSaveRestoreInfo( 104bdd1243dSDimitry Andric SGPRSaveKind::SPILL_TO_VGPR_LANE, FI)); 105e8d8bef9SDimitry Andric 1065f757f3fSDimitry Andric LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front(); 107bdd1243dSDimitry Andric dbgs() << printReg(SGPR, TRI) << " requires fallback spill to " 1085f757f3fSDimitry Andric << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane 1095f757f3fSDimitry Andric << '\n';); 1105ffd83dbSDimitry Andric } else { 111bdd1243dSDimitry Andric // Remove dead <FI> index 112bdd1243dSDimitry Andric MF.getFrameInfo().RemoveStackObject(FI); 113bdd1243dSDimitry Andric // 3: If all else fails, spill the register to memory. 114bdd1243dSDimitry Andric FI = FrameInfo.CreateSpillStackObject(Size, Alignment); 115bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills( 116bdd1243dSDimitry Andric SGPR, 117bdd1243dSDimitry Andric PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI)); 118bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling " 119bdd1243dSDimitry Andric << printReg(SGPR, TRI) << '\n'); 1205ffd83dbSDimitry Andric } 1215ffd83dbSDimitry Andric } else { 122bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills( 123bdd1243dSDimitry Andric SGPR, PrologEpilogSGPRSaveRestoreInfo( 124bdd1243dSDimitry Andric SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR)); 1255f757f3fSDimitry Andric LiveUnits.addReg(ScratchSGPR); 126bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to " 127bdd1243dSDimitry Andric << printReg(ScratchSGPR, TRI) << '\n'); 1285ffd83dbSDimitry Andric } 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric // We need to specially emit stack operations here because a different frame 1320b57cec5SDimitry Andric // register is used than in the rest of the function, as getFrameRegister would 1330b57cec5SDimitry Andric // use. 134fe6060f1SDimitry Andric static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, 135fe6060f1SDimitry Andric const SIMachineFunctionInfo &FuncInfo, 1365f757f3fSDimitry Andric LiveRegUnits &LiveUnits, MachineFunction &MF, 137e8d8bef9SDimitry Andric MachineBasicBlock &MBB, 138349cc55cSDimitry Andric MachineBasicBlock::iterator I, const DebugLoc &DL, 139bdd1243dSDimitry Andric Register SpillReg, int FI, Register FrameReg, 140bdd1243dSDimitry Andric int64_t DwordOff = 0) { 141fe6060f1SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 142fe6060f1SDimitry Andric : AMDGPU::BUFFER_STORE_DWORD_OFFSET; 1430b57cec5SDimitry Andric 144fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 145fe6060f1SDimitry Andric MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 146fe6060f1SDimitry Andric MachineMemOperand *MMO = MF.getMachineMemOperand( 147fe6060f1SDimitry Andric PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), 148fe6060f1SDimitry Andric FrameInfo.getObjectAlign(FI)); 1495f757f3fSDimitry Andric LiveUnits.addReg(SpillReg); 150bdd1243dSDimitry Andric bool IsKill = !MBB.isLiveIn(SpillReg); 151bdd1243dSDimitry Andric TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg, 1525f757f3fSDimitry Andric DwordOff, MMO, nullptr, &LiveUnits); 153bdd1243dSDimitry Andric if (IsKill) 1545f757f3fSDimitry Andric LiveUnits.removeReg(SpillReg); 155e8d8bef9SDimitry Andric } 156e8d8bef9SDimitry Andric 157fe6060f1SDimitry Andric static void buildEpilogRestore(const GCNSubtarget &ST, 158fe6060f1SDimitry Andric const SIRegisterInfo &TRI, 159fe6060f1SDimitry Andric const SIMachineFunctionInfo &FuncInfo, 1605f757f3fSDimitry Andric LiveRegUnits &LiveUnits, MachineFunction &MF, 161e8d8bef9SDimitry Andric MachineBasicBlock &MBB, 162349cc55cSDimitry Andric MachineBasicBlock::iterator I, 163bdd1243dSDimitry Andric const DebugLoc &DL, Register SpillReg, int FI, 164bdd1243dSDimitry Andric Register FrameReg, int64_t DwordOff = 0) { 165fe6060f1SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR 166fe6060f1SDimitry Andric : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 1670b57cec5SDimitry Andric 168fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 169fe6060f1SDimitry Andric MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); 170fe6060f1SDimitry Andric MachineMemOperand *MMO = MF.getMachineMemOperand( 171fe6060f1SDimitry Andric PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), 172fe6060f1SDimitry Andric FrameInfo.getObjectAlign(FI)); 173bdd1243dSDimitry Andric TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg, 1745f757f3fSDimitry Andric DwordOff, MMO, nullptr, &LiveUnits); 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 177e8d8bef9SDimitry Andric static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 178e8d8bef9SDimitry Andric const DebugLoc &DL, const SIInstrInfo *TII, 179e8d8bef9SDimitry Andric Register TargetReg) { 180e8d8bef9SDimitry Andric MachineFunction *MF = MBB.getParent(); 181e8d8bef9SDimitry Andric const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 182e8d8bef9SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 183e8d8bef9SDimitry Andric const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 184e8d8bef9SDimitry Andric Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0); 185e8d8bef9SDimitry Andric Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1); 186e8d8bef9SDimitry Andric 187e8d8bef9SDimitry Andric if (MFI->getGITPtrHigh() != 0xffffffff) { 188e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, SMovB32, TargetHi) 189e8d8bef9SDimitry Andric .addImm(MFI->getGITPtrHigh()) 190e8d8bef9SDimitry Andric .addReg(TargetReg, RegState::ImplicitDefine); 191e8d8bef9SDimitry Andric } else { 1927a6dacacSDimitry Andric const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo); 193e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, GetPC64, TargetReg); 194e8d8bef9SDimitry Andric } 195e8d8bef9SDimitry Andric Register GitPtrLo = MFI->getGITPtrLoReg(*MF); 196e8d8bef9SDimitry Andric MF->getRegInfo().addLiveIn(GitPtrLo); 197e8d8bef9SDimitry Andric MBB.addLiveIn(GitPtrLo); 198e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, SMovB32, TargetLo) 199e8d8bef9SDimitry Andric .addReg(GitPtrLo); 200e8d8bef9SDimitry Andric } 201e8d8bef9SDimitry Andric 2025f757f3fSDimitry Andric static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, 203bdd1243dSDimitry Andric const SIMachineFunctionInfo *FuncInfo, 204bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, 205bdd1243dSDimitry Andric MachineBasicBlock::iterator MBBI, bool IsProlog) { 2065f757f3fSDimitry Andric if (LiveUnits.empty()) { 2075f757f3fSDimitry Andric LiveUnits.init(TRI); 208bdd1243dSDimitry Andric if (IsProlog) { 2095f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB); 210bdd1243dSDimitry Andric } else { 211bdd1243dSDimitry Andric // In epilog. 2125f757f3fSDimitry Andric LiveUnits.addLiveOuts(MBB); 2135f757f3fSDimitry Andric LiveUnits.stepBackward(*MBBI); 214bdd1243dSDimitry Andric } 215bdd1243dSDimitry Andric } 216bdd1243dSDimitry Andric } 217bdd1243dSDimitry Andric 218bdd1243dSDimitry Andric namespace llvm { 219bdd1243dSDimitry Andric 220bdd1243dSDimitry Andric // SpillBuilder to save/restore special SGPR spills like the one needed for FP, 221bdd1243dSDimitry Andric // BP, etc. These spills are delayed until the current function's frame is 222bdd1243dSDimitry Andric // finalized. For a given register, the builder uses the 223bdd1243dSDimitry Andric // PrologEpilogSGPRSaveRestoreInfo to decide the spill method. 224bdd1243dSDimitry Andric class PrologEpilogSGPRSpillBuilder { 225bdd1243dSDimitry Andric MachineBasicBlock::iterator MI; 226bdd1243dSDimitry Andric MachineBasicBlock &MBB; 227bdd1243dSDimitry Andric MachineFunction &MF; 228bdd1243dSDimitry Andric const GCNSubtarget &ST; 229bdd1243dSDimitry Andric MachineFrameInfo &MFI; 230bdd1243dSDimitry Andric SIMachineFunctionInfo *FuncInfo; 231bdd1243dSDimitry Andric const SIInstrInfo *TII; 232bdd1243dSDimitry Andric const SIRegisterInfo &TRI; 233bdd1243dSDimitry Andric Register SuperReg; 234bdd1243dSDimitry Andric const PrologEpilogSGPRSaveRestoreInfo SI; 2355f757f3fSDimitry Andric LiveRegUnits &LiveUnits; 236bdd1243dSDimitry Andric const DebugLoc &DL; 237bdd1243dSDimitry Andric Register FrameReg; 238bdd1243dSDimitry Andric ArrayRef<int16_t> SplitParts; 239bdd1243dSDimitry Andric unsigned NumSubRegs; 240bdd1243dSDimitry Andric unsigned EltSize = 4; 241bdd1243dSDimitry Andric 242bdd1243dSDimitry Andric void saveToMemory(const int FI) const { 243bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 244bdd1243dSDimitry Andric assert(!MFI.isDeadObjectIndex(FI)); 245bdd1243dSDimitry Andric 2465f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true); 247bdd1243dSDimitry Andric 248bdd1243dSDimitry Andric MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 2495f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::VGPR_32RegClass); 250bdd1243dSDimitry Andric if (!TmpVGPR) 251bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register"); 252bdd1243dSDimitry Andric 253bdd1243dSDimitry Andric for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) { 254bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1 255bdd1243dSDimitry Andric ? SuperReg 256bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I])); 257bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 258bdd1243dSDimitry Andric .addReg(SubReg); 259bdd1243dSDimitry Andric 2605f757f3fSDimitry Andric buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR, 261bdd1243dSDimitry Andric FI, FrameReg, DwordOff); 262bdd1243dSDimitry Andric DwordOff += 4; 263bdd1243dSDimitry Andric } 264bdd1243dSDimitry Andric } 265bdd1243dSDimitry Andric 266bdd1243dSDimitry Andric void saveToVGPRLane(const int FI) const { 267bdd1243dSDimitry Andric assert(!MFI.isDeadObjectIndex(FI)); 268bdd1243dSDimitry Andric 269bdd1243dSDimitry Andric assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 270bdd1243dSDimitry Andric ArrayRef<SIRegisterInfo::SpilledReg> Spill = 2715f757f3fSDimitry Andric FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI); 272bdd1243dSDimitry Andric assert(Spill.size() == NumSubRegs); 273bdd1243dSDimitry Andric 274bdd1243dSDimitry Andric for (unsigned I = 0; I < NumSubRegs; ++I) { 275bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1 276bdd1243dSDimitry Andric ? SuperReg 277bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I])); 2785f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR), 2795f757f3fSDimitry Andric Spill[I].VGPR) 280bdd1243dSDimitry Andric .addReg(SubReg) 281bdd1243dSDimitry Andric .addImm(Spill[I].Lane) 282bdd1243dSDimitry Andric .addReg(Spill[I].VGPR, RegState::Undef); 283bdd1243dSDimitry Andric } 284bdd1243dSDimitry Andric } 285bdd1243dSDimitry Andric 286bdd1243dSDimitry Andric void copyToScratchSGPR(Register DstReg) const { 287bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg) 288bdd1243dSDimitry Andric .addReg(SuperReg) 289bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 290bdd1243dSDimitry Andric } 291bdd1243dSDimitry Andric 292bdd1243dSDimitry Andric void restoreFromMemory(const int FI) { 293bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 294bdd1243dSDimitry Andric 2955f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false); 296bdd1243dSDimitry Andric MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 2975f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::VGPR_32RegClass); 298bdd1243dSDimitry Andric if (!TmpVGPR) 299bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register"); 300bdd1243dSDimitry Andric 301bdd1243dSDimitry Andric for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) { 302bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1 303bdd1243dSDimitry Andric ? SuperReg 304bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I])); 305bdd1243dSDimitry Andric 3065f757f3fSDimitry Andric buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, 3075f757f3fSDimitry Andric TmpVGPR, FI, FrameReg, DwordOff); 308bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) 309bdd1243dSDimitry Andric .addReg(TmpVGPR, RegState::Kill); 310bdd1243dSDimitry Andric DwordOff += 4; 311bdd1243dSDimitry Andric } 312bdd1243dSDimitry Andric } 313bdd1243dSDimitry Andric 314bdd1243dSDimitry Andric void restoreFromVGPRLane(const int FI) { 315bdd1243dSDimitry Andric assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 316bdd1243dSDimitry Andric ArrayRef<SIRegisterInfo::SpilledReg> Spill = 3175f757f3fSDimitry Andric FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI); 318bdd1243dSDimitry Andric assert(Spill.size() == NumSubRegs); 319bdd1243dSDimitry Andric 320bdd1243dSDimitry Andric for (unsigned I = 0; I < NumSubRegs; ++I) { 321bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1 322bdd1243dSDimitry Andric ? SuperReg 323bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I])); 3245f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg) 325bdd1243dSDimitry Andric .addReg(Spill[I].VGPR) 326bdd1243dSDimitry Andric .addImm(Spill[I].Lane); 327bdd1243dSDimitry Andric } 328bdd1243dSDimitry Andric } 329bdd1243dSDimitry Andric 330bdd1243dSDimitry Andric void copyFromScratchSGPR(Register SrcReg) const { 331bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg) 332bdd1243dSDimitry Andric .addReg(SrcReg) 333bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 334bdd1243dSDimitry Andric } 335bdd1243dSDimitry Andric 336bdd1243dSDimitry Andric public: 337bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder(Register Reg, 338bdd1243dSDimitry Andric const PrologEpilogSGPRSaveRestoreInfo SI, 339bdd1243dSDimitry Andric MachineBasicBlock &MBB, 340bdd1243dSDimitry Andric MachineBasicBlock::iterator MI, 341bdd1243dSDimitry Andric const DebugLoc &DL, const SIInstrInfo *TII, 342bdd1243dSDimitry Andric const SIRegisterInfo &TRI, 3435f757f3fSDimitry Andric LiveRegUnits &LiveUnits, Register FrameReg) 344bdd1243dSDimitry Andric : MI(MI), MBB(MBB), MF(*MBB.getParent()), 345bdd1243dSDimitry Andric ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()), 346bdd1243dSDimitry Andric FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI), 3475f757f3fSDimitry Andric SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL), 3485f757f3fSDimitry Andric FrameReg(FrameReg) { 349bdd1243dSDimitry Andric const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg); 350bdd1243dSDimitry Andric SplitParts = TRI.getRegSplitParts(RC, EltSize); 351bdd1243dSDimitry Andric NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); 352bdd1243dSDimitry Andric 353bdd1243dSDimitry Andric assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); 354bdd1243dSDimitry Andric } 355bdd1243dSDimitry Andric 356bdd1243dSDimitry Andric void save() { 357bdd1243dSDimitry Andric switch (SI.getKind()) { 358bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_MEM: 359bdd1243dSDimitry Andric return saveToMemory(SI.getIndex()); 360bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_VGPR_LANE: 361bdd1243dSDimitry Andric return saveToVGPRLane(SI.getIndex()); 362bdd1243dSDimitry Andric case SGPRSaveKind::COPY_TO_SCRATCH_SGPR: 363bdd1243dSDimitry Andric return copyToScratchSGPR(SI.getReg()); 364bdd1243dSDimitry Andric } 365bdd1243dSDimitry Andric } 366bdd1243dSDimitry Andric 367bdd1243dSDimitry Andric void restore() { 368bdd1243dSDimitry Andric switch (SI.getKind()) { 369bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_MEM: 370bdd1243dSDimitry Andric return restoreFromMemory(SI.getIndex()); 371bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_VGPR_LANE: 372bdd1243dSDimitry Andric return restoreFromVGPRLane(SI.getIndex()); 373bdd1243dSDimitry Andric case SGPRSaveKind::COPY_TO_SCRATCH_SGPR: 374bdd1243dSDimitry Andric return copyFromScratchSGPR(SI.getReg()); 375bdd1243dSDimitry Andric } 376bdd1243dSDimitry Andric } 377bdd1243dSDimitry Andric }; 378bdd1243dSDimitry Andric 379bdd1243dSDimitry Andric } // namespace llvm 380bdd1243dSDimitry Andric 3815ffd83dbSDimitry Andric // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` 3825ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionFlatScratchInit( 3835ffd83dbSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 3845ffd83dbSDimitry Andric const DebugLoc &DL, Register ScratchWaveOffsetReg) const { 3855ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3860b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 3870b57cec5SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 3880b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric // We don't need this if we only have spills since there is no user facing 3910b57cec5SDimitry Andric // scratch. 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric // TODO: If we know we don't have flat instructions earlier, we can omit 3940b57cec5SDimitry Andric // this from the input registers. 3950b57cec5SDimitry Andric // 3960b57cec5SDimitry Andric // TODO: We only need to know if we access scratch space through a flat 3970b57cec5SDimitry Andric // pointer. Because we only detect if flat instructions are used at all, 3980b57cec5SDimitry Andric // this will be used more often than necessary on VI. 3990b57cec5SDimitry Andric 400e8d8bef9SDimitry Andric Register FlatScrInitLo; 401e8d8bef9SDimitry Andric Register FlatScrInitHi; 402e8d8bef9SDimitry Andric 403e8d8bef9SDimitry Andric if (ST.isAmdPalOS()) { 404e8d8bef9SDimitry Andric // Extract the scratch offset from the descriptor in the GIT 4055f757f3fSDimitry Andric LiveRegUnits LiveUnits; 4065f757f3fSDimitry Andric LiveUnits.init(*TRI); 4075f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB); 408e8d8bef9SDimitry Andric 409e8d8bef9SDimitry Andric // Find unused reg to load flat scratch init into 410e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 411e8d8bef9SDimitry Andric Register FlatScrInit = AMDGPU::NoRegister; 412e8d8bef9SDimitry Andric ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF); 413e8d8bef9SDimitry Andric unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2; 414e8d8bef9SDimitry Andric AllSGPR64s = AllSGPR64s.slice( 415e8d8bef9SDimitry Andric std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded)); 416e8d8bef9SDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 417e8d8bef9SDimitry Andric for (MCPhysReg Reg : AllSGPR64s) { 4185f757f3fSDimitry Andric if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) && 4195f757f3fSDimitry Andric MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 420e8d8bef9SDimitry Andric FlatScrInit = Reg; 421e8d8bef9SDimitry Andric break; 422e8d8bef9SDimitry Andric } 423e8d8bef9SDimitry Andric } 424e8d8bef9SDimitry Andric assert(FlatScrInit && "Failed to find free register for scratch init"); 425e8d8bef9SDimitry Andric 426e8d8bef9SDimitry Andric FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0); 427e8d8bef9SDimitry Andric FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1); 428e8d8bef9SDimitry Andric 429e8d8bef9SDimitry Andric buildGitPtr(MBB, I, DL, TII, FlatScrInit); 430e8d8bef9SDimitry Andric 431e8d8bef9SDimitry Andric // We now have the GIT ptr - now get the scratch descriptor from the entry 432e8d8bef9SDimitry Andric // at offset 0 (or offset 16 for a compute shader). 433e8d8bef9SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 434e8d8bef9SDimitry Andric const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 435e8d8bef9SDimitry Andric auto *MMO = MF.getMachineMemOperand( 436e8d8bef9SDimitry Andric PtrInfo, 437e8d8bef9SDimitry Andric MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 438e8d8bef9SDimitry Andric MachineMemOperand::MODereferenceable, 439e8d8bef9SDimitry Andric 8, Align(4)); 440e8d8bef9SDimitry Andric unsigned Offset = 441e8d8bef9SDimitry Andric MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 442e8d8bef9SDimitry Andric const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 443e8d8bef9SDimitry Andric unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 444e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit) 445e8d8bef9SDimitry Andric .addReg(FlatScrInit) 446e8d8bef9SDimitry Andric .addImm(EncodedOffset) // offset 447fe6060f1SDimitry Andric .addImm(0) // cpol 448e8d8bef9SDimitry Andric .addMemOperand(MMO); 449e8d8bef9SDimitry Andric 450e8d8bef9SDimitry Andric // Mask the offset in [47:0] of the descriptor 451e8d8bef9SDimitry Andric const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32); 452349cc55cSDimitry Andric auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi) 453e8d8bef9SDimitry Andric .addReg(FlatScrInitHi) 454e8d8bef9SDimitry Andric .addImm(0xffff); 455349cc55cSDimitry Andric And->getOperand(3).setIsDead(); // Mark SCC as dead. 456e8d8bef9SDimitry Andric } else { 4578bcb0991SDimitry Andric Register FlatScratchInitReg = 4588bcb0991SDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); 459e8d8bef9SDimitry Andric assert(FlatScratchInitReg); 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 4620b57cec5SDimitry Andric MRI.addLiveIn(FlatScratchInitReg); 4630b57cec5SDimitry Andric MBB.addLiveIn(FlatScratchInitReg); 4640b57cec5SDimitry Andric 465e8d8bef9SDimitry Andric FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); 466e8d8bef9SDimitry Andric FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); 467e8d8bef9SDimitry Andric } 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric // Do a 64-bit pointer add. 4700b57cec5SDimitry Andric if (ST.flatScratchIsPointer()) { 4710b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 4720b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 4730b57cec5SDimitry Andric .addReg(FlatScrInitLo) 4740b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg); 475349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), 476349cc55cSDimitry Andric FlatScrInitHi) 4770b57cec5SDimitry Andric .addReg(FlatScrInitHi) 4780b57cec5SDimitry Andric .addImm(0); 479349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead. 480349cc55cSDimitry Andric 481*0fca6ea1SDimitry Andric using namespace AMDGPU::Hwreg; 482*0fca6ea1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)) 483*0fca6ea1SDimitry Andric .addReg(FlatScrInitLo) 484*0fca6ea1SDimitry Andric .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32))); 485*0fca6ea1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)) 486*0fca6ea1SDimitry Andric .addReg(FlatScrInitHi) 487*0fca6ea1SDimitry Andric .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32))); 4880b57cec5SDimitry Andric return; 4890b57cec5SDimitry Andric } 4900b57cec5SDimitry Andric 491e8d8bef9SDimitry Andric // For GFX9. 4920b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) 4930b57cec5SDimitry Andric .addReg(FlatScrInitLo) 4940b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg); 495349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), 496349cc55cSDimitry Andric AMDGPU::FLAT_SCR_HI) 4970b57cec5SDimitry Andric .addReg(FlatScrInitHi) 4980b57cec5SDimitry Andric .addImm(0); 499349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead. 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric return; 5020b57cec5SDimitry Andric } 5030b57cec5SDimitry Andric 504e8d8bef9SDimitry Andric assert(ST.getGeneration() < AMDGPUSubtarget::GFX9); 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric // Copy the size in bytes. 5070b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) 5080b57cec5SDimitry Andric .addReg(FlatScrInitHi, RegState::Kill); 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric // Add wave offset in bytes to private base offset. 5110b57cec5SDimitry Andric // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. 512fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo) 5130b57cec5SDimitry Andric .addReg(FlatScrInitLo) 5140b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg); 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric // Convert offset to 256-byte units. 517349cc55cSDimitry Andric auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), 518349cc55cSDimitry Andric AMDGPU::FLAT_SCR_HI) 5190b57cec5SDimitry Andric .addReg(FlatScrInitLo, RegState::Kill) 5200b57cec5SDimitry Andric .addImm(8); 521bdd1243dSDimitry Andric LShr->getOperand(3).setIsDead(); // Mark SCC as dead. 5220b57cec5SDimitry Andric } 5230b57cec5SDimitry Andric 524e8d8bef9SDimitry Andric // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not 525e8d8bef9SDimitry Andric // memory. They should have been removed by now. 526e8d8bef9SDimitry Andric static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { 527e8d8bef9SDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 528e8d8bef9SDimitry Andric I != E; ++I) { 529e8d8bef9SDimitry Andric if (!MFI.isDeadObjectIndex(I)) 530e8d8bef9SDimitry Andric return false; 531e8d8bef9SDimitry Andric } 532e8d8bef9SDimitry Andric 533e8d8bef9SDimitry Andric return true; 534e8d8bef9SDimitry Andric } 535e8d8bef9SDimitry Andric 5365ffd83dbSDimitry Andric // Shift down registers reserved for the scratch RSRC. 5375ffd83dbSDimitry Andric Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( 5380b57cec5SDimitry Andric MachineFunction &MF) const { 5390b57cec5SDimitry Andric 5405ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 5415ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 5425ffd83dbSDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 5435ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 5445ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 5455ffd83dbSDimitry Andric 5465ffd83dbSDimitry Andric assert(MFI->isEntryFunction()); 5475ffd83dbSDimitry Andric 5485ffd83dbSDimitry Andric Register ScratchRsrcReg = MFI->getScratchRSrcReg(); 5495ffd83dbSDimitry Andric 550e8d8bef9SDimitry Andric if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) && 551e8d8bef9SDimitry Andric allStackObjectsAreDead(MF.getFrameInfo()))) 5525ffd83dbSDimitry Andric return Register(); 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric if (ST.hasSGPRInitBug() || 5550b57cec5SDimitry Andric ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) 5560b57cec5SDimitry Andric return ScratchRsrcReg; 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric // We reserved the last registers for this. Shift it down to the end of those 5590b57cec5SDimitry Andric // which were actually used. 5600b57cec5SDimitry Andric // 5610b57cec5SDimitry Andric // FIXME: It might be safer to use a pseudoregister before replacement. 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric // FIXME: We should be able to eliminate unused input registers. We only 5640b57cec5SDimitry Andric // cannot do this for the resources required for scratch access. For now we 5650b57cec5SDimitry Andric // skip over user SGPRs and may leave unused holes. 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; 5685ffd83dbSDimitry Andric ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF); 5690b57cec5SDimitry Andric AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric // Skip the last N reserved elements because they should have already been 5720b57cec5SDimitry Andric // reserved for VCC etc. 5735ffd83dbSDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 5740b57cec5SDimitry Andric for (MCPhysReg Reg : AllSGPR128s) { 5750b57cec5SDimitry Andric // Pick the first unallocated one. Make sure we don't clobber the other 5765ffd83dbSDimitry Andric // reserved input we needed. Also for PAL, make sure we don't clobber 5775ffd83dbSDimitry Andric // the GIT pointer passed in SGPR0 or SGPR8. 5785ffd83dbSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 57906c3fb27SDimitry Andric (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) { 5800b57cec5SDimitry Andric MRI.replaceRegWith(ScratchRsrcReg, Reg); 5810b57cec5SDimitry Andric MFI->setScratchRSrcReg(Reg); 582*0fca6ea1SDimitry Andric MRI.reserveReg(Reg, TRI); 5830b57cec5SDimitry Andric return Reg; 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric return ScratchRsrcReg; 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric 590e8d8bef9SDimitry Andric static unsigned getScratchScaleFactor(const GCNSubtarget &ST) { 591e8d8bef9SDimitry Andric return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize(); 592e8d8bef9SDimitry Andric } 593e8d8bef9SDimitry Andric 5940b57cec5SDimitry Andric void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, 5950b57cec5SDimitry Andric MachineBasicBlock &MBB) const { 5960b57cec5SDimitry Andric assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 5970b57cec5SDimitry Andric 5985ffd83dbSDimitry Andric // FIXME: If we only have SGPR spills, we won't actually be using scratch 5995ffd83dbSDimitry Andric // memory since these spill to VGPRs. We should be cleaning up these unused 6005ffd83dbSDimitry Andric // SGPR spill frame indices somewhere. 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric // FIXME: We still have implicit uses on SGPR spill instructions in case they 6030b57cec5SDimitry Andric // need to spill to vector memory. It's likely that will not happen, but at 6040b57cec5SDimitry Andric // this point it appears we need the setup. This part of the prolog should be 6050b57cec5SDimitry Andric // emitted after frame indices are eliminated. 6060b57cec5SDimitry Andric 6075ffd83dbSDimitry Andric // FIXME: Remove all of the isPhysRegUsed checks 6080b57cec5SDimitry Andric 6095ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 6105ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 6115ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 6125ffd83dbSDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 6135ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 6145ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 615fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 6160b57cec5SDimitry Andric 6175ffd83dbSDimitry Andric assert(MFI->isEntryFunction()); 6180b57cec5SDimitry Andric 6198bcb0991SDimitry Andric Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( 6200b57cec5SDimitry Andric AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); 6210b57cec5SDimitry Andric 6225ffd83dbSDimitry Andric // We need to do the replacement of the private segment buffer register even 6235ffd83dbSDimitry Andric // if there are no stack objects. There could be stores to undef or a 6245ffd83dbSDimitry Andric // constant without an associated object. 6255ffd83dbSDimitry Andric // 6265ffd83dbSDimitry Andric // This will return `Register()` in cases where there are no actual 6275ffd83dbSDimitry Andric // uses of the SRSRC. 628e8d8bef9SDimitry Andric Register ScratchRsrcReg; 629e8d8bef9SDimitry Andric if (!ST.enableFlatScratch()) 630e8d8bef9SDimitry Andric ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF); 6310b57cec5SDimitry Andric 6325ffd83dbSDimitry Andric // Make the selected register live throughout the function. 6335ffd83dbSDimitry Andric if (ScratchRsrcReg) { 6340b57cec5SDimitry Andric for (MachineBasicBlock &OtherBB : MF) { 6355ffd83dbSDimitry Andric if (&OtherBB != &MBB) { 6360b57cec5SDimitry Andric OtherBB.addLiveIn(ScratchRsrcReg); 6370b57cec5SDimitry Andric } 6385ffd83dbSDimitry Andric } 6395ffd83dbSDimitry Andric } 6400b57cec5SDimitry Andric 6415ffd83dbSDimitry Andric // Now that we have fixed the reserved SRSRC we need to locate the 6425ffd83dbSDimitry Andric // (potentially) preloaded SRSRC. 6435ffd83dbSDimitry Andric Register PreloadedScratchRsrcReg; 6445ffd83dbSDimitry Andric if (ST.isAmdHsaOrMesa(F)) { 6455ffd83dbSDimitry Andric PreloadedScratchRsrcReg = 6465ffd83dbSDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); 6475ffd83dbSDimitry Andric if (ScratchRsrcReg && PreloadedScratchRsrcReg) { 6485ffd83dbSDimitry Andric // We added live-ins during argument lowering, but since they were not 6495ffd83dbSDimitry Andric // used they were deleted. We're adding the uses now, so add them back. 6505ffd83dbSDimitry Andric MRI.addLiveIn(PreloadedScratchRsrcReg); 6515ffd83dbSDimitry Andric MBB.addLiveIn(PreloadedScratchRsrcReg); 6525ffd83dbSDimitry Andric } 6535ffd83dbSDimitry Andric } 6545ffd83dbSDimitry Andric 6555ffd83dbSDimitry Andric // Debug location must be unknown since the first debug location is used to 6565ffd83dbSDimitry Andric // determine the end of the prologue. 6570b57cec5SDimitry Andric DebugLoc DL; 6580b57cec5SDimitry Andric MachineBasicBlock::iterator I = MBB.begin(); 6590b57cec5SDimitry Andric 6605ffd83dbSDimitry Andric // We found the SRSRC first because it needs four registers and has an 6615ffd83dbSDimitry Andric // alignment requirement. If the SRSRC that we found is clobbering with 6625ffd83dbSDimitry Andric // the scratch wave offset, which may be in a fixed SGPR or a free SGPR 6635ffd83dbSDimitry Andric // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch 6645ffd83dbSDimitry Andric // wave offset to a free SGPR. 6655ffd83dbSDimitry Andric Register ScratchWaveOffsetReg; 666349cc55cSDimitry Andric if (PreloadedScratchWaveOffsetReg && 667349cc55cSDimitry Andric TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) { 6685ffd83dbSDimitry Andric ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF); 6695ffd83dbSDimitry Andric unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); 6705ffd83dbSDimitry Andric AllSGPRs = AllSGPRs.slice( 6715ffd83dbSDimitry Andric std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded)); 6725ffd83dbSDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 6735ffd83dbSDimitry Andric for (MCPhysReg Reg : AllSGPRs) { 6745ffd83dbSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 6755ffd83dbSDimitry Andric !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) { 6765ffd83dbSDimitry Andric ScratchWaveOffsetReg = Reg; 6770b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) 6785ffd83dbSDimitry Andric .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); 6795ffd83dbSDimitry Andric break; 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric } 682*0fca6ea1SDimitry Andric 683*0fca6ea1SDimitry Andric // FIXME: We can spill incoming arguments and restore at the end of the 684*0fca6ea1SDimitry Andric // prolog. 685*0fca6ea1SDimitry Andric if (!ScratchWaveOffsetReg) 686*0fca6ea1SDimitry Andric report_fatal_error( 687*0fca6ea1SDimitry Andric "could not find temporary scratch offset register in prolog"); 6880b57cec5SDimitry Andric } else { 6895ffd83dbSDimitry Andric ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg; 6900b57cec5SDimitry Andric } 691349cc55cSDimitry Andric assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg); 6925ffd83dbSDimitry Andric 693*0fca6ea1SDimitry Andric if (hasFP(MF)) { 694*0fca6ea1SDimitry Andric Register FPReg = MFI->getFrameOffsetReg(); 695*0fca6ea1SDimitry Andric assert(FPReg != AMDGPU::FP_REG); 696*0fca6ea1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); 697*0fca6ea1SDimitry Andric } 698*0fca6ea1SDimitry Andric 699e8d8bef9SDimitry Andric if (requiresStackPointerReference(MF)) { 7005ffd83dbSDimitry Andric Register SPReg = MFI->getStackPtrOffsetReg(); 7015ffd83dbSDimitry Andric assert(SPReg != AMDGPU::SP_REG); 7025ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) 703fe6060f1SDimitry Andric .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST)); 7045ffd83dbSDimitry Andric } 7055ffd83dbSDimitry Andric 706fe6060f1SDimitry Andric bool NeedsFlatScratchInit = 7075f757f3fSDimitry Andric MFI->getUserSGPRInfo().hasFlatScratchInit() && 708fe6060f1SDimitry Andric (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() || 709fe6060f1SDimitry Andric (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch())); 710fe6060f1SDimitry Andric 711fe6060f1SDimitry Andric if ((NeedsFlatScratchInit || ScratchRsrcReg) && 712349cc55cSDimitry Andric PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) { 7135ffd83dbSDimitry Andric MRI.addLiveIn(PreloadedScratchWaveOffsetReg); 7145ffd83dbSDimitry Andric MBB.addLiveIn(PreloadedScratchWaveOffsetReg); 7155ffd83dbSDimitry Andric } 7165ffd83dbSDimitry Andric 717fe6060f1SDimitry Andric if (NeedsFlatScratchInit) { 7185ffd83dbSDimitry Andric emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); 7195ffd83dbSDimitry Andric } 7205ffd83dbSDimitry Andric 7215ffd83dbSDimitry Andric if (ScratchRsrcReg) { 7225ffd83dbSDimitry Andric emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, 7235ffd83dbSDimitry Andric PreloadedScratchRsrcReg, 7245ffd83dbSDimitry Andric ScratchRsrcReg, ScratchWaveOffsetReg); 7250b57cec5SDimitry Andric } 7260b57cec5SDimitry Andric } 7270b57cec5SDimitry Andric 7285ffd83dbSDimitry Andric // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` 7295ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( 7305ffd83dbSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 7315ffd83dbSDimitry Andric const DebugLoc &DL, Register PreloadedScratchRsrcReg, 7325ffd83dbSDimitry Andric Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { 7330b57cec5SDimitry Andric 7345ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 7350b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 7360b57cec5SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 7375ffd83dbSDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 7380b57cec5SDimitry Andric const Function &Fn = MF.getFunction(); 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andric if (ST.isAmdPalOS()) { 7410b57cec5SDimitry Andric // The pointer to the GIT is formed from the offset passed in and either 7420b57cec5SDimitry Andric // the amdgpu-git-ptr-high function attribute or the top part of the PC 7438bcb0991SDimitry Andric Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 744fe6060f1SDimitry Andric Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 7450b57cec5SDimitry Andric 746e8d8bef9SDimitry Andric buildGitPtr(MBB, I, DL, TII, Rsrc01); 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric // We now have the GIT ptr - now get the scratch descriptor from the entry 7490b57cec5SDimitry Andric // at offset 0 (or offset 16 for a compute shader). 750480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 7510b57cec5SDimitry Andric const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); 7520b57cec5SDimitry Andric auto MMO = MF.getMachineMemOperand(PtrInfo, 7530b57cec5SDimitry Andric MachineMemOperand::MOLoad | 7540b57cec5SDimitry Andric MachineMemOperand::MOInvariant | 7550b57cec5SDimitry Andric MachineMemOperand::MODereferenceable, 7565ffd83dbSDimitry Andric 16, Align(4)); 7570b57cec5SDimitry Andric unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 7580b57cec5SDimitry Andric const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 7595ffd83dbSDimitry Andric unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 7600b57cec5SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) 7610b57cec5SDimitry Andric .addReg(Rsrc01) 7620b57cec5SDimitry Andric .addImm(EncodedOffset) // offset 763fe6060f1SDimitry Andric .addImm(0) // cpol 7640b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine) 7650b57cec5SDimitry Andric .addMemOperand(MMO); 766fe6060f1SDimitry Andric 767fe6060f1SDimitry Andric // The driver will always set the SRD for wave 64 (bits 118:117 of 768fe6060f1SDimitry Andric // descriptor / bits 22:21 of third sub-reg will be 0b11) 769fe6060f1SDimitry Andric // If the shader is actually wave32 we have to modify the const_index_stride 770fe6060f1SDimitry Andric // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The 771fe6060f1SDimitry Andric // reason the driver does this is that there can be cases where it presents 772fe6060f1SDimitry Andric // 2 shaders with different wave size (e.g. VsFs). 773fe6060f1SDimitry Andric // TODO: convert to using SCRATCH instructions or multiple SRD buffers 774fe6060f1SDimitry Andric if (ST.isWave32()) { 775fe6060f1SDimitry Andric const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32); 776fe6060f1SDimitry Andric BuildMI(MBB, I, DL, SBitsetB32, Rsrc03) 777fe6060f1SDimitry Andric .addImm(21) 778fe6060f1SDimitry Andric .addReg(Rsrc03); 779fe6060f1SDimitry Andric } 7805ffd83dbSDimitry Andric } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) { 7810b57cec5SDimitry Andric assert(!ST.isAmdHsaOrMesa(Fn)); 7820b57cec5SDimitry Andric const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 7830b57cec5SDimitry Andric 7848bcb0991SDimitry Andric Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); 7858bcb0991SDimitry Andric Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric // Use relocations to get the pointer, and setup the other bits manually. 7880b57cec5SDimitry Andric uint64_t Rsrc23 = TII->getScratchRsrcWords23(); 7890b57cec5SDimitry Andric 7905f757f3fSDimitry Andric if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) { 7918bcb0991SDimitry Andric Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { 7940b57cec5SDimitry Andric const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); 7950b57cec5SDimitry Andric 7960b57cec5SDimitry Andric BuildMI(MBB, I, DL, Mov64, Rsrc01) 7970b57cec5SDimitry Andric .addReg(MFI->getImplicitBufferPtrUserSGPR()) 7980b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 7990b57cec5SDimitry Andric } else { 8000b57cec5SDimitry Andric const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 8010b57cec5SDimitry Andric 802480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 8035ffd83dbSDimitry Andric auto MMO = MF.getMachineMemOperand( 8045ffd83dbSDimitry Andric PtrInfo, 8055ffd83dbSDimitry Andric MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 8060b57cec5SDimitry Andric MachineMemOperand::MODereferenceable, 8075ffd83dbSDimitry Andric 8, Align(4)); 8080b57cec5SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) 8090b57cec5SDimitry Andric .addReg(MFI->getImplicitBufferPtrUserSGPR()) 8100b57cec5SDimitry Andric .addImm(0) // offset 811fe6060f1SDimitry Andric .addImm(0) // cpol 8120b57cec5SDimitry Andric .addMemOperand(MMO) 8130b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 8160b57cec5SDimitry Andric MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric } else { 8198bcb0991SDimitry Andric Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 8208bcb0991SDimitry Andric Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc0) 8230b57cec5SDimitry Andric .addExternalSymbol("SCRATCH_RSRC_DWORD0") 8240b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc1) 8270b57cec5SDimitry Andric .addExternalSymbol("SCRATCH_RSRC_DWORD1") 8280b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 8290b57cec5SDimitry Andric } 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc2) 8320b57cec5SDimitry Andric .addImm(Rsrc23 & 0xffffffff) 8330b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc3) 8360b57cec5SDimitry Andric .addImm(Rsrc23 >> 32) 8370b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 8385ffd83dbSDimitry Andric } else if (ST.isAmdHsaOrMesa(Fn)) { 8395ffd83dbSDimitry Andric assert(PreloadedScratchRsrcReg); 8405ffd83dbSDimitry Andric 8415ffd83dbSDimitry Andric if (ScratchRsrcReg != PreloadedScratchRsrcReg) { 8425ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) 8435ffd83dbSDimitry Andric .addReg(PreloadedScratchRsrcReg, RegState::Kill); 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric 8475ffd83dbSDimitry Andric // Add the scratch wave offset into the scratch RSRC. 8485ffd83dbSDimitry Andric // 8495ffd83dbSDimitry Andric // We only want to update the first 48 bits, which is the base address 8505ffd83dbSDimitry Andric // pointer, without touching the adjacent 16 bits of flags. We know this add 8515ffd83dbSDimitry Andric // cannot carry-out from bit 47, otherwise the scratch allocation would be 8525ffd83dbSDimitry Andric // impossible to fit in the 48-bit global address space. 8535ffd83dbSDimitry Andric // 8545ffd83dbSDimitry Andric // TODO: Evaluate if it is better to just construct an SRD using the flat 8555ffd83dbSDimitry Andric // scratch init and some constants rather than update the one we are passed. 8565ffd83dbSDimitry Andric Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 8575ffd83dbSDimitry Andric Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 8585ffd83dbSDimitry Andric 8595ffd83dbSDimitry Andric // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in 8605ffd83dbSDimitry Andric // the kernel body via inreg arguments. 8615ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) 8625ffd83dbSDimitry Andric .addReg(ScratchRsrcSub0) 8635ffd83dbSDimitry Andric .addReg(ScratchWaveOffsetReg) 8645ffd83dbSDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 865349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) 8665ffd83dbSDimitry Andric .addReg(ScratchRsrcSub1) 8675ffd83dbSDimitry Andric .addImm(0) 8685ffd83dbSDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 869349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead. 8705ffd83dbSDimitry Andric } 8715ffd83dbSDimitry Andric 8720b57cec5SDimitry Andric bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { 8730b57cec5SDimitry Andric switch (ID) { 8740b57cec5SDimitry Andric case TargetStackID::Default: 8750b57cec5SDimitry Andric case TargetStackID::NoAlloc: 8760b57cec5SDimitry Andric case TargetStackID::SGPRSpill: 8770b57cec5SDimitry Andric return true; 878e8d8bef9SDimitry Andric case TargetStackID::ScalableVector: 879fe6060f1SDimitry Andric case TargetStackID::WasmLocal: 8808bcb0991SDimitry Andric return false; 8810b57cec5SDimitry Andric } 8820b57cec5SDimitry Andric llvm_unreachable("Invalid TargetStackID::Value"); 8830b57cec5SDimitry Andric } 8840b57cec5SDimitry Andric 885bdd1243dSDimitry Andric // Activate only the inactive lanes when \p EnableInactiveLanes is true. 886bdd1243dSDimitry Andric // Otherwise, activate all lanes. It returns the saved exec. 8875f757f3fSDimitry Andric static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, 8885ffd83dbSDimitry Andric MachineFunction &MF, 8895ffd83dbSDimitry Andric MachineBasicBlock &MBB, 8905ffd83dbSDimitry Andric MachineBasicBlock::iterator MBBI, 891bdd1243dSDimitry Andric const DebugLoc &DL, bool IsProlog, 892bdd1243dSDimitry Andric bool EnableInactiveLanes) { 8935ffd83dbSDimitry Andric Register ScratchExecCopy; 8945ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 8955ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 8965ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8975ffd83dbSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo(); 8985ffd83dbSDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 8995ffd83dbSDimitry Andric 9005f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog); 9015ffd83dbSDimitry Andric 9025ffd83dbSDimitry Andric ScratchExecCopy = findScratchNonCalleeSaveRegister( 9035f757f3fSDimitry Andric MRI, LiveUnits, *TRI.getWaveMaskRegClass()); 904fe6060f1SDimitry Andric if (!ScratchExecCopy) 905fe6060f1SDimitry Andric report_fatal_error("failed to find free scratch register"); 9065ffd83dbSDimitry Andric 9075f757f3fSDimitry Andric LiveUnits.addReg(ScratchExecCopy); 9085ffd83dbSDimitry Andric 909bdd1243dSDimitry Andric const unsigned SaveExecOpc = 910bdd1243dSDimitry Andric ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32 911bdd1243dSDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B32) 912bdd1243dSDimitry Andric : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64 913bdd1243dSDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64); 914bdd1243dSDimitry Andric auto SaveExec = 915bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1); 916349cc55cSDimitry Andric SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. 9175ffd83dbSDimitry Andric 9185ffd83dbSDimitry Andric return ScratchExecCopy; 9195ffd83dbSDimitry Andric } 9205ffd83dbSDimitry Andric 921bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillStores( 922bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, 9235f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, 924bdd1243dSDimitry Andric Register FrameReg, Register FramePtrRegScratchCopy) const { 925bdd1243dSDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 926bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 927bdd1243dSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 928bdd1243dSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo(); 929bdd1243dSDimitry Andric 930bdd1243dSDimitry Andric // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch 931bdd1243dSDimitry Andric // registers. However, save all lanes of callee-saved VGPRs. Due to this, we 932bdd1243dSDimitry Andric // might end up flipping the EXEC bits twice. 933bdd1243dSDimitry Andric Register ScratchExecCopy; 934bdd1243dSDimitry Andric SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs; 935bdd1243dSDimitry Andric FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs); 936bdd1243dSDimitry Andric if (!WWMScratchRegs.empty()) 937bdd1243dSDimitry Andric ScratchExecCopy = 9385f757f3fSDimitry Andric buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, 939bdd1243dSDimitry Andric /*IsProlog*/ true, /*EnableInactiveLanes*/ true); 940bdd1243dSDimitry Andric 941bdd1243dSDimitry Andric auto StoreWWMRegisters = 942bdd1243dSDimitry Andric [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) { 943bdd1243dSDimitry Andric for (const auto &Reg : WWMRegs) { 944bdd1243dSDimitry Andric Register VGPR = Reg.first; 945bdd1243dSDimitry Andric int FI = Reg.second; 9465f757f3fSDimitry Andric buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL, 947bdd1243dSDimitry Andric VGPR, FI, FrameReg); 948bdd1243dSDimitry Andric } 949bdd1243dSDimitry Andric }; 950bdd1243dSDimitry Andric 951bdd1243dSDimitry Andric StoreWWMRegisters(WWMScratchRegs); 952bdd1243dSDimitry Andric if (!WWMCalleeSavedRegs.empty()) { 953bdd1243dSDimitry Andric if (ScratchExecCopy) { 954bdd1243dSDimitry Andric unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 95506c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1); 956bdd1243dSDimitry Andric } else { 9575f757f3fSDimitry Andric ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, 958bdd1243dSDimitry Andric /*IsProlog*/ true, 959bdd1243dSDimitry Andric /*EnableInactiveLanes*/ false); 960bdd1243dSDimitry Andric } 961bdd1243dSDimitry Andric } 962bdd1243dSDimitry Andric 963bdd1243dSDimitry Andric StoreWWMRegisters(WWMCalleeSavedRegs); 964bdd1243dSDimitry Andric if (ScratchExecCopy) { 965bdd1243dSDimitry Andric // FIXME: Split block and make terminator. 966bdd1243dSDimitry Andric unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 96706c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec()) 968bdd1243dSDimitry Andric .addReg(ScratchExecCopy, RegState::Kill); 9695f757f3fSDimitry Andric LiveUnits.addReg(ScratchExecCopy); 970bdd1243dSDimitry Andric } 971bdd1243dSDimitry Andric 972bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 973bdd1243dSDimitry Andric 974bdd1243dSDimitry Andric for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) { 975bdd1243dSDimitry Andric // Special handle FP spill: 976bdd1243dSDimitry Andric // Skip if FP is saved to a scratch SGPR, the save has already been emitted. 977bdd1243dSDimitry Andric // Otherwise, FP has been moved to a temporary register and spill it 978bdd1243dSDimitry Andric // instead. 979bdd1243dSDimitry Andric Register Reg = 980bdd1243dSDimitry Andric Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first; 981bdd1243dSDimitry Andric if (!Reg) 982bdd1243dSDimitry Andric continue; 983bdd1243dSDimitry Andric 984bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI, 9855f757f3fSDimitry Andric LiveUnits, FrameReg); 986bdd1243dSDimitry Andric SB.save(); 987bdd1243dSDimitry Andric } 988bdd1243dSDimitry Andric 989bdd1243dSDimitry Andric // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make 990bdd1243dSDimitry Andric // such scratch registers live throughout the function. 991bdd1243dSDimitry Andric SmallVector<Register, 1> ScratchSGPRs; 992bdd1243dSDimitry Andric FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs); 993bdd1243dSDimitry Andric if (!ScratchSGPRs.empty()) { 994bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) { 995bdd1243dSDimitry Andric for (MCPhysReg Reg : ScratchSGPRs) 996bdd1243dSDimitry Andric MBB.addLiveIn(Reg); 997bdd1243dSDimitry Andric 998bdd1243dSDimitry Andric MBB.sortUniqueLiveIns(); 999bdd1243dSDimitry Andric } 10005f757f3fSDimitry Andric if (!LiveUnits.empty()) { 1001bdd1243dSDimitry Andric for (MCPhysReg Reg : ScratchSGPRs) 10025f757f3fSDimitry Andric LiveUnits.addReg(Reg); 1003bdd1243dSDimitry Andric } 1004bdd1243dSDimitry Andric } 1005bdd1243dSDimitry Andric } 1006bdd1243dSDimitry Andric 1007bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillRestores( 1008bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, 10095f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, 1010bdd1243dSDimitry Andric Register FrameReg, Register FramePtrRegScratchCopy) const { 1011bdd1243dSDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1012bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1013bdd1243dSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 1014bdd1243dSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo(); 1015bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1016bdd1243dSDimitry Andric 1017bdd1243dSDimitry Andric for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) { 1018bdd1243dSDimitry Andric // Special handle FP restore: 1019bdd1243dSDimitry Andric // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore 1020bdd1243dSDimitry Andric // the FP value to a temporary register. The frame pointer should be 1021bdd1243dSDimitry Andric // overwritten only at the end when all other spills are restored from 1022bdd1243dSDimitry Andric // current frame. 1023bdd1243dSDimitry Andric Register Reg = 1024bdd1243dSDimitry Andric Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first; 1025bdd1243dSDimitry Andric if (!Reg) 1026bdd1243dSDimitry Andric continue; 1027bdd1243dSDimitry Andric 1028bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI, 10295f757f3fSDimitry Andric LiveUnits, FrameReg); 1030bdd1243dSDimitry Andric SB.restore(); 1031bdd1243dSDimitry Andric } 1032bdd1243dSDimitry Andric 1033bdd1243dSDimitry Andric // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the 1034bdd1243dSDimitry Andric // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to 1035bdd1243dSDimitry Andric // this, we might end up flipping the EXEC bits twice. 1036bdd1243dSDimitry Andric Register ScratchExecCopy; 1037bdd1243dSDimitry Andric SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs; 1038bdd1243dSDimitry Andric FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs); 1039bdd1243dSDimitry Andric if (!WWMScratchRegs.empty()) 1040bdd1243dSDimitry Andric ScratchExecCopy = 10415f757f3fSDimitry Andric buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, 1042bdd1243dSDimitry Andric /*IsProlog*/ false, /*EnableInactiveLanes*/ true); 1043bdd1243dSDimitry Andric 1044bdd1243dSDimitry Andric auto RestoreWWMRegisters = 1045bdd1243dSDimitry Andric [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) { 1046bdd1243dSDimitry Andric for (const auto &Reg : WWMRegs) { 1047bdd1243dSDimitry Andric Register VGPR = Reg.first; 1048bdd1243dSDimitry Andric int FI = Reg.second; 10495f757f3fSDimitry Andric buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL, 1050bdd1243dSDimitry Andric VGPR, FI, FrameReg); 1051bdd1243dSDimitry Andric } 1052bdd1243dSDimitry Andric }; 1053bdd1243dSDimitry Andric 1054bdd1243dSDimitry Andric RestoreWWMRegisters(WWMScratchRegs); 1055bdd1243dSDimitry Andric if (!WWMCalleeSavedRegs.empty()) { 1056bdd1243dSDimitry Andric if (ScratchExecCopy) { 1057bdd1243dSDimitry Andric unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 105806c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1); 1059bdd1243dSDimitry Andric } else { 10605f757f3fSDimitry Andric ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, 1061bdd1243dSDimitry Andric /*IsProlog*/ false, 1062bdd1243dSDimitry Andric /*EnableInactiveLanes*/ false); 1063bdd1243dSDimitry Andric } 1064bdd1243dSDimitry Andric } 1065bdd1243dSDimitry Andric 1066bdd1243dSDimitry Andric RestoreWWMRegisters(WWMCalleeSavedRegs); 1067bdd1243dSDimitry Andric if (ScratchExecCopy) { 1068bdd1243dSDimitry Andric // FIXME: Split block and make terminator. 1069bdd1243dSDimitry Andric unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 107006c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec()) 1071bdd1243dSDimitry Andric .addReg(ScratchExecCopy, RegState::Kill); 1072bdd1243dSDimitry Andric } 1073fe6060f1SDimitry Andric } 1074fe6060f1SDimitry Andric 10750b57cec5SDimitry Andric void SIFrameLowering::emitPrologue(MachineFunction &MF, 10760b57cec5SDimitry Andric MachineBasicBlock &MBB) const { 10770b57cec5SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 10780b57cec5SDimitry Andric if (FuncInfo->isEntryFunction()) { 10790b57cec5SDimitry Andric emitEntryFunctionPrologue(MF, MBB); 10800b57cec5SDimitry Andric return; 10810b57cec5SDimitry Andric } 10820b57cec5SDimitry Andric 108381ad6265SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 10840b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 10850b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 10860b57cec5SDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo(); 1087bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 10880b57cec5SDimitry Andric 10895ffd83dbSDimitry Andric Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 10905ffd83dbSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 10915ffd83dbSDimitry Andric Register BasePtrReg = 10925ffd83dbSDimitry Andric TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 10935f757f3fSDimitry Andric LiveRegUnits LiveUnits; 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.begin(); 1096bdd1243dSDimitry Andric // DebugLoc must be unknown since the first instruction with DebugLoc is used 1097bdd1243dSDimitry Andric // to determine the end of the prologue. 10980b57cec5SDimitry Andric DebugLoc DL; 10990b57cec5SDimitry Andric 11005f757f3fSDimitry Andric if (FuncInfo->isChainFunction()) { 11015f757f3fSDimitry Andric // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but 11025f757f3fSDimitry Andric // are free to set one up if they need it. 11035f757f3fSDimitry Andric bool UseSP = requiresStackPointerReference(MF); 11045f757f3fSDimitry Andric if (UseSP) { 11055f757f3fSDimitry Andric assert(StackPtrReg != AMDGPU::SP_REG); 11065f757f3fSDimitry Andric 11075f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg) 11085f757f3fSDimitry Andric .addImm(MFI.getStackSize() * getScratchScaleFactor(ST)); 11095f757f3fSDimitry Andric } 11105f757f3fSDimitry Andric } 11115f757f3fSDimitry Andric 11120b57cec5SDimitry Andric bool HasFP = false; 11135ffd83dbSDimitry Andric bool HasBP = false; 11140b57cec5SDimitry Andric uint32_t NumBytes = MFI.getStackSize(); 11150b57cec5SDimitry Andric uint32_t RoundedSize = NumBytes; 11165ffd83dbSDimitry Andric 1117bdd1243dSDimitry Andric if (TRI.hasStackRealignment(MF)) 1118bdd1243dSDimitry Andric HasFP = true; 1119fe6060f1SDimitry Andric 1120bdd1243dSDimitry Andric Register FramePtrRegScratchCopy; 1121bdd1243dSDimitry Andric if (!HasFP && !hasFP(MF)) { 1122bdd1243dSDimitry Andric // Emit the CSR spill stores with SP base register. 11235f757f3fSDimitry Andric emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, 11245f757f3fSDimitry Andric FuncInfo->isChainFunction() ? Register() : StackPtrReg, 1125bdd1243dSDimitry Andric FramePtrRegScratchCopy); 1126bdd1243dSDimitry Andric } else { 1127bdd1243dSDimitry Andric // CSR spill stores will use FP as base register. 1128bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy = 1129bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); 1130fe6060f1SDimitry Andric 11315f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); 1132bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy) { 1133bdd1243dSDimitry Andric // Copy FP to the scratch register now and emit the CFI entry. It avoids 1134bdd1243dSDimitry Andric // the extra FP copy needed in the other two cases when FP is spilled to 1135bdd1243dSDimitry Andric // memory or to a VGPR lane. 1136bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB( 1137bdd1243dSDimitry Andric FramePtrReg, 1138bdd1243dSDimitry Andric FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI, 11395f757f3fSDimitry Andric DL, TII, TRI, LiveUnits, FramePtrReg); 1140bdd1243dSDimitry Andric SB.save(); 11415f757f3fSDimitry Andric LiveUnits.addReg(SGPRForFPSaveRestoreCopy); 1142bdd1243dSDimitry Andric } else { 1143bdd1243dSDimitry Andric // Copy FP into a new scratch register so that its previous value can be 1144bdd1243dSDimitry Andric // spilled after setting up the new frame. 1145bdd1243dSDimitry Andric FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister( 11465f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass); 1147bdd1243dSDimitry Andric if (!FramePtrRegScratchCopy) 1148fe6060f1SDimitry Andric report_fatal_error("failed to find free scratch register"); 1149fe6060f1SDimitry Andric 11505f757f3fSDimitry Andric LiveUnits.addReg(FramePtrRegScratchCopy); 1151bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy) 1152bdd1243dSDimitry Andric .addReg(FramePtrReg); 1153fe6060f1SDimitry Andric } 11545ffd83dbSDimitry Andric } 11555ffd83dbSDimitry Andric 1156bdd1243dSDimitry Andric if (HasFP) { 11575ffd83dbSDimitry Andric const unsigned Alignment = MFI.getMaxAlign().value(); 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric RoundedSize += Alignment; 11605f757f3fSDimitry Andric if (LiveUnits.empty()) { 11615f757f3fSDimitry Andric LiveUnits.init(TRI); 11625f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB); 11630b57cec5SDimitry Andric } 11640b57cec5SDimitry Andric 1165fe6060f1SDimitry Andric // s_add_i32 s33, s32, NumBytes 1166fe6060f1SDimitry Andric // s_and_b32 s33, s33, 0b111...0000 1167fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg) 11680b57cec5SDimitry Andric .addReg(StackPtrReg) 1169e8d8bef9SDimitry Andric .addImm((Alignment - 1) * getScratchScaleFactor(ST)) 11700b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 1171349cc55cSDimitry Andric auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) 1172fe6060f1SDimitry Andric .addReg(FramePtrReg, RegState::Kill) 1173e8d8bef9SDimitry Andric .addImm(-Alignment * getScratchScaleFactor(ST)) 11740b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 1175349cc55cSDimitry Andric And->getOperand(3).setIsDead(); // Mark SCC as dead. 11760b57cec5SDimitry Andric FuncInfo->setIsStackRealigned(true); 11770b57cec5SDimitry Andric } else if ((HasFP = hasFP(MF))) { 11785ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 11795ffd83dbSDimitry Andric .addReg(StackPtrReg) 11805ffd83dbSDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 11815ffd83dbSDimitry Andric } 11825ffd83dbSDimitry Andric 1183bdd1243dSDimitry Andric // If FP is used, emit the CSR spills with FP base register. 1184bdd1243dSDimitry Andric if (HasFP) { 11855f757f3fSDimitry Andric emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg, 1186bdd1243dSDimitry Andric FramePtrRegScratchCopy); 1187bdd1243dSDimitry Andric if (FramePtrRegScratchCopy) 11885f757f3fSDimitry Andric LiveUnits.removeReg(FramePtrRegScratchCopy); 1189bdd1243dSDimitry Andric } 1190bdd1243dSDimitry Andric 11910b57cec5SDimitry Andric // If we need a base pointer, set it up here. It's whatever the value of 11920b57cec5SDimitry Andric // the stack pointer is at this point. Any variable size objects will be 11930b57cec5SDimitry Andric // allocated after this, so we can still use the base pointer to reference 11945ffd83dbSDimitry Andric // the incoming arguments. 11955ffd83dbSDimitry Andric if ((HasBP = TRI.hasBasePointer(MF))) { 11965ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 11970b57cec5SDimitry Andric .addReg(StackPtrReg) 11980b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 11990b57cec5SDimitry Andric } 12000b57cec5SDimitry Andric 12010b57cec5SDimitry Andric if (HasFP && RoundedSize != 0) { 1202349cc55cSDimitry Andric auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) 12030b57cec5SDimitry Andric .addReg(StackPtrReg) 1204e8d8bef9SDimitry Andric .addImm(RoundedSize * getScratchScaleFactor(ST)) 12050b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 1206349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead. 12070b57cec5SDimitry Andric } 12080b57cec5SDimitry Andric 1209bdd1243dSDimitry Andric bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg); 1210bdd1243dSDimitry Andric (void)FPSaved; 1211bdd1243dSDimitry Andric assert((!HasFP || FPSaved) && 12120b57cec5SDimitry Andric "Needed to save FP but didn't save it anywhere"); 12130b57cec5SDimitry Andric 1214349cc55cSDimitry Andric // If we allow spilling to AGPRs we may have saved FP but then spill 1215349cc55cSDimitry Andric // everything into AGPRs instead of the stack. 1216bdd1243dSDimitry Andric assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) && 12170b57cec5SDimitry Andric "Saved FP but didn't need it"); 12185ffd83dbSDimitry Andric 1219bdd1243dSDimitry Andric bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg); 1220bdd1243dSDimitry Andric (void)BPSaved; 1221bdd1243dSDimitry Andric assert((!HasBP || BPSaved) && 12225ffd83dbSDimitry Andric "Needed to save BP but didn't save it anywhere"); 12235ffd83dbSDimitry Andric 1224bdd1243dSDimitry Andric assert((HasBP || !BPSaved) && "Saved BP but didn't need it"); 12250b57cec5SDimitry Andric } 12260b57cec5SDimitry Andric 12270b57cec5SDimitry Andric void SIFrameLowering::emitEpilogue(MachineFunction &MF, 12280b57cec5SDimitry Andric MachineBasicBlock &MBB) const { 12290b57cec5SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 12300b57cec5SDimitry Andric if (FuncInfo->isEntryFunction()) 12310b57cec5SDimitry Andric return; 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 12340b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 12355ffd83dbSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo(); 1236bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 12375f757f3fSDimitry Andric LiveRegUnits LiveUnits; 1238bdd1243dSDimitry Andric // Get the insert location for the epilogue. If there were no terminators in 1239bdd1243dSDimitry Andric // the block, get the last instruction. 1240bdd1243dSDimitry Andric MachineBasicBlock::iterator MBBI = MBB.end(); 12410b57cec5SDimitry Andric DebugLoc DL; 1242bdd1243dSDimitry Andric if (!MBB.empty()) { 1243bdd1243dSDimitry Andric MBBI = MBB.getLastNonDebugInstr(); 1244bdd1243dSDimitry Andric if (MBBI != MBB.end()) 1245bdd1243dSDimitry Andric DL = MBBI->getDebugLoc(); 1246bdd1243dSDimitry Andric 1247bdd1243dSDimitry Andric MBBI = MBB.getFirstTerminator(); 1248bdd1243dSDimitry Andric } 12490b57cec5SDimitry Andric 12500b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 12510b57cec5SDimitry Andric uint32_t NumBytes = MFI.getStackSize(); 12525ffd83dbSDimitry Andric uint32_t RoundedSize = FuncInfo->isStackRealigned() 12535ffd83dbSDimitry Andric ? NumBytes + MFI.getMaxAlign().value() 12545ffd83dbSDimitry Andric : NumBytes; 12555ffd83dbSDimitry Andric const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 1256bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1257bdd1243dSDimitry Andric bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg); 12585ffd83dbSDimitry Andric 1259bdd1243dSDimitry Andric Register FramePtrRegScratchCopy; 1260bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy = 1261bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); 1262bdd1243dSDimitry Andric if (FPSaved) { 1263bdd1243dSDimitry Andric // CSR spill restores should use FP as base register. If 1264bdd1243dSDimitry Andric // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP 1265bdd1243dSDimitry Andric // into a new scratch register and copy to FP later when other registers are 1266bdd1243dSDimitry Andric // restored from the current stack frame. 12675f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); 1268bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy) { 12695f757f3fSDimitry Andric LiveUnits.addReg(SGPRForFPSaveRestoreCopy); 1270bdd1243dSDimitry Andric } else { 1271bdd1243dSDimitry Andric FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister( 12725f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass); 1273bdd1243dSDimitry Andric if (!FramePtrRegScratchCopy) 1274bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register"); 1275bdd1243dSDimitry Andric 12765f757f3fSDimitry Andric LiveUnits.addReg(FramePtrRegScratchCopy); 1277bdd1243dSDimitry Andric } 1278bdd1243dSDimitry Andric 12795f757f3fSDimitry Andric emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg, 1280bdd1243dSDimitry Andric FramePtrRegScratchCopy); 1281bdd1243dSDimitry Andric } 12820b57cec5SDimitry Andric 12830b57cec5SDimitry Andric if (RoundedSize != 0 && hasFP(MF)) { 1284349cc55cSDimitry Andric auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) 12850b57cec5SDimitry Andric .addReg(StackPtrReg) 1286fe6060f1SDimitry Andric .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST))) 12870b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 1288349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead. 12890b57cec5SDimitry Andric } 12900b57cec5SDimitry Andric 1291bdd1243dSDimitry Andric if (FPSaved) { 1292bdd1243dSDimitry Andric // Insert the copy to restore FP. 1293bdd1243dSDimitry Andric Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy 1294bdd1243dSDimitry Andric : FramePtrRegScratchCopy; 1295bdd1243dSDimitry Andric MachineInstrBuilder MIB = 12965ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1297bdd1243dSDimitry Andric .addReg(SrcReg); 1298bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy) 1299bdd1243dSDimitry Andric MIB.setMIFlag(MachineInstr::FrameDestroy); 1300bdd1243dSDimitry Andric } else { 1301bdd1243dSDimitry Andric // Insert the CSR spill restores with SP as the base register. 13025f757f3fSDimitry Andric emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg, 1303bdd1243dSDimitry Andric FramePtrRegScratchCopy); 13040b57cec5SDimitry Andric } 13050b57cec5SDimitry Andric } 13060b57cec5SDimitry Andric 13070b57cec5SDimitry Andric #ifndef NDEBUG 1308e8d8bef9SDimitry Andric static bool allSGPRSpillsAreDead(const MachineFunction &MF) { 1309e8d8bef9SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 1310e8d8bef9SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 13110b57cec5SDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 13120b57cec5SDimitry Andric I != E; ++I) { 13130b57cec5SDimitry Andric if (!MFI.isDeadObjectIndex(I) && 13140b57cec5SDimitry Andric MFI.getStackID(I) == TargetStackID::SGPRSpill && 1315bdd1243dSDimitry Andric !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) { 13160b57cec5SDimitry Andric return false; 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric } 13190b57cec5SDimitry Andric 13200b57cec5SDimitry Andric return true; 13210b57cec5SDimitry Andric } 13220b57cec5SDimitry Andric #endif 13230b57cec5SDimitry Andric 1324e8d8bef9SDimitry Andric StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, 1325e8d8bef9SDimitry Andric int FI, 13265ffd83dbSDimitry Andric Register &FrameReg) const { 13270b57cec5SDimitry Andric const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andric FrameReg = RI->getFrameRegister(MF); 1330e8d8bef9SDimitry Andric return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI)); 13310b57cec5SDimitry Andric } 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameFinalized( 13340b57cec5SDimitry Andric MachineFunction &MF, 13350b57cec5SDimitry Andric RegScavenger *RS) const { 13360b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 13370b57cec5SDimitry Andric 13380b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1339fe6060f1SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 13400b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1341fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 13420b57cec5SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 13430b57cec5SDimitry Andric 1344bdd1243dSDimitry Andric // Allocate spill slots for WWM reserved VGPRs. 13455f757f3fSDimitry Andric // For chain functions, we only need to do this if we have calls to 13465f757f3fSDimitry Andric // llvm.amdgcn.cs.chain. 13475f757f3fSDimitry Andric bool IsChainWithoutCalls = 13485f757f3fSDimitry Andric FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall(); 13495f757f3fSDimitry Andric if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) { 1350bdd1243dSDimitry Andric for (Register Reg : FuncInfo->getWWMReservedRegs()) { 1351bdd1243dSDimitry Andric const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); 1352bdd1243dSDimitry Andric FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), 1353bdd1243dSDimitry Andric TRI->getSpillAlign(*RC)); 1354bdd1243dSDimitry Andric } 135581ad6265SDimitry Andric } 135681ad6265SDimitry Andric 1357fe6060f1SDimitry Andric const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() 1358fe6060f1SDimitry Andric && EnableSpillVGPRToAGPR; 1359fe6060f1SDimitry Andric 1360fe6060f1SDimitry Andric if (SpillVGPRToAGPR) { 1361fe6060f1SDimitry Andric // To track the spill frame indices handled in this pass. 1362fe6060f1SDimitry Andric BitVector SpillFIs(MFI.getObjectIndexEnd(), false); 13630eae32dcSDimitry Andric BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false); 1364fe6060f1SDimitry Andric 1365fe6060f1SDimitry Andric bool SeenDbgInstr = false; 1366fe6060f1SDimitry Andric 1367fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 1368349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 13690eae32dcSDimitry Andric int FrameIndex; 1370fe6060f1SDimitry Andric if (MI.isDebugInstr()) 1371fe6060f1SDimitry Andric SeenDbgInstr = true; 1372fe6060f1SDimitry Andric 1373fe6060f1SDimitry Andric if (TII->isVGPRSpill(MI)) { 1374fe6060f1SDimitry Andric // Try to eliminate stack used by VGPR spills before frame 1375fe6060f1SDimitry Andric // finalization. 1376fe6060f1SDimitry Andric unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1377fe6060f1SDimitry Andric AMDGPU::OpName::vaddr); 1378fe6060f1SDimitry Andric int FI = MI.getOperand(FIOp).getIndex(); 1379fe6060f1SDimitry Andric Register VReg = 1380fe6060f1SDimitry Andric TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); 1381fe6060f1SDimitry Andric if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, 1382fe6060f1SDimitry Andric TRI->isAGPR(MRI, VReg))) { 138306c3fb27SDimitry Andric assert(RS != nullptr); 13845f757f3fSDimitry Andric RS->enterBasicBlockEnd(MBB); 13855f757f3fSDimitry Andric RS->backward(std::next(MI.getIterator())); 1386fe6060f1SDimitry Andric TRI->eliminateFrameIndex(MI, 0, FIOp, RS); 1387fe6060f1SDimitry Andric SpillFIs.set(FI); 1388fe6060f1SDimitry Andric continue; 1389fe6060f1SDimitry Andric } 13900eae32dcSDimitry Andric } else if (TII->isStoreToStackSlot(MI, FrameIndex) || 13910eae32dcSDimitry Andric TII->isLoadFromStackSlot(MI, FrameIndex)) 139204eeddc0SDimitry Andric if (!MFI.isFixedObjectIndex(FrameIndex)) 13930eae32dcSDimitry Andric NonVGPRSpillFIs.set(FrameIndex); 1394fe6060f1SDimitry Andric } 1395fe6060f1SDimitry Andric } 13960eae32dcSDimitry Andric 139781ad6265SDimitry Andric // Stack slot coloring may assign different objects to the same stack slot. 13980eae32dcSDimitry Andric // If not, then the VGPR to AGPR spill slot is dead. 13990eae32dcSDimitry Andric for (unsigned FI : SpillFIs.set_bits()) 14000eae32dcSDimitry Andric if (!NonVGPRSpillFIs.test(FI)) 14010eae32dcSDimitry Andric FuncInfo->setVGPRToAGPRSpillDead(FI); 1402fe6060f1SDimitry Andric 1403fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 1404fe6060f1SDimitry Andric for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) 1405fe6060f1SDimitry Andric MBB.addLiveIn(Reg); 1406fe6060f1SDimitry Andric 1407fe6060f1SDimitry Andric for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) 1408fe6060f1SDimitry Andric MBB.addLiveIn(Reg); 1409fe6060f1SDimitry Andric 1410fe6060f1SDimitry Andric MBB.sortUniqueLiveIns(); 1411fe6060f1SDimitry Andric 1412fe6060f1SDimitry Andric if (!SpillFIs.empty() && SeenDbgInstr) { 1413fe6060f1SDimitry Andric // FIXME: The dead frame indices are replaced with a null register from 1414fe6060f1SDimitry Andric // the debug value instructions. We should instead, update it with the 1415fe6060f1SDimitry Andric // correct register value. But not sure the register value alone is 1416fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) { 1417fe6060f1SDimitry Andric if (MI.isDebugValue() && MI.getOperand(0).isFI() && 1418bdd1243dSDimitry Andric !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) && 1419fe6060f1SDimitry Andric SpillFIs[MI.getOperand(0).getIndex()]) { 1420fe6060f1SDimitry Andric MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); 1421fe6060f1SDimitry Andric } 1422fe6060f1SDimitry Andric } 1423fe6060f1SDimitry Andric } 1424fe6060f1SDimitry Andric } 1425fe6060f1SDimitry Andric } 1426fe6060f1SDimitry Andric 142781ad6265SDimitry Andric // At this point we've already allocated all spilled SGPRs to VGPRs if we 142881ad6265SDimitry Andric // can. Any remaining SGPR spills will go to memory, so move them back to the 142981ad6265SDimitry Andric // default stack. 143081ad6265SDimitry Andric bool HaveSGPRToVMemSpill = 143181ad6265SDimitry Andric FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true); 1432e8d8bef9SDimitry Andric assert(allSGPRSpillsAreDead(MF) && 14330b57cec5SDimitry Andric "SGPR spill should have been removed in SILowerSGPRSpills"); 14340b57cec5SDimitry Andric 14350b57cec5SDimitry Andric // FIXME: The other checks should be redundant with allStackObjectsAreDead, 14360b57cec5SDimitry Andric // but currently hasNonSpillStackObjects is set only from source 14370b57cec5SDimitry Andric // allocas. Stack temps produced from legalization are not counted currently. 14380b57cec5SDimitry Andric if (!allStackObjectsAreDead(MFI)) { 14390b57cec5SDimitry Andric assert(RS && "RegScavenger required if spilling"); 14400b57cec5SDimitry Andric 1441fe6060f1SDimitry Andric // Add an emergency spill slot 1442fe6060f1SDimitry Andric RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI)); 144381ad6265SDimitry Andric 144481ad6265SDimitry Andric // If we are spilling SGPRs to memory with a large frame, we may need a 144581ad6265SDimitry Andric // second VGPR emergency frame index. 144681ad6265SDimitry Andric if (HaveSGPRToVMemSpill && 144781ad6265SDimitry Andric allocateScavengingFrameIndexesNearIncomingSP(MF)) { 144881ad6265SDimitry Andric RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false)); 144981ad6265SDimitry Andric } 145081ad6265SDimitry Andric } 145181ad6265SDimitry Andric } 145281ad6265SDimitry Andric 145381ad6265SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced( 145481ad6265SDimitry Andric MachineFunction &MF, RegScavenger *RS) const { 145581ad6265SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 145681ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 145781ad6265SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 145881ad6265SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 145981ad6265SDimitry Andric 146081ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { 146181ad6265SDimitry Andric // On gfx908, we had initially reserved highest available VGPR for AGPR 146281ad6265SDimitry Andric // copy. Now since we are done with RA, check if there exist an unused VGPR 146381ad6265SDimitry Andric // which is lower than the eariler reserved VGPR before RA. If one exist, 146481ad6265SDimitry Andric // use it for AGPR copy instead of one reserved before RA. 146581ad6265SDimitry Andric Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy(); 146681ad6265SDimitry Andric Register UnusedLowVGPR = 146781ad6265SDimitry Andric TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 146881ad6265SDimitry Andric if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) < 146981ad6265SDimitry Andric TRI->getHWRegIndex(VGPRForAGPRCopy))) { 147006c3fb27SDimitry Andric // Reserve this newly identified VGPR (for AGPR copy) 147106c3fb27SDimitry Andric // reserved registers should already be frozen at this point 147206c3fb27SDimitry Andric // so we can avoid calling MRI.freezeReservedRegs and just use 147306c3fb27SDimitry Andric // MRI.reserveReg 147481ad6265SDimitry Andric FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR); 147506c3fb27SDimitry Andric MRI.reserveReg(UnusedLowVGPR, TRI); 147681ad6265SDimitry Andric } 14770b57cec5SDimitry Andric } 147806c3fb27SDimitry Andric // We initally reserved the highest available SGPR pair for long branches 147906c3fb27SDimitry Andric // now, after RA, we shift down to a lower unused one if one exists 148006c3fb27SDimitry Andric Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg(); 148106c3fb27SDimitry Andric Register UnusedLowSGPR = 148206c3fb27SDimitry Andric TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF); 148306c3fb27SDimitry Andric // If LongBranchReservedReg is null then we didn't find a long branch 148406c3fb27SDimitry Andric // and never reserved a register to begin with so there is nothing to 148506c3fb27SDimitry Andric // shift down. Then if UnusedLowSGPR is null, there isn't available lower 148606c3fb27SDimitry Andric // register to use so just keep the original one we set. 148706c3fb27SDimitry Andric if (LongBranchReservedReg && UnusedLowSGPR) { 148806c3fb27SDimitry Andric FuncInfo->setLongBranchReservedReg(UnusedLowSGPR); 148906c3fb27SDimitry Andric MRI.reserveReg(UnusedLowSGPR, TRI); 149006c3fb27SDimitry Andric } 14910b57cec5SDimitry Andric } 14920b57cec5SDimitry Andric 1493bdd1243dSDimitry Andric // The special SGPR spills like the one needed for FP, BP or any reserved 1494bdd1243dSDimitry Andric // registers delayed until frame lowering. 1495bdd1243dSDimitry Andric void SIFrameLowering::determinePrologEpilogSGPRSaves( 149606c3fb27SDimitry Andric MachineFunction &MF, BitVector &SavedVGPRs, 149706c3fb27SDimitry Andric bool NeedExecCopyReservedReg) const { 14985ffd83dbSDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 149906c3fb27SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 1500bdd1243dSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 15010b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 15020b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 15035f757f3fSDimitry Andric LiveRegUnits LiveUnits; 15045f757f3fSDimitry Andric LiveUnits.init(*TRI); 1505bdd1243dSDimitry Andric // Initially mark callee saved registers as used so we will not choose them 1506bdd1243dSDimitry Andric // while looking for scratch SGPRs. 1507bdd1243dSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 1508bdd1243dSDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) 15095f757f3fSDimitry Andric LiveUnits.addReg(CSRegs[I]); 15100b57cec5SDimitry Andric 151106c3fb27SDimitry Andric const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass(); 151206c3fb27SDimitry Andric 1513*0fca6ea1SDimitry Andric Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy(); 1514*0fca6ea1SDimitry Andric if (NeedExecCopyReservedReg || 1515*0fca6ea1SDimitry Andric (ReservedRegForExecCopy && 1516*0fca6ea1SDimitry Andric MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) { 1517*0fca6ea1SDimitry Andric MRI.reserveReg(ReservedRegForExecCopy, TRI); 15185f757f3fSDimitry Andric Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC); 151906c3fb27SDimitry Andric if (UnusedScratchReg) { 152006c3fb27SDimitry Andric // If found any unused scratch SGPR, reserve the register itself for Exec 152106c3fb27SDimitry Andric // copy and there is no need for any spills in that case. 152206c3fb27SDimitry Andric MFI->setSGPRForEXECCopy(UnusedScratchReg); 1523*0fca6ea1SDimitry Andric MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg); 15245f757f3fSDimitry Andric LiveUnits.addReg(UnusedScratchReg); 152506c3fb27SDimitry Andric } else { 152606c3fb27SDimitry Andric // Needs spill. 1527*0fca6ea1SDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) && 152806c3fb27SDimitry Andric "Re-reserving spill slot for EXEC copy register"); 1529*0fca6ea1SDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC, 153006c3fb27SDimitry Andric /*IncludeScratchCopy=*/false); 153106c3fb27SDimitry Andric } 1532*0fca6ea1SDimitry Andric } else if (ReservedRegForExecCopy) { 1533*0fca6ea1SDimitry Andric // Reset it at this point. There are no whole-wave copies and spills 1534*0fca6ea1SDimitry Andric // encountered. 1535*0fca6ea1SDimitry Andric MFI->setSGPRForEXECCopy(AMDGPU::NoRegister); 153606c3fb27SDimitry Andric } 153706c3fb27SDimitry Andric 15380b57cec5SDimitry Andric // hasFP only knows about stack objects that already exist. We're now 15390b57cec5SDimitry Andric // determining the stack slots that will be created, so we have to predict 15400b57cec5SDimitry Andric // them. Stack objects force FP usage with calls. 15410b57cec5SDimitry Andric // 15420b57cec5SDimitry Andric // Note a new VGPR CSR may be introduced if one is used for the spill, but we 15430b57cec5SDimitry Andric // don't want to report it here. 15440b57cec5SDimitry Andric // 15450b57cec5SDimitry Andric // FIXME: Is this really hasReservedCallFrame? 15460b57cec5SDimitry Andric const bool WillHaveFP = 15470b57cec5SDimitry Andric FrameInfo.hasCalls() && 15480b57cec5SDimitry Andric (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); 15490b57cec5SDimitry Andric 15505ffd83dbSDimitry Andric if (WillHaveFP || hasFP(MF)) { 1551bdd1243dSDimitry Andric Register FramePtrReg = MFI->getFrameOffsetReg(); 1552bdd1243dSDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) && 1553e8d8bef9SDimitry Andric "Re-reserving spill slot for FP"); 15545f757f3fSDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg); 15550b57cec5SDimitry Andric } 15560b57cec5SDimitry Andric 15575ffd83dbSDimitry Andric if (TRI->hasBasePointer(MF)) { 1558bdd1243dSDimitry Andric Register BasePtrReg = TRI->getBaseRegister(); 1559bdd1243dSDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) && 1560bdd1243dSDimitry Andric "Re-reserving spill slot for BP"); 15615f757f3fSDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg); 1562bdd1243dSDimitry Andric } 1563bdd1243dSDimitry Andric } 1564e8d8bef9SDimitry Andric 1565bdd1243dSDimitry Andric // Only report VGPRs to generic code. 1566bdd1243dSDimitry Andric void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, 1567bdd1243dSDimitry Andric BitVector &SavedVGPRs, 1568bdd1243dSDimitry Andric RegScavenger *RS) const { 1569bdd1243dSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 15705f757f3fSDimitry Andric 15715f757f3fSDimitry Andric // If this is a function with the amdgpu_cs_chain[_preserve] calling 15725f757f3fSDimitry Andric // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then 15735f757f3fSDimitry Andric // we don't need to save and restore anything. 15745f757f3fSDimitry Andric if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall()) 15755f757f3fSDimitry Andric return; 15765f757f3fSDimitry Andric 15777a6dacacSDimitry Andric MFI->shiftSpillPhysVGPRsToLowestRange(MF); 15787a6dacacSDimitry Andric 15795f757f3fSDimitry Andric TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); 1580bdd1243dSDimitry Andric if (MFI->isEntryFunction()) 1581bdd1243dSDimitry Andric return; 1582bdd1243dSDimitry Andric 1583bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1584bdd1243dSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 158506c3fb27SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 158606c3fb27SDimitry Andric bool NeedExecCopyReservedReg = false; 1587bdd1243dSDimitry Andric 158806c3fb27SDimitry Andric MachineInstr *ReturnMI = nullptr; 1589bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) { 1590bdd1243dSDimitry Andric for (MachineInstr &MI : MBB) { 1591bdd1243dSDimitry Andric // WRITELANE instructions used for SGPR spills can overwrite the inactive 1592bdd1243dSDimitry Andric // lanes of VGPRs and callee must spill and restore them even if they are 1593bdd1243dSDimitry Andric // marked Caller-saved. 1594bdd1243dSDimitry Andric 1595bdd1243dSDimitry Andric // TODO: Handle this elsewhere at an early point. Walking through all MBBs 1596bdd1243dSDimitry Andric // here would be a bad heuristic. A better way should be by calling 1597bdd1243dSDimitry Andric // allocateWWMSpill during the regalloc pipeline whenever a physical 15985f757f3fSDimitry Andric // register is allocated for the intended virtual registers. 15995f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) 1600bdd1243dSDimitry Andric MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg()); 16015f757f3fSDimitry Andric else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR) 1602bdd1243dSDimitry Andric MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg()); 160306c3fb27SDimitry Andric else if (TII->isWWMRegSpillOpcode(MI.getOpcode())) 160406c3fb27SDimitry Andric NeedExecCopyReservedReg = true; 160506c3fb27SDimitry Andric else if (MI.getOpcode() == AMDGPU::SI_RETURN || 16065f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || 16075f757f3fSDimitry Andric (MFI->isChainFunction() && 16085f757f3fSDimitry Andric TII->isChainCallOpcode(MI.getOpcode()))) { 160906c3fb27SDimitry Andric // We expect all return to be the same size. 161006c3fb27SDimitry Andric assert(!ReturnMI || 161106c3fb27SDimitry Andric (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) == 161206c3fb27SDimitry Andric count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); }))); 161306c3fb27SDimitry Andric ReturnMI = &MI; 161406c3fb27SDimitry Andric } 161506c3fb27SDimitry Andric } 161606c3fb27SDimitry Andric } 161706c3fb27SDimitry Andric 161806c3fb27SDimitry Andric // Remove any VGPRs used in the return value because these do not need to be saved. 161906c3fb27SDimitry Andric // This prevents CSR restore from clobbering return VGPRs. 162006c3fb27SDimitry Andric if (ReturnMI) { 162106c3fb27SDimitry Andric for (auto &Op : ReturnMI->operands()) { 162206c3fb27SDimitry Andric if (Op.isReg()) 162306c3fb27SDimitry Andric SavedVGPRs.reset(Op.getReg()); 1624bdd1243dSDimitry Andric } 1625bdd1243dSDimitry Andric } 1626bdd1243dSDimitry Andric 1627bdd1243dSDimitry Andric // Ignore the SGPRs the default implementation found. 1628bdd1243dSDimitry Andric SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); 1629bdd1243dSDimitry Andric 1630bdd1243dSDimitry Andric // Do not save AGPRs prior to GFX90A because there was no easy way to do so. 1631bdd1243dSDimitry Andric // In gfx908 there was do AGPR loads and stores and thus spilling also 1632bdd1243dSDimitry Andric // require a temporary VGPR. 1633bdd1243dSDimitry Andric if (!ST.hasGFX90AInsts()) 1634bdd1243dSDimitry Andric SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); 1635bdd1243dSDimitry Andric 163606c3fb27SDimitry Andric determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg); 1637bdd1243dSDimitry Andric 1638bdd1243dSDimitry Andric // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't 1639bdd1243dSDimitry Andric // allow the default insertion to handle them. 1640bdd1243dSDimitry Andric for (auto &Reg : MFI->getWWMSpills()) 1641bdd1243dSDimitry Andric SavedVGPRs.reset(Reg.first); 1642bdd1243dSDimitry Andric 1643bdd1243dSDimitry Andric // Mark all lane VGPRs as BB LiveIns. 1644bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) { 1645bdd1243dSDimitry Andric for (auto &Reg : MFI->getWWMSpills()) 1646bdd1243dSDimitry Andric MBB.addLiveIn(Reg.first); 1647bdd1243dSDimitry Andric 1648bdd1243dSDimitry Andric MBB.sortUniqueLiveIns(); 16490b57cec5SDimitry Andric } 16500b57cec5SDimitry Andric } 16510b57cec5SDimitry Andric 16520b57cec5SDimitry Andric void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, 16530b57cec5SDimitry Andric BitVector &SavedRegs, 16540b57cec5SDimitry Andric RegScavenger *RS) const { 16550b57cec5SDimitry Andric TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 16560b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 16570b57cec5SDimitry Andric if (MFI->isEntryFunction()) 16580b57cec5SDimitry Andric return; 16590b57cec5SDimitry Andric 16600b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 16610b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo(); 16620b57cec5SDimitry Andric 16630b57cec5SDimitry Andric // The SP is specifically managed and we don't want extra spills of it. 16640b57cec5SDimitry Andric SavedRegs.reset(MFI->getStackPtrOffsetReg()); 1665e8d8bef9SDimitry Andric 1666e8d8bef9SDimitry Andric const BitVector AllSavedRegs = SavedRegs; 1667fe6060f1SDimitry Andric SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask()); 1668e8d8bef9SDimitry Andric 1669349cc55cSDimitry Andric // We have to anticipate introducing CSR VGPR spills or spill of caller 1670349cc55cSDimitry Andric // save VGPR reserved for SGPR spills as we now always create stack entry 167104eeddc0SDimitry Andric // for it, if we don't have any stack objects already, since we require a FP 167204eeddc0SDimitry Andric // if there is a call and stack. We will allocate a VGPR for SGPR spills if 167304eeddc0SDimitry Andric // there are any SGPR spills. Whether they are CSR spills or otherwise. 1674e8d8bef9SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1675349cc55cSDimitry Andric const bool WillHaveFP = 167604eeddc0SDimitry Andric FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs()); 1677e8d8bef9SDimitry Andric 1678e8d8bef9SDimitry Andric // FP will be specially managed like SP. 1679e8d8bef9SDimitry Andric if (WillHaveFP || hasFP(MF)) 1680e8d8bef9SDimitry Andric SavedRegs.reset(MFI->getFrameOffsetReg()); 168181ad6265SDimitry Andric 168281ad6265SDimitry Andric // Return address use with return instruction is hidden through the SI_RETURN 168381ad6265SDimitry Andric // pseudo. Given that and since the IPRA computes actual register usage and 168481ad6265SDimitry Andric // does not use CSR list, the clobbering of return address by function calls 168581ad6265SDimitry Andric // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register 168681ad6265SDimitry Andric // usage collection. This will ensure save/restore of return address happens 168781ad6265SDimitry Andric // in those scenarios. 168881ad6265SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 168981ad6265SDimitry Andric Register RetAddrReg = TRI->getReturnAddressReg(MF); 169081ad6265SDimitry Andric if (!MFI->isEntryFunction() && 169181ad6265SDimitry Andric (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) { 169281ad6265SDimitry Andric SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0)); 169381ad6265SDimitry Andric SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1)); 169481ad6265SDimitry Andric } 16950b57cec5SDimitry Andric } 16960b57cec5SDimitry Andric 16970b57cec5SDimitry Andric bool SIFrameLowering::assignCalleeSavedSpillSlots( 16980b57cec5SDimitry Andric MachineFunction &MF, const TargetRegisterInfo *TRI, 16990b57cec5SDimitry Andric std::vector<CalleeSavedInfo> &CSI) const { 17000b57cec5SDimitry Andric if (CSI.empty()) 17010b57cec5SDimitry Andric return true; // Early exit if no callee saved registers are modified! 17020b57cec5SDimitry Andric 17030b57cec5SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 17045ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 17055ffd83dbSDimitry Andric const SIRegisterInfo *RI = ST.getRegisterInfo(); 17065ffd83dbSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 17075ffd83dbSDimitry Andric Register BasePtrReg = RI->getBaseRegister(); 1708bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy = 1709bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); 1710bdd1243dSDimitry Andric Register SGPRForBPSaveRestoreCopy = 1711bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg); 1712bdd1243dSDimitry Andric if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy) 1713bdd1243dSDimitry Andric return false; 1714bdd1243dSDimitry Andric 17155ffd83dbSDimitry Andric unsigned NumModifiedRegs = 0; 17165ffd83dbSDimitry Andric 1717bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy) 17185ffd83dbSDimitry Andric NumModifiedRegs++; 1719bdd1243dSDimitry Andric if (SGPRForBPSaveRestoreCopy) 17205ffd83dbSDimitry Andric NumModifiedRegs++; 17215ffd83dbSDimitry Andric 17220b57cec5SDimitry Andric for (auto &CS : CSI) { 1723bdd1243dSDimitry Andric if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) { 1724bdd1243dSDimitry Andric CS.setDstReg(SGPRForFPSaveRestoreCopy); 17255ffd83dbSDimitry Andric if (--NumModifiedRegs) 17265ffd83dbSDimitry Andric break; 1727bdd1243dSDimitry Andric } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) { 1728bdd1243dSDimitry Andric CS.setDstReg(SGPRForBPSaveRestoreCopy); 17295ffd83dbSDimitry Andric if (--NumModifiedRegs) 17300b57cec5SDimitry Andric break; 17310b57cec5SDimitry Andric } 17320b57cec5SDimitry Andric } 17330b57cec5SDimitry Andric 17340b57cec5SDimitry Andric return false; 17350b57cec5SDimitry Andric } 17360b57cec5SDimitry Andric 17374824e7fdSDimitry Andric bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP( 17384824e7fdSDimitry Andric const MachineFunction &MF) const { 17394824e7fdSDimitry Andric 17404824e7fdSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 17414824e7fdSDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 17425f757f3fSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 17434824e7fdSDimitry Andric uint64_t EstStackSize = MFI.estimateStackSize(MF); 17444824e7fdSDimitry Andric uint64_t MaxOffset = EstStackSize - 1; 17454824e7fdSDimitry Andric 17464824e7fdSDimitry Andric // We need the emergency stack slots to be allocated in range of the 17474824e7fdSDimitry Andric // MUBUF/flat scratch immediate offset from the base register, so assign these 17484824e7fdSDimitry Andric // first at the incoming SP position. 17494824e7fdSDimitry Andric // 17504824e7fdSDimitry Andric // TODO: We could try sorting the objects to find a hole in the first bytes 17514824e7fdSDimitry Andric // rather than allocating as close to possible. This could save a lot of space 17524824e7fdSDimitry Andric // on frames with alignment requirements. 17534824e7fdSDimitry Andric if (ST.enableFlatScratch()) { 17544824e7fdSDimitry Andric if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, 17554824e7fdSDimitry Andric SIInstrFlags::FlatScratch)) 17564824e7fdSDimitry Andric return false; 17574824e7fdSDimitry Andric } else { 17585f757f3fSDimitry Andric if (TII->isLegalMUBUFImmOffset(MaxOffset)) 17594824e7fdSDimitry Andric return false; 17604824e7fdSDimitry Andric } 17614824e7fdSDimitry Andric 17624824e7fdSDimitry Andric return true; 17634824e7fdSDimitry Andric } 17644824e7fdSDimitry Andric 17650b57cec5SDimitry Andric MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( 17660b57cec5SDimitry Andric MachineFunction &MF, 17670b57cec5SDimitry Andric MachineBasicBlock &MBB, 17680b57cec5SDimitry Andric MachineBasicBlock::iterator I) const { 17690b57cec5SDimitry Andric int64_t Amount = I->getOperand(0).getImm(); 17700b57cec5SDimitry Andric if (Amount == 0) 17710b57cec5SDimitry Andric return MBB.erase(I); 17720b57cec5SDimitry Andric 17730b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 17740b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 17750b57cec5SDimitry Andric const DebugLoc &DL = I->getDebugLoc(); 17760b57cec5SDimitry Andric unsigned Opc = I->getOpcode(); 17770b57cec5SDimitry Andric bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 17780b57cec5SDimitry Andric uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 17790b57cec5SDimitry Andric 17800b57cec5SDimitry Andric if (!hasReservedCallFrame(MF)) { 17815ffd83dbSDimitry Andric Amount = alignTo(Amount, getStackAlign()); 17820b57cec5SDimitry Andric assert(isUInt<32>(Amount) && "exceeded stack address space size"); 17830b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 17845ffd83dbSDimitry Andric Register SPReg = MFI->getStackPtrOffsetReg(); 17850b57cec5SDimitry Andric 1786fe6060f1SDimitry Andric Amount *= getScratchScaleFactor(ST); 1787fe6060f1SDimitry Andric if (IsDestroy) 1788fe6060f1SDimitry Andric Amount = -Amount; 1789349cc55cSDimitry Andric auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg) 17900b57cec5SDimitry Andric .addReg(SPReg) 1791fe6060f1SDimitry Andric .addImm(Amount); 1792349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead. 17930b57cec5SDimitry Andric } else if (CalleePopAmount != 0) { 17940b57cec5SDimitry Andric llvm_unreachable("is this used?"); 17950b57cec5SDimitry Andric } 17960b57cec5SDimitry Andric 17970b57cec5SDimitry Andric return MBB.erase(I); 17980b57cec5SDimitry Andric } 17990b57cec5SDimitry Andric 1800e8d8bef9SDimitry Andric /// Returns true if the frame will require a reference to the stack pointer. 1801e8d8bef9SDimitry Andric /// 1802e8d8bef9SDimitry Andric /// This is the set of conditions common to setting up the stack pointer in a 1803e8d8bef9SDimitry Andric /// kernel, and for using a frame pointer in a callable function. 1804e8d8bef9SDimitry Andric /// 1805e8d8bef9SDimitry Andric /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm 1806e8d8bef9SDimitry Andric /// references SP. 1807e8d8bef9SDimitry Andric static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) { 1808e8d8bef9SDimitry Andric return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint(); 1809e8d8bef9SDimitry Andric } 1810e8d8bef9SDimitry Andric 1811e8d8bef9SDimitry Andric // The FP for kernels is always known 0, so we never really need to setup an 1812e8d8bef9SDimitry Andric // explicit register for it. However, DisableFramePointerElim will force us to 1813e8d8bef9SDimitry Andric // use a register for it. 18140b57cec5SDimitry Andric bool SIFrameLowering::hasFP(const MachineFunction &MF) const { 18150b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 18165ffd83dbSDimitry Andric 18175f757f3fSDimitry Andric // For entry & chain functions we can use an immediate offset in most cases, 18185f757f3fSDimitry Andric // so the presence of calls doesn't imply we need a distinct frame pointer. 18195ffd83dbSDimitry Andric if (MFI.hasCalls() && 18205f757f3fSDimitry Andric !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() && 18215f757f3fSDimitry Andric !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) { 18220b57cec5SDimitry Andric // All offsets are unsigned, so need to be addressed in the same direction 18230b57cec5SDimitry Andric // as stack growth. 18240b57cec5SDimitry Andric 18250b57cec5SDimitry Andric // FIXME: This function is pretty broken, since it can be called before the 18260b57cec5SDimitry Andric // frame layout is determined or CSR spills are inserted. 18275ffd83dbSDimitry Andric return MFI.getStackSize() != 0; 18280b57cec5SDimitry Andric } 18290b57cec5SDimitry Andric 1830e8d8bef9SDimitry Andric return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() || 1831fe6060f1SDimitry Andric MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment( 1832fe6060f1SDimitry Andric MF) || 18330b57cec5SDimitry Andric MF.getTarget().Options.DisableFramePointerElim(MF); 18340b57cec5SDimitry Andric } 1835e8d8bef9SDimitry Andric 1836e8d8bef9SDimitry Andric // This is essentially a reduced version of hasFP for entry functions. Since the 1837e8d8bef9SDimitry Andric // stack pointer is known 0 on entry to kernels, we never really need an FP 1838e8d8bef9SDimitry Andric // register. We may need to initialize the stack pointer depending on the frame 1839e8d8bef9SDimitry Andric // properties, which logically overlaps many of the cases where an ordinary 1840e8d8bef9SDimitry Andric // function would require an FP. 18415f757f3fSDimitry Andric // Also used for chain functions. While not technically entry functions, chain 18425f757f3fSDimitry Andric // functions may need to set up a stack pointer in some situations. 1843e8d8bef9SDimitry Andric bool SIFrameLowering::requiresStackPointerReference( 1844e8d8bef9SDimitry Andric const MachineFunction &MF) const { 1845e8d8bef9SDimitry Andric // Callable functions always require a stack pointer reference. 18465f757f3fSDimitry Andric assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() || 18475f757f3fSDimitry Andric MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) && 18485f757f3fSDimitry Andric "only expected to call this for entry points and chain functions"); 1849e8d8bef9SDimitry Andric 1850e8d8bef9SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 1851e8d8bef9SDimitry Andric 1852e8d8bef9SDimitry Andric // Entry points ordinarily don't need to initialize SP. We have to set it up 1853e8d8bef9SDimitry Andric // for callees if there are any. Also note tail calls are impossible/don't 1854e8d8bef9SDimitry Andric // make any sense for kernels. 1855e8d8bef9SDimitry Andric if (MFI.hasCalls()) 1856e8d8bef9SDimitry Andric return true; 1857e8d8bef9SDimitry Andric 1858e8d8bef9SDimitry Andric // We still need to initialize the SP if we're doing anything weird that 1859e8d8bef9SDimitry Andric // references the SP, like variable sized stack objects. 1860e8d8bef9SDimitry Andric return frameTriviallyRequiresSP(MFI); 1861e8d8bef9SDimitry Andric } 1862