xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===----------------------- SIFrameLowering.cpp --------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIFrameLowering.h"
10e8d8bef9SDimitry Andric #include "AMDGPU.h"
11e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
120b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h"
145f757f3fSDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
17e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric using namespace llvm;
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info"
220b57cec5SDimitry Andric 
23fe6060f1SDimitry Andric static cl::opt<bool> EnableSpillVGPRToAGPR(
24fe6060f1SDimitry Andric   "amdgpu-spill-vgpr-to-agpr",
25fe6060f1SDimitry Andric   cl::desc("Enable spilling VGPRs to AGPRs"),
26fe6060f1SDimitry Andric   cl::ReallyHidden,
27fe6060f1SDimitry Andric   cl::init(true));
280b57cec5SDimitry Andric 
295f757f3fSDimitry Andric // Find a register matching \p RC from \p LiveUnits which is unused and
305f757f3fSDimitry Andric // available throughout the function. On failure, returns AMDGPU::NoRegister.
315f757f3fSDimitry Andric // TODO: Rewrite the loop here to iterate over MCRegUnits instead of
325f757f3fSDimitry Andric // MCRegisters. This should reduce the number of iterations and avoid redundant
335f757f3fSDimitry Andric // checking.
34bdd1243dSDimitry Andric static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
355f757f3fSDimitry Andric                                      const LiveRegUnits &LiveUnits,
36bdd1243dSDimitry Andric                                      const TargetRegisterClass &RC) {
37bdd1243dSDimitry Andric   for (MCRegister Reg : RC) {
385f757f3fSDimitry Andric     if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
395f757f3fSDimitry Andric         !MRI.isReserved(Reg))
40bdd1243dSDimitry Andric       return Reg;
41bdd1243dSDimitry Andric   }
42bdd1243dSDimitry Andric   return MCRegister();
43bdd1243dSDimitry Andric }
44bdd1243dSDimitry Andric 
45fe6060f1SDimitry Andric // Find a scratch register that we can use in the prologue. We avoid using
46fe6060f1SDimitry Andric // callee-save registers since they may appear to be free when this is called
47fe6060f1SDimitry Andric // from canUseAsPrologue (during shrink wrapping), but then no longer be free
48fe6060f1SDimitry Andric // when this is called from emitPrologue.
495f757f3fSDimitry Andric static MCRegister findScratchNonCalleeSaveRegister(
505f757f3fSDimitry Andric     MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
515f757f3fSDimitry Andric     const TargetRegisterClass &RC, bool Unused = false) {
520b57cec5SDimitry Andric   // Mark callee saved registers as used so we will not choose them.
530b57cec5SDimitry Andric   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
540b57cec5SDimitry Andric   for (unsigned i = 0; CSRegs[i]; ++i)
555f757f3fSDimitry Andric     LiveUnits.addReg(CSRegs[i]);
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   // We are looking for a register that can be used throughout the entire
580b57cec5SDimitry Andric   // function, so any use is unacceptable.
59bdd1243dSDimitry Andric   if (Unused)
605f757f3fSDimitry Andric     return findUnusedRegister(MRI, LiveUnits, RC);
61bdd1243dSDimitry Andric 
625ffd83dbSDimitry Andric   for (MCRegister Reg : RC) {
635f757f3fSDimitry Andric     if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
640b57cec5SDimitry Andric       return Reg;
650b57cec5SDimitry Andric   }
660b57cec5SDimitry Andric 
675ffd83dbSDimitry Andric   return MCRegister();
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric 
7006c3fb27SDimitry Andric /// Query target location for spilling SGPRs
7106c3fb27SDimitry Andric /// \p IncludeScratchCopy : Also look for free scratch SGPRs
72bdd1243dSDimitry Andric static void getVGPRSpillLaneOrTempRegister(
735f757f3fSDimitry Andric     MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
7406c3fb27SDimitry Andric     const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
7506c3fb27SDimitry Andric     bool IncludeScratchCopy = true) {
765ffd83dbSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
775ffd83dbSDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
785ffd83dbSDimitry Andric 
795ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
805ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
81bdd1243dSDimitry Andric   unsigned Size = TRI->getSpillSize(RC);
82bdd1243dSDimitry Andric   Align Alignment = TRI->getSpillAlign(RC);
835ffd83dbSDimitry Andric 
84bdd1243dSDimitry Andric   // We need to save and restore the given SGPR.
855ffd83dbSDimitry Andric 
8606c3fb27SDimitry Andric   Register ScratchSGPR;
875f757f3fSDimitry Andric   // 1: Try to save the given register into an unused scratch SGPR. The
885f757f3fSDimitry Andric   // LiveUnits should have all the callee saved registers marked as used. For
895f757f3fSDimitry Andric   // certain cases we skip copy to scratch SGPR.
9006c3fb27SDimitry Andric   if (IncludeScratchCopy)
915f757f3fSDimitry Andric     ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
92bdd1243dSDimitry Andric 
93bdd1243dSDimitry Andric   if (!ScratchSGPR) {
94bdd1243dSDimitry Andric     int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
955ffd83dbSDimitry Andric                                          TargetStackID::SGPRSpill);
965ffd83dbSDimitry Andric 
97bdd1243dSDimitry Andric     if (TRI->spillSGPRToVGPR() &&
987a6dacacSDimitry Andric         MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
997a6dacacSDimitry Andric                                          /*IsPrologEpilog=*/true)) {
100bdd1243dSDimitry Andric       // 2: There's no free lane to spill, and no free register to save the
101bdd1243dSDimitry Andric       // SGPR, so we're forced to take another VGPR to use for the spill.
102bdd1243dSDimitry Andric       MFI->addToPrologEpilogSGPRSpills(
103bdd1243dSDimitry Andric           SGPR, PrologEpilogSGPRSaveRestoreInfo(
104bdd1243dSDimitry Andric                     SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
105e8d8bef9SDimitry Andric 
1065f757f3fSDimitry Andric       LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
107bdd1243dSDimitry Andric                  dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
1085f757f3fSDimitry Andric                         << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
1095f757f3fSDimitry Andric                         << '\n';);
1105ffd83dbSDimitry Andric     } else {
111bdd1243dSDimitry Andric       // Remove dead <FI> index
112bdd1243dSDimitry Andric       MF.getFrameInfo().RemoveStackObject(FI);
113bdd1243dSDimitry Andric       // 3: If all else fails, spill the register to memory.
114bdd1243dSDimitry Andric       FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
115bdd1243dSDimitry Andric       MFI->addToPrologEpilogSGPRSpills(
116bdd1243dSDimitry Andric           SGPR,
117bdd1243dSDimitry Andric           PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));
118bdd1243dSDimitry Andric       LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
119bdd1243dSDimitry Andric                         << printReg(SGPR, TRI) << '\n');
1205ffd83dbSDimitry Andric     }
1215ffd83dbSDimitry Andric   } else {
122bdd1243dSDimitry Andric     MFI->addToPrologEpilogSGPRSpills(
123bdd1243dSDimitry Andric         SGPR, PrologEpilogSGPRSaveRestoreInfo(
124bdd1243dSDimitry Andric                   SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
1255f757f3fSDimitry Andric     LiveUnits.addReg(ScratchSGPR);
126bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
127bdd1243dSDimitry Andric                       << printReg(ScratchSGPR, TRI) << '\n');
1285ffd83dbSDimitry Andric   }
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric // We need to specially emit stack operations here because a different frame
1320b57cec5SDimitry Andric // register is used than in the rest of the function, as getFrameRegister would
1330b57cec5SDimitry Andric // use.
134fe6060f1SDimitry Andric static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
135fe6060f1SDimitry Andric                              const SIMachineFunctionInfo &FuncInfo,
1365f757f3fSDimitry Andric                              LiveRegUnits &LiveUnits, MachineFunction &MF,
137e8d8bef9SDimitry Andric                              MachineBasicBlock &MBB,
138349cc55cSDimitry Andric                              MachineBasicBlock::iterator I, const DebugLoc &DL,
139bdd1243dSDimitry Andric                              Register SpillReg, int FI, Register FrameReg,
140bdd1243dSDimitry Andric                              int64_t DwordOff = 0) {
141fe6060f1SDimitry Andric   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
142fe6060f1SDimitry Andric                                         : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1430b57cec5SDimitry Andric 
144fe6060f1SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
145fe6060f1SDimitry Andric   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
146fe6060f1SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
147fe6060f1SDimitry Andric       PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
148fe6060f1SDimitry Andric       FrameInfo.getObjectAlign(FI));
1495f757f3fSDimitry Andric   LiveUnits.addReg(SpillReg);
150bdd1243dSDimitry Andric   bool IsKill = !MBB.isLiveIn(SpillReg);
151bdd1243dSDimitry Andric   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
1525f757f3fSDimitry Andric                           DwordOff, MMO, nullptr, &LiveUnits);
153bdd1243dSDimitry Andric   if (IsKill)
1545f757f3fSDimitry Andric     LiveUnits.removeReg(SpillReg);
155e8d8bef9SDimitry Andric }
156e8d8bef9SDimitry Andric 
157fe6060f1SDimitry Andric static void buildEpilogRestore(const GCNSubtarget &ST,
158fe6060f1SDimitry Andric                                const SIRegisterInfo &TRI,
159fe6060f1SDimitry Andric                                const SIMachineFunctionInfo &FuncInfo,
1605f757f3fSDimitry Andric                                LiveRegUnits &LiveUnits, MachineFunction &MF,
161e8d8bef9SDimitry Andric                                MachineBasicBlock &MBB,
162349cc55cSDimitry Andric                                MachineBasicBlock::iterator I,
163bdd1243dSDimitry Andric                                const DebugLoc &DL, Register SpillReg, int FI,
164bdd1243dSDimitry Andric                                Register FrameReg, int64_t DwordOff = 0) {
165fe6060f1SDimitry Andric   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
166fe6060f1SDimitry Andric                                         : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1670b57cec5SDimitry Andric 
168fe6060f1SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
169fe6060f1SDimitry Andric   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
170fe6060f1SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
171fe6060f1SDimitry Andric       PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
172fe6060f1SDimitry Andric       FrameInfo.getObjectAlign(FI));
173bdd1243dSDimitry Andric   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
1745f757f3fSDimitry Andric                           DwordOff, MMO, nullptr, &LiveUnits);
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric 
177e8d8bef9SDimitry Andric static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
178e8d8bef9SDimitry Andric                         const DebugLoc &DL, const SIInstrInfo *TII,
179e8d8bef9SDimitry Andric                         Register TargetReg) {
180e8d8bef9SDimitry Andric   MachineFunction *MF = MBB.getParent();
181e8d8bef9SDimitry Andric   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
182e8d8bef9SDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
183e8d8bef9SDimitry Andric   const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
184e8d8bef9SDimitry Andric   Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
185e8d8bef9SDimitry Andric   Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
186e8d8bef9SDimitry Andric 
187e8d8bef9SDimitry Andric   if (MFI->getGITPtrHigh() != 0xffffffff) {
188e8d8bef9SDimitry Andric     BuildMI(MBB, I, DL, SMovB32, TargetHi)
189e8d8bef9SDimitry Andric         .addImm(MFI->getGITPtrHigh())
190e8d8bef9SDimitry Andric         .addReg(TargetReg, RegState::ImplicitDefine);
191e8d8bef9SDimitry Andric   } else {
1927a6dacacSDimitry Andric     const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
193e8d8bef9SDimitry Andric     BuildMI(MBB, I, DL, GetPC64, TargetReg);
194e8d8bef9SDimitry Andric   }
195e8d8bef9SDimitry Andric   Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
196e8d8bef9SDimitry Andric   MF->getRegInfo().addLiveIn(GitPtrLo);
197e8d8bef9SDimitry Andric   MBB.addLiveIn(GitPtrLo);
198e8d8bef9SDimitry Andric   BuildMI(MBB, I, DL, SMovB32, TargetLo)
199e8d8bef9SDimitry Andric     .addReg(GitPtrLo);
200e8d8bef9SDimitry Andric }
201e8d8bef9SDimitry Andric 
2025f757f3fSDimitry Andric static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
203bdd1243dSDimitry Andric                           const SIMachineFunctionInfo *FuncInfo,
204bdd1243dSDimitry Andric                           MachineFunction &MF, MachineBasicBlock &MBB,
205bdd1243dSDimitry Andric                           MachineBasicBlock::iterator MBBI, bool IsProlog) {
2065f757f3fSDimitry Andric   if (LiveUnits.empty()) {
2075f757f3fSDimitry Andric     LiveUnits.init(TRI);
208bdd1243dSDimitry Andric     if (IsProlog) {
2095f757f3fSDimitry Andric       LiveUnits.addLiveIns(MBB);
210bdd1243dSDimitry Andric     } else {
211bdd1243dSDimitry Andric       // In epilog.
2125f757f3fSDimitry Andric       LiveUnits.addLiveOuts(MBB);
2135f757f3fSDimitry Andric       LiveUnits.stepBackward(*MBBI);
214bdd1243dSDimitry Andric     }
215bdd1243dSDimitry Andric   }
216bdd1243dSDimitry Andric }
217bdd1243dSDimitry Andric 
218bdd1243dSDimitry Andric namespace llvm {
219bdd1243dSDimitry Andric 
220bdd1243dSDimitry Andric // SpillBuilder to save/restore special SGPR spills like the one needed for FP,
221bdd1243dSDimitry Andric // BP, etc. These spills are delayed until the current function's frame is
222bdd1243dSDimitry Andric // finalized. For a given register, the builder uses the
223bdd1243dSDimitry Andric // PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
224bdd1243dSDimitry Andric class PrologEpilogSGPRSpillBuilder {
225bdd1243dSDimitry Andric   MachineBasicBlock::iterator MI;
226bdd1243dSDimitry Andric   MachineBasicBlock &MBB;
227bdd1243dSDimitry Andric   MachineFunction &MF;
228bdd1243dSDimitry Andric   const GCNSubtarget &ST;
229bdd1243dSDimitry Andric   MachineFrameInfo &MFI;
230bdd1243dSDimitry Andric   SIMachineFunctionInfo *FuncInfo;
231bdd1243dSDimitry Andric   const SIInstrInfo *TII;
232bdd1243dSDimitry Andric   const SIRegisterInfo &TRI;
233bdd1243dSDimitry Andric   Register SuperReg;
234bdd1243dSDimitry Andric   const PrologEpilogSGPRSaveRestoreInfo SI;
2355f757f3fSDimitry Andric   LiveRegUnits &LiveUnits;
236bdd1243dSDimitry Andric   const DebugLoc &DL;
237bdd1243dSDimitry Andric   Register FrameReg;
238bdd1243dSDimitry Andric   ArrayRef<int16_t> SplitParts;
239bdd1243dSDimitry Andric   unsigned NumSubRegs;
240bdd1243dSDimitry Andric   unsigned EltSize = 4;
241bdd1243dSDimitry Andric 
242bdd1243dSDimitry Andric   void saveToMemory(const int FI) const {
243bdd1243dSDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
244bdd1243dSDimitry Andric     assert(!MFI.isDeadObjectIndex(FI));
245bdd1243dSDimitry Andric 
2465f757f3fSDimitry Andric     initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
247bdd1243dSDimitry Andric 
248bdd1243dSDimitry Andric     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
2495f757f3fSDimitry Andric         MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
250bdd1243dSDimitry Andric     if (!TmpVGPR)
251bdd1243dSDimitry Andric       report_fatal_error("failed to find free scratch register");
252bdd1243dSDimitry Andric 
253bdd1243dSDimitry Andric     for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
254bdd1243dSDimitry Andric       Register SubReg = NumSubRegs == 1
255bdd1243dSDimitry Andric                             ? SuperReg
256bdd1243dSDimitry Andric                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
257bdd1243dSDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
258bdd1243dSDimitry Andric           .addReg(SubReg);
259bdd1243dSDimitry Andric 
2605f757f3fSDimitry Andric       buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
261bdd1243dSDimitry Andric                        FI, FrameReg, DwordOff);
262bdd1243dSDimitry Andric       DwordOff += 4;
263bdd1243dSDimitry Andric     }
264bdd1243dSDimitry Andric   }
265bdd1243dSDimitry Andric 
266bdd1243dSDimitry Andric   void saveToVGPRLane(const int FI) const {
267bdd1243dSDimitry Andric     assert(!MFI.isDeadObjectIndex(FI));
268bdd1243dSDimitry Andric 
269bdd1243dSDimitry Andric     assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
270bdd1243dSDimitry Andric     ArrayRef<SIRegisterInfo::SpilledReg> Spill =
2715f757f3fSDimitry Andric         FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
272bdd1243dSDimitry Andric     assert(Spill.size() == NumSubRegs);
273bdd1243dSDimitry Andric 
274bdd1243dSDimitry Andric     for (unsigned I = 0; I < NumSubRegs; ++I) {
275bdd1243dSDimitry Andric       Register SubReg = NumSubRegs == 1
276bdd1243dSDimitry Andric                             ? SuperReg
277bdd1243dSDimitry Andric                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
2785f757f3fSDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
2795f757f3fSDimitry Andric               Spill[I].VGPR)
280bdd1243dSDimitry Andric           .addReg(SubReg)
281bdd1243dSDimitry Andric           .addImm(Spill[I].Lane)
282bdd1243dSDimitry Andric           .addReg(Spill[I].VGPR, RegState::Undef);
283bdd1243dSDimitry Andric     }
284bdd1243dSDimitry Andric   }
285bdd1243dSDimitry Andric 
286bdd1243dSDimitry Andric   void copyToScratchSGPR(Register DstReg) const {
287bdd1243dSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
288bdd1243dSDimitry Andric         .addReg(SuperReg)
289bdd1243dSDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
290bdd1243dSDimitry Andric   }
291bdd1243dSDimitry Andric 
292bdd1243dSDimitry Andric   void restoreFromMemory(const int FI) {
293bdd1243dSDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
294bdd1243dSDimitry Andric 
2955f757f3fSDimitry Andric     initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
296bdd1243dSDimitry Andric     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
2975f757f3fSDimitry Andric         MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
298bdd1243dSDimitry Andric     if (!TmpVGPR)
299bdd1243dSDimitry Andric       report_fatal_error("failed to find free scratch register");
300bdd1243dSDimitry Andric 
301bdd1243dSDimitry Andric     for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
302bdd1243dSDimitry Andric       Register SubReg = NumSubRegs == 1
303bdd1243dSDimitry Andric                             ? SuperReg
304bdd1243dSDimitry Andric                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
305bdd1243dSDimitry Andric 
3065f757f3fSDimitry Andric       buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
3075f757f3fSDimitry Andric                          TmpVGPR, FI, FrameReg, DwordOff);
308bdd1243dSDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
309bdd1243dSDimitry Andric           .addReg(TmpVGPR, RegState::Kill);
310bdd1243dSDimitry Andric       DwordOff += 4;
311bdd1243dSDimitry Andric     }
312bdd1243dSDimitry Andric   }
313bdd1243dSDimitry Andric 
314bdd1243dSDimitry Andric   void restoreFromVGPRLane(const int FI) {
315bdd1243dSDimitry Andric     assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
316bdd1243dSDimitry Andric     ArrayRef<SIRegisterInfo::SpilledReg> Spill =
3175f757f3fSDimitry Andric         FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
318bdd1243dSDimitry Andric     assert(Spill.size() == NumSubRegs);
319bdd1243dSDimitry Andric 
320bdd1243dSDimitry Andric     for (unsigned I = 0; I < NumSubRegs; ++I) {
321bdd1243dSDimitry Andric       Register SubReg = NumSubRegs == 1
322bdd1243dSDimitry Andric                             ? SuperReg
323bdd1243dSDimitry Andric                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
3245f757f3fSDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
325bdd1243dSDimitry Andric           .addReg(Spill[I].VGPR)
326bdd1243dSDimitry Andric           .addImm(Spill[I].Lane);
327bdd1243dSDimitry Andric     }
328bdd1243dSDimitry Andric   }
329bdd1243dSDimitry Andric 
330bdd1243dSDimitry Andric   void copyFromScratchSGPR(Register SrcReg) const {
331bdd1243dSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
332bdd1243dSDimitry Andric         .addReg(SrcReg)
333bdd1243dSDimitry Andric         .setMIFlag(MachineInstr::FrameDestroy);
334bdd1243dSDimitry Andric   }
335bdd1243dSDimitry Andric 
336bdd1243dSDimitry Andric public:
337bdd1243dSDimitry Andric   PrologEpilogSGPRSpillBuilder(Register Reg,
338bdd1243dSDimitry Andric                                const PrologEpilogSGPRSaveRestoreInfo SI,
339bdd1243dSDimitry Andric                                MachineBasicBlock &MBB,
340bdd1243dSDimitry Andric                                MachineBasicBlock::iterator MI,
341bdd1243dSDimitry Andric                                const DebugLoc &DL, const SIInstrInfo *TII,
342bdd1243dSDimitry Andric                                const SIRegisterInfo &TRI,
3435f757f3fSDimitry Andric                                LiveRegUnits &LiveUnits, Register FrameReg)
344bdd1243dSDimitry Andric       : MI(MI), MBB(MBB), MF(*MBB.getParent()),
345bdd1243dSDimitry Andric         ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
346bdd1243dSDimitry Andric         FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
3475f757f3fSDimitry Andric         SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
3485f757f3fSDimitry Andric         FrameReg(FrameReg) {
349bdd1243dSDimitry Andric     const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
350bdd1243dSDimitry Andric     SplitParts = TRI.getRegSplitParts(RC, EltSize);
351bdd1243dSDimitry Andric     NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
352bdd1243dSDimitry Andric 
353bdd1243dSDimitry Andric     assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
354bdd1243dSDimitry Andric   }
355bdd1243dSDimitry Andric 
356bdd1243dSDimitry Andric   void save() {
357bdd1243dSDimitry Andric     switch (SI.getKind()) {
358bdd1243dSDimitry Andric     case SGPRSaveKind::SPILL_TO_MEM:
359bdd1243dSDimitry Andric       return saveToMemory(SI.getIndex());
360bdd1243dSDimitry Andric     case SGPRSaveKind::SPILL_TO_VGPR_LANE:
361bdd1243dSDimitry Andric       return saveToVGPRLane(SI.getIndex());
362bdd1243dSDimitry Andric     case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
363bdd1243dSDimitry Andric       return copyToScratchSGPR(SI.getReg());
364bdd1243dSDimitry Andric     }
365bdd1243dSDimitry Andric   }
366bdd1243dSDimitry Andric 
367bdd1243dSDimitry Andric   void restore() {
368bdd1243dSDimitry Andric     switch (SI.getKind()) {
369bdd1243dSDimitry Andric     case SGPRSaveKind::SPILL_TO_MEM:
370bdd1243dSDimitry Andric       return restoreFromMemory(SI.getIndex());
371bdd1243dSDimitry Andric     case SGPRSaveKind::SPILL_TO_VGPR_LANE:
372bdd1243dSDimitry Andric       return restoreFromVGPRLane(SI.getIndex());
373bdd1243dSDimitry Andric     case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
374bdd1243dSDimitry Andric       return copyFromScratchSGPR(SI.getReg());
375bdd1243dSDimitry Andric     }
376bdd1243dSDimitry Andric   }
377bdd1243dSDimitry Andric };
378bdd1243dSDimitry Andric 
379bdd1243dSDimitry Andric } // namespace llvm
380bdd1243dSDimitry Andric 
3815ffd83dbSDimitry Andric // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
3825ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionFlatScratchInit(
3835ffd83dbSDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
3845ffd83dbSDimitry Andric     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
3855ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3860b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
3870b57cec5SDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
3880b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   // We don't need this if we only have spills since there is no user facing
3910b57cec5SDimitry Andric   // scratch.
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric   // TODO: If we know we don't have flat instructions earlier, we can omit
3940b57cec5SDimitry Andric   // this from the input registers.
3950b57cec5SDimitry Andric   //
3960b57cec5SDimitry Andric   // TODO: We only need to know if we access scratch space through a flat
3970b57cec5SDimitry Andric   // pointer. Because we only detect if flat instructions are used at all,
3980b57cec5SDimitry Andric   // this will be used more often than necessary on VI.
3990b57cec5SDimitry Andric 
400e8d8bef9SDimitry Andric   Register FlatScrInitLo;
401e8d8bef9SDimitry Andric   Register FlatScrInitHi;
402e8d8bef9SDimitry Andric 
403e8d8bef9SDimitry Andric   if (ST.isAmdPalOS()) {
404e8d8bef9SDimitry Andric     // Extract the scratch offset from the descriptor in the GIT
4055f757f3fSDimitry Andric     LiveRegUnits LiveUnits;
4065f757f3fSDimitry Andric     LiveUnits.init(*TRI);
4075f757f3fSDimitry Andric     LiveUnits.addLiveIns(MBB);
408e8d8bef9SDimitry Andric 
409e8d8bef9SDimitry Andric     // Find unused reg to load flat scratch init into
410e8d8bef9SDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
411e8d8bef9SDimitry Andric     Register FlatScrInit = AMDGPU::NoRegister;
412e8d8bef9SDimitry Andric     ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
413e8d8bef9SDimitry Andric     unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
414e8d8bef9SDimitry Andric     AllSGPR64s = AllSGPR64s.slice(
415e8d8bef9SDimitry Andric         std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
416e8d8bef9SDimitry Andric     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
417e8d8bef9SDimitry Andric     for (MCPhysReg Reg : AllSGPR64s) {
4185f757f3fSDimitry Andric       if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
4195f757f3fSDimitry Andric           MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
420e8d8bef9SDimitry Andric         FlatScrInit = Reg;
421e8d8bef9SDimitry Andric         break;
422e8d8bef9SDimitry Andric       }
423e8d8bef9SDimitry Andric     }
424e8d8bef9SDimitry Andric     assert(FlatScrInit && "Failed to find free register for scratch init");
425e8d8bef9SDimitry Andric 
426e8d8bef9SDimitry Andric     FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
427e8d8bef9SDimitry Andric     FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
428e8d8bef9SDimitry Andric 
429e8d8bef9SDimitry Andric     buildGitPtr(MBB, I, DL, TII, FlatScrInit);
430e8d8bef9SDimitry Andric 
431e8d8bef9SDimitry Andric     // We now have the GIT ptr - now get the scratch descriptor from the entry
432e8d8bef9SDimitry Andric     // at offset 0 (or offset 16 for a compute shader).
433e8d8bef9SDimitry Andric     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
434e8d8bef9SDimitry Andric     const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
435e8d8bef9SDimitry Andric     auto *MMO = MF.getMachineMemOperand(
436e8d8bef9SDimitry Andric         PtrInfo,
437e8d8bef9SDimitry Andric         MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
438e8d8bef9SDimitry Andric             MachineMemOperand::MODereferenceable,
439e8d8bef9SDimitry Andric         8, Align(4));
440e8d8bef9SDimitry Andric     unsigned Offset =
441e8d8bef9SDimitry Andric         MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
442e8d8bef9SDimitry Andric     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
443e8d8bef9SDimitry Andric     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
444e8d8bef9SDimitry Andric     BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
445e8d8bef9SDimitry Andric         .addReg(FlatScrInit)
446e8d8bef9SDimitry Andric         .addImm(EncodedOffset) // offset
447fe6060f1SDimitry Andric         .addImm(0)             // cpol
448e8d8bef9SDimitry Andric         .addMemOperand(MMO);
449e8d8bef9SDimitry Andric 
450e8d8bef9SDimitry Andric     // Mask the offset in [47:0] of the descriptor
451e8d8bef9SDimitry Andric     const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
452349cc55cSDimitry Andric     auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
453e8d8bef9SDimitry Andric         .addReg(FlatScrInitHi)
454e8d8bef9SDimitry Andric         .addImm(0xffff);
455349cc55cSDimitry Andric     And->getOperand(3).setIsDead(); // Mark SCC as dead.
456e8d8bef9SDimitry Andric   } else {
4578bcb0991SDimitry Andric     Register FlatScratchInitReg =
4588bcb0991SDimitry Andric         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
459e8d8bef9SDimitry Andric     assert(FlatScratchInitReg);
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
4620b57cec5SDimitry Andric     MRI.addLiveIn(FlatScratchInitReg);
4630b57cec5SDimitry Andric     MBB.addLiveIn(FlatScratchInitReg);
4640b57cec5SDimitry Andric 
465e8d8bef9SDimitry Andric     FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
466e8d8bef9SDimitry Andric     FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
467e8d8bef9SDimitry Andric   }
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric   // Do a 64-bit pointer add.
4700b57cec5SDimitry Andric   if (ST.flatScratchIsPointer()) {
4710b57cec5SDimitry Andric     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
4720b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
4730b57cec5SDimitry Andric         .addReg(FlatScrInitLo)
4740b57cec5SDimitry Andric         .addReg(ScratchWaveOffsetReg);
475349cc55cSDimitry Andric       auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
476349cc55cSDimitry Andric                           FlatScrInitHi)
4770b57cec5SDimitry Andric         .addReg(FlatScrInitHi)
4780b57cec5SDimitry Andric         .addImm(0);
479349cc55cSDimitry Andric       Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
480349cc55cSDimitry Andric 
481*0fca6ea1SDimitry Andric       using namespace AMDGPU::Hwreg;
482*0fca6ea1SDimitry Andric       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
483*0fca6ea1SDimitry Andric           .addReg(FlatScrInitLo)
484*0fca6ea1SDimitry Andric           .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
485*0fca6ea1SDimitry Andric       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
486*0fca6ea1SDimitry Andric           .addReg(FlatScrInitHi)
487*0fca6ea1SDimitry Andric           .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
4880b57cec5SDimitry Andric       return;
4890b57cec5SDimitry Andric     }
4900b57cec5SDimitry Andric 
491e8d8bef9SDimitry Andric     // For GFX9.
4920b57cec5SDimitry Andric     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
4930b57cec5SDimitry Andric       .addReg(FlatScrInitLo)
4940b57cec5SDimitry Andric       .addReg(ScratchWaveOffsetReg);
495349cc55cSDimitry Andric     auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
496349cc55cSDimitry Andric                         AMDGPU::FLAT_SCR_HI)
4970b57cec5SDimitry Andric       .addReg(FlatScrInitHi)
4980b57cec5SDimitry Andric       .addImm(0);
499349cc55cSDimitry Andric     Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric     return;
5020b57cec5SDimitry Andric   }
5030b57cec5SDimitry Andric 
504e8d8bef9SDimitry Andric   assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
5050b57cec5SDimitry Andric 
5060b57cec5SDimitry Andric   // Copy the size in bytes.
5070b57cec5SDimitry Andric   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
5080b57cec5SDimitry Andric     .addReg(FlatScrInitHi, RegState::Kill);
5090b57cec5SDimitry Andric 
5100b57cec5SDimitry Andric   // Add wave offset in bytes to private base offset.
5110b57cec5SDimitry Andric   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
512fe6060f1SDimitry Andric   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
5130b57cec5SDimitry Andric       .addReg(FlatScrInitLo)
5140b57cec5SDimitry Andric       .addReg(ScratchWaveOffsetReg);
5150b57cec5SDimitry Andric 
5160b57cec5SDimitry Andric   // Convert offset to 256-byte units.
517349cc55cSDimitry Andric   auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
518349cc55cSDimitry Andric                       AMDGPU::FLAT_SCR_HI)
5190b57cec5SDimitry Andric     .addReg(FlatScrInitLo, RegState::Kill)
5200b57cec5SDimitry Andric     .addImm(8);
521bdd1243dSDimitry Andric   LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
5220b57cec5SDimitry Andric }
5230b57cec5SDimitry Andric 
524e8d8bef9SDimitry Andric // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
525e8d8bef9SDimitry Andric // memory. They should have been removed by now.
526e8d8bef9SDimitry Andric static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
527e8d8bef9SDimitry Andric   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
528e8d8bef9SDimitry Andric        I != E; ++I) {
529e8d8bef9SDimitry Andric     if (!MFI.isDeadObjectIndex(I))
530e8d8bef9SDimitry Andric       return false;
531e8d8bef9SDimitry Andric   }
532e8d8bef9SDimitry Andric 
533e8d8bef9SDimitry Andric   return true;
534e8d8bef9SDimitry Andric }
535e8d8bef9SDimitry Andric 
5365ffd83dbSDimitry Andric // Shift down registers reserved for the scratch RSRC.
5375ffd83dbSDimitry Andric Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
5380b57cec5SDimitry Andric     MachineFunction &MF) const {
5390b57cec5SDimitry Andric 
5405ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5415ffd83dbSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
5425ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
5435ffd83dbSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
5445ffd83dbSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
5455ffd83dbSDimitry Andric 
5465ffd83dbSDimitry Andric   assert(MFI->isEntryFunction());
5475ffd83dbSDimitry Andric 
5485ffd83dbSDimitry Andric   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
5495ffd83dbSDimitry Andric 
550e8d8bef9SDimitry Andric   if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
551e8d8bef9SDimitry Andric                           allStackObjectsAreDead(MF.getFrameInfo())))
5525ffd83dbSDimitry Andric     return Register();
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric   if (ST.hasSGPRInitBug() ||
5550b57cec5SDimitry Andric       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
5560b57cec5SDimitry Andric     return ScratchRsrcReg;
5570b57cec5SDimitry Andric 
5580b57cec5SDimitry Andric   // We reserved the last registers for this. Shift it down to the end of those
5590b57cec5SDimitry Andric   // which were actually used.
5600b57cec5SDimitry Andric   //
5610b57cec5SDimitry Andric   // FIXME: It might be safer to use a pseudoregister before replacement.
5620b57cec5SDimitry Andric 
5630b57cec5SDimitry Andric   // FIXME: We should be able to eliminate unused input registers. We only
5640b57cec5SDimitry Andric   // cannot do this for the resources required for scratch access. For now we
5650b57cec5SDimitry Andric   // skip over user SGPRs and may leave unused holes.
5660b57cec5SDimitry Andric 
5670b57cec5SDimitry Andric   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
5685ffd83dbSDimitry Andric   ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
5690b57cec5SDimitry Andric   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric   // Skip the last N reserved elements because they should have already been
5720b57cec5SDimitry Andric   // reserved for VCC etc.
5735ffd83dbSDimitry Andric   Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
5740b57cec5SDimitry Andric   for (MCPhysReg Reg : AllSGPR128s) {
5750b57cec5SDimitry Andric     // Pick the first unallocated one. Make sure we don't clobber the other
5765ffd83dbSDimitry Andric     // reserved input we needed. Also for PAL, make sure we don't clobber
5775ffd83dbSDimitry Andric     // the GIT pointer passed in SGPR0 or SGPR8.
5785ffd83dbSDimitry Andric     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
57906c3fb27SDimitry Andric         (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
5800b57cec5SDimitry Andric       MRI.replaceRegWith(ScratchRsrcReg, Reg);
5810b57cec5SDimitry Andric       MFI->setScratchRSrcReg(Reg);
582*0fca6ea1SDimitry Andric       MRI.reserveReg(Reg, TRI);
5830b57cec5SDimitry Andric       return Reg;
5840b57cec5SDimitry Andric     }
5850b57cec5SDimitry Andric   }
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric   return ScratchRsrcReg;
5880b57cec5SDimitry Andric }
5890b57cec5SDimitry Andric 
590e8d8bef9SDimitry Andric static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
591e8d8bef9SDimitry Andric   return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
592e8d8bef9SDimitry Andric }
593e8d8bef9SDimitry Andric 
5940b57cec5SDimitry Andric void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
5950b57cec5SDimitry Andric                                                 MachineBasicBlock &MBB) const {
5960b57cec5SDimitry Andric   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
5970b57cec5SDimitry Andric 
5985ffd83dbSDimitry Andric   // FIXME: If we only have SGPR spills, we won't actually be using scratch
5995ffd83dbSDimitry Andric   // memory since these spill to VGPRs. We should be cleaning up these unused
6005ffd83dbSDimitry Andric   // SGPR spill frame indices somewhere.
6010b57cec5SDimitry Andric 
6020b57cec5SDimitry Andric   // FIXME: We still have implicit uses on SGPR spill instructions in case they
6030b57cec5SDimitry Andric   // need to spill to vector memory. It's likely that will not happen, but at
6040b57cec5SDimitry Andric   // this point it appears we need the setup. This part of the prolog should be
6050b57cec5SDimitry Andric   // emitted after frame indices are eliminated.
6060b57cec5SDimitry Andric 
6075ffd83dbSDimitry Andric   // FIXME: Remove all of the isPhysRegUsed checks
6080b57cec5SDimitry Andric 
6095ffd83dbSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
6105ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
6115ffd83dbSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
6125ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
6135ffd83dbSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
6145ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
615fe6060f1SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
6160b57cec5SDimitry Andric 
6175ffd83dbSDimitry Andric   assert(MFI->isEntryFunction());
6180b57cec5SDimitry Andric 
6198bcb0991SDimitry Andric   Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
6200b57cec5SDimitry Andric       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
6210b57cec5SDimitry Andric 
6225ffd83dbSDimitry Andric   // We need to do the replacement of the private segment buffer register even
6235ffd83dbSDimitry Andric   // if there are no stack objects. There could be stores to undef or a
6245ffd83dbSDimitry Andric   // constant without an associated object.
6255ffd83dbSDimitry Andric   //
6265ffd83dbSDimitry Andric   // This will return `Register()` in cases where there are no actual
6275ffd83dbSDimitry Andric   // uses of the SRSRC.
628e8d8bef9SDimitry Andric   Register ScratchRsrcReg;
629e8d8bef9SDimitry Andric   if (!ST.enableFlatScratch())
630e8d8bef9SDimitry Andric     ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
6310b57cec5SDimitry Andric 
6325ffd83dbSDimitry Andric   // Make the selected register live throughout the function.
6335ffd83dbSDimitry Andric   if (ScratchRsrcReg) {
6340b57cec5SDimitry Andric     for (MachineBasicBlock &OtherBB : MF) {
6355ffd83dbSDimitry Andric       if (&OtherBB != &MBB) {
6360b57cec5SDimitry Andric         OtherBB.addLiveIn(ScratchRsrcReg);
6370b57cec5SDimitry Andric       }
6385ffd83dbSDimitry Andric     }
6395ffd83dbSDimitry Andric   }
6400b57cec5SDimitry Andric 
6415ffd83dbSDimitry Andric   // Now that we have fixed the reserved SRSRC we need to locate the
6425ffd83dbSDimitry Andric   // (potentially) preloaded SRSRC.
6435ffd83dbSDimitry Andric   Register PreloadedScratchRsrcReg;
6445ffd83dbSDimitry Andric   if (ST.isAmdHsaOrMesa(F)) {
6455ffd83dbSDimitry Andric     PreloadedScratchRsrcReg =
6465ffd83dbSDimitry Andric         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
6475ffd83dbSDimitry Andric     if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
6485ffd83dbSDimitry Andric       // We added live-ins during argument lowering, but since they were not
6495ffd83dbSDimitry Andric       // used they were deleted. We're adding the uses now, so add them back.
6505ffd83dbSDimitry Andric       MRI.addLiveIn(PreloadedScratchRsrcReg);
6515ffd83dbSDimitry Andric       MBB.addLiveIn(PreloadedScratchRsrcReg);
6525ffd83dbSDimitry Andric     }
6535ffd83dbSDimitry Andric   }
6545ffd83dbSDimitry Andric 
6555ffd83dbSDimitry Andric   // Debug location must be unknown since the first debug location is used to
6565ffd83dbSDimitry Andric   // determine the end of the prologue.
6570b57cec5SDimitry Andric   DebugLoc DL;
6580b57cec5SDimitry Andric   MachineBasicBlock::iterator I = MBB.begin();
6590b57cec5SDimitry Andric 
6605ffd83dbSDimitry Andric   // We found the SRSRC first because it needs four registers and has an
6615ffd83dbSDimitry Andric   // alignment requirement. If the SRSRC that we found is clobbering with
6625ffd83dbSDimitry Andric   // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
6635ffd83dbSDimitry Andric   // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
6645ffd83dbSDimitry Andric   // wave offset to a free SGPR.
6655ffd83dbSDimitry Andric   Register ScratchWaveOffsetReg;
666349cc55cSDimitry Andric   if (PreloadedScratchWaveOffsetReg &&
667349cc55cSDimitry Andric       TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
6685ffd83dbSDimitry Andric     ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
6695ffd83dbSDimitry Andric     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
6705ffd83dbSDimitry Andric     AllSGPRs = AllSGPRs.slice(
6715ffd83dbSDimitry Andric         std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
6725ffd83dbSDimitry Andric     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
6735ffd83dbSDimitry Andric     for (MCPhysReg Reg : AllSGPRs) {
6745ffd83dbSDimitry Andric       if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
6755ffd83dbSDimitry Andric           !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
6765ffd83dbSDimitry Andric         ScratchWaveOffsetReg = Reg;
6770b57cec5SDimitry Andric         BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
6785ffd83dbSDimitry Andric             .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
6795ffd83dbSDimitry Andric         break;
6800b57cec5SDimitry Andric       }
6810b57cec5SDimitry Andric     }
682*0fca6ea1SDimitry Andric 
683*0fca6ea1SDimitry Andric     // FIXME: We can spill incoming arguments and restore at the end of the
684*0fca6ea1SDimitry Andric     // prolog.
685*0fca6ea1SDimitry Andric     if (!ScratchWaveOffsetReg)
686*0fca6ea1SDimitry Andric       report_fatal_error(
687*0fca6ea1SDimitry Andric           "could not find temporary scratch offset register in prolog");
6880b57cec5SDimitry Andric   } else {
6895ffd83dbSDimitry Andric     ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
6900b57cec5SDimitry Andric   }
691349cc55cSDimitry Andric   assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
6925ffd83dbSDimitry Andric 
693*0fca6ea1SDimitry Andric   if (hasFP(MF)) {
694*0fca6ea1SDimitry Andric     Register FPReg = MFI->getFrameOffsetReg();
695*0fca6ea1SDimitry Andric     assert(FPReg != AMDGPU::FP_REG);
696*0fca6ea1SDimitry Andric     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
697*0fca6ea1SDimitry Andric   }
698*0fca6ea1SDimitry Andric 
699e8d8bef9SDimitry Andric   if (requiresStackPointerReference(MF)) {
7005ffd83dbSDimitry Andric     Register SPReg = MFI->getStackPtrOffsetReg();
7015ffd83dbSDimitry Andric     assert(SPReg != AMDGPU::SP_REG);
7025ffd83dbSDimitry Andric     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
703fe6060f1SDimitry Andric         .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
7045ffd83dbSDimitry Andric   }
7055ffd83dbSDimitry Andric 
706fe6060f1SDimitry Andric   bool NeedsFlatScratchInit =
7075f757f3fSDimitry Andric       MFI->getUserSGPRInfo().hasFlatScratchInit() &&
708fe6060f1SDimitry Andric       (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
709fe6060f1SDimitry Andric        (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
710fe6060f1SDimitry Andric 
711fe6060f1SDimitry Andric   if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
712349cc55cSDimitry Andric       PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
7135ffd83dbSDimitry Andric     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
7145ffd83dbSDimitry Andric     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
7155ffd83dbSDimitry Andric   }
7165ffd83dbSDimitry Andric 
717fe6060f1SDimitry Andric   if (NeedsFlatScratchInit) {
7185ffd83dbSDimitry Andric     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
7195ffd83dbSDimitry Andric   }
7205ffd83dbSDimitry Andric 
7215ffd83dbSDimitry Andric   if (ScratchRsrcReg) {
7225ffd83dbSDimitry Andric     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
7235ffd83dbSDimitry Andric                                          PreloadedScratchRsrcReg,
7245ffd83dbSDimitry Andric                                          ScratchRsrcReg, ScratchWaveOffsetReg);
7250b57cec5SDimitry Andric   }
7260b57cec5SDimitry Andric }
7270b57cec5SDimitry Andric 
7285ffd83dbSDimitry Andric // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
7295ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
7305ffd83dbSDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
7315ffd83dbSDimitry Andric     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
7325ffd83dbSDimitry Andric     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
7330b57cec5SDimitry Andric 
7345ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
7350b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
7360b57cec5SDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
7375ffd83dbSDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
7380b57cec5SDimitry Andric   const Function &Fn = MF.getFunction();
7390b57cec5SDimitry Andric 
7400b57cec5SDimitry Andric   if (ST.isAmdPalOS()) {
7410b57cec5SDimitry Andric     // The pointer to the GIT is formed from the offset passed in and either
7420b57cec5SDimitry Andric     // the amdgpu-git-ptr-high function attribute or the top part of the PC
7438bcb0991SDimitry Andric     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
744fe6060f1SDimitry Andric     Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
7450b57cec5SDimitry Andric 
746e8d8bef9SDimitry Andric     buildGitPtr(MBB, I, DL, TII, Rsrc01);
7470b57cec5SDimitry Andric 
7480b57cec5SDimitry Andric     // We now have the GIT ptr - now get the scratch descriptor from the entry
7490b57cec5SDimitry Andric     // at offset 0 (or offset 16 for a compute shader).
750480093f4SDimitry Andric     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
7510b57cec5SDimitry Andric     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
7520b57cec5SDimitry Andric     auto MMO = MF.getMachineMemOperand(PtrInfo,
7530b57cec5SDimitry Andric                                        MachineMemOperand::MOLoad |
7540b57cec5SDimitry Andric                                            MachineMemOperand::MOInvariant |
7550b57cec5SDimitry Andric                                            MachineMemOperand::MODereferenceable,
7565ffd83dbSDimitry Andric                                        16, Align(4));
7570b57cec5SDimitry Andric     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
7580b57cec5SDimitry Andric     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
7595ffd83dbSDimitry Andric     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
7600b57cec5SDimitry Andric     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
7610b57cec5SDimitry Andric       .addReg(Rsrc01)
7620b57cec5SDimitry Andric       .addImm(EncodedOffset) // offset
763fe6060f1SDimitry Andric       .addImm(0) // cpol
7640b57cec5SDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
7650b57cec5SDimitry Andric       .addMemOperand(MMO);
766fe6060f1SDimitry Andric 
767fe6060f1SDimitry Andric     // The driver will always set the SRD for wave 64 (bits 118:117 of
768fe6060f1SDimitry Andric     // descriptor / bits 22:21 of third sub-reg will be 0b11)
769fe6060f1SDimitry Andric     // If the shader is actually wave32 we have to modify the const_index_stride
770fe6060f1SDimitry Andric     // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
771fe6060f1SDimitry Andric     // reason the driver does this is that there can be cases where it presents
772fe6060f1SDimitry Andric     // 2 shaders with different wave size (e.g. VsFs).
773fe6060f1SDimitry Andric     // TODO: convert to using SCRATCH instructions or multiple SRD buffers
774fe6060f1SDimitry Andric     if (ST.isWave32()) {
775fe6060f1SDimitry Andric       const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
776fe6060f1SDimitry Andric       BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
777fe6060f1SDimitry Andric           .addImm(21)
778fe6060f1SDimitry Andric           .addReg(Rsrc03);
779fe6060f1SDimitry Andric     }
7805ffd83dbSDimitry Andric   } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
7810b57cec5SDimitry Andric     assert(!ST.isAmdHsaOrMesa(Fn));
7820b57cec5SDimitry Andric     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
7830b57cec5SDimitry Andric 
7848bcb0991SDimitry Andric     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
7858bcb0991SDimitry Andric     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric     // Use relocations to get the pointer, and setup the other bits manually.
7880b57cec5SDimitry Andric     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
7890b57cec5SDimitry Andric 
7905f757f3fSDimitry Andric     if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {
7918bcb0991SDimitry Andric       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
7920b57cec5SDimitry Andric 
7930b57cec5SDimitry Andric       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
7940b57cec5SDimitry Andric         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
7950b57cec5SDimitry Andric 
7960b57cec5SDimitry Andric         BuildMI(MBB, I, DL, Mov64, Rsrc01)
7970b57cec5SDimitry Andric           .addReg(MFI->getImplicitBufferPtrUserSGPR())
7980b57cec5SDimitry Andric           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
7990b57cec5SDimitry Andric       } else {
8000b57cec5SDimitry Andric         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
8010b57cec5SDimitry Andric 
802480093f4SDimitry Andric         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
8035ffd83dbSDimitry Andric         auto MMO = MF.getMachineMemOperand(
8045ffd83dbSDimitry Andric             PtrInfo,
8055ffd83dbSDimitry Andric             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
8060b57cec5SDimitry Andric                 MachineMemOperand::MODereferenceable,
8075ffd83dbSDimitry Andric             8, Align(4));
8080b57cec5SDimitry Andric         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
8090b57cec5SDimitry Andric           .addReg(MFI->getImplicitBufferPtrUserSGPR())
8100b57cec5SDimitry Andric           .addImm(0) // offset
811fe6060f1SDimitry Andric           .addImm(0) // cpol
8120b57cec5SDimitry Andric           .addMemOperand(MMO)
8130b57cec5SDimitry Andric           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
8160b57cec5SDimitry Andric         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
8170b57cec5SDimitry Andric       }
8180b57cec5SDimitry Andric     } else {
8198bcb0991SDimitry Andric       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
8208bcb0991SDimitry Andric       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
8230b57cec5SDimitry Andric         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
8240b57cec5SDimitry Andric         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8250b57cec5SDimitry Andric 
8260b57cec5SDimitry Andric       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
8270b57cec5SDimitry Andric         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
8280b57cec5SDimitry Andric         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8290b57cec5SDimitry Andric     }
8300b57cec5SDimitry Andric 
8310b57cec5SDimitry Andric     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
8320b57cec5SDimitry Andric       .addImm(Rsrc23 & 0xffffffff)
8330b57cec5SDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8340b57cec5SDimitry Andric 
8350b57cec5SDimitry Andric     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
8360b57cec5SDimitry Andric       .addImm(Rsrc23 >> 32)
8370b57cec5SDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8385ffd83dbSDimitry Andric   } else if (ST.isAmdHsaOrMesa(Fn)) {
8395ffd83dbSDimitry Andric     assert(PreloadedScratchRsrcReg);
8405ffd83dbSDimitry Andric 
8415ffd83dbSDimitry Andric     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
8425ffd83dbSDimitry Andric       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
8435ffd83dbSDimitry Andric           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
8440b57cec5SDimitry Andric     }
8450b57cec5SDimitry Andric   }
8460b57cec5SDimitry Andric 
8475ffd83dbSDimitry Andric   // Add the scratch wave offset into the scratch RSRC.
8485ffd83dbSDimitry Andric   //
8495ffd83dbSDimitry Andric   // We only want to update the first 48 bits, which is the base address
8505ffd83dbSDimitry Andric   // pointer, without touching the adjacent 16 bits of flags. We know this add
8515ffd83dbSDimitry Andric   // cannot carry-out from bit 47, otherwise the scratch allocation would be
8525ffd83dbSDimitry Andric   // impossible to fit in the 48-bit global address space.
8535ffd83dbSDimitry Andric   //
8545ffd83dbSDimitry Andric   // TODO: Evaluate if it is better to just construct an SRD using the flat
8555ffd83dbSDimitry Andric   // scratch init and some constants rather than update the one we are passed.
8565ffd83dbSDimitry Andric   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
8575ffd83dbSDimitry Andric   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
8585ffd83dbSDimitry Andric 
8595ffd83dbSDimitry Andric   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
8605ffd83dbSDimitry Andric   // the kernel body via inreg arguments.
8615ffd83dbSDimitry Andric   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
8625ffd83dbSDimitry Andric       .addReg(ScratchRsrcSub0)
8635ffd83dbSDimitry Andric       .addReg(ScratchWaveOffsetReg)
8645ffd83dbSDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
865349cc55cSDimitry Andric   auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
8665ffd83dbSDimitry Andric       .addReg(ScratchRsrcSub1)
8675ffd83dbSDimitry Andric       .addImm(0)
8685ffd83dbSDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
869349cc55cSDimitry Andric   Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
8705ffd83dbSDimitry Andric }
8715ffd83dbSDimitry Andric 
8720b57cec5SDimitry Andric bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
8730b57cec5SDimitry Andric   switch (ID) {
8740b57cec5SDimitry Andric   case TargetStackID::Default:
8750b57cec5SDimitry Andric   case TargetStackID::NoAlloc:
8760b57cec5SDimitry Andric   case TargetStackID::SGPRSpill:
8770b57cec5SDimitry Andric     return true;
878e8d8bef9SDimitry Andric   case TargetStackID::ScalableVector:
879fe6060f1SDimitry Andric   case TargetStackID::WasmLocal:
8808bcb0991SDimitry Andric     return false;
8810b57cec5SDimitry Andric   }
8820b57cec5SDimitry Andric   llvm_unreachable("Invalid TargetStackID::Value");
8830b57cec5SDimitry Andric }
8840b57cec5SDimitry Andric 
885bdd1243dSDimitry Andric // Activate only the inactive lanes when \p EnableInactiveLanes is true.
886bdd1243dSDimitry Andric // Otherwise, activate all lanes. It returns the saved exec.
8875f757f3fSDimitry Andric static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
8885ffd83dbSDimitry Andric                                      MachineFunction &MF,
8895ffd83dbSDimitry Andric                                      MachineBasicBlock &MBB,
8905ffd83dbSDimitry Andric                                      MachineBasicBlock::iterator MBBI,
891bdd1243dSDimitry Andric                                      const DebugLoc &DL, bool IsProlog,
892bdd1243dSDimitry Andric                                      bool EnableInactiveLanes) {
8935ffd83dbSDimitry Andric   Register ScratchExecCopy;
8945ffd83dbSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
8955ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
8965ffd83dbSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
8975ffd83dbSDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
8985ffd83dbSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
8995ffd83dbSDimitry Andric 
9005f757f3fSDimitry Andric   initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
9015ffd83dbSDimitry Andric 
9025ffd83dbSDimitry Andric   ScratchExecCopy = findScratchNonCalleeSaveRegister(
9035f757f3fSDimitry Andric       MRI, LiveUnits, *TRI.getWaveMaskRegClass());
904fe6060f1SDimitry Andric   if (!ScratchExecCopy)
905fe6060f1SDimitry Andric     report_fatal_error("failed to find free scratch register");
9065ffd83dbSDimitry Andric 
9075f757f3fSDimitry Andric   LiveUnits.addReg(ScratchExecCopy);
9085ffd83dbSDimitry Andric 
909bdd1243dSDimitry Andric   const unsigned SaveExecOpc =
910bdd1243dSDimitry Andric       ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
911bdd1243dSDimitry Andric                                            : AMDGPU::S_OR_SAVEEXEC_B32)
912bdd1243dSDimitry Andric                     : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
913bdd1243dSDimitry Andric                                            : AMDGPU::S_OR_SAVEEXEC_B64);
914bdd1243dSDimitry Andric   auto SaveExec =
915bdd1243dSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
916349cc55cSDimitry Andric   SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
9175ffd83dbSDimitry Andric 
9185ffd83dbSDimitry Andric   return ScratchExecCopy;
9195ffd83dbSDimitry Andric }
9205ffd83dbSDimitry Andric 
921bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillStores(
922bdd1243dSDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB,
9235f757f3fSDimitry Andric     MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
924bdd1243dSDimitry Andric     Register FrameReg, Register FramePtrRegScratchCopy) const {
925bdd1243dSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
926bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
927bdd1243dSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
928bdd1243dSDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
929bdd1243dSDimitry Andric 
930bdd1243dSDimitry Andric   // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
931bdd1243dSDimitry Andric   // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
932bdd1243dSDimitry Andric   // might end up flipping the EXEC bits twice.
933bdd1243dSDimitry Andric   Register ScratchExecCopy;
934bdd1243dSDimitry Andric   SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
935bdd1243dSDimitry Andric   FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
936bdd1243dSDimitry Andric   if (!WWMScratchRegs.empty())
937bdd1243dSDimitry Andric     ScratchExecCopy =
9385f757f3fSDimitry Andric         buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
939bdd1243dSDimitry Andric                              /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
940bdd1243dSDimitry Andric 
941bdd1243dSDimitry Andric   auto StoreWWMRegisters =
942bdd1243dSDimitry Andric       [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
943bdd1243dSDimitry Andric         for (const auto &Reg : WWMRegs) {
944bdd1243dSDimitry Andric           Register VGPR = Reg.first;
945bdd1243dSDimitry Andric           int FI = Reg.second;
9465f757f3fSDimitry Andric           buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
947bdd1243dSDimitry Andric                            VGPR, FI, FrameReg);
948bdd1243dSDimitry Andric         }
949bdd1243dSDimitry Andric       };
950bdd1243dSDimitry Andric 
951bdd1243dSDimitry Andric   StoreWWMRegisters(WWMScratchRegs);
952bdd1243dSDimitry Andric   if (!WWMCalleeSavedRegs.empty()) {
953bdd1243dSDimitry Andric     if (ScratchExecCopy) {
954bdd1243dSDimitry Andric       unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
95506c3fb27SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
956bdd1243dSDimitry Andric     } else {
9575f757f3fSDimitry Andric       ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
958bdd1243dSDimitry Andric                                              /*IsProlog*/ true,
959bdd1243dSDimitry Andric                                              /*EnableInactiveLanes*/ false);
960bdd1243dSDimitry Andric     }
961bdd1243dSDimitry Andric   }
962bdd1243dSDimitry Andric 
963bdd1243dSDimitry Andric   StoreWWMRegisters(WWMCalleeSavedRegs);
964bdd1243dSDimitry Andric   if (ScratchExecCopy) {
965bdd1243dSDimitry Andric     // FIXME: Split block and make terminator.
966bdd1243dSDimitry Andric     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
96706c3fb27SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
968bdd1243dSDimitry Andric         .addReg(ScratchExecCopy, RegState::Kill);
9695f757f3fSDimitry Andric     LiveUnits.addReg(ScratchExecCopy);
970bdd1243dSDimitry Andric   }
971bdd1243dSDimitry Andric 
972bdd1243dSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
973bdd1243dSDimitry Andric 
974bdd1243dSDimitry Andric   for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
975bdd1243dSDimitry Andric     // Special handle FP spill:
976bdd1243dSDimitry Andric     // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
977bdd1243dSDimitry Andric     // Otherwise, FP has been moved to a temporary register and spill it
978bdd1243dSDimitry Andric     // instead.
979bdd1243dSDimitry Andric     Register Reg =
980bdd1243dSDimitry Andric         Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
981bdd1243dSDimitry Andric     if (!Reg)
982bdd1243dSDimitry Andric       continue;
983bdd1243dSDimitry Andric 
984bdd1243dSDimitry Andric     PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
9855f757f3fSDimitry Andric                                     LiveUnits, FrameReg);
986bdd1243dSDimitry Andric     SB.save();
987bdd1243dSDimitry Andric   }
988bdd1243dSDimitry Andric 
989bdd1243dSDimitry Andric   // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
990bdd1243dSDimitry Andric   // such scratch registers live throughout the function.
991bdd1243dSDimitry Andric   SmallVector<Register, 1> ScratchSGPRs;
992bdd1243dSDimitry Andric   FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
993bdd1243dSDimitry Andric   if (!ScratchSGPRs.empty()) {
994bdd1243dSDimitry Andric     for (MachineBasicBlock &MBB : MF) {
995bdd1243dSDimitry Andric       for (MCPhysReg Reg : ScratchSGPRs)
996bdd1243dSDimitry Andric         MBB.addLiveIn(Reg);
997bdd1243dSDimitry Andric 
998bdd1243dSDimitry Andric       MBB.sortUniqueLiveIns();
999bdd1243dSDimitry Andric     }
10005f757f3fSDimitry Andric     if (!LiveUnits.empty()) {
1001bdd1243dSDimitry Andric       for (MCPhysReg Reg : ScratchSGPRs)
10025f757f3fSDimitry Andric         LiveUnits.addReg(Reg);
1003bdd1243dSDimitry Andric     }
1004bdd1243dSDimitry Andric   }
1005bdd1243dSDimitry Andric }
1006bdd1243dSDimitry Andric 
1007bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillRestores(
1008bdd1243dSDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB,
10095f757f3fSDimitry Andric     MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
1010bdd1243dSDimitry Andric     Register FrameReg, Register FramePtrRegScratchCopy) const {
1011bdd1243dSDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1012bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1013bdd1243dSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
1014bdd1243dSDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
1015bdd1243dSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1016bdd1243dSDimitry Andric 
1017bdd1243dSDimitry Andric   for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1018bdd1243dSDimitry Andric     // Special handle FP restore:
1019bdd1243dSDimitry Andric     // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1020bdd1243dSDimitry Andric     // the FP value to a temporary register. The frame pointer should be
1021bdd1243dSDimitry Andric     // overwritten only at the end when all other spills are restored from
1022bdd1243dSDimitry Andric     // current frame.
1023bdd1243dSDimitry Andric     Register Reg =
1024bdd1243dSDimitry Andric         Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1025bdd1243dSDimitry Andric     if (!Reg)
1026bdd1243dSDimitry Andric       continue;
1027bdd1243dSDimitry Andric 
1028bdd1243dSDimitry Andric     PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
10295f757f3fSDimitry Andric                                     LiveUnits, FrameReg);
1030bdd1243dSDimitry Andric     SB.restore();
1031bdd1243dSDimitry Andric   }
1032bdd1243dSDimitry Andric 
1033bdd1243dSDimitry Andric   // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1034bdd1243dSDimitry Andric   // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1035bdd1243dSDimitry Andric   // this, we might end up flipping the EXEC bits twice.
1036bdd1243dSDimitry Andric   Register ScratchExecCopy;
1037bdd1243dSDimitry Andric   SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1038bdd1243dSDimitry Andric   FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1039bdd1243dSDimitry Andric   if (!WWMScratchRegs.empty())
1040bdd1243dSDimitry Andric     ScratchExecCopy =
10415f757f3fSDimitry Andric         buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1042bdd1243dSDimitry Andric                              /*IsProlog*/ false, /*EnableInactiveLanes*/ true);
1043bdd1243dSDimitry Andric 
1044bdd1243dSDimitry Andric   auto RestoreWWMRegisters =
1045bdd1243dSDimitry Andric       [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
1046bdd1243dSDimitry Andric         for (const auto &Reg : WWMRegs) {
1047bdd1243dSDimitry Andric           Register VGPR = Reg.first;
1048bdd1243dSDimitry Andric           int FI = Reg.second;
10495f757f3fSDimitry Andric           buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1050bdd1243dSDimitry Andric                              VGPR, FI, FrameReg);
1051bdd1243dSDimitry Andric         }
1052bdd1243dSDimitry Andric       };
1053bdd1243dSDimitry Andric 
1054bdd1243dSDimitry Andric   RestoreWWMRegisters(WWMScratchRegs);
1055bdd1243dSDimitry Andric   if (!WWMCalleeSavedRegs.empty()) {
1056bdd1243dSDimitry Andric     if (ScratchExecCopy) {
1057bdd1243dSDimitry Andric       unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
105806c3fb27SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
1059bdd1243dSDimitry Andric     } else {
10605f757f3fSDimitry Andric       ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1061bdd1243dSDimitry Andric                                              /*IsProlog*/ false,
1062bdd1243dSDimitry Andric                                              /*EnableInactiveLanes*/ false);
1063bdd1243dSDimitry Andric     }
1064bdd1243dSDimitry Andric   }
1065bdd1243dSDimitry Andric 
1066bdd1243dSDimitry Andric   RestoreWWMRegisters(WWMCalleeSavedRegs);
1067bdd1243dSDimitry Andric   if (ScratchExecCopy) {
1068bdd1243dSDimitry Andric     // FIXME: Split block and make terminator.
1069bdd1243dSDimitry Andric     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
107006c3fb27SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
1071bdd1243dSDimitry Andric         .addReg(ScratchExecCopy, RegState::Kill);
1072bdd1243dSDimitry Andric   }
1073fe6060f1SDimitry Andric }
1074fe6060f1SDimitry Andric 
10750b57cec5SDimitry Andric void SIFrameLowering::emitPrologue(MachineFunction &MF,
10760b57cec5SDimitry Andric                                    MachineBasicBlock &MBB) const {
10770b57cec5SDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
10780b57cec5SDimitry Andric   if (FuncInfo->isEntryFunction()) {
10790b57cec5SDimitry Andric     emitEntryFunctionPrologue(MF, MBB);
10800b57cec5SDimitry Andric     return;
10810b57cec5SDimitry Andric   }
10820b57cec5SDimitry Andric 
108381ad6265SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
10840b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
10850b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
10860b57cec5SDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
1087bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
10880b57cec5SDimitry Andric 
10895ffd83dbSDimitry Andric   Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
10905ffd83dbSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
10915ffd83dbSDimitry Andric   Register BasePtrReg =
10925ffd83dbSDimitry Andric       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
10935f757f3fSDimitry Andric   LiveRegUnits LiveUnits;
10940b57cec5SDimitry Andric 
10950b57cec5SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.begin();
1096bdd1243dSDimitry Andric   // DebugLoc must be unknown since the first instruction with DebugLoc is used
1097bdd1243dSDimitry Andric   // to determine the end of the prologue.
10980b57cec5SDimitry Andric   DebugLoc DL;
10990b57cec5SDimitry Andric 
11005f757f3fSDimitry Andric   if (FuncInfo->isChainFunction()) {
11015f757f3fSDimitry Andric     // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
11025f757f3fSDimitry Andric     // are free to set one up if they need it.
11035f757f3fSDimitry Andric     bool UseSP = requiresStackPointerReference(MF);
11045f757f3fSDimitry Andric     if (UseSP) {
11055f757f3fSDimitry Andric       assert(StackPtrReg != AMDGPU::SP_REG);
11065f757f3fSDimitry Andric 
11075f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
11085f757f3fSDimitry Andric           .addImm(MFI.getStackSize() * getScratchScaleFactor(ST));
11095f757f3fSDimitry Andric     }
11105f757f3fSDimitry Andric   }
11115f757f3fSDimitry Andric 
11120b57cec5SDimitry Andric   bool HasFP = false;
11135ffd83dbSDimitry Andric   bool HasBP = false;
11140b57cec5SDimitry Andric   uint32_t NumBytes = MFI.getStackSize();
11150b57cec5SDimitry Andric   uint32_t RoundedSize = NumBytes;
11165ffd83dbSDimitry Andric 
1117bdd1243dSDimitry Andric   if (TRI.hasStackRealignment(MF))
1118bdd1243dSDimitry Andric     HasFP = true;
1119fe6060f1SDimitry Andric 
1120bdd1243dSDimitry Andric   Register FramePtrRegScratchCopy;
1121bdd1243dSDimitry Andric   if (!HasFP && !hasFP(MF)) {
1122bdd1243dSDimitry Andric     // Emit the CSR spill stores with SP base register.
11235f757f3fSDimitry Andric     emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
11245f757f3fSDimitry Andric                        FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1125bdd1243dSDimitry Andric                        FramePtrRegScratchCopy);
1126bdd1243dSDimitry Andric   } else {
1127bdd1243dSDimitry Andric     // CSR spill stores will use FP as base register.
1128bdd1243dSDimitry Andric     Register SGPRForFPSaveRestoreCopy =
1129bdd1243dSDimitry Andric         FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1130fe6060f1SDimitry Andric 
11315f757f3fSDimitry Andric     initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1132bdd1243dSDimitry Andric     if (SGPRForFPSaveRestoreCopy) {
1133bdd1243dSDimitry Andric       // Copy FP to the scratch register now and emit the CFI entry. It avoids
1134bdd1243dSDimitry Andric       // the extra FP copy needed in the other two cases when FP is spilled to
1135bdd1243dSDimitry Andric       // memory or to a VGPR lane.
1136bdd1243dSDimitry Andric       PrologEpilogSGPRSpillBuilder SB(
1137bdd1243dSDimitry Andric           FramePtrReg,
1138bdd1243dSDimitry Andric           FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
11395f757f3fSDimitry Andric           DL, TII, TRI, LiveUnits, FramePtrReg);
1140bdd1243dSDimitry Andric       SB.save();
11415f757f3fSDimitry Andric       LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1142bdd1243dSDimitry Andric     } else {
1143bdd1243dSDimitry Andric       // Copy FP into a new scratch register so that its previous value can be
1144bdd1243dSDimitry Andric       // spilled after setting up the new frame.
1145bdd1243dSDimitry Andric       FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
11465f757f3fSDimitry Andric           MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1147bdd1243dSDimitry Andric       if (!FramePtrRegScratchCopy)
1148fe6060f1SDimitry Andric         report_fatal_error("failed to find free scratch register");
1149fe6060f1SDimitry Andric 
11505f757f3fSDimitry Andric       LiveUnits.addReg(FramePtrRegScratchCopy);
1151bdd1243dSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1152bdd1243dSDimitry Andric           .addReg(FramePtrReg);
1153fe6060f1SDimitry Andric     }
11545ffd83dbSDimitry Andric   }
11555ffd83dbSDimitry Andric 
1156bdd1243dSDimitry Andric   if (HasFP) {
11575ffd83dbSDimitry Andric     const unsigned Alignment = MFI.getMaxAlign().value();
11580b57cec5SDimitry Andric 
11590b57cec5SDimitry Andric     RoundedSize += Alignment;
11605f757f3fSDimitry Andric     if (LiveUnits.empty()) {
11615f757f3fSDimitry Andric       LiveUnits.init(TRI);
11625f757f3fSDimitry Andric       LiveUnits.addLiveIns(MBB);
11630b57cec5SDimitry Andric     }
11640b57cec5SDimitry Andric 
1165fe6060f1SDimitry Andric     // s_add_i32 s33, s32, NumBytes
1166fe6060f1SDimitry Andric     // s_and_b32 s33, s33, 0b111...0000
1167fe6060f1SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
11680b57cec5SDimitry Andric         .addReg(StackPtrReg)
1169e8d8bef9SDimitry Andric         .addImm((Alignment - 1) * getScratchScaleFactor(ST))
11700b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
1171349cc55cSDimitry Andric     auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1172fe6060f1SDimitry Andric         .addReg(FramePtrReg, RegState::Kill)
1173e8d8bef9SDimitry Andric         .addImm(-Alignment * getScratchScaleFactor(ST))
11740b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
1175349cc55cSDimitry Andric     And->getOperand(3).setIsDead(); // Mark SCC as dead.
11760b57cec5SDimitry Andric     FuncInfo->setIsStackRealigned(true);
11770b57cec5SDimitry Andric   } else if ((HasFP = hasFP(MF))) {
11785ffd83dbSDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
11795ffd83dbSDimitry Andric         .addReg(StackPtrReg)
11805ffd83dbSDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
11815ffd83dbSDimitry Andric   }
11825ffd83dbSDimitry Andric 
1183bdd1243dSDimitry Andric   // If FP is used, emit the CSR spills with FP base register.
1184bdd1243dSDimitry Andric   if (HasFP) {
11855f757f3fSDimitry Andric     emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1186bdd1243dSDimitry Andric                        FramePtrRegScratchCopy);
1187bdd1243dSDimitry Andric     if (FramePtrRegScratchCopy)
11885f757f3fSDimitry Andric       LiveUnits.removeReg(FramePtrRegScratchCopy);
1189bdd1243dSDimitry Andric   }
1190bdd1243dSDimitry Andric 
11910b57cec5SDimitry Andric   // If we need a base pointer, set it up here. It's whatever the value of
11920b57cec5SDimitry Andric   // the stack pointer is at this point. Any variable size objects will be
11930b57cec5SDimitry Andric   // allocated after this, so we can still use the base pointer to reference
11945ffd83dbSDimitry Andric   // the incoming arguments.
11955ffd83dbSDimitry Andric   if ((HasBP = TRI.hasBasePointer(MF))) {
11965ffd83dbSDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
11970b57cec5SDimitry Andric         .addReg(StackPtrReg)
11980b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
11990b57cec5SDimitry Andric   }
12000b57cec5SDimitry Andric 
12010b57cec5SDimitry Andric   if (HasFP && RoundedSize != 0) {
1202349cc55cSDimitry Andric     auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
12030b57cec5SDimitry Andric         .addReg(StackPtrReg)
1204e8d8bef9SDimitry Andric         .addImm(RoundedSize * getScratchScaleFactor(ST))
12050b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
1206349cc55cSDimitry Andric     Add->getOperand(3).setIsDead(); // Mark SCC as dead.
12070b57cec5SDimitry Andric   }
12080b57cec5SDimitry Andric 
1209bdd1243dSDimitry Andric   bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1210bdd1243dSDimitry Andric   (void)FPSaved;
1211bdd1243dSDimitry Andric   assert((!HasFP || FPSaved) &&
12120b57cec5SDimitry Andric          "Needed to save FP but didn't save it anywhere");
12130b57cec5SDimitry Andric 
1214349cc55cSDimitry Andric   // If we allow spilling to AGPRs we may have saved FP but then spill
1215349cc55cSDimitry Andric   // everything into AGPRs instead of the stack.
1216bdd1243dSDimitry Andric   assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
12170b57cec5SDimitry Andric          "Saved FP but didn't need it");
12185ffd83dbSDimitry Andric 
1219bdd1243dSDimitry Andric   bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1220bdd1243dSDimitry Andric   (void)BPSaved;
1221bdd1243dSDimitry Andric   assert((!HasBP || BPSaved) &&
12225ffd83dbSDimitry Andric          "Needed to save BP but didn't save it anywhere");
12235ffd83dbSDimitry Andric 
1224bdd1243dSDimitry Andric   assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
12250b57cec5SDimitry Andric }
12260b57cec5SDimitry Andric 
12270b57cec5SDimitry Andric void SIFrameLowering::emitEpilogue(MachineFunction &MF,
12280b57cec5SDimitry Andric                                    MachineBasicBlock &MBB) const {
12290b57cec5SDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
12300b57cec5SDimitry Andric   if (FuncInfo->isEntryFunction())
12310b57cec5SDimitry Andric     return;
12320b57cec5SDimitry Andric 
12330b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
12340b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
12355ffd83dbSDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
1236bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
12375f757f3fSDimitry Andric   LiveRegUnits LiveUnits;
1238bdd1243dSDimitry Andric   // Get the insert location for the epilogue. If there were no terminators in
1239bdd1243dSDimitry Andric   // the block, get the last instruction.
1240bdd1243dSDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.end();
12410b57cec5SDimitry Andric   DebugLoc DL;
1242bdd1243dSDimitry Andric   if (!MBB.empty()) {
1243bdd1243dSDimitry Andric     MBBI = MBB.getLastNonDebugInstr();
1244bdd1243dSDimitry Andric     if (MBBI != MBB.end())
1245bdd1243dSDimitry Andric       DL = MBBI->getDebugLoc();
1246bdd1243dSDimitry Andric 
1247bdd1243dSDimitry Andric     MBBI = MBB.getFirstTerminator();
1248bdd1243dSDimitry Andric   }
12490b57cec5SDimitry Andric 
12500b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
12510b57cec5SDimitry Andric   uint32_t NumBytes = MFI.getStackSize();
12525ffd83dbSDimitry Andric   uint32_t RoundedSize = FuncInfo->isStackRealigned()
12535ffd83dbSDimitry Andric                              ? NumBytes + MFI.getMaxAlign().value()
12545ffd83dbSDimitry Andric                              : NumBytes;
12555ffd83dbSDimitry Andric   const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1256bdd1243dSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1257bdd1243dSDimitry Andric   bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
12585ffd83dbSDimitry Andric 
1259bdd1243dSDimitry Andric   Register FramePtrRegScratchCopy;
1260bdd1243dSDimitry Andric   Register SGPRForFPSaveRestoreCopy =
1261bdd1243dSDimitry Andric       FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1262bdd1243dSDimitry Andric   if (FPSaved) {
1263bdd1243dSDimitry Andric     // CSR spill restores should use FP as base register. If
1264bdd1243dSDimitry Andric     // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1265bdd1243dSDimitry Andric     // into a new scratch register and copy to FP later when other registers are
1266bdd1243dSDimitry Andric     // restored from the current stack frame.
12675f757f3fSDimitry Andric     initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1268bdd1243dSDimitry Andric     if (SGPRForFPSaveRestoreCopy) {
12695f757f3fSDimitry Andric       LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1270bdd1243dSDimitry Andric     } else {
1271bdd1243dSDimitry Andric       FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
12725f757f3fSDimitry Andric           MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1273bdd1243dSDimitry Andric       if (!FramePtrRegScratchCopy)
1274bdd1243dSDimitry Andric         report_fatal_error("failed to find free scratch register");
1275bdd1243dSDimitry Andric 
12765f757f3fSDimitry Andric       LiveUnits.addReg(FramePtrRegScratchCopy);
1277bdd1243dSDimitry Andric     }
1278bdd1243dSDimitry Andric 
12795f757f3fSDimitry Andric     emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1280bdd1243dSDimitry Andric                          FramePtrRegScratchCopy);
1281bdd1243dSDimitry Andric   }
12820b57cec5SDimitry Andric 
12830b57cec5SDimitry Andric   if (RoundedSize != 0 && hasFP(MF)) {
1284349cc55cSDimitry Andric     auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
12850b57cec5SDimitry Andric         .addReg(StackPtrReg)
1286fe6060f1SDimitry Andric         .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
12870b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameDestroy);
1288349cc55cSDimitry Andric     Add->getOperand(3).setIsDead(); // Mark SCC as dead.
12890b57cec5SDimitry Andric   }
12900b57cec5SDimitry Andric 
1291bdd1243dSDimitry Andric   if (FPSaved) {
1292bdd1243dSDimitry Andric     // Insert the copy to restore FP.
1293bdd1243dSDimitry Andric     Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1294bdd1243dSDimitry Andric                                                : FramePtrRegScratchCopy;
1295bdd1243dSDimitry Andric     MachineInstrBuilder MIB =
12965ffd83dbSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1297bdd1243dSDimitry Andric             .addReg(SrcReg);
1298bdd1243dSDimitry Andric     if (SGPRForFPSaveRestoreCopy)
1299bdd1243dSDimitry Andric       MIB.setMIFlag(MachineInstr::FrameDestroy);
1300bdd1243dSDimitry Andric   } else {
1301bdd1243dSDimitry Andric     // Insert the CSR spill restores with SP as the base register.
13025f757f3fSDimitry Andric     emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1303bdd1243dSDimitry Andric                          FramePtrRegScratchCopy);
13040b57cec5SDimitry Andric   }
13050b57cec5SDimitry Andric }
13060b57cec5SDimitry Andric 
13070b57cec5SDimitry Andric #ifndef NDEBUG
1308e8d8bef9SDimitry Andric static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1309e8d8bef9SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
1310e8d8bef9SDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13110b57cec5SDimitry Andric   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
13120b57cec5SDimitry Andric        I != E; ++I) {
13130b57cec5SDimitry Andric     if (!MFI.isDeadObjectIndex(I) &&
13140b57cec5SDimitry Andric         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1315bdd1243dSDimitry Andric         !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
13160b57cec5SDimitry Andric       return false;
13170b57cec5SDimitry Andric     }
13180b57cec5SDimitry Andric   }
13190b57cec5SDimitry Andric 
13200b57cec5SDimitry Andric   return true;
13210b57cec5SDimitry Andric }
13220b57cec5SDimitry Andric #endif
13230b57cec5SDimitry Andric 
1324e8d8bef9SDimitry Andric StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1325e8d8bef9SDimitry Andric                                                     int FI,
13265ffd83dbSDimitry Andric                                                     Register &FrameReg) const {
13270b57cec5SDimitry Andric   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
13280b57cec5SDimitry Andric 
13290b57cec5SDimitry Andric   FrameReg = RI->getFrameRegister(MF);
1330e8d8bef9SDimitry Andric   return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
13310b57cec5SDimitry Andric }
13320b57cec5SDimitry Andric 
13330b57cec5SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameFinalized(
13340b57cec5SDimitry Andric   MachineFunction &MF,
13350b57cec5SDimitry Andric   RegScavenger *RS) const {
13360b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
13370b57cec5SDimitry Andric 
13380b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1339fe6060f1SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
13400b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1341fe6060f1SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
13420b57cec5SDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13430b57cec5SDimitry Andric 
1344bdd1243dSDimitry Andric   // Allocate spill slots for WWM reserved VGPRs.
13455f757f3fSDimitry Andric   // For chain functions, we only need to do this if we have calls to
13465f757f3fSDimitry Andric   // llvm.amdgcn.cs.chain.
13475f757f3fSDimitry Andric   bool IsChainWithoutCalls =
13485f757f3fSDimitry Andric       FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall();
13495f757f3fSDimitry Andric   if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) {
1350bdd1243dSDimitry Andric     for (Register Reg : FuncInfo->getWWMReservedRegs()) {
1351bdd1243dSDimitry Andric       const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1352bdd1243dSDimitry Andric       FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1353bdd1243dSDimitry Andric                                  TRI->getSpillAlign(*RC));
1354bdd1243dSDimitry Andric     }
135581ad6265SDimitry Andric   }
135681ad6265SDimitry Andric 
1357fe6060f1SDimitry Andric   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1358fe6060f1SDimitry Andric                                && EnableSpillVGPRToAGPR;
1359fe6060f1SDimitry Andric 
1360fe6060f1SDimitry Andric   if (SpillVGPRToAGPR) {
1361fe6060f1SDimitry Andric     // To track the spill frame indices handled in this pass.
1362fe6060f1SDimitry Andric     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
13630eae32dcSDimitry Andric     BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1364fe6060f1SDimitry Andric 
1365fe6060f1SDimitry Andric     bool SeenDbgInstr = false;
1366fe6060f1SDimitry Andric 
1367fe6060f1SDimitry Andric     for (MachineBasicBlock &MBB : MF) {
1368349cc55cSDimitry Andric       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
13690eae32dcSDimitry Andric         int FrameIndex;
1370fe6060f1SDimitry Andric         if (MI.isDebugInstr())
1371fe6060f1SDimitry Andric           SeenDbgInstr = true;
1372fe6060f1SDimitry Andric 
1373fe6060f1SDimitry Andric         if (TII->isVGPRSpill(MI)) {
1374fe6060f1SDimitry Andric           // Try to eliminate stack used by VGPR spills before frame
1375fe6060f1SDimitry Andric           // finalization.
1376fe6060f1SDimitry Andric           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1377fe6060f1SDimitry Andric                                                      AMDGPU::OpName::vaddr);
1378fe6060f1SDimitry Andric           int FI = MI.getOperand(FIOp).getIndex();
1379fe6060f1SDimitry Andric           Register VReg =
1380fe6060f1SDimitry Andric             TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1381fe6060f1SDimitry Andric           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1382fe6060f1SDimitry Andric                                                 TRI->isAGPR(MRI, VReg))) {
138306c3fb27SDimitry Andric             assert(RS != nullptr);
13845f757f3fSDimitry Andric             RS->enterBasicBlockEnd(MBB);
13855f757f3fSDimitry Andric             RS->backward(std::next(MI.getIterator()));
1386fe6060f1SDimitry Andric             TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1387fe6060f1SDimitry Andric             SpillFIs.set(FI);
1388fe6060f1SDimitry Andric             continue;
1389fe6060f1SDimitry Andric           }
13900eae32dcSDimitry Andric         } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
13910eae32dcSDimitry Andric                    TII->isLoadFromStackSlot(MI, FrameIndex))
139204eeddc0SDimitry Andric           if (!MFI.isFixedObjectIndex(FrameIndex))
13930eae32dcSDimitry Andric             NonVGPRSpillFIs.set(FrameIndex);
1394fe6060f1SDimitry Andric       }
1395fe6060f1SDimitry Andric     }
13960eae32dcSDimitry Andric 
139781ad6265SDimitry Andric     // Stack slot coloring may assign different objects to the same stack slot.
13980eae32dcSDimitry Andric     // If not, then the VGPR to AGPR spill slot is dead.
13990eae32dcSDimitry Andric     for (unsigned FI : SpillFIs.set_bits())
14000eae32dcSDimitry Andric       if (!NonVGPRSpillFIs.test(FI))
14010eae32dcSDimitry Andric         FuncInfo->setVGPRToAGPRSpillDead(FI);
1402fe6060f1SDimitry Andric 
1403fe6060f1SDimitry Andric     for (MachineBasicBlock &MBB : MF) {
1404fe6060f1SDimitry Andric       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1405fe6060f1SDimitry Andric         MBB.addLiveIn(Reg);
1406fe6060f1SDimitry Andric 
1407fe6060f1SDimitry Andric       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1408fe6060f1SDimitry Andric         MBB.addLiveIn(Reg);
1409fe6060f1SDimitry Andric 
1410fe6060f1SDimitry Andric       MBB.sortUniqueLiveIns();
1411fe6060f1SDimitry Andric 
1412fe6060f1SDimitry Andric       if (!SpillFIs.empty() && SeenDbgInstr) {
1413fe6060f1SDimitry Andric         // FIXME: The dead frame indices are replaced with a null register from
1414fe6060f1SDimitry Andric         // the debug value instructions. We should instead, update it with the
1415fe6060f1SDimitry Andric         // correct register value. But not sure the register value alone is
1416fe6060f1SDimitry Andric         for (MachineInstr &MI : MBB) {
1417fe6060f1SDimitry Andric           if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1418bdd1243dSDimitry Andric               !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
1419fe6060f1SDimitry Andric               SpillFIs[MI.getOperand(0).getIndex()]) {
1420fe6060f1SDimitry Andric             MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1421fe6060f1SDimitry Andric           }
1422fe6060f1SDimitry Andric         }
1423fe6060f1SDimitry Andric       }
1424fe6060f1SDimitry Andric     }
1425fe6060f1SDimitry Andric   }
1426fe6060f1SDimitry Andric 
142781ad6265SDimitry Andric   // At this point we've already allocated all spilled SGPRs to VGPRs if we
142881ad6265SDimitry Andric   // can. Any remaining SGPR spills will go to memory, so move them back to the
142981ad6265SDimitry Andric   // default stack.
143081ad6265SDimitry Andric   bool HaveSGPRToVMemSpill =
143181ad6265SDimitry Andric       FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1432e8d8bef9SDimitry Andric   assert(allSGPRSpillsAreDead(MF) &&
14330b57cec5SDimitry Andric          "SGPR spill should have been removed in SILowerSGPRSpills");
14340b57cec5SDimitry Andric 
14350b57cec5SDimitry Andric   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
14360b57cec5SDimitry Andric   // but currently hasNonSpillStackObjects is set only from source
14370b57cec5SDimitry Andric   // allocas. Stack temps produced from legalization are not counted currently.
14380b57cec5SDimitry Andric   if (!allStackObjectsAreDead(MFI)) {
14390b57cec5SDimitry Andric     assert(RS && "RegScavenger required if spilling");
14400b57cec5SDimitry Andric 
1441fe6060f1SDimitry Andric     // Add an emergency spill slot
1442fe6060f1SDimitry Andric     RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
144381ad6265SDimitry Andric 
144481ad6265SDimitry Andric     // If we are spilling SGPRs to memory with a large frame, we may need a
144581ad6265SDimitry Andric     // second VGPR emergency frame index.
144681ad6265SDimitry Andric     if (HaveSGPRToVMemSpill &&
144781ad6265SDimitry Andric         allocateScavengingFrameIndexesNearIncomingSP(MF)) {
144881ad6265SDimitry Andric       RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
144981ad6265SDimitry Andric     }
145081ad6265SDimitry Andric   }
145181ad6265SDimitry Andric }
145281ad6265SDimitry Andric 
145381ad6265SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
145481ad6265SDimitry Andric     MachineFunction &MF, RegScavenger *RS) const {
145581ad6265SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
145681ad6265SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
145781ad6265SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
145881ad6265SDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
145981ad6265SDimitry Andric 
146081ad6265SDimitry Andric   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
146181ad6265SDimitry Andric     // On gfx908, we had initially reserved highest available VGPR for AGPR
146281ad6265SDimitry Andric     // copy. Now since we are done with RA, check if there exist an unused VGPR
146381ad6265SDimitry Andric     // which is lower than the eariler reserved VGPR before RA. If one exist,
146481ad6265SDimitry Andric     // use it for AGPR copy instead of one reserved before RA.
146581ad6265SDimitry Andric     Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
146681ad6265SDimitry Andric     Register UnusedLowVGPR =
146781ad6265SDimitry Andric         TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
146881ad6265SDimitry Andric     if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
146981ad6265SDimitry Andric                           TRI->getHWRegIndex(VGPRForAGPRCopy))) {
147006c3fb27SDimitry Andric       // Reserve this newly identified VGPR (for AGPR copy)
147106c3fb27SDimitry Andric       // reserved registers should already be frozen at this point
147206c3fb27SDimitry Andric       // so we can avoid calling MRI.freezeReservedRegs and just use
147306c3fb27SDimitry Andric       // MRI.reserveReg
147481ad6265SDimitry Andric       FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
147506c3fb27SDimitry Andric       MRI.reserveReg(UnusedLowVGPR, TRI);
147681ad6265SDimitry Andric     }
14770b57cec5SDimitry Andric   }
147806c3fb27SDimitry Andric   // We initally reserved the highest available SGPR pair for long branches
147906c3fb27SDimitry Andric   // now, after RA, we shift down to a lower unused one if one exists
148006c3fb27SDimitry Andric   Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
148106c3fb27SDimitry Andric   Register UnusedLowSGPR =
148206c3fb27SDimitry Andric       TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
148306c3fb27SDimitry Andric   // If LongBranchReservedReg is null then we didn't find a long branch
148406c3fb27SDimitry Andric   // and never reserved a register to begin with so there is nothing to
148506c3fb27SDimitry Andric   // shift down. Then if UnusedLowSGPR is null, there isn't available lower
148606c3fb27SDimitry Andric   // register to use so just keep the original one we set.
148706c3fb27SDimitry Andric   if (LongBranchReservedReg && UnusedLowSGPR) {
148806c3fb27SDimitry Andric     FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
148906c3fb27SDimitry Andric     MRI.reserveReg(UnusedLowSGPR, TRI);
149006c3fb27SDimitry Andric   }
14910b57cec5SDimitry Andric }
14920b57cec5SDimitry Andric 
1493bdd1243dSDimitry Andric // The special SGPR spills like the one needed for FP, BP or any reserved
1494bdd1243dSDimitry Andric // registers delayed until frame lowering.
1495bdd1243dSDimitry Andric void SIFrameLowering::determinePrologEpilogSGPRSaves(
149606c3fb27SDimitry Andric     MachineFunction &MF, BitVector &SavedVGPRs,
149706c3fb27SDimitry Andric     bool NeedExecCopyReservedReg) const {
14985ffd83dbSDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
149906c3fb27SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
1500bdd1243dSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
15010b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
15020b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
15035f757f3fSDimitry Andric   LiveRegUnits LiveUnits;
15045f757f3fSDimitry Andric   LiveUnits.init(*TRI);
1505bdd1243dSDimitry Andric   // Initially mark callee saved registers as used so we will not choose them
1506bdd1243dSDimitry Andric   // while looking for scratch SGPRs.
1507bdd1243dSDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1508bdd1243dSDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I)
15095f757f3fSDimitry Andric     LiveUnits.addReg(CSRegs[I]);
15100b57cec5SDimitry Andric 
151106c3fb27SDimitry Andric   const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
151206c3fb27SDimitry Andric 
1513*0fca6ea1SDimitry Andric   Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1514*0fca6ea1SDimitry Andric   if (NeedExecCopyReservedReg ||
1515*0fca6ea1SDimitry Andric       (ReservedRegForExecCopy &&
1516*0fca6ea1SDimitry Andric        MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1517*0fca6ea1SDimitry Andric     MRI.reserveReg(ReservedRegForExecCopy, TRI);
15185f757f3fSDimitry Andric     Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
151906c3fb27SDimitry Andric     if (UnusedScratchReg) {
152006c3fb27SDimitry Andric       // If found any unused scratch SGPR, reserve the register itself for Exec
152106c3fb27SDimitry Andric       // copy and there is no need for any spills in that case.
152206c3fb27SDimitry Andric       MFI->setSGPRForEXECCopy(UnusedScratchReg);
1523*0fca6ea1SDimitry Andric       MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
15245f757f3fSDimitry Andric       LiveUnits.addReg(UnusedScratchReg);
152506c3fb27SDimitry Andric     } else {
152606c3fb27SDimitry Andric       // Needs spill.
1527*0fca6ea1SDimitry Andric       assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
152806c3fb27SDimitry Andric              "Re-reserving spill slot for EXEC copy register");
1529*0fca6ea1SDimitry Andric       getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
153006c3fb27SDimitry Andric                                      /*IncludeScratchCopy=*/false);
153106c3fb27SDimitry Andric     }
1532*0fca6ea1SDimitry Andric   } else if (ReservedRegForExecCopy) {
1533*0fca6ea1SDimitry Andric     // Reset it at this point. There are no whole-wave copies and spills
1534*0fca6ea1SDimitry Andric     // encountered.
1535*0fca6ea1SDimitry Andric     MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
153606c3fb27SDimitry Andric   }
153706c3fb27SDimitry Andric 
15380b57cec5SDimitry Andric   // hasFP only knows about stack objects that already exist. We're now
15390b57cec5SDimitry Andric   // determining the stack slots that will be created, so we have to predict
15400b57cec5SDimitry Andric   // them. Stack objects force FP usage with calls.
15410b57cec5SDimitry Andric   //
15420b57cec5SDimitry Andric   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
15430b57cec5SDimitry Andric   // don't want to report it here.
15440b57cec5SDimitry Andric   //
15450b57cec5SDimitry Andric   // FIXME: Is this really hasReservedCallFrame?
15460b57cec5SDimitry Andric   const bool WillHaveFP =
15470b57cec5SDimitry Andric       FrameInfo.hasCalls() &&
15480b57cec5SDimitry Andric       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
15490b57cec5SDimitry Andric 
15505ffd83dbSDimitry Andric   if (WillHaveFP || hasFP(MF)) {
1551bdd1243dSDimitry Andric     Register FramePtrReg = MFI->getFrameOffsetReg();
1552bdd1243dSDimitry Andric     assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1553e8d8bef9SDimitry Andric            "Re-reserving spill slot for FP");
15545f757f3fSDimitry Andric     getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
15550b57cec5SDimitry Andric   }
15560b57cec5SDimitry Andric 
15575ffd83dbSDimitry Andric   if (TRI->hasBasePointer(MF)) {
1558bdd1243dSDimitry Andric     Register BasePtrReg = TRI->getBaseRegister();
1559bdd1243dSDimitry Andric     assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1560bdd1243dSDimitry Andric            "Re-reserving spill slot for BP");
15615f757f3fSDimitry Andric     getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1562bdd1243dSDimitry Andric   }
1563bdd1243dSDimitry Andric }
1564e8d8bef9SDimitry Andric 
1565bdd1243dSDimitry Andric // Only report VGPRs to generic code.
1566bdd1243dSDimitry Andric void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1567bdd1243dSDimitry Andric                                            BitVector &SavedVGPRs,
1568bdd1243dSDimitry Andric                                            RegScavenger *RS) const {
1569bdd1243dSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
15705f757f3fSDimitry Andric 
15715f757f3fSDimitry Andric   // If this is a function with the amdgpu_cs_chain[_preserve] calling
15725f757f3fSDimitry Andric   // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
15735f757f3fSDimitry Andric   // we don't need to save and restore anything.
15745f757f3fSDimitry Andric   if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
15755f757f3fSDimitry Andric     return;
15765f757f3fSDimitry Andric 
15777a6dacacSDimitry Andric   MFI->shiftSpillPhysVGPRsToLowestRange(MF);
15787a6dacacSDimitry Andric 
15795f757f3fSDimitry Andric   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1580bdd1243dSDimitry Andric   if (MFI->isEntryFunction())
1581bdd1243dSDimitry Andric     return;
1582bdd1243dSDimitry Andric 
1583bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1584bdd1243dSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
158506c3fb27SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
158606c3fb27SDimitry Andric   bool NeedExecCopyReservedReg = false;
1587bdd1243dSDimitry Andric 
158806c3fb27SDimitry Andric   MachineInstr *ReturnMI = nullptr;
1589bdd1243dSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
1590bdd1243dSDimitry Andric     for (MachineInstr &MI : MBB) {
1591bdd1243dSDimitry Andric       // WRITELANE instructions used for SGPR spills can overwrite the inactive
1592bdd1243dSDimitry Andric       // lanes of VGPRs and callee must spill and restore them even if they are
1593bdd1243dSDimitry Andric       // marked Caller-saved.
1594bdd1243dSDimitry Andric 
1595bdd1243dSDimitry Andric       // TODO: Handle this elsewhere at an early point. Walking through all MBBs
1596bdd1243dSDimitry Andric       // here would be a bad heuristic. A better way should be by calling
1597bdd1243dSDimitry Andric       // allocateWWMSpill during the regalloc pipeline whenever a physical
15985f757f3fSDimitry Andric       // register is allocated for the intended virtual registers.
15995f757f3fSDimitry Andric       if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
1600bdd1243dSDimitry Andric         MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
16015f757f3fSDimitry Andric       else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
1602bdd1243dSDimitry Andric         MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
160306c3fb27SDimitry Andric       else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
160406c3fb27SDimitry Andric         NeedExecCopyReservedReg = true;
160506c3fb27SDimitry Andric       else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
16065f757f3fSDimitry Andric                MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
16075f757f3fSDimitry Andric                (MFI->isChainFunction() &&
16085f757f3fSDimitry Andric                 TII->isChainCallOpcode(MI.getOpcode()))) {
160906c3fb27SDimitry Andric         // We expect all return to be the same size.
161006c3fb27SDimitry Andric         assert(!ReturnMI ||
161106c3fb27SDimitry Andric                (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
161206c3fb27SDimitry Andric                 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
161306c3fb27SDimitry Andric         ReturnMI = &MI;
161406c3fb27SDimitry Andric       }
161506c3fb27SDimitry Andric     }
161606c3fb27SDimitry Andric   }
161706c3fb27SDimitry Andric 
161806c3fb27SDimitry Andric   // Remove any VGPRs used in the return value because these do not need to be saved.
161906c3fb27SDimitry Andric   // This prevents CSR restore from clobbering return VGPRs.
162006c3fb27SDimitry Andric   if (ReturnMI) {
162106c3fb27SDimitry Andric     for (auto &Op : ReturnMI->operands()) {
162206c3fb27SDimitry Andric       if (Op.isReg())
162306c3fb27SDimitry Andric         SavedVGPRs.reset(Op.getReg());
1624bdd1243dSDimitry Andric     }
1625bdd1243dSDimitry Andric   }
1626bdd1243dSDimitry Andric 
1627bdd1243dSDimitry Andric   // Ignore the SGPRs the default implementation found.
1628bdd1243dSDimitry Andric   SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1629bdd1243dSDimitry Andric 
1630bdd1243dSDimitry Andric   // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1631bdd1243dSDimitry Andric   // In gfx908 there was do AGPR loads and stores and thus spilling also
1632bdd1243dSDimitry Andric   // require a temporary VGPR.
1633bdd1243dSDimitry Andric   if (!ST.hasGFX90AInsts())
1634bdd1243dSDimitry Andric     SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1635bdd1243dSDimitry Andric 
163606c3fb27SDimitry Andric   determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1637bdd1243dSDimitry Andric 
1638bdd1243dSDimitry Andric   // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1639bdd1243dSDimitry Andric   // allow the default insertion to handle them.
1640bdd1243dSDimitry Andric   for (auto &Reg : MFI->getWWMSpills())
1641bdd1243dSDimitry Andric     SavedVGPRs.reset(Reg.first);
1642bdd1243dSDimitry Andric 
1643bdd1243dSDimitry Andric   // Mark all lane VGPRs as BB LiveIns.
1644bdd1243dSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
1645bdd1243dSDimitry Andric     for (auto &Reg : MFI->getWWMSpills())
1646bdd1243dSDimitry Andric       MBB.addLiveIn(Reg.first);
1647bdd1243dSDimitry Andric 
1648bdd1243dSDimitry Andric     MBB.sortUniqueLiveIns();
16490b57cec5SDimitry Andric   }
16500b57cec5SDimitry Andric }
16510b57cec5SDimitry Andric 
16520b57cec5SDimitry Andric void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
16530b57cec5SDimitry Andric                                                BitVector &SavedRegs,
16540b57cec5SDimitry Andric                                                RegScavenger *RS) const {
16550b57cec5SDimitry Andric   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
16560b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
16570b57cec5SDimitry Andric   if (MFI->isEntryFunction())
16580b57cec5SDimitry Andric     return;
16590b57cec5SDimitry Andric 
16600b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
16610b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
16620b57cec5SDimitry Andric 
16630b57cec5SDimitry Andric   // The SP is specifically managed and we don't want extra spills of it.
16640b57cec5SDimitry Andric   SavedRegs.reset(MFI->getStackPtrOffsetReg());
1665e8d8bef9SDimitry Andric 
1666e8d8bef9SDimitry Andric   const BitVector AllSavedRegs = SavedRegs;
1667fe6060f1SDimitry Andric   SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1668e8d8bef9SDimitry Andric 
1669349cc55cSDimitry Andric   // We have to anticipate introducing CSR VGPR spills or spill of caller
1670349cc55cSDimitry Andric   // save VGPR reserved for SGPR spills as we now always create stack entry
167104eeddc0SDimitry Andric   // for it, if we don't have any stack objects already, since we require a FP
167204eeddc0SDimitry Andric   // if there is a call and stack. We will allocate a VGPR for SGPR spills if
167304eeddc0SDimitry Andric   // there are any SGPR spills. Whether they are CSR spills or otherwise.
1674e8d8bef9SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1675349cc55cSDimitry Andric   const bool WillHaveFP =
167604eeddc0SDimitry Andric       FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1677e8d8bef9SDimitry Andric 
1678e8d8bef9SDimitry Andric   // FP will be specially managed like SP.
1679e8d8bef9SDimitry Andric   if (WillHaveFP || hasFP(MF))
1680e8d8bef9SDimitry Andric     SavedRegs.reset(MFI->getFrameOffsetReg());
168181ad6265SDimitry Andric 
168281ad6265SDimitry Andric   // Return address use with return instruction is hidden through the SI_RETURN
168381ad6265SDimitry Andric   // pseudo. Given that and since the IPRA computes actual register usage and
168481ad6265SDimitry Andric   // does not use CSR list, the clobbering of return address by function calls
168581ad6265SDimitry Andric   // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
168681ad6265SDimitry Andric   // usage collection. This will ensure save/restore of return address happens
168781ad6265SDimitry Andric   // in those scenarios.
168881ad6265SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
168981ad6265SDimitry Andric   Register RetAddrReg = TRI->getReturnAddressReg(MF);
169081ad6265SDimitry Andric   if (!MFI->isEntryFunction() &&
169181ad6265SDimitry Andric       (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
169281ad6265SDimitry Andric     SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
169381ad6265SDimitry Andric     SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
169481ad6265SDimitry Andric   }
16950b57cec5SDimitry Andric }
16960b57cec5SDimitry Andric 
16970b57cec5SDimitry Andric bool SIFrameLowering::assignCalleeSavedSpillSlots(
16980b57cec5SDimitry Andric     MachineFunction &MF, const TargetRegisterInfo *TRI,
16990b57cec5SDimitry Andric     std::vector<CalleeSavedInfo> &CSI) const {
17000b57cec5SDimitry Andric   if (CSI.empty())
17010b57cec5SDimitry Andric     return true; // Early exit if no callee saved registers are modified!
17020b57cec5SDimitry Andric 
17030b57cec5SDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
17045ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17055ffd83dbSDimitry Andric   const SIRegisterInfo *RI = ST.getRegisterInfo();
17065ffd83dbSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
17075ffd83dbSDimitry Andric   Register BasePtrReg = RI->getBaseRegister();
1708bdd1243dSDimitry Andric   Register SGPRForFPSaveRestoreCopy =
1709bdd1243dSDimitry Andric       FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1710bdd1243dSDimitry Andric   Register SGPRForBPSaveRestoreCopy =
1711bdd1243dSDimitry Andric       FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1712bdd1243dSDimitry Andric   if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1713bdd1243dSDimitry Andric     return false;
1714bdd1243dSDimitry Andric 
17155ffd83dbSDimitry Andric   unsigned NumModifiedRegs = 0;
17165ffd83dbSDimitry Andric 
1717bdd1243dSDimitry Andric   if (SGPRForFPSaveRestoreCopy)
17185ffd83dbSDimitry Andric     NumModifiedRegs++;
1719bdd1243dSDimitry Andric   if (SGPRForBPSaveRestoreCopy)
17205ffd83dbSDimitry Andric     NumModifiedRegs++;
17215ffd83dbSDimitry Andric 
17220b57cec5SDimitry Andric   for (auto &CS : CSI) {
1723bdd1243dSDimitry Andric     if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) {
1724bdd1243dSDimitry Andric       CS.setDstReg(SGPRForFPSaveRestoreCopy);
17255ffd83dbSDimitry Andric       if (--NumModifiedRegs)
17265ffd83dbSDimitry Andric         break;
1727bdd1243dSDimitry Andric     } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) {
1728bdd1243dSDimitry Andric       CS.setDstReg(SGPRForBPSaveRestoreCopy);
17295ffd83dbSDimitry Andric       if (--NumModifiedRegs)
17300b57cec5SDimitry Andric         break;
17310b57cec5SDimitry Andric     }
17320b57cec5SDimitry Andric   }
17330b57cec5SDimitry Andric 
17340b57cec5SDimitry Andric   return false;
17350b57cec5SDimitry Andric }
17360b57cec5SDimitry Andric 
17374824e7fdSDimitry Andric bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
17384824e7fdSDimitry Andric   const MachineFunction &MF) const {
17394824e7fdSDimitry Andric 
17404824e7fdSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17414824e7fdSDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
17425f757f3fSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
17434824e7fdSDimitry Andric   uint64_t EstStackSize = MFI.estimateStackSize(MF);
17444824e7fdSDimitry Andric   uint64_t MaxOffset = EstStackSize - 1;
17454824e7fdSDimitry Andric 
17464824e7fdSDimitry Andric   // We need the emergency stack slots to be allocated in range of the
17474824e7fdSDimitry Andric   // MUBUF/flat scratch immediate offset from the base register, so assign these
17484824e7fdSDimitry Andric   // first at the incoming SP position.
17494824e7fdSDimitry Andric   //
17504824e7fdSDimitry Andric   // TODO: We could try sorting the objects to find a hole in the first bytes
17514824e7fdSDimitry Andric   // rather than allocating as close to possible. This could save a lot of space
17524824e7fdSDimitry Andric   // on frames with alignment requirements.
17534824e7fdSDimitry Andric   if (ST.enableFlatScratch()) {
17544824e7fdSDimitry Andric     if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
17554824e7fdSDimitry Andric                                SIInstrFlags::FlatScratch))
17564824e7fdSDimitry Andric       return false;
17574824e7fdSDimitry Andric   } else {
17585f757f3fSDimitry Andric     if (TII->isLegalMUBUFImmOffset(MaxOffset))
17594824e7fdSDimitry Andric       return false;
17604824e7fdSDimitry Andric   }
17614824e7fdSDimitry Andric 
17624824e7fdSDimitry Andric   return true;
17634824e7fdSDimitry Andric }
17644824e7fdSDimitry Andric 
17650b57cec5SDimitry Andric MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
17660b57cec5SDimitry Andric   MachineFunction &MF,
17670b57cec5SDimitry Andric   MachineBasicBlock &MBB,
17680b57cec5SDimitry Andric   MachineBasicBlock::iterator I) const {
17690b57cec5SDimitry Andric   int64_t Amount = I->getOperand(0).getImm();
17700b57cec5SDimitry Andric   if (Amount == 0)
17710b57cec5SDimitry Andric     return MBB.erase(I);
17720b57cec5SDimitry Andric 
17730b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17740b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
17750b57cec5SDimitry Andric   const DebugLoc &DL = I->getDebugLoc();
17760b57cec5SDimitry Andric   unsigned Opc = I->getOpcode();
17770b57cec5SDimitry Andric   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
17780b57cec5SDimitry Andric   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
17790b57cec5SDimitry Andric 
17800b57cec5SDimitry Andric   if (!hasReservedCallFrame(MF)) {
17815ffd83dbSDimitry Andric     Amount = alignTo(Amount, getStackAlign());
17820b57cec5SDimitry Andric     assert(isUInt<32>(Amount) && "exceeded stack address space size");
17830b57cec5SDimitry Andric     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
17845ffd83dbSDimitry Andric     Register SPReg = MFI->getStackPtrOffsetReg();
17850b57cec5SDimitry Andric 
1786fe6060f1SDimitry Andric     Amount *= getScratchScaleFactor(ST);
1787fe6060f1SDimitry Andric     if (IsDestroy)
1788fe6060f1SDimitry Andric       Amount = -Amount;
1789349cc55cSDimitry Andric     auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
17900b57cec5SDimitry Andric         .addReg(SPReg)
1791fe6060f1SDimitry Andric         .addImm(Amount);
1792349cc55cSDimitry Andric     Add->getOperand(3).setIsDead(); // Mark SCC as dead.
17930b57cec5SDimitry Andric   } else if (CalleePopAmount != 0) {
17940b57cec5SDimitry Andric     llvm_unreachable("is this used?");
17950b57cec5SDimitry Andric   }
17960b57cec5SDimitry Andric 
17970b57cec5SDimitry Andric   return MBB.erase(I);
17980b57cec5SDimitry Andric }
17990b57cec5SDimitry Andric 
1800e8d8bef9SDimitry Andric /// Returns true if the frame will require a reference to the stack pointer.
1801e8d8bef9SDimitry Andric ///
1802e8d8bef9SDimitry Andric /// This is the set of conditions common to setting up the stack pointer in a
1803e8d8bef9SDimitry Andric /// kernel, and for using a frame pointer in a callable function.
1804e8d8bef9SDimitry Andric ///
1805e8d8bef9SDimitry Andric /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1806e8d8bef9SDimitry Andric /// references SP.
1807e8d8bef9SDimitry Andric static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1808e8d8bef9SDimitry Andric   return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1809e8d8bef9SDimitry Andric }
1810e8d8bef9SDimitry Andric 
1811e8d8bef9SDimitry Andric // The FP for kernels is always known 0, so we never really need to setup an
1812e8d8bef9SDimitry Andric // explicit register for it. However, DisableFramePointerElim will force us to
1813e8d8bef9SDimitry Andric // use a register for it.
18140b57cec5SDimitry Andric bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
18150b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
18165ffd83dbSDimitry Andric 
18175f757f3fSDimitry Andric   // For entry & chain functions we can use an immediate offset in most cases,
18185f757f3fSDimitry Andric   // so the presence of calls doesn't imply we need a distinct frame pointer.
18195ffd83dbSDimitry Andric   if (MFI.hasCalls() &&
18205f757f3fSDimitry Andric       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
18215f757f3fSDimitry Andric       !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) {
18220b57cec5SDimitry Andric     // All offsets are unsigned, so need to be addressed in the same direction
18230b57cec5SDimitry Andric     // as stack growth.
18240b57cec5SDimitry Andric 
18250b57cec5SDimitry Andric     // FIXME: This function is pretty broken, since it can be called before the
18260b57cec5SDimitry Andric     // frame layout is determined or CSR spills are inserted.
18275ffd83dbSDimitry Andric     return MFI.getStackSize() != 0;
18280b57cec5SDimitry Andric   }
18290b57cec5SDimitry Andric 
1830e8d8bef9SDimitry Andric   return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1831fe6060f1SDimitry Andric          MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1832fe6060f1SDimitry Andric              MF) ||
18330b57cec5SDimitry Andric          MF.getTarget().Options.DisableFramePointerElim(MF);
18340b57cec5SDimitry Andric }
1835e8d8bef9SDimitry Andric 
1836e8d8bef9SDimitry Andric // This is essentially a reduced version of hasFP for entry functions. Since the
1837e8d8bef9SDimitry Andric // stack pointer is known 0 on entry to kernels, we never really need an FP
1838e8d8bef9SDimitry Andric // register. We may need to initialize the stack pointer depending on the frame
1839e8d8bef9SDimitry Andric // properties, which logically overlaps many of the cases where an ordinary
1840e8d8bef9SDimitry Andric // function would require an FP.
18415f757f3fSDimitry Andric // Also used for chain functions. While not technically entry functions, chain
18425f757f3fSDimitry Andric // functions may need to set up a stack pointer in some situations.
1843e8d8bef9SDimitry Andric bool SIFrameLowering::requiresStackPointerReference(
1844e8d8bef9SDimitry Andric     const MachineFunction &MF) const {
1845e8d8bef9SDimitry Andric   // Callable functions always require a stack pointer reference.
18465f757f3fSDimitry Andric   assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() ||
18475f757f3fSDimitry Andric           MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) &&
18485f757f3fSDimitry Andric          "only expected to call this for entry points and chain functions");
1849e8d8bef9SDimitry Andric 
1850e8d8bef9SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
1851e8d8bef9SDimitry Andric 
1852e8d8bef9SDimitry Andric   // Entry points ordinarily don't need to initialize SP. We have to set it up
1853e8d8bef9SDimitry Andric   // for callees if there are any. Also note tail calls are impossible/don't
1854e8d8bef9SDimitry Andric   // make any sense for kernels.
1855e8d8bef9SDimitry Andric   if (MFI.hasCalls())
1856e8d8bef9SDimitry Andric     return true;
1857e8d8bef9SDimitry Andric 
1858e8d8bef9SDimitry Andric   // We still need to initialize the SP if we're doing anything weird that
1859e8d8bef9SDimitry Andric   // references the SP, like variable sized stack objects.
1860e8d8bef9SDimitry Andric   return frameTriviallyRequiresSP(MFI);
1861e8d8bef9SDimitry Andric }
1862