xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// SI implementation of the TargetRegisterInfo class.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
14e8d8bef9SDimitry Andric #include "AMDGPU.h"
150b57cec5SDimitry Andric #include "AMDGPURegisterBankInfo.h"
16e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
170b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUInstPrinter.h"
180b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h"
20bdd1243dSDimitry Andric #include "SIRegisterInfo.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
225f757f3fSDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric using namespace llvm;
280b57cec5SDimitry Andric 
295ffd83dbSDimitry Andric #define GET_REGINFO_TARGET_DESC
305ffd83dbSDimitry Andric #include "AMDGPUGenRegisterInfo.inc"
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric static cl::opt<bool> EnableSpillSGPRToVGPR(
330b57cec5SDimitry Andric   "amdgpu-spill-sgpr-to-vgpr",
3406c3fb27SDimitry Andric   cl::desc("Enable spilling SGPRs to VGPRs"),
350b57cec5SDimitry Andric   cl::ReallyHidden,
360b57cec5SDimitry Andric   cl::init(true));
370b57cec5SDimitry Andric 
385ffd83dbSDimitry Andric std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39e8d8bef9SDimitry Andric std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric // Map numbers of DWORDs to indexes in SubRegFromChannelTable.
42e8d8bef9SDimitry Andric // Valid indexes are shifted 1, such that a 0 mapping means unsupported.
43e8d8bef9SDimitry Andric // e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
44e8d8bef9SDimitry Andric //      meaning index 7 in SubRegFromChannelTable.
45e8d8bef9SDimitry Andric static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
46e8d8bef9SDimitry Andric     0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
470b57cec5SDimitry Andric 
48fe6060f1SDimitry Andric namespace llvm {
49fe6060f1SDimitry Andric 
50fe6060f1SDimitry Andric // A temporary struct to spill SGPRs.
51fe6060f1SDimitry Andric // This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
52fe6060f1SDimitry Andric // just v_writelane and v_readlane.
53fe6060f1SDimitry Andric //
54fe6060f1SDimitry Andric // When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
55fe6060f1SDimitry Andric // is saved to scratch (or the other way around for loads).
56fe6060f1SDimitry Andric // For this, a VGPR is required where the needed lanes can be clobbered. The
57fe6060f1SDimitry Andric // RegScavenger can provide a VGPR where currently active lanes can be
58fe6060f1SDimitry Andric // clobbered, but we still need to save inactive lanes.
59fe6060f1SDimitry Andric // The high-level steps are:
60fe6060f1SDimitry Andric // - Try to scavenge SGPR(s) to save exec
61fe6060f1SDimitry Andric // - Try to scavenge VGPR
62fe6060f1SDimitry Andric // - Save needed, all or inactive lanes of a TmpVGPR
63fe6060f1SDimitry Andric // - Spill/Restore SGPRs using TmpVGPR
64fe6060f1SDimitry Andric // - Restore TmpVGPR
65fe6060f1SDimitry Andric //
66fe6060f1SDimitry Andric // To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
67fe6060f1SDimitry Andric // cannot scavenge temporary SGPRs to save exec, we use the following code:
68fe6060f1SDimitry Andric // buffer_store_dword TmpVGPR ; only if active lanes need to be saved
69fe6060f1SDimitry Andric // s_not exec, exec
70fe6060f1SDimitry Andric // buffer_store_dword TmpVGPR ; save inactive lanes
71fe6060f1SDimitry Andric // s_not exec, exec
72fe6060f1SDimitry Andric struct SGPRSpillBuilder {
73fe6060f1SDimitry Andric   struct PerVGPRData {
74fe6060f1SDimitry Andric     unsigned PerVGPR;
75fe6060f1SDimitry Andric     unsigned NumVGPRs;
76fe6060f1SDimitry Andric     int64_t VGPRLanes;
77fe6060f1SDimitry Andric   };
78fe6060f1SDimitry Andric 
79fe6060f1SDimitry Andric   // The SGPR to save
80fe6060f1SDimitry Andric   Register SuperReg;
81fe6060f1SDimitry Andric   MachineBasicBlock::iterator MI;
82fe6060f1SDimitry Andric   ArrayRef<int16_t> SplitParts;
83fe6060f1SDimitry Andric   unsigned NumSubRegs;
84fe6060f1SDimitry Andric   bool IsKill;
85fe6060f1SDimitry Andric   const DebugLoc &DL;
86fe6060f1SDimitry Andric 
87fe6060f1SDimitry Andric   /* When spilling to stack */
88fe6060f1SDimitry Andric   // The SGPRs are written into this VGPR, which is then written to scratch
89fe6060f1SDimitry Andric   // (or vice versa for loads).
90fe6060f1SDimitry Andric   Register TmpVGPR = AMDGPU::NoRegister;
91fe6060f1SDimitry Andric   // Temporary spill slot to save TmpVGPR to.
92fe6060f1SDimitry Andric   int TmpVGPRIndex = 0;
93fe6060f1SDimitry Andric   // If TmpVGPR is live before the spill or if it is scavenged.
94fe6060f1SDimitry Andric   bool TmpVGPRLive = false;
95fe6060f1SDimitry Andric   // Scavenged SGPR to save EXEC.
96fe6060f1SDimitry Andric   Register SavedExecReg = AMDGPU::NoRegister;
97fe6060f1SDimitry Andric   // Stack index to write the SGPRs to.
98fe6060f1SDimitry Andric   int Index;
99fe6060f1SDimitry Andric   unsigned EltSize = 4;
100fe6060f1SDimitry Andric 
101fe6060f1SDimitry Andric   RegScavenger *RS;
102349cc55cSDimitry Andric   MachineBasicBlock *MBB;
103fe6060f1SDimitry Andric   MachineFunction &MF;
104fe6060f1SDimitry Andric   SIMachineFunctionInfo &MFI;
105fe6060f1SDimitry Andric   const SIInstrInfo &TII;
106fe6060f1SDimitry Andric   const SIRegisterInfo &TRI;
107fe6060f1SDimitry Andric   bool IsWave32;
108fe6060f1SDimitry Andric   Register ExecReg;
109fe6060f1SDimitry Andric   unsigned MovOpc;
110fe6060f1SDimitry Andric   unsigned NotOpc;
111fe6060f1SDimitry Andric 
112fe6060f1SDimitry Andric   SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
113fe6060f1SDimitry Andric                    bool IsWave32, MachineBasicBlock::iterator MI, int Index,
114fe6060f1SDimitry Andric                    RegScavenger *RS)
115349cc55cSDimitry Andric       : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
116349cc55cSDimitry Andric                          MI->getOperand(0).isKill(), Index, RS) {}
117349cc55cSDimitry Andric 
118349cc55cSDimitry Andric   SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
119349cc55cSDimitry Andric                    bool IsWave32, MachineBasicBlock::iterator MI, Register Reg,
120349cc55cSDimitry Andric                    bool IsKill, int Index, RegScavenger *RS)
121349cc55cSDimitry Andric       : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
122349cc55cSDimitry Andric         Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
123fe6060f1SDimitry Andric         MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
124fe6060f1SDimitry Andric         IsWave32(IsWave32) {
125bdd1243dSDimitry Andric     const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
126fe6060f1SDimitry Andric     SplitParts = TRI.getRegSplitParts(RC, EltSize);
127fe6060f1SDimitry Andric     NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
128fe6060f1SDimitry Andric 
129fe6060f1SDimitry Andric     if (IsWave32) {
130fe6060f1SDimitry Andric       ExecReg = AMDGPU::EXEC_LO;
131fe6060f1SDimitry Andric       MovOpc = AMDGPU::S_MOV_B32;
132fe6060f1SDimitry Andric       NotOpc = AMDGPU::S_NOT_B32;
133fe6060f1SDimitry Andric     } else {
134fe6060f1SDimitry Andric       ExecReg = AMDGPU::EXEC;
135fe6060f1SDimitry Andric       MovOpc = AMDGPU::S_MOV_B64;
136fe6060f1SDimitry Andric       NotOpc = AMDGPU::S_NOT_B64;
137fe6060f1SDimitry Andric     }
138fe6060f1SDimitry Andric 
139fe6060f1SDimitry Andric     assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
140fe6060f1SDimitry Andric     assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
141fe6060f1SDimitry Andric            SuperReg != AMDGPU::EXEC && "exec should never spill");
142fe6060f1SDimitry Andric   }
143fe6060f1SDimitry Andric 
144fe6060f1SDimitry Andric   PerVGPRData getPerVGPRData() {
145fe6060f1SDimitry Andric     PerVGPRData Data;
146fe6060f1SDimitry Andric     Data.PerVGPR = IsWave32 ? 32 : 64;
147fe6060f1SDimitry Andric     Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
148fe6060f1SDimitry Andric     Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
149fe6060f1SDimitry Andric     return Data;
150fe6060f1SDimitry Andric   }
151fe6060f1SDimitry Andric 
152fe6060f1SDimitry Andric   // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
153fe6060f1SDimitry Andric   // free.
154fe6060f1SDimitry Andric   // Writes these instructions if an SGPR can be scavenged:
155fe6060f1SDimitry Andric   // s_mov_b64 s[6:7], exec   ; Save exec
156fe6060f1SDimitry Andric   // s_mov_b64 exec, 3        ; Wanted lanemask
157fe6060f1SDimitry Andric   // buffer_store_dword v1    ; Write scavenged VGPR to emergency slot
158fe6060f1SDimitry Andric   //
159fe6060f1SDimitry Andric   // Writes these instructions if no SGPR can be scavenged:
160fe6060f1SDimitry Andric   // buffer_store_dword v0    ; Only if no free VGPR was found
161fe6060f1SDimitry Andric   // s_not_b64 exec, exec
162fe6060f1SDimitry Andric   // buffer_store_dword v0    ; Save inactive lanes
163fe6060f1SDimitry Andric   //                          ; exec stays inverted, it is flipped back in
164fe6060f1SDimitry Andric   //                          ; restore.
165fe6060f1SDimitry Andric   void prepare() {
166fe6060f1SDimitry Andric     // Scavenged temporary VGPR to use. It must be scavenged once for any number
167fe6060f1SDimitry Andric     // of spilled subregs.
168fe6060f1SDimitry Andric     // FIXME: The liveness analysis is limited and does not tell if a register
169fe6060f1SDimitry Andric     // is in use in lanes that are currently inactive. We can never be sure if
170fe6060f1SDimitry Andric     // a register as actually in use in another lane, so we need to save all
171fe6060f1SDimitry Andric     // used lanes of the chosen VGPR.
172fe6060f1SDimitry Andric     assert(RS && "Cannot spill SGPR to memory without RegScavenger");
17306c3fb27SDimitry Andric     TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,
17406c3fb27SDimitry Andric                                             0, false);
175fe6060f1SDimitry Andric 
176fe6060f1SDimitry Andric     // Reserve temporary stack slot
177fe6060f1SDimitry Andric     TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI);
178fe6060f1SDimitry Andric     if (TmpVGPR) {
179fe6060f1SDimitry Andric       // Found a register that is dead in the currently active lanes, we only
180fe6060f1SDimitry Andric       // need to spill inactive lanes.
181fe6060f1SDimitry Andric       TmpVGPRLive = false;
182fe6060f1SDimitry Andric     } else {
183fe6060f1SDimitry Andric       // Pick v0 because it doesn't make a difference.
184fe6060f1SDimitry Andric       TmpVGPR = AMDGPU::VGPR0;
185fe6060f1SDimitry Andric       TmpVGPRLive = true;
186fe6060f1SDimitry Andric     }
187fe6060f1SDimitry Andric 
18881ad6265SDimitry Andric     if (TmpVGPRLive) {
18981ad6265SDimitry Andric       // We need to inform the scavenger that this index is already in use until
19081ad6265SDimitry Andric       // we're done with the custom emergency spill.
19181ad6265SDimitry Andric       RS->assignRegToScavengingIndex(TmpVGPRIndex, TmpVGPR);
19281ad6265SDimitry Andric     }
19381ad6265SDimitry Andric 
19481ad6265SDimitry Andric     // We may end up recursively calling the scavenger, and don't want to re-use
19581ad6265SDimitry Andric     // the same register.
19681ad6265SDimitry Andric     RS->setRegUsed(TmpVGPR);
19781ad6265SDimitry Andric 
198fe6060f1SDimitry Andric     // Try to scavenge SGPRs to save exec
199fe6060f1SDimitry Andric     assert(!SavedExecReg && "Exec is already saved, refuse to save again");
200fe6060f1SDimitry Andric     const TargetRegisterClass &RC =
201fe6060f1SDimitry Andric         IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
202fe6060f1SDimitry Andric     RS->setRegUsed(SuperReg);
20306c3fb27SDimitry Andric     SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);
204fe6060f1SDimitry Andric 
205fe6060f1SDimitry Andric     int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
206fe6060f1SDimitry Andric 
207fe6060f1SDimitry Andric     if (SavedExecReg) {
208fe6060f1SDimitry Andric       RS->setRegUsed(SavedExecReg);
209fe6060f1SDimitry Andric       // Set exec to needed lanes
210349cc55cSDimitry Andric       BuildMI(*MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
211349cc55cSDimitry Andric       auto I =
212349cc55cSDimitry Andric           BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
213fe6060f1SDimitry Andric       if (!TmpVGPRLive)
214fe6060f1SDimitry Andric         I.addReg(TmpVGPR, RegState::ImplicitDefine);
215fe6060f1SDimitry Andric       // Spill needed lanes
216fe6060f1SDimitry Andric       TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
217fe6060f1SDimitry Andric     } else {
21881ad6265SDimitry Andric       // The modify and restore of exec clobber SCC, which we would have to save
21981ad6265SDimitry Andric       // and restore. FIXME: We probably would need to reserve a register for
22081ad6265SDimitry Andric       // this.
22181ad6265SDimitry Andric       if (RS->isRegUsed(AMDGPU::SCC))
22281ad6265SDimitry Andric         MI->emitError("unhandled SGPR spill to memory");
22381ad6265SDimitry Andric 
224fe6060f1SDimitry Andric       // Spill active lanes
225fe6060f1SDimitry Andric       if (TmpVGPRLive)
226fe6060f1SDimitry Andric         TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
227fe6060f1SDimitry Andric                                     /*IsKill*/ false);
228fe6060f1SDimitry Andric       // Spill inactive lanes
229349cc55cSDimitry Andric       auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
230fe6060f1SDimitry Andric       if (!TmpVGPRLive)
231fe6060f1SDimitry Andric         I.addReg(TmpVGPR, RegState::ImplicitDefine);
232bdd1243dSDimitry Andric       I->getOperand(2).setIsDead(); // Mark SCC as dead.
233fe6060f1SDimitry Andric       TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
234fe6060f1SDimitry Andric     }
235fe6060f1SDimitry Andric   }
236fe6060f1SDimitry Andric 
237fe6060f1SDimitry Andric   // Writes these instructions if an SGPR can be scavenged:
238fe6060f1SDimitry Andric   // buffer_load_dword v1     ; Write scavenged VGPR to emergency slot
239fe6060f1SDimitry Andric   // s_waitcnt vmcnt(0)       ; If a free VGPR was found
240fe6060f1SDimitry Andric   // s_mov_b64 exec, s[6:7]   ; Save exec
241fe6060f1SDimitry Andric   //
242fe6060f1SDimitry Andric   // Writes these instructions if no SGPR can be scavenged:
243fe6060f1SDimitry Andric   // buffer_load_dword v0     ; Restore inactive lanes
244fe6060f1SDimitry Andric   // s_waitcnt vmcnt(0)       ; If a free VGPR was found
245fe6060f1SDimitry Andric   // s_not_b64 exec, exec
246fe6060f1SDimitry Andric   // buffer_load_dword v0     ; Only if no free VGPR was found
247fe6060f1SDimitry Andric   void restore() {
248fe6060f1SDimitry Andric     if (SavedExecReg) {
249fe6060f1SDimitry Andric       // Restore used lanes
250fe6060f1SDimitry Andric       TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
251fe6060f1SDimitry Andric                                   /*IsKill*/ false);
252fe6060f1SDimitry Andric       // Restore exec
253349cc55cSDimitry Andric       auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
254fe6060f1SDimitry Andric                    .addReg(SavedExecReg, RegState::Kill);
255fe6060f1SDimitry Andric       // Add an implicit use of the load so it is not dead.
256fe6060f1SDimitry Andric       // FIXME This inserts an unnecessary waitcnt
257fe6060f1SDimitry Andric       if (!TmpVGPRLive) {
258fe6060f1SDimitry Andric         I.addReg(TmpVGPR, RegState::ImplicitKill);
259fe6060f1SDimitry Andric       }
260fe6060f1SDimitry Andric     } else {
261fe6060f1SDimitry Andric       // Restore inactive lanes
262fe6060f1SDimitry Andric       TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
263fe6060f1SDimitry Andric                                   /*IsKill*/ false);
264349cc55cSDimitry Andric       auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
2650eae32dcSDimitry Andric       if (!TmpVGPRLive)
266fe6060f1SDimitry Andric         I.addReg(TmpVGPR, RegState::ImplicitKill);
267bdd1243dSDimitry Andric       I->getOperand(2).setIsDead(); // Mark SCC as dead.
2680eae32dcSDimitry Andric 
269fe6060f1SDimitry Andric       // Restore active lanes
270fe6060f1SDimitry Andric       if (TmpVGPRLive)
271fe6060f1SDimitry Andric         TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
272fe6060f1SDimitry Andric     }
27381ad6265SDimitry Andric 
27481ad6265SDimitry Andric     // Inform the scavenger where we're releasing our custom scavenged register.
27581ad6265SDimitry Andric     if (TmpVGPRLive) {
27681ad6265SDimitry Andric       MachineBasicBlock::iterator RestorePt = std::prev(MI);
27781ad6265SDimitry Andric       RS->assignRegToScavengingIndex(TmpVGPRIndex, TmpVGPR, &*RestorePt);
27881ad6265SDimitry Andric     }
279fe6060f1SDimitry Andric   }
280fe6060f1SDimitry Andric 
281fe6060f1SDimitry Andric   // Write TmpVGPR to memory or read TmpVGPR from memory.
282fe6060f1SDimitry Andric   // Either using a single buffer_load/store if exec is set to the needed mask
283fe6060f1SDimitry Andric   // or using
284fe6060f1SDimitry Andric   // buffer_load
285fe6060f1SDimitry Andric   // s_not exec, exec
286fe6060f1SDimitry Andric   // buffer_load
287fe6060f1SDimitry Andric   // s_not exec, exec
288fe6060f1SDimitry Andric   void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
289fe6060f1SDimitry Andric     if (SavedExecReg) {
290fe6060f1SDimitry Andric       // Spill needed lanes
291fe6060f1SDimitry Andric       TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
292fe6060f1SDimitry Andric     } else {
29381ad6265SDimitry Andric       // The modify and restore of exec clobber SCC, which we would have to save
29481ad6265SDimitry Andric       // and restore. FIXME: We probably would need to reserve a register for
29581ad6265SDimitry Andric       // this.
29681ad6265SDimitry Andric       if (RS->isRegUsed(AMDGPU::SCC))
29781ad6265SDimitry Andric         MI->emitError("unhandled SGPR spill to memory");
29881ad6265SDimitry Andric 
299fe6060f1SDimitry Andric       // Spill active lanes
300fe6060f1SDimitry Andric       TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
301fe6060f1SDimitry Andric                                   /*IsKill*/ false);
302fe6060f1SDimitry Andric       // Spill inactive lanes
3030eae32dcSDimitry Andric       auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
3040eae32dcSDimitry Andric       Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
305fe6060f1SDimitry Andric       TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
3060eae32dcSDimitry Andric       auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
3070eae32dcSDimitry Andric       Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
308fe6060f1SDimitry Andric     }
309fe6060f1SDimitry Andric   }
310349cc55cSDimitry Andric 
311349cc55cSDimitry Andric   void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) {
312349cc55cSDimitry Andric     assert(MBB->getParent() == &MF);
313349cc55cSDimitry Andric     MI = NewMI;
314349cc55cSDimitry Andric     MBB = NewMBB;
315349cc55cSDimitry Andric   }
316fe6060f1SDimitry Andric };
317fe6060f1SDimitry Andric 
318fe6060f1SDimitry Andric } // namespace llvm
319fe6060f1SDimitry Andric 
3205ffd83dbSDimitry Andric SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
321*0fca6ea1SDimitry Andric     : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour(),
322*0fca6ea1SDimitry Andric                             ST.getAMDGPUDwarfFlavour()),
323*0fca6ea1SDimitry Andric       ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
3240b57cec5SDimitry Andric 
3255ffd83dbSDimitry Andric   assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
3265ffd83dbSDimitry Andric          getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
3275ffd83dbSDimitry Andric          (getSubRegIndexLaneMask(AMDGPU::lo16) |
3285ffd83dbSDimitry Andric           getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
3295ffd83dbSDimitry Andric            getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
3305ffd83dbSDimitry Andric          "getNumCoveredRegs() will not work with generated subreg masks!");
3310b57cec5SDimitry Andric 
3325ffd83dbSDimitry Andric   RegPressureIgnoredUnits.resize(getNumRegUnits());
33306c3fb27SDimitry Andric   RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin());
334647cbc5dSDimitry Andric   for (auto Reg : AMDGPU::VGPR_16RegClass) {
335647cbc5dSDimitry Andric     if (AMDGPU::isHi(Reg, *this))
33606c3fb27SDimitry Andric       RegPressureIgnoredUnits.set(*regunits(Reg).begin());
337647cbc5dSDimitry Andric   }
3380b57cec5SDimitry Andric 
3395ffd83dbSDimitry Andric   // HACK: Until this is fully tablegen'd.
3405ffd83dbSDimitry Andric   static llvm::once_flag InitializeRegSplitPartsFlag;
3415ffd83dbSDimitry Andric 
3425ffd83dbSDimitry Andric   static auto InitializeRegSplitPartsOnce = [this]() {
3435ffd83dbSDimitry Andric     for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
3445ffd83dbSDimitry Andric       unsigned Size = getSubRegIdxSize(Idx);
3455ffd83dbSDimitry Andric       if (Size & 31)
3460b57cec5SDimitry Andric         continue;
3475ffd83dbSDimitry Andric       std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
3485ffd83dbSDimitry Andric       unsigned Pos = getSubRegIdxOffset(Idx);
3495ffd83dbSDimitry Andric       if (Pos % Size)
3500b57cec5SDimitry Andric         continue;
3515ffd83dbSDimitry Andric       Pos /= Size;
3525ffd83dbSDimitry Andric       if (Vec.empty()) {
3535ffd83dbSDimitry Andric         unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
3545ffd83dbSDimitry Andric         Vec.resize(MaxNumParts);
3555ffd83dbSDimitry Andric       }
3565ffd83dbSDimitry Andric       Vec[Pos] = Idx;
3575ffd83dbSDimitry Andric     }
3585ffd83dbSDimitry Andric   };
3595ffd83dbSDimitry Andric 
360e8d8bef9SDimitry Andric   static llvm::once_flag InitializeSubRegFromChannelTableFlag;
361e8d8bef9SDimitry Andric 
362e8d8bef9SDimitry Andric   static auto InitializeSubRegFromChannelTableOnce = [this]() {
363e8d8bef9SDimitry Andric     for (auto &Row : SubRegFromChannelTable)
364e8d8bef9SDimitry Andric       Row.fill(AMDGPU::NoSubRegister);
36581ad6265SDimitry Andric     for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
366*0fca6ea1SDimitry Andric       unsigned Width = getSubRegIdxSize(Idx) / 32;
367*0fca6ea1SDimitry Andric       unsigned Offset = getSubRegIdxOffset(Idx) / 32;
368e8d8bef9SDimitry Andric       assert(Width < SubRegFromChannelTableWidthMap.size());
369e8d8bef9SDimitry Andric       Width = SubRegFromChannelTableWidthMap[Width];
370e8d8bef9SDimitry Andric       if (Width == 0)
371e8d8bef9SDimitry Andric         continue;
372e8d8bef9SDimitry Andric       unsigned TableIdx = Width - 1;
373e8d8bef9SDimitry Andric       assert(TableIdx < SubRegFromChannelTable.size());
374e8d8bef9SDimitry Andric       assert(Offset < SubRegFromChannelTable[TableIdx].size());
375e8d8bef9SDimitry Andric       SubRegFromChannelTable[TableIdx][Offset] = Idx;
376e8d8bef9SDimitry Andric     }
377e8d8bef9SDimitry Andric   };
3785ffd83dbSDimitry Andric 
3795ffd83dbSDimitry Andric   llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
380e8d8bef9SDimitry Andric   llvm::call_once(InitializeSubRegFromChannelTableFlag,
381e8d8bef9SDimitry Andric                   InitializeSubRegFromChannelTableOnce);
3825ffd83dbSDimitry Andric }
3835ffd83dbSDimitry Andric 
3845ffd83dbSDimitry Andric void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
3855ffd83dbSDimitry Andric                                            MCRegister Reg) const {
38606c3fb27SDimitry Andric   for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R)
3875ffd83dbSDimitry Andric     Reserved.set(*R);
3885ffd83dbSDimitry Andric }
3895ffd83dbSDimitry Andric 
3905ffd83dbSDimitry Andric // Forced to be here by one .inc
3915ffd83dbSDimitry Andric const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
3925ffd83dbSDimitry Andric   const MachineFunction *MF) const {
3935ffd83dbSDimitry Andric   CallingConv::ID CC = MF->getFunction().getCallingConv();
3945ffd83dbSDimitry Andric   switch (CC) {
3955ffd83dbSDimitry Andric   case CallingConv::C:
3965ffd83dbSDimitry Andric   case CallingConv::Fast:
3975ffd83dbSDimitry Andric   case CallingConv::Cold:
39881ad6265SDimitry Andric     return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
39981ad6265SDimitry Andric                                : CSR_AMDGPU_SaveList;
400349cc55cSDimitry Andric   case CallingConv::AMDGPU_Gfx:
40181ad6265SDimitry Andric     return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
402349cc55cSDimitry Andric                                : CSR_AMDGPU_SI_Gfx_SaveList;
4035f757f3fSDimitry Andric   case CallingConv::AMDGPU_CS_ChainPreserve:
4045f757f3fSDimitry Andric     return CSR_AMDGPU_CS_ChainPreserve_SaveList;
4055ffd83dbSDimitry Andric   default: {
4065ffd83dbSDimitry Andric     // Dummy to not crash RegisterClassInfo.
4075ffd83dbSDimitry Andric     static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
4085ffd83dbSDimitry Andric     return &NoCalleeSavedReg;
4095ffd83dbSDimitry Andric   }
4100b57cec5SDimitry Andric   }
4110b57cec5SDimitry Andric }
4120b57cec5SDimitry Andric 
4135ffd83dbSDimitry Andric const MCPhysReg *
4145ffd83dbSDimitry Andric SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
4155ffd83dbSDimitry Andric   return nullptr;
4160b57cec5SDimitry Andric }
4170b57cec5SDimitry Andric 
4185ffd83dbSDimitry Andric const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
4195ffd83dbSDimitry Andric                                                      CallingConv::ID CC) const {
4205ffd83dbSDimitry Andric   switch (CC) {
4215ffd83dbSDimitry Andric   case CallingConv::C:
4225ffd83dbSDimitry Andric   case CallingConv::Fast:
4235ffd83dbSDimitry Andric   case CallingConv::Cold:
42481ad6265SDimitry Andric     return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
42581ad6265SDimitry Andric                                : CSR_AMDGPU_RegMask;
426349cc55cSDimitry Andric   case CallingConv::AMDGPU_Gfx:
42781ad6265SDimitry Andric     return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
428349cc55cSDimitry Andric                                : CSR_AMDGPU_SI_Gfx_RegMask;
4295f757f3fSDimitry Andric   case CallingConv::AMDGPU_CS_Chain:
4305f757f3fSDimitry Andric   case CallingConv::AMDGPU_CS_ChainPreserve:
4315f757f3fSDimitry Andric     // Calls to these functions never return, so we can pretend everything is
4325f757f3fSDimitry Andric     // preserved.
4335f757f3fSDimitry Andric     return AMDGPU_AllVGPRs_RegMask;
4345ffd83dbSDimitry Andric   default:
4355ffd83dbSDimitry Andric     return nullptr;
4365ffd83dbSDimitry Andric   }
4375ffd83dbSDimitry Andric }
4385ffd83dbSDimitry Andric 
439e8d8bef9SDimitry Andric const uint32_t *SIRegisterInfo::getNoPreservedMask() const {
440e8d8bef9SDimitry Andric   return CSR_AMDGPU_NoRegs_RegMask;
441e8d8bef9SDimitry Andric }
442e8d8bef9SDimitry Andric 
4435f757f3fSDimitry Andric bool SIRegisterInfo::isChainScratchRegister(Register VGPR) {
4445f757f3fSDimitry Andric   return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
4455f757f3fSDimitry Andric }
4465f757f3fSDimitry Andric 
4474824e7fdSDimitry Andric const TargetRegisterClass *
4484824e7fdSDimitry Andric SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
4494824e7fdSDimitry Andric                                           const MachineFunction &MF) const {
4504824e7fdSDimitry Andric   // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
4514824e7fdSDimitry Andric   // equivalent AV class. If used one, the verifier will crash after
4524824e7fdSDimitry Andric   // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
4534824e7fdSDimitry Andric   // until Instruction selection.
45481ad6265SDimitry Andric   if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
4554824e7fdSDimitry Andric     if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
4564824e7fdSDimitry Andric       return &AMDGPU::AV_32RegClass;
4574824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
4584824e7fdSDimitry Andric       return &AMDGPU::AV_64RegClass;
4594824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_64_Align2RegClass ||
4604824e7fdSDimitry Andric         RC == &AMDGPU::AReg_64_Align2RegClass)
4614824e7fdSDimitry Andric       return &AMDGPU::AV_64_Align2RegClass;
4624824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
4634824e7fdSDimitry Andric       return &AMDGPU::AV_96RegClass;
4644824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_96_Align2RegClass ||
4654824e7fdSDimitry Andric         RC == &AMDGPU::AReg_96_Align2RegClass)
4664824e7fdSDimitry Andric       return &AMDGPU::AV_96_Align2RegClass;
4674824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
4684824e7fdSDimitry Andric       return &AMDGPU::AV_128RegClass;
4694824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_128_Align2RegClass ||
4704824e7fdSDimitry Andric         RC == &AMDGPU::AReg_128_Align2RegClass)
4714824e7fdSDimitry Andric       return &AMDGPU::AV_128_Align2RegClass;
4724824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
4734824e7fdSDimitry Andric       return &AMDGPU::AV_160RegClass;
4744824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_160_Align2RegClass ||
4754824e7fdSDimitry Andric         RC == &AMDGPU::AReg_160_Align2RegClass)
4764824e7fdSDimitry Andric       return &AMDGPU::AV_160_Align2RegClass;
4774824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
4784824e7fdSDimitry Andric       return &AMDGPU::AV_192RegClass;
4794824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_192_Align2RegClass ||
4804824e7fdSDimitry Andric         RC == &AMDGPU::AReg_192_Align2RegClass)
4814824e7fdSDimitry Andric       return &AMDGPU::AV_192_Align2RegClass;
4824824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
4834824e7fdSDimitry Andric       return &AMDGPU::AV_256RegClass;
4844824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_256_Align2RegClass ||
4854824e7fdSDimitry Andric         RC == &AMDGPU::AReg_256_Align2RegClass)
4864824e7fdSDimitry Andric       return &AMDGPU::AV_256_Align2RegClass;
4874824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
4884824e7fdSDimitry Andric       return &AMDGPU::AV_512RegClass;
4894824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_512_Align2RegClass ||
4904824e7fdSDimitry Andric         RC == &AMDGPU::AReg_512_Align2RegClass)
4914824e7fdSDimitry Andric       return &AMDGPU::AV_512_Align2RegClass;
4924824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
4934824e7fdSDimitry Andric       return &AMDGPU::AV_1024RegClass;
4944824e7fdSDimitry Andric     if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
4954824e7fdSDimitry Andric         RC == &AMDGPU::AReg_1024_Align2RegClass)
4964824e7fdSDimitry Andric       return &AMDGPU::AV_1024_Align2RegClass;
4974824e7fdSDimitry Andric   }
4984824e7fdSDimitry Andric 
4994824e7fdSDimitry Andric   return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
5004824e7fdSDimitry Andric }
5014824e7fdSDimitry Andric 
5025ffd83dbSDimitry Andric Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
50381ad6265SDimitry Andric   const SIFrameLowering *TFI = ST.getFrameLowering();
5045ffd83dbSDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
5055f757f3fSDimitry Andric   // During ISel lowering we always reserve the stack pointer in entry and chain
5065ffd83dbSDimitry Andric   // functions, but never actually want to reference it when accessing our own
5075ffd83dbSDimitry Andric   // frame. If we need a frame pointer we use it, but otherwise we can just use
5085ffd83dbSDimitry Andric   // an immediate "0" which we represent by returning NoRegister.
5095f757f3fSDimitry Andric   if (FuncInfo->isBottomOfStack()) {
5105ffd83dbSDimitry Andric     return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
5115ffd83dbSDimitry Andric   }
5125ffd83dbSDimitry Andric   return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
5135ffd83dbSDimitry Andric                         : FuncInfo->getStackPtrOffsetReg();
5145ffd83dbSDimitry Andric }
5155ffd83dbSDimitry Andric 
5165ffd83dbSDimitry Andric bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
5175ffd83dbSDimitry Andric   // When we need stack realignment, we can't reference off of the
5185ffd83dbSDimitry Andric   // stack pointer, so we reserve a base pointer.
5195ffd83dbSDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
520fe6060f1SDimitry Andric   return MFI.getNumFixedObjects() && shouldRealignStack(MF);
5215ffd83dbSDimitry Andric }
5225ffd83dbSDimitry Andric 
5235ffd83dbSDimitry Andric Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
5245ffd83dbSDimitry Andric 
5255ffd83dbSDimitry Andric const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
52681ad6265SDimitry Andric   return AMDGPU_AllVGPRs_RegMask;
5275ffd83dbSDimitry Andric }
5285ffd83dbSDimitry Andric 
529fe6060f1SDimitry Andric const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const {
53081ad6265SDimitry Andric   return AMDGPU_AllAGPRs_RegMask;
531fe6060f1SDimitry Andric }
532fe6060f1SDimitry Andric 
533fe6060f1SDimitry Andric const uint32_t *SIRegisterInfo::getAllVectorRegMask() const {
53481ad6265SDimitry Andric   return AMDGPU_AllVectorRegs_RegMask;
535fe6060f1SDimitry Andric }
536fe6060f1SDimitry Andric 
5375ffd83dbSDimitry Andric const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
53881ad6265SDimitry Andric   return AMDGPU_AllAllocatableSRegs_RegMask;
5395ffd83dbSDimitry Andric }
5405ffd83dbSDimitry Andric 
5415ffd83dbSDimitry Andric unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
5425ffd83dbSDimitry Andric                                               unsigned NumRegs) {
543e8d8bef9SDimitry Andric   assert(NumRegs < SubRegFromChannelTableWidthMap.size());
544e8d8bef9SDimitry Andric   unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
545e8d8bef9SDimitry Andric   assert(NumRegIndex && "Not implemented");
546e8d8bef9SDimitry Andric   assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
547e8d8bef9SDimitry Andric   return SubRegFromChannelTable[NumRegIndex - 1][Channel];
5485ffd83dbSDimitry Andric }
5495ffd83dbSDimitry Andric 
55006c3fb27SDimitry Andric MCRegister
55106c3fb27SDimitry Andric SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF,
55206c3fb27SDimitry Andric                                         const unsigned Align,
55306c3fb27SDimitry Andric                                         const TargetRegisterClass *RC) const {
55406c3fb27SDimitry Andric   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align;
55506c3fb27SDimitry Andric   MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
55606c3fb27SDimitry Andric   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
55706c3fb27SDimitry Andric }
55806c3fb27SDimitry Andric 
5595ffd83dbSDimitry Andric MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
5600b57cec5SDimitry Andric   const MachineFunction &MF) const {
56106c3fb27SDimitry Andric   return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
5620b57cec5SDimitry Andric }
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
5650b57cec5SDimitry Andric   BitVector Reserved(getNumRegs());
5665ffd83dbSDimitry Andric   Reserved.set(AMDGPU::MODE);
5670b57cec5SDimitry Andric 
56881ad6265SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
56981ad6265SDimitry Andric 
57081ad6265SDimitry Andric   // Reserve special purpose registers.
57181ad6265SDimitry Andric   //
5720b57cec5SDimitry Andric   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
5730b57cec5SDimitry Andric   // this seems likely to result in bugs, so I'm marking them as reserved.
5740b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
5750b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
5760b57cec5SDimitry Andric 
5770b57cec5SDimitry Andric   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
5780b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::M0);
5790b57cec5SDimitry Andric 
5800b57cec5SDimitry Andric   // Reserve src_vccz, src_execz, src_scc.
5810b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
5820b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
5830b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
5840b57cec5SDimitry Andric 
585bdd1243dSDimitry Andric   // Reserve the memory aperture registers
5860b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
5870b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
5880b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
5890b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric   // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
5920b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric   // Reserve xnack_mask registers - support is not implemented in Codegen.
5950b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
5960b57cec5SDimitry Andric 
5970b57cec5SDimitry Andric   // Reserve lds_direct register - support is not implemented in Codegen.
5980b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
5990b57cec5SDimitry Andric 
6000b57cec5SDimitry Andric   // Reserve Trap Handler registers - support is not implemented in Codegen.
6010b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TBA);
6020b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TMA);
6030b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
6040b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
6050b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
6060b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
6070b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
6080b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
6090b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
6100b57cec5SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric   // Reserve null register - it shall never be allocated
61381ad6265SDimitry Andric   reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
6140b57cec5SDimitry Andric 
61581ad6265SDimitry Andric   // Reserve SGPRs.
61681ad6265SDimitry Andric   //
6170b57cec5SDimitry Andric   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
6180b57cec5SDimitry Andric   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
619*0fca6ea1SDimitry Andric   for (const TargetRegisterClass *RC : regclasses()) {
620*0fca6ea1SDimitry Andric     if (RC->isBaseClass() && isSGPRClass(RC)) {
621*0fca6ea1SDimitry Andric       unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
622*0fca6ea1SDimitry Andric       for (MCPhysReg Reg : *RC) {
623*0fca6ea1SDimitry Andric         unsigned Index = getHWRegIndex(Reg);
624*0fca6ea1SDimitry Andric         if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
625*0fca6ea1SDimitry Andric           Reserved.set(Reg);
626*0fca6ea1SDimitry Andric       }
627*0fca6ea1SDimitry Andric     }
6280b57cec5SDimitry Andric   }
6290b57cec5SDimitry Andric 
63081ad6265SDimitry Andric   Register ScratchRSrcReg = MFI->getScratchRSrcReg();
63181ad6265SDimitry Andric   if (ScratchRSrcReg != AMDGPU::NoRegister) {
63281ad6265SDimitry Andric     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
63381ad6265SDimitry Andric     // need to spill.
63481ad6265SDimitry Andric     // TODO: May need to reserve a VGPR if doing LDS spilling.
63581ad6265SDimitry Andric     reserveRegisterTuples(Reserved, ScratchRSrcReg);
63681ad6265SDimitry Andric   }
63781ad6265SDimitry Andric 
63806c3fb27SDimitry Andric   Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
63906c3fb27SDimitry Andric   if (LongBranchReservedReg)
64006c3fb27SDimitry Andric     reserveRegisterTuples(Reserved, LongBranchReservedReg);
64106c3fb27SDimitry Andric 
64281ad6265SDimitry Andric   // We have to assume the SP is needed in case there are calls in the function,
64381ad6265SDimitry Andric   // which is detected after the function is lowered. If we aren't really going
64481ad6265SDimitry Andric   // to need SP, don't bother reserving it.
64581ad6265SDimitry Andric   MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
64681ad6265SDimitry Andric   if (StackPtrReg) {
64781ad6265SDimitry Andric     reserveRegisterTuples(Reserved, StackPtrReg);
64881ad6265SDimitry Andric     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
64981ad6265SDimitry Andric   }
65081ad6265SDimitry Andric 
65181ad6265SDimitry Andric   MCRegister FrameReg = MFI->getFrameOffsetReg();
65281ad6265SDimitry Andric   if (FrameReg) {
65381ad6265SDimitry Andric     reserveRegisterTuples(Reserved, FrameReg);
65481ad6265SDimitry Andric     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
65581ad6265SDimitry Andric   }
65681ad6265SDimitry Andric 
65781ad6265SDimitry Andric   if (hasBasePointer(MF)) {
65881ad6265SDimitry Andric     MCRegister BasePtrReg = getBaseRegister();
65981ad6265SDimitry Andric     reserveRegisterTuples(Reserved, BasePtrReg);
66081ad6265SDimitry Andric     assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
66181ad6265SDimitry Andric   }
66281ad6265SDimitry Andric 
66306c3fb27SDimitry Andric   // FIXME: Use same reserved register introduced in D149775
66406c3fb27SDimitry Andric   // SGPR used to preserve EXEC MASK around WWM spill/copy instructions.
66506c3fb27SDimitry Andric   Register ExecCopyReg = MFI->getSGPRForEXECCopy();
66606c3fb27SDimitry Andric   if (ExecCopyReg)
66706c3fb27SDimitry Andric     reserveRegisterTuples(Reserved, ExecCopyReg);
66806c3fb27SDimitry Andric 
66981ad6265SDimitry Andric   // Reserve VGPRs/AGPRs.
67081ad6265SDimitry Andric   //
6710b57cec5SDimitry Andric   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
672349cc55cSDimitry Andric   unsigned MaxNumAGPRs = MaxNumVGPRs;
6730b57cec5SDimitry Andric   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
674349cc55cSDimitry Andric 
67581ad6265SDimitry Andric   // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
67681ad6265SDimitry Andric   // a wave may have up to 512 total vector registers combining together both
67781ad6265SDimitry Andric   // VGPRs and AGPRs. Hence, in an entry function without calls and without
67881ad6265SDimitry Andric   // AGPRs used within it, it is possible to use the whole vector register
67981ad6265SDimitry Andric   // budget for VGPRs.
68081ad6265SDimitry Andric   //
68181ad6265SDimitry Andric   // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
68281ad6265SDimitry Andric   //       register file accordingly.
68381ad6265SDimitry Andric   if (ST.hasGFX90AInsts()) {
684349cc55cSDimitry Andric     if (MFI->usesAGPRs(MF)) {
685349cc55cSDimitry Andric       MaxNumVGPRs /= 2;
686349cc55cSDimitry Andric       MaxNumAGPRs = MaxNumVGPRs;
687349cc55cSDimitry Andric     } else {
688349cc55cSDimitry Andric       if (MaxNumVGPRs > TotalNumVGPRs) {
689349cc55cSDimitry Andric         MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
690349cc55cSDimitry Andric         MaxNumVGPRs = TotalNumVGPRs;
691349cc55cSDimitry Andric       } else
692349cc55cSDimitry Andric         MaxNumAGPRs = 0;
693349cc55cSDimitry Andric     }
694349cc55cSDimitry Andric   }
695349cc55cSDimitry Andric 
696*0fca6ea1SDimitry Andric   for (const TargetRegisterClass *RC : regclasses()) {
697*0fca6ea1SDimitry Andric     if (RC->isBaseClass() && isVGPRClass(RC)) {
698*0fca6ea1SDimitry Andric       unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
699*0fca6ea1SDimitry Andric       for (MCPhysReg Reg : *RC) {
700*0fca6ea1SDimitry Andric         unsigned Index = getHWRegIndex(Reg);
701*0fca6ea1SDimitry Andric         if (Index + NumRegs > MaxNumVGPRs)
702*0fca6ea1SDimitry Andric           Reserved.set(Reg);
703*0fca6ea1SDimitry Andric       }
704*0fca6ea1SDimitry Andric     }
705349cc55cSDimitry Andric   }
706349cc55cSDimitry Andric 
70706c3fb27SDimitry Andric   // Reserve all the AGPRs if there are no instructions to use it.
708*0fca6ea1SDimitry Andric   if (!ST.hasMAIInsts())
709*0fca6ea1SDimitry Andric     MaxNumAGPRs = 0;
710*0fca6ea1SDimitry Andric   for (const TargetRegisterClass *RC : regclasses()) {
711*0fca6ea1SDimitry Andric     if (RC->isBaseClass() && isAGPRClass(RC)) {
712*0fca6ea1SDimitry Andric       unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
713*0fca6ea1SDimitry Andric       for (MCPhysReg Reg : *RC) {
714*0fca6ea1SDimitry Andric         unsigned Index = getHWRegIndex(Reg);
715*0fca6ea1SDimitry Andric         if (Index + NumRegs > MaxNumAGPRs)
716*0fca6ea1SDimitry Andric           Reserved.set(Reg);
717*0fca6ea1SDimitry Andric       }
718*0fca6ea1SDimitry Andric     }
71906c3fb27SDimitry Andric   }
7200b57cec5SDimitry Andric 
72181ad6265SDimitry Andric   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
72281ad6265SDimitry Andric   // VGPR available at all times.
72381ad6265SDimitry Andric   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
72481ad6265SDimitry Andric     reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
7255ffd83dbSDimitry Andric   }
7265ffd83dbSDimitry Andric 
727bdd1243dSDimitry Andric   for (Register Reg : MFI->getWWMReservedRegs())
7288bcb0991SDimitry Andric     reserveRegisterTuples(Reserved, Reg);
729fe6060f1SDimitry Andric 
7300b57cec5SDimitry Andric   // FIXME: Stop using reserved registers for this.
7310b57cec5SDimitry Andric   for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
7320b57cec5SDimitry Andric     reserveRegisterTuples(Reserved, Reg);
7330b57cec5SDimitry Andric 
7340b57cec5SDimitry Andric   for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
7350b57cec5SDimitry Andric     reserveRegisterTuples(Reserved, Reg);
7360b57cec5SDimitry Andric 
7370b57cec5SDimitry Andric   return Reserved;
7380b57cec5SDimitry Andric }
7390b57cec5SDimitry Andric 
74081ad6265SDimitry Andric bool SIRegisterInfo::isAsmClobberable(const MachineFunction &MF,
74181ad6265SDimitry Andric                                       MCRegister PhysReg) const {
74281ad6265SDimitry Andric   return !MF.getRegInfo().isReserved(PhysReg);
74381ad6265SDimitry Andric }
74481ad6265SDimitry Andric 
745fe6060f1SDimitry Andric bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
7460b57cec5SDimitry Andric   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
7475f757f3fSDimitry Andric   // On entry or in chain functions, the base address is 0, so it can't possibly
7485f757f3fSDimitry Andric   // need any more alignment.
7490b57cec5SDimitry Andric 
7500b57cec5SDimitry Andric   // FIXME: Should be able to specify the entry frame alignment per calling
7510b57cec5SDimitry Andric   // convention instead.
7525f757f3fSDimitry Andric   if (Info->isBottomOfStack())
7530b57cec5SDimitry Andric     return false;
7540b57cec5SDimitry Andric 
755fe6060f1SDimitry Andric   return TargetRegisterInfo::shouldRealignStack(MF);
7560b57cec5SDimitry Andric }
7570b57cec5SDimitry Andric 
7580b57cec5SDimitry Andric bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
7590b57cec5SDimitry Andric   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
7600b57cec5SDimitry Andric   if (Info->isEntryFunction()) {
7610b57cec5SDimitry Andric     const MachineFrameInfo &MFI = Fn.getFrameInfo();
7620b57cec5SDimitry Andric     return MFI.hasStackObjects() || MFI.hasCalls();
7630b57cec5SDimitry Andric   }
7640b57cec5SDimitry Andric 
7650b57cec5SDimitry Andric   // May need scavenger for dealing with callee saved registers.
7660b57cec5SDimitry Andric   return true;
7670b57cec5SDimitry Andric }
7680b57cec5SDimitry Andric 
7690b57cec5SDimitry Andric bool SIRegisterInfo::requiresFrameIndexScavenging(
7700b57cec5SDimitry Andric   const MachineFunction &MF) const {
7718bcb0991SDimitry Andric   // Do not use frame virtual registers. They used to be used for SGPRs, but
7728bcb0991SDimitry Andric   // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
7738bcb0991SDimitry Andric   // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
7748bcb0991SDimitry Andric   // spill.
7758bcb0991SDimitry Andric   return false;
7760b57cec5SDimitry Andric }
7770b57cec5SDimitry Andric 
7780b57cec5SDimitry Andric bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
7790b57cec5SDimitry Andric   const MachineFunction &MF) const {
7800b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
7818bcb0991SDimitry Andric   return MFI.hasStackObjects();
7820b57cec5SDimitry Andric }
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric bool SIRegisterInfo::requiresVirtualBaseRegisters(
7850b57cec5SDimitry Andric   const MachineFunction &) const {
7860b57cec5SDimitry Andric   // There are no special dedicated stack or frame pointers.
7870b57cec5SDimitry Andric   return true;
7880b57cec5SDimitry Andric }
7890b57cec5SDimitry Andric 
790e8d8bef9SDimitry Andric int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
791e8d8bef9SDimitry Andric   assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI));
7920b57cec5SDimitry Andric 
7930b57cec5SDimitry Andric   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
7940b57cec5SDimitry Andric                                           AMDGPU::OpName::offset);
7950b57cec5SDimitry Andric   return MI->getOperand(OffIdx).getImm();
7960b57cec5SDimitry Andric }
7970b57cec5SDimitry Andric 
7980b57cec5SDimitry Andric int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
7990b57cec5SDimitry Andric                                                  int Idx) const {
800e8d8bef9SDimitry Andric   if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
8010b57cec5SDimitry Andric     return 0;
8020b57cec5SDimitry Andric 
803e8d8bef9SDimitry Andric   assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
804e8d8bef9SDimitry Andric                                             AMDGPU::OpName::vaddr) ||
805e8d8bef9SDimitry Andric          (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
806e8d8bef9SDimitry Andric                                             AMDGPU::OpName::saddr))) &&
8070b57cec5SDimitry Andric          "Should never see frame index on non-address operand");
8080b57cec5SDimitry Andric 
809e8d8bef9SDimitry Andric   return getScratchInstrOffset(MI);
8100b57cec5SDimitry Andric }
8110b57cec5SDimitry Andric 
8120b57cec5SDimitry Andric bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
813fe6060f1SDimitry Andric   if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
8140b57cec5SDimitry Andric     return false;
8150b57cec5SDimitry Andric 
816e8d8bef9SDimitry Andric   int64_t FullOffset = Offset + getScratchInstrOffset(MI);
8170b57cec5SDimitry Andric 
818e8d8bef9SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
8195f757f3fSDimitry Andric   if (SIInstrInfo::isMUBUF(*MI))
8205f757f3fSDimitry Andric     return !TII->isLegalMUBUFImmOffset(FullOffset);
8215f757f3fSDimitry Andric 
822fe6060f1SDimitry Andric   return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
823fe6060f1SDimitry Andric                                  SIInstrFlags::FlatScratch);
8240b57cec5SDimitry Andric }
8250b57cec5SDimitry Andric 
826e8d8bef9SDimitry Andric Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
8270b57cec5SDimitry Andric                                                       int FrameIdx,
8280b57cec5SDimitry Andric                                                       int64_t Offset) const {
8290b57cec5SDimitry Andric   MachineBasicBlock::iterator Ins = MBB->begin();
8300b57cec5SDimitry Andric   DebugLoc DL; // Defaults to "unknown"
8310b57cec5SDimitry Andric 
8320b57cec5SDimitry Andric   if (Ins != MBB->end())
8330b57cec5SDimitry Andric     DL = Ins->getDebugLoc();
8340b57cec5SDimitry Andric 
8350b57cec5SDimitry Andric   MachineFunction *MF = MBB->getParent();
8368bcb0991SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
837e8d8bef9SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
838e8d8bef9SDimitry Andric   unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
839e8d8bef9SDimitry Andric                                            : AMDGPU::V_MOV_B32_e32;
840e8d8bef9SDimitry Andric 
841e8d8bef9SDimitry Andric   Register BaseReg = MRI.createVirtualRegister(
842e8d8bef9SDimitry Andric       ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
843e8d8bef9SDimitry Andric                              : &AMDGPU::VGPR_32RegClass);
8440b57cec5SDimitry Andric 
8450b57cec5SDimitry Andric   if (Offset == 0) {
846e8d8bef9SDimitry Andric     BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
8470b57cec5SDimitry Andric       .addFrameIndex(FrameIdx);
848e8d8bef9SDimitry Andric     return BaseReg;
8490b57cec5SDimitry Andric   }
8500b57cec5SDimitry Andric 
8518bcb0991SDimitry Andric   Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8520b57cec5SDimitry Andric 
853e8d8bef9SDimitry Andric   Register FIReg = MRI.createVirtualRegister(
854e8d8bef9SDimitry Andric       ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
855e8d8bef9SDimitry Andric                              : &AMDGPU::VGPR_32RegClass);
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
8580b57cec5SDimitry Andric     .addImm(Offset);
859e8d8bef9SDimitry Andric   BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
8600b57cec5SDimitry Andric     .addFrameIndex(FrameIdx);
8610b57cec5SDimitry Andric 
862e8d8bef9SDimitry Andric   if (ST.enableFlatScratch() ) {
863fe6060f1SDimitry Andric     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
864e8d8bef9SDimitry Andric         .addReg(OffsetReg, RegState::Kill)
865e8d8bef9SDimitry Andric         .addReg(FIReg);
866e8d8bef9SDimitry Andric     return BaseReg;
867e8d8bef9SDimitry Andric   }
868e8d8bef9SDimitry Andric 
8690b57cec5SDimitry Andric   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
8700b57cec5SDimitry Andric     .addReg(OffsetReg, RegState::Kill)
8710b57cec5SDimitry Andric     .addReg(FIReg)
8720b57cec5SDimitry Andric     .addImm(0); // clamp bit
873e8d8bef9SDimitry Andric 
874e8d8bef9SDimitry Andric   return BaseReg;
8750b57cec5SDimitry Andric }
8760b57cec5SDimitry Andric 
8775ffd83dbSDimitry Andric void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
8780b57cec5SDimitry Andric                                        int64_t Offset) const {
8798bcb0991SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
880e8d8bef9SDimitry Andric   bool IsFlat = TII->isFLATScratch(MI);
8810b57cec5SDimitry Andric 
8820b57cec5SDimitry Andric #ifndef NDEBUG
8830b57cec5SDimitry Andric   // FIXME: Is it possible to be storing a frame index to itself?
8840b57cec5SDimitry Andric   bool SeenFI = false;
8850b57cec5SDimitry Andric   for (const MachineOperand &MO: MI.operands()) {
8860b57cec5SDimitry Andric     if (MO.isFI()) {
8870b57cec5SDimitry Andric       if (SeenFI)
8880b57cec5SDimitry Andric         llvm_unreachable("should not see multiple frame indices");
8890b57cec5SDimitry Andric 
8900b57cec5SDimitry Andric       SeenFI = true;
8910b57cec5SDimitry Andric     }
8920b57cec5SDimitry Andric   }
8930b57cec5SDimitry Andric #endif
8940b57cec5SDimitry Andric 
895e8d8bef9SDimitry Andric   MachineOperand *FIOp =
896e8d8bef9SDimitry Andric       TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
897e8d8bef9SDimitry Andric                                       : AMDGPU::OpName::vaddr);
8980b57cec5SDimitry Andric 
8990b57cec5SDimitry Andric   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
9000b57cec5SDimitry Andric   int64_t NewOffset = OffsetOp->getImm() + Offset;
901e8d8bef9SDimitry Andric 
902e8d8bef9SDimitry Andric   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
903e8d8bef9SDimitry Andric   assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
904e8d8bef9SDimitry Andric 
905e8d8bef9SDimitry Andric   if (IsFlat) {
906fe6060f1SDimitry Andric     assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
907fe6060f1SDimitry Andric                                   SIInstrFlags::FlatScratch) &&
908e8d8bef9SDimitry Andric            "offset should be legal");
909e8d8bef9SDimitry Andric     FIOp->ChangeToRegister(BaseReg, false);
910e8d8bef9SDimitry Andric     OffsetOp->setImm(NewOffset);
911e8d8bef9SDimitry Andric     return;
912e8d8bef9SDimitry Andric   }
913e8d8bef9SDimitry Andric 
914e8d8bef9SDimitry Andric #ifndef NDEBUG
915e8d8bef9SDimitry Andric   MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
916e8d8bef9SDimitry Andric   assert(SOffset->isImm() && SOffset->getImm() == 0);
917e8d8bef9SDimitry Andric #endif
918e8d8bef9SDimitry Andric 
9195f757f3fSDimitry Andric   assert(TII->isLegalMUBUFImmOffset(NewOffset) && "offset should be legal");
9200b57cec5SDimitry Andric 
9210b57cec5SDimitry Andric   FIOp->ChangeToRegister(BaseReg, false);
9220b57cec5SDimitry Andric   OffsetOp->setImm(NewOffset);
9230b57cec5SDimitry Andric }
9240b57cec5SDimitry Andric 
9250b57cec5SDimitry Andric bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
9265ffd83dbSDimitry Andric                                         Register BaseReg,
9270b57cec5SDimitry Andric                                         int64_t Offset) const {
928e8d8bef9SDimitry Andric   if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
9290b57cec5SDimitry Andric     return false;
9300b57cec5SDimitry Andric 
931e8d8bef9SDimitry Andric   int64_t NewOffset = Offset + getScratchInstrOffset(MI);
9320b57cec5SDimitry Andric 
933e8d8bef9SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
9345f757f3fSDimitry Andric   if (SIInstrInfo::isMUBUF(*MI))
9355f757f3fSDimitry Andric     return TII->isLegalMUBUFImmOffset(NewOffset);
9365f757f3fSDimitry Andric 
937fe6060f1SDimitry Andric   return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
938fe6060f1SDimitry Andric                                 SIInstrFlags::FlatScratch);
9390b57cec5SDimitry Andric }
9400b57cec5SDimitry Andric 
9410b57cec5SDimitry Andric const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
9420b57cec5SDimitry Andric   const MachineFunction &MF, unsigned Kind) const {
9430b57cec5SDimitry Andric   // This is inaccurate. It depends on the instruction and address space. The
9440b57cec5SDimitry Andric   // only place where we should hit this is for dealing with frame indexes /
9450b57cec5SDimitry Andric   // private accesses, so this is correct in that case.
9460b57cec5SDimitry Andric   return &AMDGPU::VGPR_32RegClass;
9470b57cec5SDimitry Andric }
9480b57cec5SDimitry Andric 
949349cc55cSDimitry Andric const TargetRegisterClass *
950349cc55cSDimitry Andric SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
951349cc55cSDimitry Andric   if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
952349cc55cSDimitry Andric     return getEquivalentVGPRClass(RC);
953bdd1243dSDimitry Andric   if (RC == &AMDGPU::SCC_CLASSRegClass)
954bdd1243dSDimitry Andric     return getWaveMaskRegClass();
955349cc55cSDimitry Andric 
956349cc55cSDimitry Andric   return RC;
957349cc55cSDimitry Andric }
958349cc55cSDimitry Andric 
9590b57cec5SDimitry Andric static unsigned getNumSubRegsForSpillOp(unsigned Op) {
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric   switch (Op) {
9620b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S1024_SAVE:
9630b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S1024_RESTORE:
9640b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V1024_SAVE:
9650b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V1024_RESTORE:
9660b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A1024_SAVE:
9670b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A1024_RESTORE:
9680eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV1024_SAVE:
9690eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV1024_RESTORE:
9700b57cec5SDimitry Andric     return 32;
9710b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S512_SAVE:
9720b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S512_RESTORE:
9730b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V512_SAVE:
9740b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V512_RESTORE:
9750b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A512_SAVE:
9760b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A512_RESTORE:
9770eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV512_SAVE:
9780eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV512_RESTORE:
9790b57cec5SDimitry Andric     return 16;
980bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S384_SAVE:
981bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S384_RESTORE:
982bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_V384_SAVE:
983bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_V384_RESTORE:
984bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_A384_SAVE:
985bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_A384_RESTORE:
986bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_AV384_SAVE:
987bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_AV384_RESTORE:
988bdd1243dSDimitry Andric     return 12;
989bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S352_SAVE:
990bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S352_RESTORE:
991bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_V352_SAVE:
992bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_V352_RESTORE:
993bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_A352_SAVE:
994bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_A352_RESTORE:
995bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_AV352_SAVE:
996bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_AV352_RESTORE:
997bdd1243dSDimitry Andric     return 11;
998bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S320_SAVE:
999bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S320_RESTORE:
1000bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_V320_SAVE:
1001bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_V320_RESTORE:
1002bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_A320_SAVE:
1003bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_A320_RESTORE:
1004bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_AV320_SAVE:
1005bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_AV320_RESTORE:
1006bdd1243dSDimitry Andric     return 10;
1007bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S288_SAVE:
1008bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S288_RESTORE:
1009bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_V288_SAVE:
1010bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_V288_RESTORE:
1011bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_A288_SAVE:
1012bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_A288_RESTORE:
1013bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_AV288_SAVE:
1014bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_AV288_RESTORE:
1015bdd1243dSDimitry Andric     return 9;
10160b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S256_SAVE:
10170b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S256_RESTORE:
10180b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V256_SAVE:
10190b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V256_RESTORE:
1020e8d8bef9SDimitry Andric   case AMDGPU::SI_SPILL_A256_SAVE:
1021e8d8bef9SDimitry Andric   case AMDGPU::SI_SPILL_A256_RESTORE:
10220eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV256_SAVE:
10230eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV256_RESTORE:
10240b57cec5SDimitry Andric     return 8;
1025fe6060f1SDimitry Andric   case AMDGPU::SI_SPILL_S224_SAVE:
1026fe6060f1SDimitry Andric   case AMDGPU::SI_SPILL_S224_RESTORE:
1027fe6060f1SDimitry Andric   case AMDGPU::SI_SPILL_V224_SAVE:
1028fe6060f1SDimitry Andric   case AMDGPU::SI_SPILL_V224_RESTORE:
1029fe6060f1SDimitry Andric   case AMDGPU::SI_SPILL_A224_SAVE:
1030fe6060f1SDimitry Andric   case AMDGPU::SI_SPILL_A224_RESTORE:
10310eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV224_SAVE:
10320eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV224_RESTORE:
1033fe6060f1SDimitry Andric     return 7;
10345ffd83dbSDimitry Andric   case AMDGPU::SI_SPILL_S192_SAVE:
10355ffd83dbSDimitry Andric   case AMDGPU::SI_SPILL_S192_RESTORE:
10365ffd83dbSDimitry Andric   case AMDGPU::SI_SPILL_V192_SAVE:
10375ffd83dbSDimitry Andric   case AMDGPU::SI_SPILL_V192_RESTORE:
1038e8d8bef9SDimitry Andric   case AMDGPU::SI_SPILL_A192_SAVE:
1039e8d8bef9SDimitry Andric   case AMDGPU::SI_SPILL_A192_RESTORE:
10400eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV192_SAVE:
10410eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV192_RESTORE:
10425ffd83dbSDimitry Andric     return 6;
10430b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S160_SAVE:
10440b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S160_RESTORE:
10450b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V160_SAVE:
10460b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V160_RESTORE:
1047e8d8bef9SDimitry Andric   case AMDGPU::SI_SPILL_A160_SAVE:
1048e8d8bef9SDimitry Andric   case AMDGPU::SI_SPILL_A160_RESTORE:
10490eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV160_SAVE:
10500eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV160_RESTORE:
10510b57cec5SDimitry Andric     return 5;
10520b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S128_SAVE:
10530b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S128_RESTORE:
10540b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V128_SAVE:
10550b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V128_RESTORE:
10560b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A128_SAVE:
10570b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A128_RESTORE:
10580eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV128_SAVE:
10590eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV128_RESTORE:
10600b57cec5SDimitry Andric     return 4;
10610b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S96_SAVE:
10620b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S96_RESTORE:
10630b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V96_SAVE:
10640b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V96_RESTORE:
1065e8d8bef9SDimitry Andric   case AMDGPU::SI_SPILL_A96_SAVE:
1066e8d8bef9SDimitry Andric   case AMDGPU::SI_SPILL_A96_RESTORE:
10670eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV96_SAVE:
10680eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV96_RESTORE:
10690b57cec5SDimitry Andric     return 3;
10700b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S64_SAVE:
10710b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S64_RESTORE:
10720b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V64_SAVE:
10730b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V64_RESTORE:
10740b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A64_SAVE:
10750b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A64_RESTORE:
10760eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV64_SAVE:
10770eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV64_RESTORE:
10780b57cec5SDimitry Andric     return 2;
10790b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S32_SAVE:
10800b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S32_RESTORE:
10810b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V32_SAVE:
10820b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_V32_RESTORE:
10830b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A32_SAVE:
10840b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_A32_RESTORE:
10850eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV32_SAVE:
10860eae32dcSDimitry Andric   case AMDGPU::SI_SPILL_AV32_RESTORE:
108706c3fb27SDimitry Andric   case AMDGPU::SI_SPILL_WWM_V32_SAVE:
108806c3fb27SDimitry Andric   case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
10895f757f3fSDimitry Andric   case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
10905f757f3fSDimitry Andric   case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
10910b57cec5SDimitry Andric     return 1;
10920b57cec5SDimitry Andric   default: llvm_unreachable("Invalid spill opcode");
10930b57cec5SDimitry Andric   }
10940b57cec5SDimitry Andric }
10950b57cec5SDimitry Andric 
10960b57cec5SDimitry Andric static int getOffsetMUBUFStore(unsigned Opc) {
10970b57cec5SDimitry Andric   switch (Opc) {
10980b57cec5SDimitry Andric   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
10990b57cec5SDimitry Andric     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
11000b57cec5SDimitry Andric   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
11010b57cec5SDimitry Andric     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
11020b57cec5SDimitry Andric   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
11030b57cec5SDimitry Andric     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
11040b57cec5SDimitry Andric   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
11050b57cec5SDimitry Andric     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
110681ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
110781ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
11080b57cec5SDimitry Andric   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
11090b57cec5SDimitry Andric     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
11100b57cec5SDimitry Andric   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
11110b57cec5SDimitry Andric     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
11120b57cec5SDimitry Andric   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
11130b57cec5SDimitry Andric     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
11140b57cec5SDimitry Andric   default:
11150b57cec5SDimitry Andric     return -1;
11160b57cec5SDimitry Andric   }
11170b57cec5SDimitry Andric }
11180b57cec5SDimitry Andric 
11190b57cec5SDimitry Andric static int getOffsetMUBUFLoad(unsigned Opc) {
11200b57cec5SDimitry Andric   switch (Opc) {
11210b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
11220b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
11230b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
11240b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
11250b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
11260b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
11270b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
11280b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
11290b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
11300b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
11310b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
11320b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
113381ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
113481ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
11350b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
11360b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
11370b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
11380b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
11390b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
11400b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
11410b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
11420b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
11430b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
11440b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
11450b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
11460b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
11470b57cec5SDimitry Andric   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
11480b57cec5SDimitry Andric     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
11490b57cec5SDimitry Andric   default:
11500b57cec5SDimitry Andric     return -1;
11510b57cec5SDimitry Andric   }
11520b57cec5SDimitry Andric }
11530b57cec5SDimitry Andric 
115481ad6265SDimitry Andric static int getOffenMUBUFStore(unsigned Opc) {
115581ad6265SDimitry Andric   switch (Opc) {
115681ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
115781ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
115881ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
115981ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
116081ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
116181ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
116281ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
116381ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
116481ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
116581ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
116681ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
116781ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
116881ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
116981ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
117081ad6265SDimitry Andric   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
117181ad6265SDimitry Andric     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
117281ad6265SDimitry Andric   default:
117381ad6265SDimitry Andric     return -1;
117481ad6265SDimitry Andric   }
117581ad6265SDimitry Andric }
117681ad6265SDimitry Andric 
117781ad6265SDimitry Andric static int getOffenMUBUFLoad(unsigned Opc) {
117881ad6265SDimitry Andric   switch (Opc) {
117981ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
118081ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
118181ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
118281ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
118381ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
118481ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
118581ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
118681ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
118781ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
118881ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
118981ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
119081ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
119181ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
119281ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
119381ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
119481ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
119581ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
119681ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
119781ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
119881ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
119981ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
120081ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
120181ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
120281ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
120381ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
120481ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
120581ad6265SDimitry Andric   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
120681ad6265SDimitry Andric     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
120781ad6265SDimitry Andric   default:
120881ad6265SDimitry Andric     return -1;
120981ad6265SDimitry Andric   }
121081ad6265SDimitry Andric }
121181ad6265SDimitry Andric 
12128bcb0991SDimitry Andric static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
1213fe6060f1SDimitry Andric                                            MachineBasicBlock &MBB,
12148bcb0991SDimitry Andric                                            MachineBasicBlock::iterator MI,
1215fe6060f1SDimitry Andric                                            int Index, unsigned Lane,
1216fe6060f1SDimitry Andric                                            unsigned ValueReg, bool IsKill) {
1217fe6060f1SDimitry Andric   MachineFunction *MF = MBB.getParent();
12180b57cec5SDimitry Andric   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
12190b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
12200b57cec5SDimitry Andric 
12210b57cec5SDimitry Andric   MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
12220b57cec5SDimitry Andric 
12230b57cec5SDimitry Andric   if (Reg == AMDGPU::NoRegister)
12240b57cec5SDimitry Andric     return MachineInstrBuilder();
12250b57cec5SDimitry Andric 
12260b57cec5SDimitry Andric   bool IsStore = MI->mayStore();
12270b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
12280b57cec5SDimitry Andric   auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
12290b57cec5SDimitry Andric 
12300b57cec5SDimitry Andric   unsigned Dst = IsStore ? Reg : ValueReg;
12310b57cec5SDimitry Andric   unsigned Src = IsStore ? ValueReg : Reg;
12324824e7fdSDimitry Andric   bool IsVGPR = TRI->isVGPR(MRI, Reg);
12334824e7fdSDimitry Andric   DebugLoc DL = MI->getDebugLoc();
12344824e7fdSDimitry Andric   if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
12354824e7fdSDimitry Andric     // Spiller during regalloc may restore a spilled register to its superclass.
12364824e7fdSDimitry Andric     // It could result in AGPR spills restored to VGPRs or the other way around,
12374824e7fdSDimitry Andric     // making the src and dst with identical regclasses at this point. It just
12384824e7fdSDimitry Andric     // needs a copy in such cases.
12394824e7fdSDimitry Andric     auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
12404824e7fdSDimitry Andric                        .addReg(Src, getKillRegState(IsKill));
12414824e7fdSDimitry Andric     CopyMIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
12424824e7fdSDimitry Andric     return CopyMIB;
12434824e7fdSDimitry Andric   }
12444824e7fdSDimitry Andric   unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1245e8d8bef9SDimitry Andric                                     : AMDGPU::V_ACCVGPR_READ_B32_e64;
12460b57cec5SDimitry Andric 
12474824e7fdSDimitry Andric   auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
12480b57cec5SDimitry Andric                  .addReg(Src, getKillRegState(IsKill));
1249e8d8bef9SDimitry Andric   MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
1250e8d8bef9SDimitry Andric   return MIB;
12510b57cec5SDimitry Andric }
12520b57cec5SDimitry Andric 
12530b57cec5SDimitry Andric // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
12540b57cec5SDimitry Andric // need to handle the case where an SGPR may need to be spilled while spilling.
12558bcb0991SDimitry Andric static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
12560b57cec5SDimitry Andric                                       MachineFrameInfo &MFI,
12570b57cec5SDimitry Andric                                       MachineBasicBlock::iterator MI,
12580b57cec5SDimitry Andric                                       int Index,
12590b57cec5SDimitry Andric                                       int64_t Offset) {
12608bcb0991SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
12610b57cec5SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
12620b57cec5SDimitry Andric   const DebugLoc &DL = MI->getDebugLoc();
12630b57cec5SDimitry Andric   bool IsStore = MI->mayStore();
12640b57cec5SDimitry Andric 
12650b57cec5SDimitry Andric   unsigned Opc = MI->getOpcode();
12660b57cec5SDimitry Andric   int LoadStoreOp = IsStore ?
12670b57cec5SDimitry Andric     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
12680b57cec5SDimitry Andric   if (LoadStoreOp == -1)
12690b57cec5SDimitry Andric     return false;
12700b57cec5SDimitry Andric 
12710b57cec5SDimitry Andric   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
1272fe6060f1SDimitry Andric   if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
12730b57cec5SDimitry Andric     return true;
12740b57cec5SDimitry Andric 
12750b57cec5SDimitry Andric   MachineInstrBuilder NewMI =
12760b57cec5SDimitry Andric       BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
12770b57cec5SDimitry Andric           .add(*Reg)
12780b57cec5SDimitry Andric           .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
12790b57cec5SDimitry Andric           .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
12800b57cec5SDimitry Andric           .addImm(Offset)
1281fe6060f1SDimitry Andric           .addImm(0) // cpol
12828bcb0991SDimitry Andric           .addImm(0) // swz
12830b57cec5SDimitry Andric           .cloneMemRefs(*MI);
12840b57cec5SDimitry Andric 
12850b57cec5SDimitry Andric   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
12860b57cec5SDimitry Andric                                                        AMDGPU::OpName::vdata_in);
12870b57cec5SDimitry Andric   if (VDataIn)
12880b57cec5SDimitry Andric     NewMI.add(*VDataIn);
12890b57cec5SDimitry Andric   return true;
12900b57cec5SDimitry Andric }
12910b57cec5SDimitry Andric 
1292e8d8bef9SDimitry Andric static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
1293e8d8bef9SDimitry Andric                                           unsigned LoadStoreOp,
1294e8d8bef9SDimitry Andric                                           unsigned EltSize) {
1295e8d8bef9SDimitry Andric   bool IsStore = TII->get(LoadStoreOp).mayStore();
1296bdd1243dSDimitry Andric   bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr);
1297e8d8bef9SDimitry Andric   bool UseST =
1298bdd1243dSDimitry Andric       !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr);
1299e8d8bef9SDimitry Andric 
1300e8d8bef9SDimitry Andric   switch (EltSize) {
1301e8d8bef9SDimitry Andric   case 4:
1302e8d8bef9SDimitry Andric     LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1303e8d8bef9SDimitry Andric                           : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1304e8d8bef9SDimitry Andric     break;
1305e8d8bef9SDimitry Andric   case 8:
1306e8d8bef9SDimitry Andric     LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1307e8d8bef9SDimitry Andric                           : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1308e8d8bef9SDimitry Andric     break;
1309e8d8bef9SDimitry Andric   case 12:
1310e8d8bef9SDimitry Andric     LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1311e8d8bef9SDimitry Andric                           : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1312e8d8bef9SDimitry Andric     break;
1313e8d8bef9SDimitry Andric   case 16:
1314e8d8bef9SDimitry Andric     LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1315e8d8bef9SDimitry Andric                           : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1316e8d8bef9SDimitry Andric     break;
1317e8d8bef9SDimitry Andric   default:
1318e8d8bef9SDimitry Andric     llvm_unreachable("Unexpected spill load/store size!");
1319e8d8bef9SDimitry Andric   }
1320e8d8bef9SDimitry Andric 
132181ad6265SDimitry Andric   if (HasVAddr)
132281ad6265SDimitry Andric     LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
132381ad6265SDimitry Andric   else if (UseST)
1324e8d8bef9SDimitry Andric     LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1325e8d8bef9SDimitry Andric 
1326e8d8bef9SDimitry Andric   return LoadStoreOp;
1327e8d8bef9SDimitry Andric }
1328e8d8bef9SDimitry Andric 
1329fe6060f1SDimitry Andric void SIRegisterInfo::buildSpillLoadStore(
1330349cc55cSDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
1331fe6060f1SDimitry Andric     unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1332fe6060f1SDimitry Andric     MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
13335f757f3fSDimitry Andric     RegScavenger *RS, LiveRegUnits *LiveUnits) const {
13345f757f3fSDimitry Andric   assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
1335fe6060f1SDimitry Andric 
1336fe6060f1SDimitry Andric   MachineFunction *MF = MBB.getParent();
13370b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
13380b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF->getFrameInfo();
13395ffd83dbSDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
13400b57cec5SDimitry Andric 
1341e8d8bef9SDimitry Andric   const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1342e8d8bef9SDimitry Andric   bool IsStore = Desc->mayStore();
1343e8d8bef9SDimitry Andric   bool IsFlat = TII->isFLATScratch(LoadStoreOp);
13440b57cec5SDimitry Andric 
134581ad6265SDimitry Andric   bool CanClobberSCC = false;
13460b57cec5SDimitry Andric   bool Scavenged = false;
13475ffd83dbSDimitry Andric   MCRegister SOffset = ScratchOffsetReg;
13480b57cec5SDimitry Andric 
13490b57cec5SDimitry Andric   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1350fe6060f1SDimitry Andric   // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
13514824e7fdSDimitry Andric   const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
135206c3fb27SDimitry Andric   const unsigned RegWidth = AMDGPU::getRegBitWidth(*RC) / 8;
1353e8d8bef9SDimitry Andric 
1354e8d8bef9SDimitry Andric   // Always use 4 byte operations for AGPRs because we need to scavenge
1355e8d8bef9SDimitry Andric   // a temporary VGPR.
1356e8d8bef9SDimitry Andric   unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1357e8d8bef9SDimitry Andric   unsigned NumSubRegs = RegWidth / EltSize;
13580b57cec5SDimitry Andric   unsigned Size = NumSubRegs * EltSize;
1359e8d8bef9SDimitry Andric   unsigned RemSize = RegWidth - Size;
1360e8d8bef9SDimitry Andric   unsigned NumRemSubRegs = RemSize ? 1 : 0;
13610b57cec5SDimitry Andric   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
136281ad6265SDimitry Andric   int64_t MaterializedOffset = Offset;
136381ad6265SDimitry Andric 
1364e8d8bef9SDimitry Andric   int64_t MaxOffset = Offset + Size + RemSize - EltSize;
13650b57cec5SDimitry Andric   int64_t ScratchOffsetRegDelta = 0;
13660b57cec5SDimitry Andric 
1367e8d8bef9SDimitry Andric   if (IsFlat && EltSize > 4) {
1368e8d8bef9SDimitry Andric     LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1369e8d8bef9SDimitry Andric     Desc = &TII->get(LoadStoreOp);
1370e8d8bef9SDimitry Andric   }
1371e8d8bef9SDimitry Andric 
13725ffd83dbSDimitry Andric   Align Alignment = MFI.getObjectAlign(Index);
13730b57cec5SDimitry Andric   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
13740b57cec5SDimitry Andric 
1375e8d8bef9SDimitry Andric   assert((IsFlat || ((Offset % EltSize) == 0)) &&
1376e8d8bef9SDimitry Andric          "unexpected VGPR spill offset");
13770b57cec5SDimitry Andric 
137881ad6265SDimitry Andric   // Track a VGPR to use for a constant offset we need to materialize.
137981ad6265SDimitry Andric   Register TmpOffsetVGPR;
138081ad6265SDimitry Andric 
138181ad6265SDimitry Andric   // Track a VGPR to use as an intermediate value.
138281ad6265SDimitry Andric   Register TmpIntermediateVGPR;
138381ad6265SDimitry Andric   bool UseVGPROffset = false;
138481ad6265SDimitry Andric 
138581ad6265SDimitry Andric   // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
138681ad6265SDimitry Andric   // combination.
138781ad6265SDimitry Andric   auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
138881ad6265SDimitry Andric                                 int64_t VOffset) {
138981ad6265SDimitry Andric     // We are using a VGPR offset
139081ad6265SDimitry Andric     if (IsFlat && SGPRBase) {
139181ad6265SDimitry Andric       // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
139281ad6265SDimitry Andric       // SGPR, so perform the add as vector.
139381ad6265SDimitry Andric       // We don't need a base SGPR in the kernel.
139481ad6265SDimitry Andric 
139581ad6265SDimitry Andric       if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
139681ad6265SDimitry Andric         BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
139781ad6265SDimitry Andric           .addReg(SGPRBase)
139881ad6265SDimitry Andric           .addImm(VOffset)
139981ad6265SDimitry Andric           .addImm(0); // clamp
140081ad6265SDimitry Andric       } else {
140181ad6265SDimitry Andric         BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
140281ad6265SDimitry Andric           .addReg(SGPRBase);
140381ad6265SDimitry Andric         BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
140481ad6265SDimitry Andric           .addImm(VOffset)
140581ad6265SDimitry Andric           .addReg(TmpOffsetVGPR);
140681ad6265SDimitry Andric       }
140781ad6265SDimitry Andric     } else {
140881ad6265SDimitry Andric       assert(TmpOffsetVGPR);
140981ad6265SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
141081ad6265SDimitry Andric         .addImm(VOffset);
141181ad6265SDimitry Andric     }
141281ad6265SDimitry Andric   };
141381ad6265SDimitry Andric 
1414fe6060f1SDimitry Andric   bool IsOffsetLegal =
1415fe6060f1SDimitry Andric       IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1416fe6060f1SDimitry Andric                                       SIInstrFlags::FlatScratch)
14175f757f3fSDimitry Andric              : TII->isLegalMUBUFImmOffset(MaxOffset);
1418e8d8bef9SDimitry Andric   if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
14195ffd83dbSDimitry Andric     SOffset = MCRegister();
14200b57cec5SDimitry Andric 
14210b57cec5SDimitry Andric     // We don't have access to the register scavenger if this function is called
14225f757f3fSDimitry Andric     // during  PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
142381ad6265SDimitry Andric     // TODO: Clobbering SCC is not necessary for scratch instructions in the
142481ad6265SDimitry Andric     // entry.
1425fe6060f1SDimitry Andric     if (RS) {
1426bdd1243dSDimitry Andric       SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
142781ad6265SDimitry Andric 
142881ad6265SDimitry Andric       // Piggy back on the liveness scan we just did see if SCC is dead.
142981ad6265SDimitry Andric       CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
14305f757f3fSDimitry Andric     } else if (LiveUnits) {
14315f757f3fSDimitry Andric       CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
1432fe6060f1SDimitry Andric       for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
14335f757f3fSDimitry Andric         if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1434fe6060f1SDimitry Andric           SOffset = Reg;
1435fe6060f1SDimitry Andric           break;
1436fe6060f1SDimitry Andric         }
1437fe6060f1SDimitry Andric       }
1438fe6060f1SDimitry Andric     }
14390b57cec5SDimitry Andric 
144081ad6265SDimitry Andric     if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
144181ad6265SDimitry Andric       SOffset = Register();
144281ad6265SDimitry Andric 
14435ffd83dbSDimitry Andric     if (!SOffset) {
144481ad6265SDimitry Andric       UseVGPROffset = true;
144581ad6265SDimitry Andric 
144681ad6265SDimitry Andric       if (RS) {
1447bdd1243dSDimitry Andric         TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
144881ad6265SDimitry Andric       } else {
14495f757f3fSDimitry Andric         assert(LiveUnits);
145081ad6265SDimitry Andric         for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
14515f757f3fSDimitry Andric           if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
145281ad6265SDimitry Andric             TmpOffsetVGPR = Reg;
145381ad6265SDimitry Andric             break;
145481ad6265SDimitry Andric           }
145581ad6265SDimitry Andric         }
145681ad6265SDimitry Andric       }
145781ad6265SDimitry Andric 
145881ad6265SDimitry Andric       assert(TmpOffsetVGPR);
145981ad6265SDimitry Andric     } else if (!SOffset && CanClobberSCC) {
14600b57cec5SDimitry Andric       // There are no free SGPRs, and since we are in the process of spilling
14610b57cec5SDimitry Andric       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
14620b57cec5SDimitry Andric       // on SI/CI and on VI it is true until we implement spilling using scalar
14630b57cec5SDimitry Andric       // stores), we have no way to free up an SGPR.  Our solution here is to
14645ffd83dbSDimitry Andric       // add the offset directly to the ScratchOffset or StackPtrOffset
14655ffd83dbSDimitry Andric       // register, and then subtract the offset after the spill to return the
14665ffd83dbSDimitry Andric       // register to it's original value.
146781ad6265SDimitry Andric 
146881ad6265SDimitry Andric       // TODO: If we don't have to do an emergency stack slot spill, converting
146981ad6265SDimitry Andric       // to use the VGPR offset is fewer instructions.
14705ffd83dbSDimitry Andric       if (!ScratchOffsetReg)
14715ffd83dbSDimitry Andric         ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
14720b57cec5SDimitry Andric       SOffset = ScratchOffsetReg;
14730b57cec5SDimitry Andric       ScratchOffsetRegDelta = Offset;
14740b57cec5SDimitry Andric     } else {
14750b57cec5SDimitry Andric       Scavenged = true;
14760b57cec5SDimitry Andric     }
14770b57cec5SDimitry Andric 
147881ad6265SDimitry Andric     // We currently only support spilling VGPRs to EltSize boundaries, meaning
147981ad6265SDimitry Andric     // we can simplify the adjustment of Offset here to just scale with
148081ad6265SDimitry Andric     // WavefrontSize.
148181ad6265SDimitry Andric     if (!IsFlat && !UseVGPROffset)
148281ad6265SDimitry Andric       Offset *= ST.getWavefrontSize();
148381ad6265SDimitry Andric 
148481ad6265SDimitry Andric     if (!UseVGPROffset && !SOffset)
14855ffd83dbSDimitry Andric       report_fatal_error("could not scavenge SGPR to spill in entry function");
14865ffd83dbSDimitry Andric 
148781ad6265SDimitry Andric     if (UseVGPROffset) {
148881ad6265SDimitry Andric       // We are using a VGPR offset
148981ad6265SDimitry Andric       MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
149081ad6265SDimitry Andric     } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1491fe6060f1SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
14925ffd83dbSDimitry Andric     } else {
149381ad6265SDimitry Andric       assert(Offset != 0);
14940eae32dcSDimitry Andric       auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
14950b57cec5SDimitry Andric           .addReg(ScratchOffsetReg)
14960b57cec5SDimitry Andric           .addImm(Offset);
14970eae32dcSDimitry Andric       Add->getOperand(3).setIsDead(); // Mark SCC as dead.
14985ffd83dbSDimitry Andric     }
14990b57cec5SDimitry Andric 
15000b57cec5SDimitry Andric     Offset = 0;
15010b57cec5SDimitry Andric   }
15020b57cec5SDimitry Andric 
1503e8d8bef9SDimitry Andric   if (IsFlat && SOffset == AMDGPU::NoRegister) {
1504e8d8bef9SDimitry Andric     assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1505e8d8bef9SDimitry Andric            && "Unexpected vaddr for flat scratch with a FI operand");
1506e8d8bef9SDimitry Andric 
150781ad6265SDimitry Andric     if (UseVGPROffset) {
150881ad6265SDimitry Andric       LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
150981ad6265SDimitry Andric     } else {
1510e8d8bef9SDimitry Andric       assert(ST.hasFlatScratchSTMode());
1511e8d8bef9SDimitry Andric       LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1512e8d8bef9SDimitry Andric     }
1513e8d8bef9SDimitry Andric 
151481ad6265SDimitry Andric     Desc = &TII->get(LoadStoreOp);
151581ad6265SDimitry Andric   }
1516e8d8bef9SDimitry Andric 
1517e8d8bef9SDimitry Andric   for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1518e8d8bef9SDimitry Andric        ++i, RegOffset += EltSize) {
1519e8d8bef9SDimitry Andric     if (i == NumSubRegs) {
1520e8d8bef9SDimitry Andric       EltSize = RemSize;
1521e8d8bef9SDimitry Andric       LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1522e8d8bef9SDimitry Andric     }
1523e8d8bef9SDimitry Andric     Desc = &TII->get(LoadStoreOp);
1524e8d8bef9SDimitry Andric 
152581ad6265SDimitry Andric     if (!IsFlat && UseVGPROffset) {
152681ad6265SDimitry Andric       int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
152781ad6265SDimitry Andric                                    : getOffenMUBUFLoad(LoadStoreOp);
152881ad6265SDimitry Andric       Desc = &TII->get(NewLoadStoreOp);
152981ad6265SDimitry Andric     }
153081ad6265SDimitry Andric 
153181ad6265SDimitry Andric     if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
153281ad6265SDimitry Andric       // If we are spilling an AGPR beyond the range of the memory instruction
153381ad6265SDimitry Andric       // offset and need to use a VGPR offset, we ideally have at least 2
153481ad6265SDimitry Andric       // scratch VGPRs. If we don't have a second free VGPR without spilling,
153581ad6265SDimitry Andric       // recycle the VGPR used for the offset which requires resetting after
153681ad6265SDimitry Andric       // each subregister.
153781ad6265SDimitry Andric 
153881ad6265SDimitry Andric       MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
153981ad6265SDimitry Andric     }
154081ad6265SDimitry Andric 
1541e8d8bef9SDimitry Andric     unsigned NumRegs = EltSize / 4;
1542e8d8bef9SDimitry Andric     Register SubReg = e == 1
1543e8d8bef9SDimitry Andric             ? ValueReg
1544e8d8bef9SDimitry Andric             : Register(getSubReg(ValueReg,
1545e8d8bef9SDimitry Andric                                  getSubRegFromChannel(RegOffset / 4, NumRegs)));
15460b57cec5SDimitry Andric 
15470b57cec5SDimitry Andric     unsigned SOffsetRegState = 0;
15480b57cec5SDimitry Andric     unsigned SrcDstRegState = getDefRegState(!IsStore);
154981ad6265SDimitry Andric     const bool IsLastSubReg = i + 1 == e;
1550bdd1243dSDimitry Andric     const bool IsFirstSubReg = i == 0;
155181ad6265SDimitry Andric     if (IsLastSubReg) {
15520b57cec5SDimitry Andric       SOffsetRegState |= getKillRegState(Scavenged);
15530b57cec5SDimitry Andric       // The last implicit use carries the "Kill" flag.
15540b57cec5SDimitry Andric       SrcDstRegState |= getKillRegState(IsKill);
15550b57cec5SDimitry Andric     }
15560b57cec5SDimitry Andric 
1557e8d8bef9SDimitry Andric     // Make sure the whole register is defined if there are undef components by
1558e8d8bef9SDimitry Andric     // adding an implicit def of the super-reg on the first instruction.
1559bdd1243dSDimitry Andric     bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1560e8d8bef9SDimitry Andric     bool NeedSuperRegImpOperand = e > 1;
15610b57cec5SDimitry Andric 
1562349cc55cSDimitry Andric     // Remaining element size to spill into memory after some parts of it
1563349cc55cSDimitry Andric     // spilled into either AGPRs or VGPRs.
1564349cc55cSDimitry Andric     unsigned RemEltSize = EltSize;
1565349cc55cSDimitry Andric 
1566349cc55cSDimitry Andric     // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
1567349cc55cSDimitry Andric     // starting from the last lane. In case if a register cannot be completely
1568349cc55cSDimitry Andric     // spilled into another register that will ensure its alignment does not
1569349cc55cSDimitry Andric     // change. For targets with VGPR alignment requirement this is important
1570349cc55cSDimitry Andric     // in case of flat scratch usage as we might get a scratch_load or
1571349cc55cSDimitry Andric     // scratch_store of an unaligned register otherwise.
1572349cc55cSDimitry Andric     for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1573349cc55cSDimitry Andric              LaneE = RegOffset / 4;
1574349cc55cSDimitry Andric          Lane >= LaneE; --Lane) {
1575e8d8bef9SDimitry Andric       bool IsSubReg = e > 1 || EltSize > 4;
1576e8d8bef9SDimitry Andric       Register Sub = IsSubReg
1577e8d8bef9SDimitry Andric              ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1578e8d8bef9SDimitry Andric              : ValueReg;
1579fe6060f1SDimitry Andric       auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1580e8d8bef9SDimitry Andric       if (!MIB.getInstr())
1581e8d8bef9SDimitry Andric         break;
1582bdd1243dSDimitry Andric       if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1583e8d8bef9SDimitry Andric         MIB.addReg(ValueReg, RegState::ImplicitDefine);
1584e8d8bef9SDimitry Andric         NeedSuperRegDef = false;
1585e8d8bef9SDimitry Andric       }
1586bdd1243dSDimitry Andric       if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1587e8d8bef9SDimitry Andric         NeedSuperRegImpOperand = true;
1588e8d8bef9SDimitry Andric         unsigned State = SrcDstRegState;
1589bdd1243dSDimitry Andric         if (!IsLastSubReg || (Lane != LaneE))
1590e8d8bef9SDimitry Andric           State &= ~RegState::Kill;
1591bdd1243dSDimitry Andric         if (!IsFirstSubReg || (Lane != LaneS))
1592bdd1243dSDimitry Andric           State &= ~RegState::Define;
1593e8d8bef9SDimitry Andric         MIB.addReg(ValueReg, RegState::Implicit | State);
1594e8d8bef9SDimitry Andric       }
1595349cc55cSDimitry Andric       RemEltSize -= 4;
1596e8d8bef9SDimitry Andric     }
1597e8d8bef9SDimitry Andric 
1598349cc55cSDimitry Andric     if (!RemEltSize) // Fully spilled into AGPRs.
1599e8d8bef9SDimitry Andric       continue;
1600e8d8bef9SDimitry Andric 
1601e8d8bef9SDimitry Andric     if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1602e8d8bef9SDimitry Andric       assert(IsFlat && EltSize > 4);
1603e8d8bef9SDimitry Andric 
1604e8d8bef9SDimitry Andric       unsigned NumRegs = RemEltSize / 4;
1605e8d8bef9SDimitry Andric       SubReg = Register(getSubReg(ValueReg,
1606349cc55cSDimitry Andric                         getSubRegFromChannel(RegOffset / 4, NumRegs)));
1607e8d8bef9SDimitry Andric       unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1608e8d8bef9SDimitry Andric       Desc = &TII->get(Opc);
1609e8d8bef9SDimitry Andric     }
1610e8d8bef9SDimitry Andric 
16110b57cec5SDimitry Andric     unsigned FinalReg = SubReg;
1612e8d8bef9SDimitry Andric 
1613e8d8bef9SDimitry Andric     if (IsAGPR) {
1614e8d8bef9SDimitry Andric       assert(EltSize == 4);
1615e8d8bef9SDimitry Andric 
161681ad6265SDimitry Andric       if (!TmpIntermediateVGPR) {
161781ad6265SDimitry Andric         TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
161881ad6265SDimitry Andric         assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
1619e8d8bef9SDimitry Andric       }
1620e8d8bef9SDimitry Andric       if (IsStore) {
1621fe6060f1SDimitry Andric         auto AccRead = BuildMI(MBB, MI, DL,
162281ad6265SDimitry Andric                                TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
162381ad6265SDimitry Andric                                TmpIntermediateVGPR)
16240b57cec5SDimitry Andric                            .addReg(SubReg, getKillRegState(IsKill));
1625e8d8bef9SDimitry Andric         if (NeedSuperRegDef)
1626e8d8bef9SDimitry Andric           AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1627e8d8bef9SDimitry Andric         AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
1628e8d8bef9SDimitry Andric       }
162981ad6265SDimitry Andric       SubReg = TmpIntermediateVGPR;
163081ad6265SDimitry Andric     } else if (UseVGPROffset) {
163181ad6265SDimitry Andric       if (!TmpOffsetVGPR) {
163206c3fb27SDimitry Andric         TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
163306c3fb27SDimitry Andric                                                       MI, false, 0);
163481ad6265SDimitry Andric         RS->setRegUsed(TmpOffsetVGPR);
163581ad6265SDimitry Andric       }
16360b57cec5SDimitry Andric     }
16370b57cec5SDimitry Andric 
1638349cc55cSDimitry Andric     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
16395ffd83dbSDimitry Andric     MachineMemOperand *NewMMO =
1640e8d8bef9SDimitry Andric         MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1641349cc55cSDimitry Andric                                  commonAlignment(Alignment, RegOffset));
16420b57cec5SDimitry Andric 
1643fe6060f1SDimitry Andric     auto MIB =
1644fe6060f1SDimitry Andric         BuildMI(MBB, MI, DL, *Desc)
1645fe6060f1SDimitry Andric             .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
164681ad6265SDimitry Andric 
164781ad6265SDimitry Andric     if (UseVGPROffset) {
164881ad6265SDimitry Andric       // For an AGPR spill, we reuse the same temp VGPR for the offset and the
164981ad6265SDimitry Andric       // intermediate accvgpr_write.
165081ad6265SDimitry Andric       MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
165181ad6265SDimitry Andric     }
165281ad6265SDimitry Andric 
1653e8d8bef9SDimitry Andric     if (!IsFlat)
1654e8d8bef9SDimitry Andric       MIB.addReg(FuncInfo->getScratchRSrcReg());
1655e8d8bef9SDimitry Andric 
16565ffd83dbSDimitry Andric     if (SOffset == AMDGPU::NoRegister) {
165781ad6265SDimitry Andric       if (!IsFlat) {
165881ad6265SDimitry Andric         if (UseVGPROffset && ScratchOffsetReg) {
165981ad6265SDimitry Andric           MIB.addReg(ScratchOffsetReg);
166081ad6265SDimitry Andric         } else {
16615f757f3fSDimitry Andric           assert(FuncInfo->isBottomOfStack());
16625ffd83dbSDimitry Andric           MIB.addImm(0);
166381ad6265SDimitry Andric         }
166481ad6265SDimitry Andric       }
16655ffd83dbSDimitry Andric     } else {
16665ffd83dbSDimitry Andric       MIB.addReg(SOffset, SOffsetRegState);
16675ffd83dbSDimitry Andric     }
16687a6dacacSDimitry Andric 
16697a6dacacSDimitry Andric     MIB.addImm(Offset + RegOffset);
16707a6dacacSDimitry Andric 
16717a6dacacSDimitry Andric     bool LastUse = MMO->getFlags() & MOLastUse;
16727a6dacacSDimitry Andric     MIB.addImm(LastUse ? AMDGPU::CPol::TH_LU : 0); // cpol
16737a6dacacSDimitry Andric 
1674e8d8bef9SDimitry Andric     if (!IsFlat)
1675bdd1243dSDimitry Andric       MIB.addImm(0); // swz
1676e8d8bef9SDimitry Andric     MIB.addMemOperand(NewMMO);
16770b57cec5SDimitry Andric 
1678e8d8bef9SDimitry Andric     if (!IsAGPR && NeedSuperRegDef)
1679e8d8bef9SDimitry Andric       MIB.addReg(ValueReg, RegState::ImplicitDefine);
1680e8d8bef9SDimitry Andric 
168181ad6265SDimitry Andric     if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1682fe6060f1SDimitry Andric       MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
16830b57cec5SDimitry Andric                     FinalReg)
168481ad6265SDimitry Andric                 .addReg(TmpIntermediateVGPR, RegState::Kill);
1685e8d8bef9SDimitry Andric       MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
16860b57cec5SDimitry Andric     }
16870b57cec5SDimitry Andric 
1688bdd1243dSDimitry Andric     if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
16890b57cec5SDimitry Andric       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
169006c3fb27SDimitry Andric 
169106c3fb27SDimitry Andric     // The epilog restore of a wwm-scratch register can cause undesired
169206c3fb27SDimitry Andric     // optimization during machine-cp post PrologEpilogInserter if the same
169306c3fb27SDimitry Andric     // register was assigned for return value ABI lowering with a COPY
169406c3fb27SDimitry Andric     // instruction. As given below, with the epilog reload, the earlier COPY
169506c3fb27SDimitry Andric     // appeared to be dead during machine-cp.
169606c3fb27SDimitry Andric     // ...
169706c3fb27SDimitry Andric     // v0 in WWM operation, needs the WWM spill at prolog/epilog.
169806c3fb27SDimitry Andric     // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0
169906c3fb27SDimitry Andric     // ...
170006c3fb27SDimitry Andric     // Epilog block:
170106c3fb27SDimitry Andric     // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0
170206c3fb27SDimitry Andric     // ...
170306c3fb27SDimitry Andric     // WWM spill restore to preserve the inactive lanes of v0.
170406c3fb27SDimitry Andric     // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1
170506c3fb27SDimitry Andric     // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0
170606c3fb27SDimitry Andric     // $exec = S_MOV_B64 killed $sgpr4_sgpr5
170706c3fb27SDimitry Andric     // ...
170806c3fb27SDimitry Andric     // SI_RETURN implicit $vgpr0
170906c3fb27SDimitry Andric     // ...
171006c3fb27SDimitry Andric     // To fix it, mark the same reg as a tied op for such restore instructions
171106c3fb27SDimitry Andric     // so that it marks a usage for the preceding COPY.
171206c3fb27SDimitry Andric     if (!IsStore && MI != MBB.end() && MI->isReturn() &&
171306c3fb27SDimitry Andric         MI->readsRegister(SubReg, this)) {
171406c3fb27SDimitry Andric       MIB.addReg(SubReg, RegState::Implicit);
171506c3fb27SDimitry Andric       MIB->tieOperands(0, MIB->getNumOperands() - 1);
171606c3fb27SDimitry Andric     }
17170b57cec5SDimitry Andric   }
17180b57cec5SDimitry Andric 
17190b57cec5SDimitry Andric   if (ScratchOffsetRegDelta != 0) {
17200b57cec5SDimitry Andric     // Subtract the offset we added to the ScratchOffset register.
1721fe6060f1SDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
17225ffd83dbSDimitry Andric         .addReg(SOffset)
1723fe6060f1SDimitry Andric         .addImm(-ScratchOffsetRegDelta);
17240b57cec5SDimitry Andric   }
17250b57cec5SDimitry Andric }
17260b57cec5SDimitry Andric 
1727fe6060f1SDimitry Andric void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
1728fe6060f1SDimitry Andric                                              int Offset, bool IsLoad,
1729fe6060f1SDimitry Andric                                              bool IsKill) const {
17305ffd83dbSDimitry Andric   // Load/store VGPR
1731fe6060f1SDimitry Andric   MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
17325ffd83dbSDimitry Andric   assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill);
17335ffd83dbSDimitry Andric 
1734fe6060f1SDimitry Andric   Register FrameReg =
1735fe6060f1SDimitry Andric       FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
17365ffd83dbSDimitry Andric           ? getBaseRegister()
1737fe6060f1SDimitry Andric           : getFrameRegister(SB.MF);
17385ffd83dbSDimitry Andric 
17395ffd83dbSDimitry Andric   Align Alignment = FrameInfo.getObjectAlign(Index);
1740fe6060f1SDimitry Andric   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index);
1741fe6060f1SDimitry Andric   MachineMemOperand *MMO = SB.MF.getMachineMemOperand(
17425ffd83dbSDimitry Andric       PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore,
1743fe6060f1SDimitry Andric       SB.EltSize, Alignment);
17445ffd83dbSDimitry Andric 
17455ffd83dbSDimitry Andric   if (IsLoad) {
1746e8d8bef9SDimitry Andric     unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1747e8d8bef9SDimitry Andric                                           : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1748349cc55cSDimitry Andric     buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
1749*0fca6ea1SDimitry Andric                         FrameReg, (int64_t)Offset * SB.EltSize, MMO, SB.RS);
17505ffd83dbSDimitry Andric   } else {
1751e8d8bef9SDimitry Andric     unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1752e8d8bef9SDimitry Andric                                           : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1753349cc55cSDimitry Andric     buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
1754*0fca6ea1SDimitry Andric                         FrameReg, (int64_t)Offset * SB.EltSize, MMO, SB.RS);
17555ffd83dbSDimitry Andric     // This only ever adds one VGPR spill
1756fe6060f1SDimitry Andric     SB.MFI.addToSpilledVGPRs(1);
17575ffd83dbSDimitry Andric   }
17585ffd83dbSDimitry Andric }
17595ffd83dbSDimitry Andric 
1760bdd1243dSDimitry Andric bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
1761bdd1243dSDimitry Andric                                RegScavenger *RS, SlotIndexes *Indexes,
17625f757f3fSDimitry Andric                                LiveIntervals *LIS, bool OnlyToVGPR,
17635f757f3fSDimitry Andric                                bool SpillToPhysVGPRLane) const {
1764fe6060f1SDimitry Andric   SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
17650b57cec5SDimitry Andric 
17665f757f3fSDimitry Andric   ArrayRef<SpilledReg> VGPRSpills =
17675f757f3fSDimitry Andric       SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
17685f757f3fSDimitry Andric                           : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index);
17690b57cec5SDimitry Andric   bool SpillToVGPR = !VGPRSpills.empty();
17700b57cec5SDimitry Andric   if (OnlyToVGPR && !SpillToVGPR)
17710b57cec5SDimitry Andric     return false;
17720b57cec5SDimitry Andric 
1773fe6060f1SDimitry Andric   assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
1774fe6060f1SDimitry Andric                          SB.SuperReg != SB.MFI.getFrameOffsetReg()));
17750b57cec5SDimitry Andric 
17765ffd83dbSDimitry Andric   if (SpillToVGPR) {
1777349cc55cSDimitry Andric 
1778349cc55cSDimitry Andric     assert(SB.NumSubRegs == VGPRSpills.size() &&
1779349cc55cSDimitry Andric            "Num of VGPR lanes should be equal to num of SGPRs spilled");
1780349cc55cSDimitry Andric 
1781fe6060f1SDimitry Andric     for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1782fe6060f1SDimitry Andric       Register SubReg =
1783fe6060f1SDimitry Andric           SB.NumSubRegs == 1
1784fe6060f1SDimitry Andric               ? SB.SuperReg
1785fe6060f1SDimitry Andric               : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
178681ad6265SDimitry Andric       SpilledReg Spill = VGPRSpills[i];
17870b57cec5SDimitry Andric 
1788bdd1243dSDimitry Andric       bool IsFirstSubreg = i == 0;
1789bdd1243dSDimitry Andric       bool IsLastSubreg = i == SB.NumSubRegs - 1;
1790bdd1243dSDimitry Andric       bool UseKill = SB.IsKill && IsLastSubreg;
1791bdd1243dSDimitry Andric 
17920b57cec5SDimitry Andric 
17930b57cec5SDimitry Andric       // Mark the "old value of vgpr" input undef only if this is the first sgpr
17940b57cec5SDimitry Andric       // spill to this specific vgpr in the first basic block.
1795349cc55cSDimitry Andric       auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
17965f757f3fSDimitry Andric                          SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
1797e8d8bef9SDimitry Andric                      .addReg(SubReg, getKillRegState(UseKill))
17980b57cec5SDimitry Andric                      .addImm(Spill.Lane)
1799e8d8bef9SDimitry Andric                      .addReg(Spill.VGPR);
1800bdd1243dSDimitry Andric       if (Indexes) {
1801bdd1243dSDimitry Andric         if (IsFirstSubreg)
1802bdd1243dSDimitry Andric           Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1803fe6060f1SDimitry Andric         else
1804bdd1243dSDimitry Andric           Indexes->insertMachineInstrInMaps(*MIB);
1805e8d8bef9SDimitry Andric       }
1806e8d8bef9SDimitry Andric 
1807bdd1243dSDimitry Andric       if (IsFirstSubreg && SB.NumSubRegs > 1) {
1808fe6060f1SDimitry Andric         // We may be spilling a super-register which is only partially defined,
1809fe6060f1SDimitry Andric         // and need to ensure later spills think the value is defined.
1810fe6060f1SDimitry Andric         MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1811fe6060f1SDimitry Andric       }
1812fe6060f1SDimitry Andric 
1813bdd1243dSDimitry Andric       if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1814fe6060f1SDimitry Andric         MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
18150b57cec5SDimitry Andric 
18160b57cec5SDimitry Andric       // FIXME: Since this spills to another register instead of an actual
18170b57cec5SDimitry Andric       // frame index, we should delete the frame index when all references to
18180b57cec5SDimitry Andric       // it are fixed.
18195ffd83dbSDimitry Andric     }
18200b57cec5SDimitry Andric   } else {
1821fe6060f1SDimitry Andric     SB.prepare();
18220b57cec5SDimitry Andric 
1823fe6060f1SDimitry Andric     // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1824fe6060f1SDimitry Andric     unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
18250b57cec5SDimitry Andric 
1826fe6060f1SDimitry Andric     // Per VGPR helper data
1827fe6060f1SDimitry Andric     auto PVD = SB.getPerVGPRData();
18285ffd83dbSDimitry Andric 
1829fe6060f1SDimitry Andric     for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
18305ffd83dbSDimitry Andric       unsigned TmpVGPRFlags = RegState::Undef;
18315ffd83dbSDimitry Andric 
18325ffd83dbSDimitry Andric       // Write sub registers into the VGPR
1833fe6060f1SDimitry Andric       for (unsigned i = Offset * PVD.PerVGPR,
1834fe6060f1SDimitry Andric                     e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
18355ffd83dbSDimitry Andric            i < e; ++i) {
1836fe6060f1SDimitry Andric         Register SubReg =
1837fe6060f1SDimitry Andric             SB.NumSubRegs == 1
1838fe6060f1SDimitry Andric                 ? SB.SuperReg
1839fe6060f1SDimitry Andric                 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
18405ffd83dbSDimitry Andric 
18415ffd83dbSDimitry Andric         MachineInstrBuilder WriteLane =
18425f757f3fSDimitry Andric             BuildMI(*SB.MBB, MI, SB.DL,
18435f757f3fSDimitry Andric                     SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
18445ffd83dbSDimitry Andric                 .addReg(SubReg, SubKillState)
1845fe6060f1SDimitry Andric                 .addImm(i % PVD.PerVGPR)
1846fe6060f1SDimitry Andric                 .addReg(SB.TmpVGPR, TmpVGPRFlags);
18475ffd83dbSDimitry Andric         TmpVGPRFlags = 0;
18480b57cec5SDimitry Andric 
1849bdd1243dSDimitry Andric         if (Indexes) {
1850fe6060f1SDimitry Andric           if (i == 0)
1851bdd1243dSDimitry Andric             Indexes->replaceMachineInstrInMaps(*MI, *WriteLane);
1852fe6060f1SDimitry Andric           else
1853bdd1243dSDimitry Andric             Indexes->insertMachineInstrInMaps(*WriteLane);
1854fe6060f1SDimitry Andric         }
1855fe6060f1SDimitry Andric 
18560b57cec5SDimitry Andric         // There could be undef components of a spilled super register.
18570b57cec5SDimitry Andric         // TODO: Can we detect this and skip the spill?
1858fe6060f1SDimitry Andric         if (SB.NumSubRegs > 1) {
1859fe6060f1SDimitry Andric           // The last implicit use of the SB.SuperReg carries the "Kill" flag.
18600b57cec5SDimitry Andric           unsigned SuperKillState = 0;
1861fe6060f1SDimitry Andric           if (i + 1 == SB.NumSubRegs)
1862fe6060f1SDimitry Andric             SuperKillState |= getKillRegState(SB.IsKill);
1863fe6060f1SDimitry Andric           WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
18645ffd83dbSDimitry Andric         }
18650b57cec5SDimitry Andric       }
18660b57cec5SDimitry Andric 
18675ffd83dbSDimitry Andric       // Write out VGPR
1868fe6060f1SDimitry Andric       SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
18690b57cec5SDimitry Andric     }
1870fe6060f1SDimitry Andric 
1871fe6060f1SDimitry Andric     SB.restore();
18720b57cec5SDimitry Andric   }
18730b57cec5SDimitry Andric 
18740b57cec5SDimitry Andric   MI->eraseFromParent();
1875fe6060f1SDimitry Andric   SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
1876fe6060f1SDimitry Andric 
1877fe6060f1SDimitry Andric   if (LIS)
1878fe6060f1SDimitry Andric     LIS->removeAllRegUnitsForPhysReg(SB.SuperReg);
1879fe6060f1SDimitry Andric 
18800b57cec5SDimitry Andric   return true;
18810b57cec5SDimitry Andric }
18820b57cec5SDimitry Andric 
1883bdd1243dSDimitry Andric bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
1884bdd1243dSDimitry Andric                                  RegScavenger *RS, SlotIndexes *Indexes,
18855f757f3fSDimitry Andric                                  LiveIntervals *LIS, bool OnlyToVGPR,
18865f757f3fSDimitry Andric                                  bool SpillToPhysVGPRLane) const {
1887fe6060f1SDimitry Andric   SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
18880b57cec5SDimitry Andric 
18895f757f3fSDimitry Andric   ArrayRef<SpilledReg> VGPRSpills =
18905f757f3fSDimitry Andric       SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
18915f757f3fSDimitry Andric                           : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index);
18920b57cec5SDimitry Andric   bool SpillToVGPR = !VGPRSpills.empty();
18930b57cec5SDimitry Andric   if (OnlyToVGPR && !SpillToVGPR)
18940b57cec5SDimitry Andric     return false;
18950b57cec5SDimitry Andric 
18965ffd83dbSDimitry Andric   if (SpillToVGPR) {
1897fe6060f1SDimitry Andric     for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1898fe6060f1SDimitry Andric       Register SubReg =
1899fe6060f1SDimitry Andric           SB.NumSubRegs == 1
1900fe6060f1SDimitry Andric               ? SB.SuperReg
1901fe6060f1SDimitry Andric               : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
19020b57cec5SDimitry Andric 
190381ad6265SDimitry Andric       SpilledReg Spill = VGPRSpills[i];
19045f757f3fSDimitry Andric       auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
19055f757f3fSDimitry Andric                          SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
19060b57cec5SDimitry Andric                      .addReg(Spill.VGPR)
19070b57cec5SDimitry Andric                      .addImm(Spill.Lane);
1908fe6060f1SDimitry Andric       if (SB.NumSubRegs > 1 && i == 0)
1909fe6060f1SDimitry Andric         MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1910bdd1243dSDimitry Andric       if (Indexes) {
1911fe6060f1SDimitry Andric         if (i == e - 1)
1912bdd1243dSDimitry Andric           Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1913fe6060f1SDimitry Andric         else
1914bdd1243dSDimitry Andric           Indexes->insertMachineInstrInMaps(*MIB);
1915fe6060f1SDimitry Andric       }
19165ffd83dbSDimitry Andric     }
19170b57cec5SDimitry Andric   } else {
1918fe6060f1SDimitry Andric     SB.prepare();
19190b57cec5SDimitry Andric 
1920fe6060f1SDimitry Andric     // Per VGPR helper data
1921fe6060f1SDimitry Andric     auto PVD = SB.getPerVGPRData();
19220b57cec5SDimitry Andric 
1923fe6060f1SDimitry Andric     for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
19245ffd83dbSDimitry Andric       // Load in VGPR data
1925fe6060f1SDimitry Andric       SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
19260b57cec5SDimitry Andric 
19275ffd83dbSDimitry Andric       // Unpack lanes
1928fe6060f1SDimitry Andric       for (unsigned i = Offset * PVD.PerVGPR,
1929fe6060f1SDimitry Andric                     e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
19305ffd83dbSDimitry Andric            i < e; ++i) {
1931fe6060f1SDimitry Andric         Register SubReg =
1932fe6060f1SDimitry Andric             SB.NumSubRegs == 1
1933fe6060f1SDimitry Andric                 ? SB.SuperReg
1934fe6060f1SDimitry Andric                 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
19350b57cec5SDimitry Andric 
19365ffd83dbSDimitry Andric         bool LastSubReg = (i + 1 == e);
1937349cc55cSDimitry Andric         auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
19385f757f3fSDimitry Andric                            SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1939fe6060f1SDimitry Andric                        .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
19405ffd83dbSDimitry Andric                        .addImm(i);
1941fe6060f1SDimitry Andric         if (SB.NumSubRegs > 1 && i == 0)
1942fe6060f1SDimitry Andric           MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1943bdd1243dSDimitry Andric         if (Indexes) {
1944fe6060f1SDimitry Andric           if (i == e - 1)
1945bdd1243dSDimitry Andric             Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1946fe6060f1SDimitry Andric           else
1947bdd1243dSDimitry Andric             Indexes->insertMachineInstrInMaps(*MIB);
19485ffd83dbSDimitry Andric         }
19490b57cec5SDimitry Andric       }
19500b57cec5SDimitry Andric     }
19510b57cec5SDimitry Andric 
1952fe6060f1SDimitry Andric     SB.restore();
1953fe6060f1SDimitry Andric   }
1954fe6060f1SDimitry Andric 
19550b57cec5SDimitry Andric   MI->eraseFromParent();
1956fe6060f1SDimitry Andric 
1957fe6060f1SDimitry Andric   if (LIS)
1958fe6060f1SDimitry Andric     LIS->removeAllRegUnitsForPhysReg(SB.SuperReg);
1959fe6060f1SDimitry Andric 
19600b57cec5SDimitry Andric   return true;
19610b57cec5SDimitry Andric }
19620b57cec5SDimitry Andric 
1963349cc55cSDimitry Andric bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
1964349cc55cSDimitry Andric                                         MachineBasicBlock &RestoreMBB,
1965349cc55cSDimitry Andric                                         Register SGPR, RegScavenger *RS) const {
1966349cc55cSDimitry Andric   SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1967349cc55cSDimitry Andric                       RS);
1968349cc55cSDimitry Andric   SB.prepare();
1969349cc55cSDimitry Andric   // Generate the spill of SGPR to SB.TmpVGPR.
1970349cc55cSDimitry Andric   unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1971349cc55cSDimitry Andric   auto PVD = SB.getPerVGPRData();
1972349cc55cSDimitry Andric   for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1973349cc55cSDimitry Andric     unsigned TmpVGPRFlags = RegState::Undef;
1974349cc55cSDimitry Andric     // Write sub registers into the VGPR
1975349cc55cSDimitry Andric     for (unsigned i = Offset * PVD.PerVGPR,
1976349cc55cSDimitry Andric                   e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1977349cc55cSDimitry Andric          i < e; ++i) {
1978349cc55cSDimitry Andric       Register SubReg =
1979349cc55cSDimitry Andric           SB.NumSubRegs == 1
1980349cc55cSDimitry Andric               ? SB.SuperReg
1981349cc55cSDimitry Andric               : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1982349cc55cSDimitry Andric 
1983349cc55cSDimitry Andric       MachineInstrBuilder WriteLane =
1984349cc55cSDimitry Andric           BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1985349cc55cSDimitry Andric                   SB.TmpVGPR)
1986349cc55cSDimitry Andric               .addReg(SubReg, SubKillState)
1987349cc55cSDimitry Andric               .addImm(i % PVD.PerVGPR)
1988349cc55cSDimitry Andric               .addReg(SB.TmpVGPR, TmpVGPRFlags);
1989349cc55cSDimitry Andric       TmpVGPRFlags = 0;
1990349cc55cSDimitry Andric       // There could be undef components of a spilled super register.
1991349cc55cSDimitry Andric       // TODO: Can we detect this and skip the spill?
1992349cc55cSDimitry Andric       if (SB.NumSubRegs > 1) {
1993349cc55cSDimitry Andric         // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1994349cc55cSDimitry Andric         unsigned SuperKillState = 0;
1995349cc55cSDimitry Andric         if (i + 1 == SB.NumSubRegs)
1996349cc55cSDimitry Andric           SuperKillState |= getKillRegState(SB.IsKill);
1997349cc55cSDimitry Andric         WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1998349cc55cSDimitry Andric       }
1999349cc55cSDimitry Andric     }
2000349cc55cSDimitry Andric     // Don't need to write VGPR out.
2001349cc55cSDimitry Andric   }
2002349cc55cSDimitry Andric 
2003349cc55cSDimitry Andric   // Restore clobbered registers in the specified restore block.
2004349cc55cSDimitry Andric   MI = RestoreMBB.end();
2005349cc55cSDimitry Andric   SB.setMI(&RestoreMBB, MI);
2006349cc55cSDimitry Andric   // Generate the restore of SGPR from SB.TmpVGPR.
2007349cc55cSDimitry Andric   for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
2008349cc55cSDimitry Andric     // Don't need to load VGPR in.
2009349cc55cSDimitry Andric     // Unpack lanes
2010349cc55cSDimitry Andric     for (unsigned i = Offset * PVD.PerVGPR,
2011349cc55cSDimitry Andric                   e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
2012349cc55cSDimitry Andric          i < e; ++i) {
2013349cc55cSDimitry Andric       Register SubReg =
2014349cc55cSDimitry Andric           SB.NumSubRegs == 1
2015349cc55cSDimitry Andric               ? SB.SuperReg
2016349cc55cSDimitry Andric               : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
2017349cc55cSDimitry Andric       bool LastSubReg = (i + 1 == e);
2018349cc55cSDimitry Andric       auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
2019349cc55cSDimitry Andric                          SubReg)
2020349cc55cSDimitry Andric                      .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
2021349cc55cSDimitry Andric                      .addImm(i);
2022349cc55cSDimitry Andric       if (SB.NumSubRegs > 1 && i == 0)
2023349cc55cSDimitry Andric         MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
2024349cc55cSDimitry Andric     }
2025349cc55cSDimitry Andric   }
2026349cc55cSDimitry Andric   SB.restore();
2027349cc55cSDimitry Andric 
2028349cc55cSDimitry Andric   SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
2029349cc55cSDimitry Andric   return false;
2030349cc55cSDimitry Andric }
2031349cc55cSDimitry Andric 
20320b57cec5SDimitry Andric /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
20330b57cec5SDimitry Andric /// a VGPR and the stack slot can be safely eliminated when all other users are
20340b57cec5SDimitry Andric /// handled.
20350b57cec5SDimitry Andric bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
2036bdd1243dSDimitry Andric     MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
20375f757f3fSDimitry Andric     SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const {
20380b57cec5SDimitry Andric   switch (MI->getOpcode()) {
20390b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S1024_SAVE:
20400b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S512_SAVE:
2041bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S384_SAVE:
2042bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S352_SAVE:
2043bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S320_SAVE:
2044bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S288_SAVE:
20450b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S256_SAVE:
2046fe6060f1SDimitry Andric   case AMDGPU::SI_SPILL_S224_SAVE:
20475ffd83dbSDimitry Andric   case AMDGPU::SI_SPILL_S192_SAVE:
20480b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S160_SAVE:
20490b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S128_SAVE:
20500b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S96_SAVE:
20510b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S64_SAVE:
20520b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S32_SAVE:
20535f757f3fSDimitry Andric     return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
20540b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S1024_RESTORE:
20550b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S512_RESTORE:
2056bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S384_RESTORE:
2057bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S352_RESTORE:
2058bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S320_RESTORE:
2059bdd1243dSDimitry Andric   case AMDGPU::SI_SPILL_S288_RESTORE:
20600b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S256_RESTORE:
2061fe6060f1SDimitry Andric   case AMDGPU::SI_SPILL_S224_RESTORE:
20625ffd83dbSDimitry Andric   case AMDGPU::SI_SPILL_S192_RESTORE:
20630b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S160_RESTORE:
20640b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S128_RESTORE:
20650b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S96_RESTORE:
20660b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S64_RESTORE:
20670b57cec5SDimitry Andric   case AMDGPU::SI_SPILL_S32_RESTORE:
20685f757f3fSDimitry Andric     return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
20690b57cec5SDimitry Andric   default:
20700b57cec5SDimitry Andric     llvm_unreachable("not an SGPR spill instruction");
20710b57cec5SDimitry Andric   }
20720b57cec5SDimitry Andric }
20730b57cec5SDimitry Andric 
2074bdd1243dSDimitry Andric bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
20750b57cec5SDimitry Andric                                         int SPAdj, unsigned FIOperandNum,
20760b57cec5SDimitry Andric                                         RegScavenger *RS) const {
20770b57cec5SDimitry Andric   MachineFunction *MF = MI->getParent()->getParent();
20780b57cec5SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
20790b57cec5SDimitry Andric   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
20800b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
20810b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
20820b57cec5SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
20830b57cec5SDimitry Andric 
20840b57cec5SDimitry Andric   assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
20850b57cec5SDimitry Andric 
2086*0fca6ea1SDimitry Andric   assert(MF->getRegInfo().isReserved(MFI->getScratchRSrcReg()) &&
2087*0fca6ea1SDimitry Andric          "unreserved scratch RSRC register");
2088*0fca6ea1SDimitry Andric 
20890b57cec5SDimitry Andric   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
20900b57cec5SDimitry Andric   int Index = MI->getOperand(FIOperandNum).getIndex();
20910b57cec5SDimitry Andric 
20925ffd83dbSDimitry Andric   Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
20935ffd83dbSDimitry Andric                           ? getBaseRegister()
20945ffd83dbSDimitry Andric                           : getFrameRegister(*MF);
20950b57cec5SDimitry Andric 
20960b57cec5SDimitry Andric   switch (MI->getOpcode()) {
20970b57cec5SDimitry Andric     // SGPR register spill
20980b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S1024_SAVE:
20990b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S512_SAVE:
2100bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_S384_SAVE:
2101bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_S352_SAVE:
2102bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_S320_SAVE:
2103bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_S288_SAVE:
21040b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S256_SAVE:
2105fe6060f1SDimitry Andric     case AMDGPU::SI_SPILL_S224_SAVE:
21065ffd83dbSDimitry Andric     case AMDGPU::SI_SPILL_S192_SAVE:
21070b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S160_SAVE:
21080b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S128_SAVE:
21090b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S96_SAVE:
21100b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S64_SAVE:
21110b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S32_SAVE: {
2112bdd1243dSDimitry Andric       return spillSGPR(MI, Index, RS);
21130b57cec5SDimitry Andric     }
21140b57cec5SDimitry Andric 
21150b57cec5SDimitry Andric     // SGPR register restore
21160b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S1024_RESTORE:
21170b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S512_RESTORE:
2118bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_S384_RESTORE:
2119bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_S352_RESTORE:
2120bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_S320_RESTORE:
2121bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_S288_RESTORE:
21220b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S256_RESTORE:
2123fe6060f1SDimitry Andric     case AMDGPU::SI_SPILL_S224_RESTORE:
21245ffd83dbSDimitry Andric     case AMDGPU::SI_SPILL_S192_RESTORE:
21250b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S160_RESTORE:
21260b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S128_RESTORE:
21270b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S96_RESTORE:
21280b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S64_RESTORE:
21290b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_S32_RESTORE: {
2130bdd1243dSDimitry Andric       return restoreSGPR(MI, Index, RS);
21310b57cec5SDimitry Andric     }
21320b57cec5SDimitry Andric 
21330b57cec5SDimitry Andric     // VGPR register spill
21340b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V1024_SAVE:
21350b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V512_SAVE:
2136bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_V384_SAVE:
2137bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_V352_SAVE:
2138bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_V320_SAVE:
2139bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_V288_SAVE:
21400b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V256_SAVE:
2141fe6060f1SDimitry Andric     case AMDGPU::SI_SPILL_V224_SAVE:
2142e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_V192_SAVE:
21430b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V160_SAVE:
21440b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V128_SAVE:
21450b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V96_SAVE:
21460b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V64_SAVE:
21470b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V32_SAVE:
21480b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_A1024_SAVE:
21490b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_A512_SAVE:
2150bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_A384_SAVE:
2151bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_A352_SAVE:
2152bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_A320_SAVE:
2153bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_A288_SAVE:
2154e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_A256_SAVE:
2155fe6060f1SDimitry Andric     case AMDGPU::SI_SPILL_A224_SAVE:
2156e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_A192_SAVE:
2157e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_A160_SAVE:
21580b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_A128_SAVE:
2159e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_A96_SAVE:
21600b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_A64_SAVE:
21610eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_A32_SAVE:
21620eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV1024_SAVE:
21630eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV512_SAVE:
2164bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_AV384_SAVE:
2165bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_AV352_SAVE:
2166bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_AV320_SAVE:
2167bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_AV288_SAVE:
21680eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV256_SAVE:
21690eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV224_SAVE:
21700eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV192_SAVE:
21710eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV160_SAVE:
21720eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV128_SAVE:
21730eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV96_SAVE:
21740eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV64_SAVE:
217506c3fb27SDimitry Andric     case AMDGPU::SI_SPILL_AV32_SAVE:
21765f757f3fSDimitry Andric     case AMDGPU::SI_SPILL_WWM_V32_SAVE:
21775f757f3fSDimitry Andric     case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
21780b57cec5SDimitry Andric       const MachineOperand *VData = TII->getNamedOperand(*MI,
21790b57cec5SDimitry Andric                                                          AMDGPU::OpName::vdata);
21800b57cec5SDimitry Andric       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
21810b57cec5SDimitry Andric              MFI->getStackPtrOffsetReg());
21820b57cec5SDimitry Andric 
2183e8d8bef9SDimitry Andric       unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2184e8d8bef9SDimitry Andric                                             : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2185fe6060f1SDimitry Andric       auto *MBB = MI->getParent();
218606c3fb27SDimitry Andric       bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
218706c3fb27SDimitry Andric       if (IsWWMRegSpill) {
218806c3fb27SDimitry Andric         TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
218906c3fb27SDimitry Andric                                   RS->isRegUsed(AMDGPU::SCC));
219006c3fb27SDimitry Andric       }
2191fe6060f1SDimitry Andric       buildSpillLoadStore(
2192349cc55cSDimitry Andric           *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
21930b57cec5SDimitry Andric           TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2194fe6060f1SDimitry Andric           *MI->memoperands_begin(), RS);
21950b57cec5SDimitry Andric       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
219606c3fb27SDimitry Andric       if (IsWWMRegSpill)
219706c3fb27SDimitry Andric         TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
219806c3fb27SDimitry Andric 
21990b57cec5SDimitry Andric       MI->eraseFromParent();
2200bdd1243dSDimitry Andric       return true;
22010b57cec5SDimitry Andric     }
22020b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V32_RESTORE:
22030b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V64_RESTORE:
22040b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V96_RESTORE:
22050b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V128_RESTORE:
22060b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V160_RESTORE:
2207e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_V192_RESTORE:
2208fe6060f1SDimitry Andric     case AMDGPU::SI_SPILL_V224_RESTORE:
22090b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V256_RESTORE:
2210bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_V288_RESTORE:
2211bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_V320_RESTORE:
2212bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_V352_RESTORE:
2213bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_V384_RESTORE:
22140b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V512_RESTORE:
22150b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_V1024_RESTORE:
22160b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_A32_RESTORE:
22170b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_A64_RESTORE:
2218e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_A96_RESTORE:
22190b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_A128_RESTORE:
2220e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_A160_RESTORE:
2221e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_A192_RESTORE:
2222fe6060f1SDimitry Andric     case AMDGPU::SI_SPILL_A224_RESTORE:
2223e8d8bef9SDimitry Andric     case AMDGPU::SI_SPILL_A256_RESTORE:
2224bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_A288_RESTORE:
2225bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_A320_RESTORE:
2226bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_A352_RESTORE:
2227bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_A384_RESTORE:
22280b57cec5SDimitry Andric     case AMDGPU::SI_SPILL_A512_RESTORE:
22290eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_A1024_RESTORE:
22300eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV32_RESTORE:
22310eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV64_RESTORE:
22320eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV96_RESTORE:
22330eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV128_RESTORE:
22340eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV160_RESTORE:
22350eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV192_RESTORE:
22360eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV224_RESTORE:
22370eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV256_RESTORE:
2238bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_AV288_RESTORE:
2239bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_AV320_RESTORE:
2240bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_AV352_RESTORE:
2241bdd1243dSDimitry Andric     case AMDGPU::SI_SPILL_AV384_RESTORE:
22420eae32dcSDimitry Andric     case AMDGPU::SI_SPILL_AV512_RESTORE:
224306c3fb27SDimitry Andric     case AMDGPU::SI_SPILL_AV1024_RESTORE:
22445f757f3fSDimitry Andric     case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
22455f757f3fSDimitry Andric     case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
22460b57cec5SDimitry Andric       const MachineOperand *VData = TII->getNamedOperand(*MI,
22470b57cec5SDimitry Andric                                                          AMDGPU::OpName::vdata);
22480b57cec5SDimitry Andric       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
22490b57cec5SDimitry Andric              MFI->getStackPtrOffsetReg());
22500b57cec5SDimitry Andric 
2251e8d8bef9SDimitry Andric       unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2252e8d8bef9SDimitry Andric                                             : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2253fe6060f1SDimitry Andric       auto *MBB = MI->getParent();
225406c3fb27SDimitry Andric       bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
225506c3fb27SDimitry Andric       if (IsWWMRegSpill) {
225606c3fb27SDimitry Andric         TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
225706c3fb27SDimitry Andric                                   RS->isRegUsed(AMDGPU::SCC));
225806c3fb27SDimitry Andric       }
22597a6dacacSDimitry Andric 
2260fe6060f1SDimitry Andric       buildSpillLoadStore(
2261349cc55cSDimitry Andric           *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
22620b57cec5SDimitry Andric           TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2263fe6060f1SDimitry Andric           *MI->memoperands_begin(), RS);
226406c3fb27SDimitry Andric 
226506c3fb27SDimitry Andric       if (IsWWMRegSpill)
226606c3fb27SDimitry Andric         TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
226706c3fb27SDimitry Andric 
22680b57cec5SDimitry Andric       MI->eraseFromParent();
2269bdd1243dSDimitry Andric       return true;
22700b57cec5SDimitry Andric     }
22710b57cec5SDimitry Andric 
22720b57cec5SDimitry Andric     default: {
2273fe6060f1SDimitry Andric       // Other access to frame index
22740b57cec5SDimitry Andric       const DebugLoc &DL = MI->getDebugLoc();
2275e8d8bef9SDimitry Andric 
2276e8d8bef9SDimitry Andric       int64_t Offset = FrameInfo.getObjectOffset(Index);
2277e8d8bef9SDimitry Andric       if (ST.enableFlatScratch()) {
2278e8d8bef9SDimitry Andric         if (TII->isFLATScratch(*MI)) {
2279e8d8bef9SDimitry Andric           assert((int16_t)FIOperandNum ==
2280e8d8bef9SDimitry Andric                  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2281e8d8bef9SDimitry Andric                                             AMDGPU::OpName::saddr));
2282e8d8bef9SDimitry Andric 
2283e8d8bef9SDimitry Andric           // The offset is always swizzled, just replace it
2284e8d8bef9SDimitry Andric           if (FrameReg)
2285e8d8bef9SDimitry Andric             FIOp.ChangeToRegister(FrameReg, false);
2286e8d8bef9SDimitry Andric 
2287e8d8bef9SDimitry Andric           MachineOperand *OffsetOp =
2288e8d8bef9SDimitry Andric             TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2289e8d8bef9SDimitry Andric           int64_t NewOffset = Offset + OffsetOp->getImm();
2290e8d8bef9SDimitry Andric           if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
2291fe6060f1SDimitry Andric                                      SIInstrFlags::FlatScratch)) {
2292e8d8bef9SDimitry Andric             OffsetOp->setImm(NewOffset);
2293e8d8bef9SDimitry Andric             if (FrameReg)
2294bdd1243dSDimitry Andric               return false;
2295e8d8bef9SDimitry Andric             Offset = 0;
2296e8d8bef9SDimitry Andric           }
2297e8d8bef9SDimitry Andric 
229881ad6265SDimitry Andric           if (!Offset) {
229981ad6265SDimitry Andric             unsigned Opc = MI->getOpcode();
230081ad6265SDimitry Andric             int NewOpc = -1;
2301bdd1243dSDimitry Andric             if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
230281ad6265SDimitry Andric               NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
230381ad6265SDimitry Andric             } else if (ST.hasFlatScratchSTMode()) {
2304e8d8bef9SDimitry Andric               // On GFX10 we have ST mode to use no registers for an address.
2305e8d8bef9SDimitry Andric               // Otherwise we need to materialize 0 into an SGPR.
230681ad6265SDimitry Andric               NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
230781ad6265SDimitry Andric             }
230881ad6265SDimitry Andric 
230981ad6265SDimitry Andric             if (NewOpc != -1) {
2310bdd1243dSDimitry Andric               // removeOperand doesn't fixup tied operand indexes as it goes, so
2311bdd1243dSDimitry Andric               // it asserts. Untie vdst_in for now and retie them afterwards.
2312bdd1243dSDimitry Andric               int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
2313bdd1243dSDimitry Andric                                                      AMDGPU::OpName::vdst_in);
2314bdd1243dSDimitry Andric               bool TiedVDst = VDstIn != -1 &&
2315bdd1243dSDimitry Andric                               MI->getOperand(VDstIn).isReg() &&
2316bdd1243dSDimitry Andric                               MI->getOperand(VDstIn).isTied();
2317bdd1243dSDimitry Andric               if (TiedVDst)
2318bdd1243dSDimitry Andric                 MI->untieRegOperand(VDstIn);
2319bdd1243dSDimitry Andric 
232081ad6265SDimitry Andric               MI->removeOperand(
2321e8d8bef9SDimitry Andric                   AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2322bdd1243dSDimitry Andric 
2323bdd1243dSDimitry Andric               if (TiedVDst) {
2324bdd1243dSDimitry Andric                 int NewVDst =
2325bdd1243dSDimitry Andric                     AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2326bdd1243dSDimitry Andric                 int NewVDstIn =
2327bdd1243dSDimitry Andric                     AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2328bdd1243dSDimitry Andric                 assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2329bdd1243dSDimitry Andric                 MI->tieOperands(NewVDst, NewVDstIn);
2330bdd1243dSDimitry Andric               }
2331e8d8bef9SDimitry Andric               MI->setDesc(TII->get(NewOpc));
2332bdd1243dSDimitry Andric               return false;
2333e8d8bef9SDimitry Andric             }
2334e8d8bef9SDimitry Andric           }
233581ad6265SDimitry Andric         }
2336e8d8bef9SDimitry Andric 
2337e8d8bef9SDimitry Andric         if (!FrameReg) {
2338e8d8bef9SDimitry Andric           FIOp.ChangeToImmediate(Offset);
2339e8d8bef9SDimitry Andric           if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
2340bdd1243dSDimitry Andric             return false;
2341e8d8bef9SDimitry Andric         }
2342e8d8bef9SDimitry Andric 
2343e8d8bef9SDimitry Andric         // We need to use register here. Check if we can use an SGPR or need
2344e8d8bef9SDimitry Andric         // a VGPR.
2345e8d8bef9SDimitry Andric         FIOp.ChangeToRegister(AMDGPU::M0, false);
2346e8d8bef9SDimitry Andric         bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
2347e8d8bef9SDimitry Andric 
2348e8d8bef9SDimitry Andric         if (!Offset && FrameReg && UseSGPR) {
2349e8d8bef9SDimitry Andric           FIOp.setReg(FrameReg);
2350bdd1243dSDimitry Andric           return false;
2351e8d8bef9SDimitry Andric         }
2352e8d8bef9SDimitry Andric 
2353e8d8bef9SDimitry Andric         const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
2354e8d8bef9SDimitry Andric                                                 : &AMDGPU::VGPR_32RegClass;
2355e8d8bef9SDimitry Andric 
235606c3fb27SDimitry Andric         Register TmpReg =
235706c3fb27SDimitry Andric             RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
2358e8d8bef9SDimitry Andric         FIOp.setReg(TmpReg);
2359bdd1243dSDimitry Andric         FIOp.setIsKill();
2360e8d8bef9SDimitry Andric 
2361e8d8bef9SDimitry Andric         if ((!FrameReg || !Offset) && TmpReg) {
2362e8d8bef9SDimitry Andric           unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2363e8d8bef9SDimitry Andric           auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
2364e8d8bef9SDimitry Andric           if (FrameReg)
2365e8d8bef9SDimitry Andric             MIB.addReg(FrameReg);
2366e8d8bef9SDimitry Andric           else
2367e8d8bef9SDimitry Andric             MIB.addImm(Offset);
2368e8d8bef9SDimitry Andric 
2369bdd1243dSDimitry Andric           return false;
2370e8d8bef9SDimitry Andric         }
2371e8d8bef9SDimitry Andric 
2372*0fca6ea1SDimitry Andric         bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
2373*0fca6ea1SDimitry Andric                            !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
2374bdd1243dSDimitry Andric 
2375e8d8bef9SDimitry Andric         Register TmpSReg =
2376e8d8bef9SDimitry Andric             UseSGPR ? TmpReg
237706c3fb27SDimitry Andric                     : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
237806c3fb27SDimitry Andric                                                     MI, false, 0, !UseSGPR);
2379e8d8bef9SDimitry Andric 
2380e8d8bef9SDimitry Andric         // TODO: for flat scratch another attempt can be made with a VGPR index
2381e8d8bef9SDimitry Andric         //       if no SGPRs can be scavenged.
2382e8d8bef9SDimitry Andric         if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2383e8d8bef9SDimitry Andric           report_fatal_error("Cannot scavenge register in FI elimination!");
2384e8d8bef9SDimitry Andric 
2385e8d8bef9SDimitry Andric         if (!TmpSReg) {
2386e8d8bef9SDimitry Andric           // Use frame register and restore it after.
2387e8d8bef9SDimitry Andric           TmpSReg = FrameReg;
2388e8d8bef9SDimitry Andric           FIOp.setReg(FrameReg);
2389e8d8bef9SDimitry Andric           FIOp.setIsKill(false);
2390e8d8bef9SDimitry Andric         }
2391e8d8bef9SDimitry Andric 
2392bdd1243dSDimitry Andric         if (NeedSaveSCC) {
2393bdd1243dSDimitry Andric           assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
2394bdd1243dSDimitry Andric           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
2395bdd1243dSDimitry Andric               .addReg(FrameReg)
2396bdd1243dSDimitry Andric               .addImm(Offset);
2397bdd1243dSDimitry Andric           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
2398bdd1243dSDimitry Andric               .addReg(TmpSReg)
2399bdd1243dSDimitry Andric               .addImm(0);
2400bdd1243dSDimitry Andric           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
2401bdd1243dSDimitry Andric               .addImm(0)
2402bdd1243dSDimitry Andric               .addReg(TmpSReg);
2403bdd1243dSDimitry Andric         } else {
2404fe6060f1SDimitry Andric           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
2405e8d8bef9SDimitry Andric               .addReg(FrameReg)
2406e8d8bef9SDimitry Andric               .addImm(Offset);
2407bdd1243dSDimitry Andric         }
2408e8d8bef9SDimitry Andric 
2409e8d8bef9SDimitry Andric         if (!UseSGPR)
2410e8d8bef9SDimitry Andric           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2411e8d8bef9SDimitry Andric             .addReg(TmpSReg, RegState::Kill);
2412e8d8bef9SDimitry Andric 
2413e8d8bef9SDimitry Andric         if (TmpSReg == FrameReg) {
2414e8d8bef9SDimitry Andric           // Undo frame register modification.
2415*0fca6ea1SDimitry Andric           if (NeedSaveSCC &&
2416*0fca6ea1SDimitry Andric               !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
2417bdd1243dSDimitry Andric             MachineBasicBlock::iterator I =
2418bdd1243dSDimitry Andric                 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
2419bdd1243dSDimitry Andric                         TmpSReg)
2420bdd1243dSDimitry Andric                     .addReg(FrameReg)
2421bdd1243dSDimitry Andric                     .addImm(-Offset);
2422bdd1243dSDimitry Andric             I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
2423bdd1243dSDimitry Andric                     .addReg(TmpSReg)
2424bdd1243dSDimitry Andric                     .addImm(0);
2425bdd1243dSDimitry Andric             BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
2426bdd1243dSDimitry Andric                     TmpSReg)
2427bdd1243dSDimitry Andric                 .addImm(0)
2428bdd1243dSDimitry Andric                 .addReg(TmpSReg);
2429bdd1243dSDimitry Andric           } else {
2430fe6060f1SDimitry Andric             BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
2431e8d8bef9SDimitry Andric                     FrameReg)
2432e8d8bef9SDimitry Andric                 .addReg(FrameReg)
2433fe6060f1SDimitry Andric                 .addImm(-Offset);
2434e8d8bef9SDimitry Andric           }
2435bdd1243dSDimitry Andric         }
2436e8d8bef9SDimitry Andric 
2437bdd1243dSDimitry Andric         return false;
2438e8d8bef9SDimitry Andric       }
2439e8d8bef9SDimitry Andric 
24400b57cec5SDimitry Andric       bool IsMUBUF = TII->isMUBUF(*MI);
24410b57cec5SDimitry Andric 
24425f757f3fSDimitry Andric       if (!IsMUBUF && !MFI->isBottomOfStack()) {
24435ffd83dbSDimitry Andric         // Convert to a swizzled stack address by scaling by the wave size.
24445ffd83dbSDimitry Andric         // In an entry function/kernel the offset is already swizzled.
244581ad6265SDimitry Andric         bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
2446*0fca6ea1SDimitry Andric         bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
2447*0fca6ea1SDimitry Andric                        !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
244881ad6265SDimitry Andric         const TargetRegisterClass *RC = IsSALU && !LiveSCC
244981ad6265SDimitry Andric                                             ? &AMDGPU::SReg_32RegClass
245081ad6265SDimitry Andric                                             : &AMDGPU::VGPR_32RegClass;
245181ad6265SDimitry Andric         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
245281ad6265SDimitry Andric                       MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
245306c3fb27SDimitry Andric         Register ResultReg =
245406c3fb27SDimitry Andric             IsCopy ? MI->getOperand(0).getReg()
245506c3fb27SDimitry Andric                    : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
24560b57cec5SDimitry Andric 
24570b57cec5SDimitry Andric         int64_t Offset = FrameInfo.getObjectOffset(Index);
24580b57cec5SDimitry Andric         if (Offset == 0) {
245981ad6265SDimitry Andric           unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
246081ad6265SDimitry Andric                                                : AMDGPU::V_LSHRREV_B32_e64;
24615f757f3fSDimitry Andric           auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg);
24625f757f3fSDimitry Andric           if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
24635f757f3fSDimitry Andric             // For V_LSHRREV, the operands are reversed (the shift count goes
24645f757f3fSDimitry Andric             // first).
24655f757f3fSDimitry Andric             Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
24665f757f3fSDimitry Andric           else
24675f757f3fSDimitry Andric             Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
246881ad6265SDimitry Andric           if (IsSALU && !LiveSCC)
2469bdd1243dSDimitry Andric             Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
247081ad6265SDimitry Andric           if (IsSALU && LiveSCC) {
247106c3fb27SDimitry Andric             Register NewDest = RS->scavengeRegisterBackwards(
247206c3fb27SDimitry Andric                 AMDGPU::SReg_32RegClass, Shift, false, 0);
247381ad6265SDimitry Andric             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
247481ad6265SDimitry Andric                     NewDest)
247581ad6265SDimitry Andric                 .addReg(ResultReg);
247681ad6265SDimitry Andric             ResultReg = NewDest;
247781ad6265SDimitry Andric           }
24780b57cec5SDimitry Andric         } else {
247981ad6265SDimitry Andric           MachineInstrBuilder MIB;
248081ad6265SDimitry Andric           if (!IsSALU) {
248181ad6265SDimitry Andric             if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
248281ad6265SDimitry Andric                 nullptr) {
24835ffd83dbSDimitry Andric               // Reuse ResultReg in intermediate step.
24845ffd83dbSDimitry Andric               Register ScaledReg = ResultReg;
24850b57cec5SDimitry Andric 
24868bcb0991SDimitry Andric               BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
24878bcb0991SDimitry Andric                       ScaledReg)
24888bcb0991SDimitry Andric                 .addImm(ST.getWavefrontSizeLog2())
24895ffd83dbSDimitry Andric                 .addReg(FrameReg);
24900b57cec5SDimitry Andric 
24918bcb0991SDimitry Andric               const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
24920b57cec5SDimitry Andric 
24938bcb0991SDimitry Andric               // TODO: Fold if use instruction is another add of a constant.
24948bcb0991SDimitry Andric               if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
24958bcb0991SDimitry Andric                 // FIXME: This can fail
24968bcb0991SDimitry Andric                 MIB.addImm(Offset);
24978bcb0991SDimitry Andric                 MIB.addReg(ScaledReg, RegState::Kill);
24988bcb0991SDimitry Andric                 if (!IsVOP2)
24998bcb0991SDimitry Andric                   MIB.addImm(0); // clamp bit
25008bcb0991SDimitry Andric               } else {
2501e8d8bef9SDimitry Andric                 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
2502480093f4SDimitry Andric                        "Need to reuse carry out register");
25038bcb0991SDimitry Andric 
2504480093f4SDimitry Andric                 // Use scavenged unused carry out as offset register.
2505480093f4SDimitry Andric                 Register ConstOffsetReg;
2506480093f4SDimitry Andric                 if (!isWave32)
2507480093f4SDimitry Andric                   ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
2508480093f4SDimitry Andric                 else
2509480093f4SDimitry Andric                   ConstOffsetReg = MIB.getReg(1);
25108bcb0991SDimitry Andric 
25118bcb0991SDimitry Andric                 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
25120b57cec5SDimitry Andric                     .addImm(Offset);
25138bcb0991SDimitry Andric                 MIB.addReg(ConstOffsetReg, RegState::Kill);
25148bcb0991SDimitry Andric                 MIB.addReg(ScaledReg, RegState::Kill);
25158bcb0991SDimitry Andric                 MIB.addImm(0); // clamp bit
25160b57cec5SDimitry Andric               }
251781ad6265SDimitry Andric             }
251881ad6265SDimitry Andric           }
251981ad6265SDimitry Andric           if (!MIB || IsSALU) {
2520480093f4SDimitry Andric             // We have to produce a carry out, and there isn't a free SGPR pair
2521480093f4SDimitry Andric             // for it. We can keep the whole computation on the SALU to avoid
2522480093f4SDimitry Andric             // clobbering an additional register at the cost of an extra mov.
25238bcb0991SDimitry Andric 
25248bcb0991SDimitry Andric             // We may have 1 free scratch SGPR even though a carry out is
25258bcb0991SDimitry Andric             // unavailable. Only one additional mov is needed.
252606c3fb27SDimitry Andric             Register TmpScaledReg = RS->scavengeRegisterBackwards(
252706c3fb27SDimitry Andric                 AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
25285ffd83dbSDimitry Andric             Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
25298bcb0991SDimitry Andric 
25308bcb0991SDimitry Andric             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
25315ffd83dbSDimitry Andric               .addReg(FrameReg)
25328bcb0991SDimitry Andric               .addImm(ST.getWavefrontSizeLog2());
2533fe6060f1SDimitry Andric             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
25348bcb0991SDimitry Andric                 .addReg(ScaledReg, RegState::Kill)
25358bcb0991SDimitry Andric                 .addImm(Offset);
253681ad6265SDimitry Andric             if (!IsSALU)
25378bcb0991SDimitry Andric               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
25388bcb0991SDimitry Andric                   .addReg(ScaledReg, RegState::Kill);
253981ad6265SDimitry Andric             else
254081ad6265SDimitry Andric               ResultReg = ScaledReg;
25418bcb0991SDimitry Andric 
25428bcb0991SDimitry Andric             // If there were truly no free SGPRs, we need to undo everything.
25438bcb0991SDimitry Andric             if (!TmpScaledReg.isValid()) {
2544fe6060f1SDimitry Andric               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
25458bcb0991SDimitry Andric                 .addReg(ScaledReg, RegState::Kill)
2546fe6060f1SDimitry Andric                 .addImm(-Offset);
25478bcb0991SDimitry Andric               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
25485ffd83dbSDimitry Andric                 .addReg(FrameReg)
25498bcb0991SDimitry Andric                 .addImm(ST.getWavefrontSizeLog2());
25508bcb0991SDimitry Andric             }
25518bcb0991SDimitry Andric           }
25528bcb0991SDimitry Andric         }
25538bcb0991SDimitry Andric 
25540b57cec5SDimitry Andric         // Don't introduce an extra copy if we're just materializing in a mov.
2555bdd1243dSDimitry Andric         if (IsCopy) {
25560b57cec5SDimitry Andric           MI->eraseFromParent();
2557bdd1243dSDimitry Andric           return true;
2558bdd1243dSDimitry Andric         }
25590b57cec5SDimitry Andric         FIOp.ChangeToRegister(ResultReg, false, false, true);
2560bdd1243dSDimitry Andric         return false;
25610b57cec5SDimitry Andric       }
25620b57cec5SDimitry Andric 
25630b57cec5SDimitry Andric       if (IsMUBUF) {
25640b57cec5SDimitry Andric         // Disable offen so we don't need a 0 vgpr base.
25650b57cec5SDimitry Andric         assert(static_cast<int>(FIOperandNum) ==
25660b57cec5SDimitry Andric                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
25670b57cec5SDimitry Andric                                           AMDGPU::OpName::vaddr));
25680b57cec5SDimitry Andric 
25695ffd83dbSDimitry Andric         auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
2570e8d8bef9SDimitry Andric         assert((SOffset.isImm() && SOffset.getImm() == 0));
2571e8d8bef9SDimitry Andric 
2572e8d8bef9SDimitry Andric         if (FrameReg != AMDGPU::NoRegister)
2573e8d8bef9SDimitry Andric           SOffset.ChangeToRegister(FrameReg, false);
25740b57cec5SDimitry Andric 
25750b57cec5SDimitry Andric         int64_t Offset = FrameInfo.getObjectOffset(Index);
25760b57cec5SDimitry Andric         int64_t OldImm
25770b57cec5SDimitry Andric           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
25780b57cec5SDimitry Andric         int64_t NewOffset = OldImm + Offset;
25790b57cec5SDimitry Andric 
25805f757f3fSDimitry Andric         if (TII->isLegalMUBUFImmOffset(NewOffset) &&
25818bcb0991SDimitry Andric             buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
25820b57cec5SDimitry Andric           MI->eraseFromParent();
2583bdd1243dSDimitry Andric           return true;
25840b57cec5SDimitry Andric         }
25850b57cec5SDimitry Andric       }
25860b57cec5SDimitry Andric 
25870b57cec5SDimitry Andric       // If the offset is simply too big, don't convert to a scratch wave offset
25880b57cec5SDimitry Andric       // relative index.
25890b57cec5SDimitry Andric 
25900b57cec5SDimitry Andric       FIOp.ChangeToImmediate(Offset);
25910b57cec5SDimitry Andric       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
259206c3fb27SDimitry Andric         Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
259306c3fb27SDimitry Andric                                                         MI, false, 0);
25940b57cec5SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
25950b57cec5SDimitry Andric           .addImm(Offset);
25960b57cec5SDimitry Andric         FIOp.ChangeToRegister(TmpReg, false, false, true);
25970b57cec5SDimitry Andric       }
25980b57cec5SDimitry Andric     }
25990b57cec5SDimitry Andric   }
2600bdd1243dSDimitry Andric   return false;
26010b57cec5SDimitry Andric }
26020b57cec5SDimitry Andric 
26035ffd83dbSDimitry Andric StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
26048bcb0991SDimitry Andric   return AMDGPUInstPrinter::getRegisterName(Reg);
26050b57cec5SDimitry Andric }
26060b57cec5SDimitry Andric 
26075f757f3fSDimitry Andric unsigned AMDGPU::getRegBitWidth(const TargetRegisterClass &RC) {
26085f757f3fSDimitry Andric   return getRegBitWidth(RC.getID());
26095f757f3fSDimitry Andric }
26105f757f3fSDimitry Andric 
2611fe6060f1SDimitry Andric static const TargetRegisterClass *
2612fe6060f1SDimitry Andric getAnyVGPRClassForBitWidth(unsigned BitWidth) {
261306c3fb27SDimitry Andric   if (BitWidth == 64)
26145ffd83dbSDimitry Andric     return &AMDGPU::VReg_64RegClass;
261506c3fb27SDimitry Andric   if (BitWidth == 96)
26165ffd83dbSDimitry Andric     return &AMDGPU::VReg_96RegClass;
261706c3fb27SDimitry Andric   if (BitWidth == 128)
26185ffd83dbSDimitry Andric     return &AMDGPU::VReg_128RegClass;
261906c3fb27SDimitry Andric   if (BitWidth == 160)
26205ffd83dbSDimitry Andric     return &AMDGPU::VReg_160RegClass;
262106c3fb27SDimitry Andric   if (BitWidth == 192)
26225ffd83dbSDimitry Andric     return &AMDGPU::VReg_192RegClass;
262306c3fb27SDimitry Andric   if (BitWidth == 224)
2624fe6060f1SDimitry Andric     return &AMDGPU::VReg_224RegClass;
262506c3fb27SDimitry Andric   if (BitWidth == 256)
26265ffd83dbSDimitry Andric     return &AMDGPU::VReg_256RegClass;
262706c3fb27SDimitry Andric   if (BitWidth == 288)
2628bdd1243dSDimitry Andric     return &AMDGPU::VReg_288RegClass;
262906c3fb27SDimitry Andric   if (BitWidth == 320)
2630bdd1243dSDimitry Andric     return &AMDGPU::VReg_320RegClass;
263106c3fb27SDimitry Andric   if (BitWidth == 352)
2632bdd1243dSDimitry Andric     return &AMDGPU::VReg_352RegClass;
263306c3fb27SDimitry Andric   if (BitWidth == 384)
2634bdd1243dSDimitry Andric     return &AMDGPU::VReg_384RegClass;
263506c3fb27SDimitry Andric   if (BitWidth == 512)
26365ffd83dbSDimitry Andric     return &AMDGPU::VReg_512RegClass;
263706c3fb27SDimitry Andric   if (BitWidth == 1024)
26385ffd83dbSDimitry Andric     return &AMDGPU::VReg_1024RegClass;
26395ffd83dbSDimitry Andric 
26405ffd83dbSDimitry Andric   return nullptr;
26415ffd83dbSDimitry Andric }
26425ffd83dbSDimitry Andric 
2643fe6060f1SDimitry Andric static const TargetRegisterClass *
2644fe6060f1SDimitry Andric getAlignedVGPRClassForBitWidth(unsigned BitWidth) {
264506c3fb27SDimitry Andric   if (BitWidth == 64)
2646fe6060f1SDimitry Andric     return &AMDGPU::VReg_64_Align2RegClass;
264706c3fb27SDimitry Andric   if (BitWidth == 96)
2648fe6060f1SDimitry Andric     return &AMDGPU::VReg_96_Align2RegClass;
264906c3fb27SDimitry Andric   if (BitWidth == 128)
2650fe6060f1SDimitry Andric     return &AMDGPU::VReg_128_Align2RegClass;
265106c3fb27SDimitry Andric   if (BitWidth == 160)
2652fe6060f1SDimitry Andric     return &AMDGPU::VReg_160_Align2RegClass;
265306c3fb27SDimitry Andric   if (BitWidth == 192)
2654fe6060f1SDimitry Andric     return &AMDGPU::VReg_192_Align2RegClass;
265506c3fb27SDimitry Andric   if (BitWidth == 224)
2656fe6060f1SDimitry Andric     return &AMDGPU::VReg_224_Align2RegClass;
265706c3fb27SDimitry Andric   if (BitWidth == 256)
2658fe6060f1SDimitry Andric     return &AMDGPU::VReg_256_Align2RegClass;
265906c3fb27SDimitry Andric   if (BitWidth == 288)
2660bdd1243dSDimitry Andric     return &AMDGPU::VReg_288_Align2RegClass;
266106c3fb27SDimitry Andric   if (BitWidth == 320)
2662bdd1243dSDimitry Andric     return &AMDGPU::VReg_320_Align2RegClass;
266306c3fb27SDimitry Andric   if (BitWidth == 352)
2664bdd1243dSDimitry Andric     return &AMDGPU::VReg_352_Align2RegClass;
266506c3fb27SDimitry Andric   if (BitWidth == 384)
2666bdd1243dSDimitry Andric     return &AMDGPU::VReg_384_Align2RegClass;
266706c3fb27SDimitry Andric   if (BitWidth == 512)
2668fe6060f1SDimitry Andric     return &AMDGPU::VReg_512_Align2RegClass;
266906c3fb27SDimitry Andric   if (BitWidth == 1024)
2670fe6060f1SDimitry Andric     return &AMDGPU::VReg_1024_Align2RegClass;
2671fe6060f1SDimitry Andric 
2672fe6060f1SDimitry Andric   return nullptr;
2673fe6060f1SDimitry Andric }
2674fe6060f1SDimitry Andric 
26755ffd83dbSDimitry Andric const TargetRegisterClass *
2676fe6060f1SDimitry Andric SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
2677fe6060f1SDimitry Andric   if (BitWidth == 1)
2678fe6060f1SDimitry Andric     return &AMDGPU::VReg_1RegClass;
267906c3fb27SDimitry Andric   if (BitWidth == 16)
2680647cbc5dSDimitry Andric     return &AMDGPU::VGPR_16RegClass;
268106c3fb27SDimitry Andric   if (BitWidth == 32)
2682fe6060f1SDimitry Andric     return &AMDGPU::VGPR_32RegClass;
2683fe6060f1SDimitry Andric   return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth)
2684fe6060f1SDimitry Andric                                 : getAnyVGPRClassForBitWidth(BitWidth);
2685fe6060f1SDimitry Andric }
2686fe6060f1SDimitry Andric 
2687fe6060f1SDimitry Andric static const TargetRegisterClass *
2688fe6060f1SDimitry Andric getAnyAGPRClassForBitWidth(unsigned BitWidth) {
268906c3fb27SDimitry Andric   if (BitWidth == 64)
26905ffd83dbSDimitry Andric     return &AMDGPU::AReg_64RegClass;
269106c3fb27SDimitry Andric   if (BitWidth == 96)
26925ffd83dbSDimitry Andric     return &AMDGPU::AReg_96RegClass;
269306c3fb27SDimitry Andric   if (BitWidth == 128)
26945ffd83dbSDimitry Andric     return &AMDGPU::AReg_128RegClass;
269506c3fb27SDimitry Andric   if (BitWidth == 160)
26965ffd83dbSDimitry Andric     return &AMDGPU::AReg_160RegClass;
269706c3fb27SDimitry Andric   if (BitWidth == 192)
26985ffd83dbSDimitry Andric     return &AMDGPU::AReg_192RegClass;
269906c3fb27SDimitry Andric   if (BitWidth == 224)
2700fe6060f1SDimitry Andric     return &AMDGPU::AReg_224RegClass;
270106c3fb27SDimitry Andric   if (BitWidth == 256)
27025ffd83dbSDimitry Andric     return &AMDGPU::AReg_256RegClass;
270306c3fb27SDimitry Andric   if (BitWidth == 288)
2704bdd1243dSDimitry Andric     return &AMDGPU::AReg_288RegClass;
270506c3fb27SDimitry Andric   if (BitWidth == 320)
2706bdd1243dSDimitry Andric     return &AMDGPU::AReg_320RegClass;
270706c3fb27SDimitry Andric   if (BitWidth == 352)
2708bdd1243dSDimitry Andric     return &AMDGPU::AReg_352RegClass;
270906c3fb27SDimitry Andric   if (BitWidth == 384)
2710bdd1243dSDimitry Andric     return &AMDGPU::AReg_384RegClass;
271106c3fb27SDimitry Andric   if (BitWidth == 512)
27125ffd83dbSDimitry Andric     return &AMDGPU::AReg_512RegClass;
271306c3fb27SDimitry Andric   if (BitWidth == 1024)
27145ffd83dbSDimitry Andric     return &AMDGPU::AReg_1024RegClass;
27155ffd83dbSDimitry Andric 
27165ffd83dbSDimitry Andric   return nullptr;
27175ffd83dbSDimitry Andric }
27185ffd83dbSDimitry Andric 
2719fe6060f1SDimitry Andric static const TargetRegisterClass *
2720fe6060f1SDimitry Andric getAlignedAGPRClassForBitWidth(unsigned BitWidth) {
272106c3fb27SDimitry Andric   if (BitWidth == 64)
2722fe6060f1SDimitry Andric     return &AMDGPU::AReg_64_Align2RegClass;
272306c3fb27SDimitry Andric   if (BitWidth == 96)
2724fe6060f1SDimitry Andric     return &AMDGPU::AReg_96_Align2RegClass;
272506c3fb27SDimitry Andric   if (BitWidth == 128)
2726fe6060f1SDimitry Andric     return &AMDGPU::AReg_128_Align2RegClass;
272706c3fb27SDimitry Andric   if (BitWidth == 160)
2728fe6060f1SDimitry Andric     return &AMDGPU::AReg_160_Align2RegClass;
272906c3fb27SDimitry Andric   if (BitWidth == 192)
2730fe6060f1SDimitry Andric     return &AMDGPU::AReg_192_Align2RegClass;
273106c3fb27SDimitry Andric   if (BitWidth == 224)
2732fe6060f1SDimitry Andric     return &AMDGPU::AReg_224_Align2RegClass;
273306c3fb27SDimitry Andric   if (BitWidth == 256)
2734fe6060f1SDimitry Andric     return &AMDGPU::AReg_256_Align2RegClass;
273506c3fb27SDimitry Andric   if (BitWidth == 288)
2736bdd1243dSDimitry Andric     return &AMDGPU::AReg_288_Align2RegClass;
273706c3fb27SDimitry Andric   if (BitWidth == 320)
2738bdd1243dSDimitry Andric     return &AMDGPU::AReg_320_Align2RegClass;
273906c3fb27SDimitry Andric   if (BitWidth == 352)
2740bdd1243dSDimitry Andric     return &AMDGPU::AReg_352_Align2RegClass;
274106c3fb27SDimitry Andric   if (BitWidth == 384)
2742bdd1243dSDimitry Andric     return &AMDGPU::AReg_384_Align2RegClass;
274306c3fb27SDimitry Andric   if (BitWidth == 512)
2744fe6060f1SDimitry Andric     return &AMDGPU::AReg_512_Align2RegClass;
274506c3fb27SDimitry Andric   if (BitWidth == 1024)
2746fe6060f1SDimitry Andric     return &AMDGPU::AReg_1024_Align2RegClass;
2747fe6060f1SDimitry Andric 
2748fe6060f1SDimitry Andric   return nullptr;
2749fe6060f1SDimitry Andric }
2750fe6060f1SDimitry Andric 
2751fe6060f1SDimitry Andric const TargetRegisterClass *
2752fe6060f1SDimitry Andric SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
275306c3fb27SDimitry Andric   if (BitWidth == 16)
2754fe6060f1SDimitry Andric     return &AMDGPU::AGPR_LO16RegClass;
275506c3fb27SDimitry Andric   if (BitWidth == 32)
2756fe6060f1SDimitry Andric     return &AMDGPU::AGPR_32RegClass;
2757fe6060f1SDimitry Andric   return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth)
2758fe6060f1SDimitry Andric                                 : getAnyAGPRClassForBitWidth(BitWidth);
2759fe6060f1SDimitry Andric }
2760fe6060f1SDimitry Andric 
27614824e7fdSDimitry Andric static const TargetRegisterClass *
27624824e7fdSDimitry Andric getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
276306c3fb27SDimitry Andric   if (BitWidth == 64)
27644824e7fdSDimitry Andric     return &AMDGPU::AV_64RegClass;
276506c3fb27SDimitry Andric   if (BitWidth == 96)
27664824e7fdSDimitry Andric     return &AMDGPU::AV_96RegClass;
276706c3fb27SDimitry Andric   if (BitWidth == 128)
27684824e7fdSDimitry Andric     return &AMDGPU::AV_128RegClass;
276906c3fb27SDimitry Andric   if (BitWidth == 160)
27704824e7fdSDimitry Andric     return &AMDGPU::AV_160RegClass;
277106c3fb27SDimitry Andric   if (BitWidth == 192)
27724824e7fdSDimitry Andric     return &AMDGPU::AV_192RegClass;
277306c3fb27SDimitry Andric   if (BitWidth == 224)
27744824e7fdSDimitry Andric     return &AMDGPU::AV_224RegClass;
277506c3fb27SDimitry Andric   if (BitWidth == 256)
27764824e7fdSDimitry Andric     return &AMDGPU::AV_256RegClass;
277706c3fb27SDimitry Andric   if (BitWidth == 288)
2778bdd1243dSDimitry Andric     return &AMDGPU::AV_288RegClass;
277906c3fb27SDimitry Andric   if (BitWidth == 320)
2780bdd1243dSDimitry Andric     return &AMDGPU::AV_320RegClass;
278106c3fb27SDimitry Andric   if (BitWidth == 352)
2782bdd1243dSDimitry Andric     return &AMDGPU::AV_352RegClass;
278306c3fb27SDimitry Andric   if (BitWidth == 384)
2784bdd1243dSDimitry Andric     return &AMDGPU::AV_384RegClass;
278506c3fb27SDimitry Andric   if (BitWidth == 512)
27864824e7fdSDimitry Andric     return &AMDGPU::AV_512RegClass;
278706c3fb27SDimitry Andric   if (BitWidth == 1024)
27884824e7fdSDimitry Andric     return &AMDGPU::AV_1024RegClass;
27894824e7fdSDimitry Andric 
27904824e7fdSDimitry Andric   return nullptr;
27914824e7fdSDimitry Andric }
27924824e7fdSDimitry Andric 
27934824e7fdSDimitry Andric static const TargetRegisterClass *
27944824e7fdSDimitry Andric getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
279506c3fb27SDimitry Andric   if (BitWidth == 64)
27964824e7fdSDimitry Andric     return &AMDGPU::AV_64_Align2RegClass;
279706c3fb27SDimitry Andric   if (BitWidth == 96)
27984824e7fdSDimitry Andric     return &AMDGPU::AV_96_Align2RegClass;
279906c3fb27SDimitry Andric   if (BitWidth == 128)
28004824e7fdSDimitry Andric     return &AMDGPU::AV_128_Align2RegClass;
280106c3fb27SDimitry Andric   if (BitWidth == 160)
28024824e7fdSDimitry Andric     return &AMDGPU::AV_160_Align2RegClass;
280306c3fb27SDimitry Andric   if (BitWidth == 192)
28044824e7fdSDimitry Andric     return &AMDGPU::AV_192_Align2RegClass;
280506c3fb27SDimitry Andric   if (BitWidth == 224)
28064824e7fdSDimitry Andric     return &AMDGPU::AV_224_Align2RegClass;
280706c3fb27SDimitry Andric   if (BitWidth == 256)
28084824e7fdSDimitry Andric     return &AMDGPU::AV_256_Align2RegClass;
280906c3fb27SDimitry Andric   if (BitWidth == 288)
2810bdd1243dSDimitry Andric     return &AMDGPU::AV_288_Align2RegClass;
281106c3fb27SDimitry Andric   if (BitWidth == 320)
2812bdd1243dSDimitry Andric     return &AMDGPU::AV_320_Align2RegClass;
281306c3fb27SDimitry Andric   if (BitWidth == 352)
2814bdd1243dSDimitry Andric     return &AMDGPU::AV_352_Align2RegClass;
281506c3fb27SDimitry Andric   if (BitWidth == 384)
2816bdd1243dSDimitry Andric     return &AMDGPU::AV_384_Align2RegClass;
281706c3fb27SDimitry Andric   if (BitWidth == 512)
28184824e7fdSDimitry Andric     return &AMDGPU::AV_512_Align2RegClass;
281906c3fb27SDimitry Andric   if (BitWidth == 1024)
28204824e7fdSDimitry Andric     return &AMDGPU::AV_1024_Align2RegClass;
28214824e7fdSDimitry Andric 
28224824e7fdSDimitry Andric   return nullptr;
28234824e7fdSDimitry Andric }
28244824e7fdSDimitry Andric 
28254824e7fdSDimitry Andric const TargetRegisterClass *
28264824e7fdSDimitry Andric SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
282706c3fb27SDimitry Andric   if (BitWidth == 32)
28284824e7fdSDimitry Andric     return &AMDGPU::AV_32RegClass;
28294824e7fdSDimitry Andric   return ST.needsAlignedVGPRs()
28304824e7fdSDimitry Andric              ? getAlignedVectorSuperClassForBitWidth(BitWidth)
28314824e7fdSDimitry Andric              : getAnyVectorSuperClassForBitWidth(BitWidth);
28324824e7fdSDimitry Andric }
28334824e7fdSDimitry Andric 
28345ffd83dbSDimitry Andric const TargetRegisterClass *
28355ffd83dbSDimitry Andric SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
283606c3fb27SDimitry Andric   if (BitWidth == 16)
28375ffd83dbSDimitry Andric     return &AMDGPU::SGPR_LO16RegClass;
283806c3fb27SDimitry Andric   if (BitWidth == 32)
28395ffd83dbSDimitry Andric     return &AMDGPU::SReg_32RegClass;
284006c3fb27SDimitry Andric   if (BitWidth == 64)
28415ffd83dbSDimitry Andric     return &AMDGPU::SReg_64RegClass;
284206c3fb27SDimitry Andric   if (BitWidth == 96)
28435ffd83dbSDimitry Andric     return &AMDGPU::SGPR_96RegClass;
284406c3fb27SDimitry Andric   if (BitWidth == 128)
28455ffd83dbSDimitry Andric     return &AMDGPU::SGPR_128RegClass;
284606c3fb27SDimitry Andric   if (BitWidth == 160)
28475ffd83dbSDimitry Andric     return &AMDGPU::SGPR_160RegClass;
284806c3fb27SDimitry Andric   if (BitWidth == 192)
28495ffd83dbSDimitry Andric     return &AMDGPU::SGPR_192RegClass;
285006c3fb27SDimitry Andric   if (BitWidth == 224)
2851fe6060f1SDimitry Andric     return &AMDGPU::SGPR_224RegClass;
285206c3fb27SDimitry Andric   if (BitWidth == 256)
28535ffd83dbSDimitry Andric     return &AMDGPU::SGPR_256RegClass;
285406c3fb27SDimitry Andric   if (BitWidth == 288)
2855bdd1243dSDimitry Andric     return &AMDGPU::SGPR_288RegClass;
285606c3fb27SDimitry Andric   if (BitWidth == 320)
2857bdd1243dSDimitry Andric     return &AMDGPU::SGPR_320RegClass;
285806c3fb27SDimitry Andric   if (BitWidth == 352)
2859bdd1243dSDimitry Andric     return &AMDGPU::SGPR_352RegClass;
286006c3fb27SDimitry Andric   if (BitWidth == 384)
2861bdd1243dSDimitry Andric     return &AMDGPU::SGPR_384RegClass;
286206c3fb27SDimitry Andric   if (BitWidth == 512)
28635ffd83dbSDimitry Andric     return &AMDGPU::SGPR_512RegClass;
286406c3fb27SDimitry Andric   if (BitWidth == 1024)
28655ffd83dbSDimitry Andric     return &AMDGPU::SGPR_1024RegClass;
28665ffd83dbSDimitry Andric 
28675ffd83dbSDimitry Andric   return nullptr;
28685ffd83dbSDimitry Andric }
28695ffd83dbSDimitry Andric 
2870e8d8bef9SDimitry Andric bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
2871e8d8bef9SDimitry Andric                                Register Reg) const {
2872e8d8bef9SDimitry Andric   const TargetRegisterClass *RC;
2873e8d8bef9SDimitry Andric   if (Reg.isVirtual())
2874e8d8bef9SDimitry Andric     RC = MRI.getRegClass(Reg);
2875e8d8bef9SDimitry Andric   else
2876bdd1243dSDimitry Andric     RC = getPhysRegBaseClass(Reg);
2877bdd1243dSDimitry Andric   return RC ? isSGPRClass(RC) : false;
2878e8d8bef9SDimitry Andric }
2879e8d8bef9SDimitry Andric 
28805ffd83dbSDimitry Andric const TargetRegisterClass *
28815ffd83dbSDimitry Andric SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
28825ffd83dbSDimitry Andric   unsigned Size = getRegSizeInBits(*SRC);
28835ffd83dbSDimitry Andric   const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
28845ffd83dbSDimitry Andric   assert(VRC && "Invalid register class size");
28855ffd83dbSDimitry Andric   return VRC;
28860b57cec5SDimitry Andric }
28870b57cec5SDimitry Andric 
28885ffd83dbSDimitry Andric const TargetRegisterClass *
28895ffd83dbSDimitry Andric SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const {
28905ffd83dbSDimitry Andric   unsigned Size = getRegSizeInBits(*SRC);
28915ffd83dbSDimitry Andric   const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
28925ffd83dbSDimitry Andric   assert(ARC && "Invalid register class size");
28935ffd83dbSDimitry Andric   return ARC;
28940b57cec5SDimitry Andric }
28950b57cec5SDimitry Andric 
28965ffd83dbSDimitry Andric const TargetRegisterClass *
28975ffd83dbSDimitry Andric SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
28985ffd83dbSDimitry Andric   unsigned Size = getRegSizeInBits(*VRC);
28995ffd83dbSDimitry Andric   if (Size == 32)
29000b57cec5SDimitry Andric     return &AMDGPU::SGPR_32RegClass;
29015ffd83dbSDimitry Andric   const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size);
29025ffd83dbSDimitry Andric   assert(SRC && "Invalid register class size");
29035ffd83dbSDimitry Andric   return SRC;
29040b57cec5SDimitry Andric }
29050b57cec5SDimitry Andric 
2906fe6060f1SDimitry Andric const TargetRegisterClass *
2907fe6060f1SDimitry Andric SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
2908fe6060f1SDimitry Andric                                          const TargetRegisterClass *SubRC,
2909fe6060f1SDimitry Andric                                          unsigned SubIdx) const {
2910fe6060f1SDimitry Andric   // Ensure this subregister index is aligned in the super register.
2911fe6060f1SDimitry Andric   const TargetRegisterClass *MatchRC =
2912fe6060f1SDimitry Andric       getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2913fe6060f1SDimitry Andric   return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2914fe6060f1SDimitry Andric }
2915fe6060f1SDimitry Andric 
29168bcb0991SDimitry Andric bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
29178bcb0991SDimitry Andric   if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
29188bcb0991SDimitry Andric       OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST)
29198bcb0991SDimitry Andric     return !ST.hasMFMAInlineLiteralBug();
29208bcb0991SDimitry Andric 
29218bcb0991SDimitry Andric   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
29228bcb0991SDimitry Andric          OpType <= AMDGPU::OPERAND_SRC_LAST;
29238bcb0991SDimitry Andric }
29248bcb0991SDimitry Andric 
29250b57cec5SDimitry Andric bool SIRegisterInfo::shouldRewriteCopySrc(
29260b57cec5SDimitry Andric   const TargetRegisterClass *DefRC,
29270b57cec5SDimitry Andric   unsigned DefSubReg,
29280b57cec5SDimitry Andric   const TargetRegisterClass *SrcRC,
29290b57cec5SDimitry Andric   unsigned SrcSubReg) const {
29300b57cec5SDimitry Andric   // We want to prefer the smallest register class possible, so we don't want to
29310b57cec5SDimitry Andric   // stop and rewrite on anything that looks like a subregister
29320b57cec5SDimitry Andric   // extract. Operations mostly don't care about the super register class, so we
29330b57cec5SDimitry Andric   // only want to stop on the most basic of copies between the same register
29340b57cec5SDimitry Andric   // class.
29350b57cec5SDimitry Andric   //
29360b57cec5SDimitry Andric   // e.g. if we have something like
29370b57cec5SDimitry Andric   // %0 = ...
29380b57cec5SDimitry Andric   // %1 = ...
29390b57cec5SDimitry Andric   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
29400b57cec5SDimitry Andric   // %3 = COPY %2, sub0
29410b57cec5SDimitry Andric   //
29420b57cec5SDimitry Andric   // We want to look through the COPY to find:
29430b57cec5SDimitry Andric   //  => %3 = COPY %0
29440b57cec5SDimitry Andric 
29450b57cec5SDimitry Andric   // Plain copy.
29460b57cec5SDimitry Andric   return getCommonSubClass(DefRC, SrcRC) != nullptr;
29470b57cec5SDimitry Andric }
29480b57cec5SDimitry Andric 
2949e8d8bef9SDimitry Andric bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2950e8d8bef9SDimitry Andric   // TODO: 64-bit operands have extending behavior from 32-bit literal.
2951e8d8bef9SDimitry Andric   return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2952e8d8bef9SDimitry Andric          OpType <= AMDGPU::OPERAND_REG_IMM_LAST;
2953e8d8bef9SDimitry Andric }
2954e8d8bef9SDimitry Andric 
29555ffd83dbSDimitry Andric /// Returns a lowest register that is not used at any point in the function.
29560b57cec5SDimitry Andric ///        If all registers are used, then this function will return
295706c3fb27SDimitry Andric ///         AMDGPU::NoRegister. If \p ReserveHighestRegister = true, then return
29585ffd83dbSDimitry Andric ///         highest unused register.
295906c3fb27SDimitry Andric MCRegister SIRegisterInfo::findUnusedRegister(
296006c3fb27SDimitry Andric     const MachineRegisterInfo &MRI, const TargetRegisterClass *RC,
296106c3fb27SDimitry Andric     const MachineFunction &MF, bool ReserveHighestRegister) const {
296206c3fb27SDimitry Andric   if (ReserveHighestRegister) {
29635ffd83dbSDimitry Andric     for (MCRegister Reg : reverse(*RC))
29640b57cec5SDimitry Andric       if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
29650b57cec5SDimitry Andric         return Reg;
29665ffd83dbSDimitry Andric   } else {
29675ffd83dbSDimitry Andric     for (MCRegister Reg : *RC)
29685ffd83dbSDimitry Andric       if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
29695ffd83dbSDimitry Andric         return Reg;
29705ffd83dbSDimitry Andric   }
29715ffd83dbSDimitry Andric   return MCRegister();
29720b57cec5SDimitry Andric }
29730b57cec5SDimitry Andric 
297406c3fb27SDimitry Andric bool SIRegisterInfo::isUniformReg(const MachineRegisterInfo &MRI,
297506c3fb27SDimitry Andric                                   const RegisterBankInfo &RBI,
297606c3fb27SDimitry Andric                                   Register Reg) const {
297706c3fb27SDimitry Andric   auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
297806c3fb27SDimitry Andric   if (!RB)
297906c3fb27SDimitry Andric     return false;
298006c3fb27SDimitry Andric 
298106c3fb27SDimitry Andric   return !RBI.isDivergentRegBank(RB);
298206c3fb27SDimitry Andric }
298306c3fb27SDimitry Andric 
29840b57cec5SDimitry Andric ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
29850b57cec5SDimitry Andric                                                    unsigned EltSize) const {
298606c3fb27SDimitry Andric   const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
29875ffd83dbSDimitry Andric   assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
29880b57cec5SDimitry Andric 
29895ffd83dbSDimitry Andric   const unsigned RegDWORDs = RegBitWidth / 32;
29905ffd83dbSDimitry Andric   const unsigned EltDWORDs = EltSize / 4;
29915ffd83dbSDimitry Andric   assert(RegSplitParts.size() + 1 >= EltDWORDs);
29920b57cec5SDimitry Andric 
29935ffd83dbSDimitry Andric   const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
29945ffd83dbSDimitry Andric   const unsigned NumParts = RegDWORDs / EltDWORDs;
29950b57cec5SDimitry Andric 
2996bdd1243dSDimitry Andric   return ArrayRef(Parts.data(), NumParts);
29970b57cec5SDimitry Andric }
29980b57cec5SDimitry Andric 
29990b57cec5SDimitry Andric const TargetRegisterClass*
30000b57cec5SDimitry Andric SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
30015ffd83dbSDimitry Andric                                   Register Reg) const {
3002bdd1243dSDimitry Andric   return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3003bdd1243dSDimitry Andric }
3004bdd1243dSDimitry Andric 
3005bdd1243dSDimitry Andric const TargetRegisterClass *
3006bdd1243dSDimitry Andric SIRegisterInfo::getRegClassForOperandReg(const MachineRegisterInfo &MRI,
3007bdd1243dSDimitry Andric                                          const MachineOperand &MO) const {
3008bdd1243dSDimitry Andric   const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
3009bdd1243dSDimitry Andric   return getSubRegisterClass(SrcRC, MO.getSubReg());
30100b57cec5SDimitry Andric }
30110b57cec5SDimitry Andric 
30120b57cec5SDimitry Andric bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
30135ffd83dbSDimitry Andric                             Register Reg) const {
30140b57cec5SDimitry Andric   const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
30155ffd83dbSDimitry Andric   // Registers without classes are unaddressable, SGPR-like registers.
3016349cc55cSDimitry Andric   return RC && isVGPRClass(RC);
30170b57cec5SDimitry Andric }
30180b57cec5SDimitry Andric 
30190b57cec5SDimitry Andric bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
30205ffd83dbSDimitry Andric                             Register Reg) const {
30210b57cec5SDimitry Andric   const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
30225ffd83dbSDimitry Andric 
30235ffd83dbSDimitry Andric   // Registers without classes are unaddressable, SGPR-like registers.
3024349cc55cSDimitry Andric   return RC && isAGPRClass(RC);
30250b57cec5SDimitry Andric }
30260b57cec5SDimitry Andric 
30270b57cec5SDimitry Andric bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
30280b57cec5SDimitry Andric                                     const TargetRegisterClass *SrcRC,
30290b57cec5SDimitry Andric                                     unsigned SubReg,
30300b57cec5SDimitry Andric                                     const TargetRegisterClass *DstRC,
30310b57cec5SDimitry Andric                                     unsigned DstSubReg,
30320b57cec5SDimitry Andric                                     const TargetRegisterClass *NewRC,
30330b57cec5SDimitry Andric                                     LiveIntervals &LIS) const {
30340b57cec5SDimitry Andric   unsigned SrcSize = getRegSizeInBits(*SrcRC);
30350b57cec5SDimitry Andric   unsigned DstSize = getRegSizeInBits(*DstRC);
30360b57cec5SDimitry Andric   unsigned NewSize = getRegSizeInBits(*NewRC);
30370b57cec5SDimitry Andric 
30380b57cec5SDimitry Andric   // Do not increase size of registers beyond dword, we would need to allocate
30390b57cec5SDimitry Andric   // adjacent registers and constraint regalloc more than needed.
30400b57cec5SDimitry Andric 
30410b57cec5SDimitry Andric   // Always allow dword coalescing.
30420b57cec5SDimitry Andric   if (SrcSize <= 32 || DstSize <= 32)
30430b57cec5SDimitry Andric     return true;
30440b57cec5SDimitry Andric 
30450b57cec5SDimitry Andric   return NewSize <= DstSize || NewSize <= SrcSize;
30460b57cec5SDimitry Andric }
30470b57cec5SDimitry Andric 
30480b57cec5SDimitry Andric unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
30490b57cec5SDimitry Andric                                              MachineFunction &MF) const {
30500b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
30510b57cec5SDimitry Andric 
30520b57cec5SDimitry Andric   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
30530b57cec5SDimitry Andric                                                        MF.getFunction());
30540b57cec5SDimitry Andric   switch (RC->getID()) {
30550b57cec5SDimitry Andric   default:
30565ffd83dbSDimitry Andric     return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
30570b57cec5SDimitry Andric   case AMDGPU::VGPR_32RegClassID:
30580b57cec5SDimitry Andric     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
30590b57cec5SDimitry Andric   case AMDGPU::SGPR_32RegClassID:
30605ffd83dbSDimitry Andric   case AMDGPU::SGPR_LO16RegClassID:
30610b57cec5SDimitry Andric     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
30620b57cec5SDimitry Andric   }
30630b57cec5SDimitry Andric }
30640b57cec5SDimitry Andric 
30650b57cec5SDimitry Andric unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
30660b57cec5SDimitry Andric                                                 unsigned Idx) const {
30675ffd83dbSDimitry Andric   if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
30685ffd83dbSDimitry Andric       Idx == AMDGPU::RegisterPressureSets::AGPR_32)
30690b57cec5SDimitry Andric     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
30700b57cec5SDimitry Andric                                const_cast<MachineFunction &>(MF));
30710b57cec5SDimitry Andric 
30725ffd83dbSDimitry Andric   if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
30730b57cec5SDimitry Andric     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
30740b57cec5SDimitry Andric                                const_cast<MachineFunction &>(MF));
30750b57cec5SDimitry Andric 
30765ffd83dbSDimitry Andric   llvm_unreachable("Unexpected register pressure set!");
30770b57cec5SDimitry Andric }
30780b57cec5SDimitry Andric 
30790b57cec5SDimitry Andric const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
30800b57cec5SDimitry Andric   static const int Empty[] = { -1 };
30810b57cec5SDimitry Andric 
30825ffd83dbSDimitry Andric   if (RegPressureIgnoredUnits[RegUnit])
30830b57cec5SDimitry Andric     return Empty;
30845ffd83dbSDimitry Andric 
30855ffd83dbSDimitry Andric   return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
30860b57cec5SDimitry Andric }
30870b57cec5SDimitry Andric 
30885ffd83dbSDimitry Andric MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
30890b57cec5SDimitry Andric   // Not a callee saved register.
30900b57cec5SDimitry Andric   return AMDGPU::SGPR30_SGPR31;
30910b57cec5SDimitry Andric }
30920b57cec5SDimitry Andric 
30930b57cec5SDimitry Andric const TargetRegisterClass *
30940b57cec5SDimitry Andric SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
309581ad6265SDimitry Andric                                          const RegisterBank &RB) const {
30960b57cec5SDimitry Andric   switch (RB.getID()) {
30970b57cec5SDimitry Andric   case AMDGPU::VGPRRegBankID:
30985f757f3fSDimitry Andric     return getVGPRClassForBitWidth(
30995f757f3fSDimitry Andric         std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size));
31000b57cec5SDimitry Andric   case AMDGPU::VCCRegBankID:
31015ffd83dbSDimitry Andric     assert(Size == 1);
31025ffd83dbSDimitry Andric     return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
31035ffd83dbSDimitry Andric                     : &AMDGPU::SReg_64_XEXECRegClass;
31040b57cec5SDimitry Andric   case AMDGPU::SGPRRegBankID:
31055ffd83dbSDimitry Andric     return getSGPRClassForBitWidth(std::max(32u, Size));
31065ffd83dbSDimitry Andric   case AMDGPU::AGPRRegBankID:
31075ffd83dbSDimitry Andric     return getAGPRClassForBitWidth(std::max(32u, Size));
31080b57cec5SDimitry Andric   default:
31090b57cec5SDimitry Andric     llvm_unreachable("unknown register bank");
31100b57cec5SDimitry Andric   }
31110b57cec5SDimitry Andric }
31120b57cec5SDimitry Andric 
31130b57cec5SDimitry Andric const TargetRegisterClass *
31140b57cec5SDimitry Andric SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
31150b57cec5SDimitry Andric                                          const MachineRegisterInfo &MRI) const {
31168bcb0991SDimitry Andric   const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
31178bcb0991SDimitry Andric   if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
311881ad6265SDimitry Andric     return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
31198bcb0991SDimitry Andric 
3120349cc55cSDimitry Andric   if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
31218bcb0991SDimitry Andric     return getAllocatableClass(RC);
3122349cc55cSDimitry Andric 
3123349cc55cSDimitry Andric   return nullptr;
31240b57cec5SDimitry Andric }
31250b57cec5SDimitry Andric 
31265ffd83dbSDimitry Andric MCRegister SIRegisterInfo::getVCC() const {
31270b57cec5SDimitry Andric   return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
31280b57cec5SDimitry Andric }
31290b57cec5SDimitry Andric 
3130753f127fSDimitry Andric MCRegister SIRegisterInfo::getExec() const {
3131753f127fSDimitry Andric   return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3132753f127fSDimitry Andric }
3133753f127fSDimitry Andric 
3134fe6060f1SDimitry Andric const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
3135fe6060f1SDimitry Andric   // VGPR tuples have an alignment requirement on gfx90a variants.
3136fe6060f1SDimitry Andric   return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3137fe6060f1SDimitry Andric                                 : &AMDGPU::VReg_64RegClass;
3138fe6060f1SDimitry Andric }
3139fe6060f1SDimitry Andric 
31400b57cec5SDimitry Andric const TargetRegisterClass *
31410b57cec5SDimitry Andric SIRegisterInfo::getRegClass(unsigned RCID) const {
31420b57cec5SDimitry Andric   switch ((int)RCID) {
31430b57cec5SDimitry Andric   case AMDGPU::SReg_1RegClassID:
31440b57cec5SDimitry Andric     return getBoolRC();
31450b57cec5SDimitry Andric   case AMDGPU::SReg_1_XEXECRegClassID:
31460b57cec5SDimitry Andric     return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
31470b57cec5SDimitry Andric       : &AMDGPU::SReg_64_XEXECRegClass;
31480b57cec5SDimitry Andric   case -1:
31490b57cec5SDimitry Andric     return nullptr;
31500b57cec5SDimitry Andric   default:
31515ffd83dbSDimitry Andric     return AMDGPUGenRegisterInfo::getRegClass(RCID);
31520b57cec5SDimitry Andric   }
31530b57cec5SDimitry Andric }
31540b57cec5SDimitry Andric 
31550b57cec5SDimitry Andric // Find reaching register definition
31565ffd83dbSDimitry Andric MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
31570b57cec5SDimitry Andric                                               MachineInstr &Use,
31580b57cec5SDimitry Andric                                               MachineRegisterInfo &MRI,
31590b57cec5SDimitry Andric                                               LiveIntervals *LIS) const {
3160*0fca6ea1SDimitry Andric   auto &MDT = LIS->getDomTree();
31610b57cec5SDimitry Andric   SlotIndex UseIdx = LIS->getInstructionIndex(Use);
31620b57cec5SDimitry Andric   SlotIndex DefIdx;
31630b57cec5SDimitry Andric 
31645ffd83dbSDimitry Andric   if (Reg.isVirtual()) {
31650b57cec5SDimitry Andric     if (!LIS->hasInterval(Reg))
31660b57cec5SDimitry Andric       return nullptr;
31670b57cec5SDimitry Andric     LiveInterval &LI = LIS->getInterval(Reg);
31680b57cec5SDimitry Andric     LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
31690b57cec5SDimitry Andric                                   : MRI.getMaxLaneMaskForVReg(Reg);
31700b57cec5SDimitry Andric     VNInfo *V = nullptr;
31710b57cec5SDimitry Andric     if (LI.hasSubRanges()) {
31720b57cec5SDimitry Andric       for (auto &S : LI.subranges()) {
31730b57cec5SDimitry Andric         if ((S.LaneMask & SubLanes) == SubLanes) {
31740b57cec5SDimitry Andric           V = S.getVNInfoAt(UseIdx);
31750b57cec5SDimitry Andric           break;
31760b57cec5SDimitry Andric         }
31770b57cec5SDimitry Andric       }
31780b57cec5SDimitry Andric     } else {
31790b57cec5SDimitry Andric       V = LI.getVNInfoAt(UseIdx);
31800b57cec5SDimitry Andric     }
31810b57cec5SDimitry Andric     if (!V)
31820b57cec5SDimitry Andric       return nullptr;
31830b57cec5SDimitry Andric     DefIdx = V->def;
31840b57cec5SDimitry Andric   } else {
31850b57cec5SDimitry Andric     // Find last def.
318606c3fb27SDimitry Andric     for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
318706c3fb27SDimitry Andric       LiveRange &LR = LIS->getRegUnit(Unit);
31880b57cec5SDimitry Andric       if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
31890b57cec5SDimitry Andric         if (!DefIdx.isValid() ||
31900b57cec5SDimitry Andric             MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
31910b57cec5SDimitry Andric                           LIS->getInstructionFromIndex(V->def)))
31920b57cec5SDimitry Andric           DefIdx = V->def;
31930b57cec5SDimitry Andric       } else {
31940b57cec5SDimitry Andric         return nullptr;
31950b57cec5SDimitry Andric       }
31960b57cec5SDimitry Andric     }
31970b57cec5SDimitry Andric   }
31980b57cec5SDimitry Andric 
31990b57cec5SDimitry Andric   MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
32000b57cec5SDimitry Andric 
32010b57cec5SDimitry Andric   if (!Def || !MDT.dominates(Def, &Use))
32020b57cec5SDimitry Andric     return nullptr;
32030b57cec5SDimitry Andric 
32040b57cec5SDimitry Andric   assert(Def->modifiesRegister(Reg, this));
32050b57cec5SDimitry Andric 
32060b57cec5SDimitry Andric   return Def;
32070b57cec5SDimitry Andric }
32085ffd83dbSDimitry Andric 
32095ffd83dbSDimitry Andric MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
3210bdd1243dSDimitry Andric   assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
32115ffd83dbSDimitry Andric 
32125ffd83dbSDimitry Andric   for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
32135ffd83dbSDimitry Andric                                          AMDGPU::SReg_32RegClass,
32145ffd83dbSDimitry Andric                                          AMDGPU::AGPR_32RegClass } ) {
32155ffd83dbSDimitry Andric     if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
32165ffd83dbSDimitry Andric       return Super;
32175ffd83dbSDimitry Andric   }
32185ffd83dbSDimitry Andric   if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
32195ffd83dbSDimitry Andric                                             &AMDGPU::VGPR_32RegClass)) {
32205ffd83dbSDimitry Andric       return Super;
32215ffd83dbSDimitry Andric   }
32225ffd83dbSDimitry Andric 
32235ffd83dbSDimitry Andric   return AMDGPU::NoRegister;
32245ffd83dbSDimitry Andric }
32255ffd83dbSDimitry Andric 
3226fe6060f1SDimitry Andric bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
3227fe6060f1SDimitry Andric   if (!ST.needsAlignedVGPRs())
3228fe6060f1SDimitry Andric     return true;
3229fe6060f1SDimitry Andric 
32304824e7fdSDimitry Andric   if (isVGPRClass(&RC))
3231fe6060f1SDimitry Andric     return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
32324824e7fdSDimitry Andric   if (isAGPRClass(&RC))
3233fe6060f1SDimitry Andric     return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
32344824e7fdSDimitry Andric   if (isVectorSuperClass(&RC))
32354824e7fdSDimitry Andric     return RC.hasSuperClassEq(
32364824e7fdSDimitry Andric         getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
3237fe6060f1SDimitry Andric 
3238fe6060f1SDimitry Andric   return true;
3239fe6060f1SDimitry Andric }
3240fe6060f1SDimitry Andric 
324181ad6265SDimitry Andric const TargetRegisterClass *
324281ad6265SDimitry Andric SIRegisterInfo::getProperlyAlignedRC(const TargetRegisterClass *RC) const {
324381ad6265SDimitry Andric   if (!RC || !ST.needsAlignedVGPRs())
324481ad6265SDimitry Andric     return RC;
324581ad6265SDimitry Andric 
324681ad6265SDimitry Andric   unsigned Size = getRegSizeInBits(*RC);
324781ad6265SDimitry Andric   if (Size <= 32)
324881ad6265SDimitry Andric     return RC;
324981ad6265SDimitry Andric 
325081ad6265SDimitry Andric   if (isVGPRClass(RC))
325181ad6265SDimitry Andric     return getAlignedVGPRClassForBitWidth(Size);
325281ad6265SDimitry Andric   if (isAGPRClass(RC))
325381ad6265SDimitry Andric     return getAlignedAGPRClassForBitWidth(Size);
325481ad6265SDimitry Andric   if (isVectorSuperClass(RC))
325581ad6265SDimitry Andric     return getAlignedVectorSuperClassForBitWidth(Size);
325681ad6265SDimitry Andric 
325781ad6265SDimitry Andric   return RC;
325881ad6265SDimitry Andric }
325981ad6265SDimitry Andric 
32605ffd83dbSDimitry Andric ArrayRef<MCPhysReg>
32615ffd83dbSDimitry Andric SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const {
3262bdd1243dSDimitry Andric   return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
32635ffd83dbSDimitry Andric }
32645ffd83dbSDimitry Andric 
32655ffd83dbSDimitry Andric ArrayRef<MCPhysReg>
3266e8d8bef9SDimitry Andric SIRegisterInfo::getAllSGPR64(const MachineFunction &MF) const {
3267bdd1243dSDimitry Andric   return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
32685ffd83dbSDimitry Andric }
32695ffd83dbSDimitry Andric 
32705ffd83dbSDimitry Andric ArrayRef<MCPhysReg>
3271e8d8bef9SDimitry Andric SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const {
3272bdd1243dSDimitry Andric   return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
32735ffd83dbSDimitry Andric }
327406c3fb27SDimitry Andric 
327506c3fb27SDimitry Andric unsigned
327606c3fb27SDimitry Andric SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
327706c3fb27SDimitry Andric                                           unsigned SubReg) const {
327806c3fb27SDimitry Andric   switch (RC->TSFlags & SIRCFlags::RegKindMask) {
327906c3fb27SDimitry Andric   case SIRCFlags::HasSGPR:
328006c3fb27SDimitry Andric     return std::min(128u, getSubRegIdxSize(SubReg));
328106c3fb27SDimitry Andric   case SIRCFlags::HasAGPR:
328206c3fb27SDimitry Andric   case SIRCFlags::HasVGPR:
328306c3fb27SDimitry Andric   case SIRCFlags::HasVGPR | SIRCFlags::HasAGPR:
328406c3fb27SDimitry Andric     return std::min(32u, getSubRegIdxSize(SubReg));
328506c3fb27SDimitry Andric   default:
328606c3fb27SDimitry Andric     break;
328706c3fb27SDimitry Andric   }
328806c3fb27SDimitry Andric   return 0;
328906c3fb27SDimitry Andric }
3290