1f4a2713aSLionel Sambuc //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc // The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc /// \file
11f4a2713aSLionel Sambuc /// \brief SI implementation of the TargetRegisterInfo class.
12f4a2713aSLionel Sambuc //
13f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
14f4a2713aSLionel Sambuc
15f4a2713aSLionel Sambuc
16f4a2713aSLionel Sambuc #include "SIRegisterInfo.h"
17f4a2713aSLionel Sambuc #include "SIInstrInfo.h"
18*0a6a1f1dSLionel Sambuc #include "SIMachineFunctionInfo.h"
19*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineFrameInfo.h"
20*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineInstrBuilder.h"
21*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/RegisterScavenging.h"
22*0a6a1f1dSLionel Sambuc #include "llvm/IR/Function.h"
23*0a6a1f1dSLionel Sambuc #include "llvm/IR/LLVMContext.h"
24f4a2713aSLionel Sambuc
25f4a2713aSLionel Sambuc using namespace llvm;
26f4a2713aSLionel Sambuc
SIRegisterInfo(const AMDGPUSubtarget & st)27*0a6a1f1dSLionel Sambuc SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
28*0a6a1f1dSLionel Sambuc : AMDGPURegisterInfo(st)
29f4a2713aSLionel Sambuc { }
30f4a2713aSLionel Sambuc
getReservedRegs(const MachineFunction & MF) const31f4a2713aSLionel Sambuc BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
32f4a2713aSLionel Sambuc BitVector Reserved(getNumRegs());
33f4a2713aSLionel Sambuc Reserved.set(AMDGPU::EXEC);
34*0a6a1f1dSLionel Sambuc
35*0a6a1f1dSLionel Sambuc // EXEC_LO and EXEC_HI could be allocated and used as regular register,
36*0a6a1f1dSLionel Sambuc // but this seems likely to result in bugs, so I'm marking them as reserved.
37*0a6a1f1dSLionel Sambuc Reserved.set(AMDGPU::EXEC_LO);
38*0a6a1f1dSLionel Sambuc Reserved.set(AMDGPU::EXEC_HI);
39*0a6a1f1dSLionel Sambuc
40f4a2713aSLionel Sambuc Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
41*0a6a1f1dSLionel Sambuc Reserved.set(AMDGPU::FLAT_SCR);
42*0a6a1f1dSLionel Sambuc Reserved.set(AMDGPU::FLAT_SCR_LO);
43*0a6a1f1dSLionel Sambuc Reserved.set(AMDGPU::FLAT_SCR_HI);
44*0a6a1f1dSLionel Sambuc
45*0a6a1f1dSLionel Sambuc // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
46*0a6a1f1dSLionel Sambuc Reserved.set(AMDGPU::VGPR255);
47*0a6a1f1dSLionel Sambuc Reserved.set(AMDGPU::VGPR254);
48*0a6a1f1dSLionel Sambuc
49*0a6a1f1dSLionel Sambuc // Tonga and Iceland can only allocate a fixed number of SGPRs due
50*0a6a1f1dSLionel Sambuc // to a hw bug.
51*0a6a1f1dSLionel Sambuc if (ST.hasSGPRInitBug()) {
52*0a6a1f1dSLionel Sambuc unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
53*0a6a1f1dSLionel Sambuc // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
54*0a6a1f1dSLionel Sambuc // Assume XNACK_MASK is unused.
55*0a6a1f1dSLionel Sambuc unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
56*0a6a1f1dSLionel Sambuc
57*0a6a1f1dSLionel Sambuc for (unsigned i = Limit; i < NumSGPRs; ++i) {
58*0a6a1f1dSLionel Sambuc unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
59*0a6a1f1dSLionel Sambuc MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
60*0a6a1f1dSLionel Sambuc
61*0a6a1f1dSLionel Sambuc for (; R.isValid(); ++R)
62*0a6a1f1dSLionel Sambuc Reserved.set(*R);
63*0a6a1f1dSLionel Sambuc }
64*0a6a1f1dSLionel Sambuc }
65*0a6a1f1dSLionel Sambuc
66f4a2713aSLionel Sambuc return Reserved;
67f4a2713aSLionel Sambuc }
68f4a2713aSLionel Sambuc
getRegPressureSetLimit(unsigned Idx) const69*0a6a1f1dSLionel Sambuc unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
70*0a6a1f1dSLionel Sambuc
71*0a6a1f1dSLionel Sambuc // FIXME: We should adjust the max number of waves based on LDS size.
72*0a6a1f1dSLionel Sambuc unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
73*0a6a1f1dSLionel Sambuc ST.getMaxWavesPerCU());
74*0a6a1f1dSLionel Sambuc unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
75*0a6a1f1dSLionel Sambuc
76*0a6a1f1dSLionel Sambuc for (regclass_iterator I = regclass_begin(), E = regclass_end();
77*0a6a1f1dSLionel Sambuc I != E; ++I) {
78*0a6a1f1dSLionel Sambuc
79*0a6a1f1dSLionel Sambuc unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
80*0a6a1f1dSLionel Sambuc unsigned Limit;
81*0a6a1f1dSLionel Sambuc
82*0a6a1f1dSLionel Sambuc if (isSGPRClass(*I)) {
83*0a6a1f1dSLionel Sambuc Limit = SGPRLimit / NumSubRegs;
84*0a6a1f1dSLionel Sambuc } else {
85*0a6a1f1dSLionel Sambuc Limit = VGPRLimit / NumSubRegs;
86f4a2713aSLionel Sambuc }
87f4a2713aSLionel Sambuc
88*0a6a1f1dSLionel Sambuc const int *Sets = getRegClassPressureSets(*I);
89*0a6a1f1dSLionel Sambuc assert(Sets);
90*0a6a1f1dSLionel Sambuc for (unsigned i = 0; Sets[i] != -1; ++i) {
91*0a6a1f1dSLionel Sambuc if (Sets[i] == (int)Idx)
92*0a6a1f1dSLionel Sambuc return Limit;
93*0a6a1f1dSLionel Sambuc }
94*0a6a1f1dSLionel Sambuc }
95*0a6a1f1dSLionel Sambuc return 256;
96*0a6a1f1dSLionel Sambuc }
97*0a6a1f1dSLionel Sambuc
requiresRegisterScavenging(const MachineFunction & Fn) const98*0a6a1f1dSLionel Sambuc bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
99*0a6a1f1dSLionel Sambuc return Fn.getFrameInfo()->hasStackObjects();
100*0a6a1f1dSLionel Sambuc }
101*0a6a1f1dSLionel Sambuc
getNumSubRegsForSpillOp(unsigned Op)102*0a6a1f1dSLionel Sambuc static unsigned getNumSubRegsForSpillOp(unsigned Op) {
103*0a6a1f1dSLionel Sambuc
104*0a6a1f1dSLionel Sambuc switch (Op) {
105*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S512_SAVE:
106*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S512_RESTORE:
107*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V512_SAVE:
108*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V512_RESTORE:
109*0a6a1f1dSLionel Sambuc return 16;
110*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S256_SAVE:
111*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S256_RESTORE:
112*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V256_SAVE:
113*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V256_RESTORE:
114*0a6a1f1dSLionel Sambuc return 8;
115*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S128_SAVE:
116*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S128_RESTORE:
117*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V128_SAVE:
118*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V128_RESTORE:
119*0a6a1f1dSLionel Sambuc return 4;
120*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V96_SAVE:
121*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V96_RESTORE:
122*0a6a1f1dSLionel Sambuc return 3;
123*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S64_SAVE:
124*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S64_RESTORE:
125*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V64_SAVE:
126*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V64_RESTORE:
127*0a6a1f1dSLionel Sambuc return 2;
128*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S32_SAVE:
129*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S32_RESTORE:
130*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V32_SAVE:
131*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V32_RESTORE:
132*0a6a1f1dSLionel Sambuc return 1;
133*0a6a1f1dSLionel Sambuc default: llvm_unreachable("Invalid spill opcode");
134*0a6a1f1dSLionel Sambuc }
135*0a6a1f1dSLionel Sambuc }
136*0a6a1f1dSLionel Sambuc
buildScratchLoadStore(MachineBasicBlock::iterator MI,unsigned LoadStoreOp,unsigned Value,unsigned ScratchRsrcReg,unsigned ScratchOffset,int64_t Offset,RegScavenger * RS) const137*0a6a1f1dSLionel Sambuc void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
138*0a6a1f1dSLionel Sambuc unsigned LoadStoreOp,
139*0a6a1f1dSLionel Sambuc unsigned Value,
140*0a6a1f1dSLionel Sambuc unsigned ScratchRsrcReg,
141*0a6a1f1dSLionel Sambuc unsigned ScratchOffset,
142*0a6a1f1dSLionel Sambuc int64_t Offset,
143*0a6a1f1dSLionel Sambuc RegScavenger *RS) const {
144*0a6a1f1dSLionel Sambuc
145*0a6a1f1dSLionel Sambuc const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
146*0a6a1f1dSLionel Sambuc MachineBasicBlock *MBB = MI->getParent();
147*0a6a1f1dSLionel Sambuc const MachineFunction *MF = MI->getParent()->getParent();
148*0a6a1f1dSLionel Sambuc LLVMContext &Ctx = MF->getFunction()->getContext();
149*0a6a1f1dSLionel Sambuc DebugLoc DL = MI->getDebugLoc();
150*0a6a1f1dSLionel Sambuc bool IsLoad = TII->get(LoadStoreOp).mayLoad();
151*0a6a1f1dSLionel Sambuc
152*0a6a1f1dSLionel Sambuc bool RanOutOfSGPRs = false;
153*0a6a1f1dSLionel Sambuc unsigned SOffset = ScratchOffset;
154*0a6a1f1dSLionel Sambuc
155*0a6a1f1dSLionel Sambuc unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
156*0a6a1f1dSLionel Sambuc unsigned Size = NumSubRegs * 4;
157*0a6a1f1dSLionel Sambuc
158*0a6a1f1dSLionel Sambuc if (!isUInt<12>(Offset + Size)) {
159*0a6a1f1dSLionel Sambuc SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
160*0a6a1f1dSLionel Sambuc if (SOffset == AMDGPU::NoRegister) {
161*0a6a1f1dSLionel Sambuc RanOutOfSGPRs = true;
162*0a6a1f1dSLionel Sambuc SOffset = AMDGPU::SGPR0;
163*0a6a1f1dSLionel Sambuc }
164*0a6a1f1dSLionel Sambuc BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
165*0a6a1f1dSLionel Sambuc .addReg(ScratchOffset)
166*0a6a1f1dSLionel Sambuc .addImm(Offset);
167*0a6a1f1dSLionel Sambuc Offset = 0;
168*0a6a1f1dSLionel Sambuc }
169*0a6a1f1dSLionel Sambuc
170*0a6a1f1dSLionel Sambuc if (RanOutOfSGPRs)
171*0a6a1f1dSLionel Sambuc Ctx.emitError("Ran out of SGPRs for spilling VGPRS");
172*0a6a1f1dSLionel Sambuc
173*0a6a1f1dSLionel Sambuc for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
174*0a6a1f1dSLionel Sambuc unsigned SubReg = NumSubRegs > 1 ?
175*0a6a1f1dSLionel Sambuc getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
176*0a6a1f1dSLionel Sambuc Value;
177*0a6a1f1dSLionel Sambuc bool IsKill = (i == e - 1);
178*0a6a1f1dSLionel Sambuc
179*0a6a1f1dSLionel Sambuc BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
180*0a6a1f1dSLionel Sambuc .addReg(SubReg, getDefRegState(IsLoad))
181*0a6a1f1dSLionel Sambuc .addReg(ScratchRsrcReg, getKillRegState(IsKill))
182*0a6a1f1dSLionel Sambuc .addImm(Offset)
183*0a6a1f1dSLionel Sambuc .addReg(SOffset)
184*0a6a1f1dSLionel Sambuc .addImm(0) // glc
185*0a6a1f1dSLionel Sambuc .addImm(0) // slc
186*0a6a1f1dSLionel Sambuc .addImm(0) // tfe
187*0a6a1f1dSLionel Sambuc .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
188*0a6a1f1dSLionel Sambuc }
189*0a6a1f1dSLionel Sambuc }
190*0a6a1f1dSLionel Sambuc
eliminateFrameIndex(MachineBasicBlock::iterator MI,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const191*0a6a1f1dSLionel Sambuc void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
192*0a6a1f1dSLionel Sambuc int SPAdj, unsigned FIOperandNum,
193*0a6a1f1dSLionel Sambuc RegScavenger *RS) const {
194*0a6a1f1dSLionel Sambuc MachineFunction *MF = MI->getParent()->getParent();
195*0a6a1f1dSLionel Sambuc MachineBasicBlock *MBB = MI->getParent();
196*0a6a1f1dSLionel Sambuc SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
197*0a6a1f1dSLionel Sambuc MachineFrameInfo *FrameInfo = MF->getFrameInfo();
198*0a6a1f1dSLionel Sambuc const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
199*0a6a1f1dSLionel Sambuc DebugLoc DL = MI->getDebugLoc();
200*0a6a1f1dSLionel Sambuc
201*0a6a1f1dSLionel Sambuc MachineOperand &FIOp = MI->getOperand(FIOperandNum);
202*0a6a1f1dSLionel Sambuc int Index = MI->getOperand(FIOperandNum).getIndex();
203*0a6a1f1dSLionel Sambuc
204*0a6a1f1dSLionel Sambuc switch (MI->getOpcode()) {
205*0a6a1f1dSLionel Sambuc // SGPR register spill
206*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S512_SAVE:
207*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S256_SAVE:
208*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S128_SAVE:
209*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S64_SAVE:
210*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S32_SAVE: {
211*0a6a1f1dSLionel Sambuc unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
212*0a6a1f1dSLionel Sambuc
213*0a6a1f1dSLionel Sambuc for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
214*0a6a1f1dSLionel Sambuc unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
215*0a6a1f1dSLionel Sambuc &AMDGPU::SGPR_32RegClass, i);
216*0a6a1f1dSLionel Sambuc struct SIMachineFunctionInfo::SpilledReg Spill =
217*0a6a1f1dSLionel Sambuc MFI->getSpilledReg(MF, Index, i);
218*0a6a1f1dSLionel Sambuc
219*0a6a1f1dSLionel Sambuc if (Spill.VGPR == AMDGPU::NoRegister) {
220*0a6a1f1dSLionel Sambuc LLVMContext &Ctx = MF->getFunction()->getContext();
221*0a6a1f1dSLionel Sambuc Ctx.emitError("Ran out of VGPRs for spilling SGPR");
222*0a6a1f1dSLionel Sambuc }
223*0a6a1f1dSLionel Sambuc
224*0a6a1f1dSLionel Sambuc BuildMI(*MBB, MI, DL,
225*0a6a1f1dSLionel Sambuc TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
226*0a6a1f1dSLionel Sambuc Spill.VGPR)
227*0a6a1f1dSLionel Sambuc .addReg(SubReg)
228*0a6a1f1dSLionel Sambuc .addImm(Spill.Lane);
229*0a6a1f1dSLionel Sambuc
230*0a6a1f1dSLionel Sambuc }
231*0a6a1f1dSLionel Sambuc MI->eraseFromParent();
232*0a6a1f1dSLionel Sambuc break;
233*0a6a1f1dSLionel Sambuc }
234*0a6a1f1dSLionel Sambuc
235*0a6a1f1dSLionel Sambuc // SGPR register restore
236*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S512_RESTORE:
237*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S256_RESTORE:
238*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S128_RESTORE:
239*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S64_RESTORE:
240*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_S32_RESTORE: {
241*0a6a1f1dSLionel Sambuc unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
242*0a6a1f1dSLionel Sambuc
243*0a6a1f1dSLionel Sambuc for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
244*0a6a1f1dSLionel Sambuc unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
245*0a6a1f1dSLionel Sambuc &AMDGPU::SGPR_32RegClass, i);
246*0a6a1f1dSLionel Sambuc bool isM0 = SubReg == AMDGPU::M0;
247*0a6a1f1dSLionel Sambuc struct SIMachineFunctionInfo::SpilledReg Spill =
248*0a6a1f1dSLionel Sambuc MFI->getSpilledReg(MF, Index, i);
249*0a6a1f1dSLionel Sambuc
250*0a6a1f1dSLionel Sambuc if (Spill.VGPR == AMDGPU::NoRegister) {
251*0a6a1f1dSLionel Sambuc LLVMContext &Ctx = MF->getFunction()->getContext();
252*0a6a1f1dSLionel Sambuc Ctx.emitError("Ran out of VGPRs for spilling SGPR");
253*0a6a1f1dSLionel Sambuc }
254*0a6a1f1dSLionel Sambuc
255*0a6a1f1dSLionel Sambuc if (isM0)
256*0a6a1f1dSLionel Sambuc SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
257*0a6a1f1dSLionel Sambuc
258*0a6a1f1dSLionel Sambuc BuildMI(*MBB, MI, DL,
259*0a6a1f1dSLionel Sambuc TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
260*0a6a1f1dSLionel Sambuc SubReg)
261*0a6a1f1dSLionel Sambuc .addReg(Spill.VGPR)
262*0a6a1f1dSLionel Sambuc .addImm(Spill.Lane)
263*0a6a1f1dSLionel Sambuc .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
264*0a6a1f1dSLionel Sambuc if (isM0) {
265*0a6a1f1dSLionel Sambuc BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
266*0a6a1f1dSLionel Sambuc .addReg(SubReg);
267*0a6a1f1dSLionel Sambuc }
268*0a6a1f1dSLionel Sambuc }
269*0a6a1f1dSLionel Sambuc
270*0a6a1f1dSLionel Sambuc // TODO: only do this when it is needed
271*0a6a1f1dSLionel Sambuc switch (ST.getGeneration()) {
272*0a6a1f1dSLionel Sambuc case AMDGPUSubtarget::SOUTHERN_ISLANDS:
273*0a6a1f1dSLionel Sambuc // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
274*0a6a1f1dSLionel Sambuc TII->insertNOPs(MI, 3);
275*0a6a1f1dSLionel Sambuc break;
276*0a6a1f1dSLionel Sambuc case AMDGPUSubtarget::SEA_ISLANDS:
277*0a6a1f1dSLionel Sambuc break;
278*0a6a1f1dSLionel Sambuc default: // VOLCANIC_ISLANDS and later
279*0a6a1f1dSLionel Sambuc // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
280*0a6a1f1dSLionel Sambuc // and later. This also applies to VALUs which write VCC, but we're
281*0a6a1f1dSLionel Sambuc // unlikely to see VMEM use VCC.
282*0a6a1f1dSLionel Sambuc TII->insertNOPs(MI, 4);
283*0a6a1f1dSLionel Sambuc }
284*0a6a1f1dSLionel Sambuc
285*0a6a1f1dSLionel Sambuc MI->eraseFromParent();
286*0a6a1f1dSLionel Sambuc break;
287*0a6a1f1dSLionel Sambuc }
288*0a6a1f1dSLionel Sambuc
289*0a6a1f1dSLionel Sambuc // VGPR register spill
290*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V512_SAVE:
291*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V256_SAVE:
292*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V128_SAVE:
293*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V96_SAVE:
294*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V64_SAVE:
295*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V32_SAVE:
296*0a6a1f1dSLionel Sambuc buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
297*0a6a1f1dSLionel Sambuc TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
298*0a6a1f1dSLionel Sambuc TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
299*0a6a1f1dSLionel Sambuc TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
300*0a6a1f1dSLionel Sambuc FrameInfo->getObjectOffset(Index), RS);
301*0a6a1f1dSLionel Sambuc MI->eraseFromParent();
302*0a6a1f1dSLionel Sambuc break;
303*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V32_RESTORE:
304*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V64_RESTORE:
305*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V96_RESTORE:
306*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V128_RESTORE:
307*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V256_RESTORE:
308*0a6a1f1dSLionel Sambuc case AMDGPU::SI_SPILL_V512_RESTORE: {
309*0a6a1f1dSLionel Sambuc buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
310*0a6a1f1dSLionel Sambuc TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
311*0a6a1f1dSLionel Sambuc TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
312*0a6a1f1dSLionel Sambuc TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
313*0a6a1f1dSLionel Sambuc FrameInfo->getObjectOffset(Index), RS);
314*0a6a1f1dSLionel Sambuc MI->eraseFromParent();
315*0a6a1f1dSLionel Sambuc break;
316*0a6a1f1dSLionel Sambuc }
317*0a6a1f1dSLionel Sambuc
318*0a6a1f1dSLionel Sambuc default: {
319*0a6a1f1dSLionel Sambuc int64_t Offset = FrameInfo->getObjectOffset(Index);
320*0a6a1f1dSLionel Sambuc FIOp.ChangeToImmediate(Offset);
321*0a6a1f1dSLionel Sambuc if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
322*0a6a1f1dSLionel Sambuc unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, SPAdj);
323*0a6a1f1dSLionel Sambuc BuildMI(*MBB, MI, MI->getDebugLoc(),
324*0a6a1f1dSLionel Sambuc TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
325*0a6a1f1dSLionel Sambuc .addImm(Offset);
326*0a6a1f1dSLionel Sambuc FIOp.ChangeToRegister(TmpReg, false, false, true);
327*0a6a1f1dSLionel Sambuc }
328*0a6a1f1dSLionel Sambuc }
329f4a2713aSLionel Sambuc }
330f4a2713aSLionel Sambuc }
331f4a2713aSLionel Sambuc
getCFGStructurizerRegClass(MVT VT) const332f4a2713aSLionel Sambuc const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
333f4a2713aSLionel Sambuc MVT VT) const {
334f4a2713aSLionel Sambuc switch(VT.SimpleTy) {
335f4a2713aSLionel Sambuc default:
336*0a6a1f1dSLionel Sambuc case MVT::i32: return &AMDGPU::VGPR_32RegClass;
337f4a2713aSLionel Sambuc }
338f4a2713aSLionel Sambuc }
339f4a2713aSLionel Sambuc
getHWRegIndex(unsigned Reg) const340f4a2713aSLionel Sambuc unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
341*0a6a1f1dSLionel Sambuc return getEncodingValue(Reg) & 0xff;
342f4a2713aSLionel Sambuc }
343f4a2713aSLionel Sambuc
getPhysRegClass(unsigned Reg) const344f4a2713aSLionel Sambuc const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
345f4a2713aSLionel Sambuc assert(!TargetRegisterInfo::isVirtualRegister(Reg));
346f4a2713aSLionel Sambuc
347*0a6a1f1dSLionel Sambuc static const TargetRegisterClass *BaseClasses[] = {
348*0a6a1f1dSLionel Sambuc &AMDGPU::VGPR_32RegClass,
349f4a2713aSLionel Sambuc &AMDGPU::SReg_32RegClass,
350f4a2713aSLionel Sambuc &AMDGPU::VReg_64RegClass,
351f4a2713aSLionel Sambuc &AMDGPU::SReg_64RegClass,
352*0a6a1f1dSLionel Sambuc &AMDGPU::VReg_96RegClass,
353*0a6a1f1dSLionel Sambuc &AMDGPU::VReg_128RegClass,
354f4a2713aSLionel Sambuc &AMDGPU::SReg_128RegClass,
355*0a6a1f1dSLionel Sambuc &AMDGPU::VReg_256RegClass,
356*0a6a1f1dSLionel Sambuc &AMDGPU::SReg_256RegClass,
357*0a6a1f1dSLionel Sambuc &AMDGPU::VReg_512RegClass
358f4a2713aSLionel Sambuc };
359f4a2713aSLionel Sambuc
360*0a6a1f1dSLionel Sambuc for (const TargetRegisterClass *BaseClass : BaseClasses) {
361*0a6a1f1dSLionel Sambuc if (BaseClass->contains(Reg)) {
362*0a6a1f1dSLionel Sambuc return BaseClass;
363f4a2713aSLionel Sambuc }
364f4a2713aSLionel Sambuc }
365*0a6a1f1dSLionel Sambuc return nullptr;
366f4a2713aSLionel Sambuc }
367f4a2713aSLionel Sambuc
hasVGPRs(const TargetRegisterClass * RC) const368f4a2713aSLionel Sambuc bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
369*0a6a1f1dSLionel Sambuc return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) ||
370f4a2713aSLionel Sambuc getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
371f4a2713aSLionel Sambuc getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
372f4a2713aSLionel Sambuc getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
373f4a2713aSLionel Sambuc getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
374f4a2713aSLionel Sambuc getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
375f4a2713aSLionel Sambuc }
376f4a2713aSLionel Sambuc
getEquivalentVGPRClass(const TargetRegisterClass * SRC) const377f4a2713aSLionel Sambuc const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
378f4a2713aSLionel Sambuc const TargetRegisterClass *SRC) const {
379f4a2713aSLionel Sambuc if (hasVGPRs(SRC)) {
380f4a2713aSLionel Sambuc return SRC;
381f4a2713aSLionel Sambuc } else if (SRC == &AMDGPU::SCCRegRegClass) {
382f4a2713aSLionel Sambuc return &AMDGPU::VCCRegRegClass;
383f4a2713aSLionel Sambuc } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
384*0a6a1f1dSLionel Sambuc return &AMDGPU::VGPR_32RegClass;
385f4a2713aSLionel Sambuc } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
386f4a2713aSLionel Sambuc return &AMDGPU::VReg_64RegClass;
387f4a2713aSLionel Sambuc } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
388f4a2713aSLionel Sambuc return &AMDGPU::VReg_128RegClass;
389f4a2713aSLionel Sambuc } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
390f4a2713aSLionel Sambuc return &AMDGPU::VReg_256RegClass;
391f4a2713aSLionel Sambuc } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
392f4a2713aSLionel Sambuc return &AMDGPU::VReg_512RegClass;
393f4a2713aSLionel Sambuc }
394*0a6a1f1dSLionel Sambuc return nullptr;
395f4a2713aSLionel Sambuc }
396f4a2713aSLionel Sambuc
getSubRegClass(const TargetRegisterClass * RC,unsigned SubIdx) const397f4a2713aSLionel Sambuc const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
398f4a2713aSLionel Sambuc const TargetRegisterClass *RC, unsigned SubIdx) const {
399f4a2713aSLionel Sambuc if (SubIdx == AMDGPU::NoSubRegister)
400f4a2713aSLionel Sambuc return RC;
401f4a2713aSLionel Sambuc
402f4a2713aSLionel Sambuc // If this register has a sub-register, we can safely assume it is a 32-bit
403*0a6a1f1dSLionel Sambuc // register, because all of SI's sub-registers are 32-bit.
404f4a2713aSLionel Sambuc if (isSGPRClass(RC)) {
405f4a2713aSLionel Sambuc return &AMDGPU::SGPR_32RegClass;
406f4a2713aSLionel Sambuc } else {
407f4a2713aSLionel Sambuc return &AMDGPU::VGPR_32RegClass;
408f4a2713aSLionel Sambuc }
409f4a2713aSLionel Sambuc }
410*0a6a1f1dSLionel Sambuc
getPhysRegSubReg(unsigned Reg,const TargetRegisterClass * SubRC,unsigned Channel) const411*0a6a1f1dSLionel Sambuc unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
412*0a6a1f1dSLionel Sambuc const TargetRegisterClass *SubRC,
413*0a6a1f1dSLionel Sambuc unsigned Channel) const {
414*0a6a1f1dSLionel Sambuc
415*0a6a1f1dSLionel Sambuc switch (Reg) {
416*0a6a1f1dSLionel Sambuc case AMDGPU::VCC:
417*0a6a1f1dSLionel Sambuc switch(Channel) {
418*0a6a1f1dSLionel Sambuc case 0: return AMDGPU::VCC_LO;
419*0a6a1f1dSLionel Sambuc case 1: return AMDGPU::VCC_HI;
420*0a6a1f1dSLionel Sambuc default: llvm_unreachable("Invalid SubIdx for VCC");
421*0a6a1f1dSLionel Sambuc }
422*0a6a1f1dSLionel Sambuc
423*0a6a1f1dSLionel Sambuc case AMDGPU::FLAT_SCR:
424*0a6a1f1dSLionel Sambuc switch (Channel) {
425*0a6a1f1dSLionel Sambuc case 0:
426*0a6a1f1dSLionel Sambuc return AMDGPU::FLAT_SCR_LO;
427*0a6a1f1dSLionel Sambuc case 1:
428*0a6a1f1dSLionel Sambuc return AMDGPU::FLAT_SCR_HI;
429*0a6a1f1dSLionel Sambuc default:
430*0a6a1f1dSLionel Sambuc llvm_unreachable("Invalid SubIdx for FLAT_SCR");
431*0a6a1f1dSLionel Sambuc }
432*0a6a1f1dSLionel Sambuc break;
433*0a6a1f1dSLionel Sambuc
434*0a6a1f1dSLionel Sambuc case AMDGPU::EXEC:
435*0a6a1f1dSLionel Sambuc switch (Channel) {
436*0a6a1f1dSLionel Sambuc case 0:
437*0a6a1f1dSLionel Sambuc return AMDGPU::EXEC_LO;
438*0a6a1f1dSLionel Sambuc case 1:
439*0a6a1f1dSLionel Sambuc return AMDGPU::EXEC_HI;
440*0a6a1f1dSLionel Sambuc default:
441*0a6a1f1dSLionel Sambuc llvm_unreachable("Invalid SubIdx for EXEC");
442*0a6a1f1dSLionel Sambuc }
443*0a6a1f1dSLionel Sambuc break;
444*0a6a1f1dSLionel Sambuc }
445*0a6a1f1dSLionel Sambuc
446*0a6a1f1dSLionel Sambuc const TargetRegisterClass *RC = getPhysRegClass(Reg);
447*0a6a1f1dSLionel Sambuc // 32-bit registers don't have sub-registers, so we can just return the
448*0a6a1f1dSLionel Sambuc // Reg. We need to have this check here, because the calculation below
449*0a6a1f1dSLionel Sambuc // using getHWRegIndex() will fail with special 32-bit registers like
450*0a6a1f1dSLionel Sambuc // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0.
451*0a6a1f1dSLionel Sambuc if (RC->getSize() == 4) {
452*0a6a1f1dSLionel Sambuc assert(Channel == 0);
453*0a6a1f1dSLionel Sambuc return Reg;
454*0a6a1f1dSLionel Sambuc }
455*0a6a1f1dSLionel Sambuc
456*0a6a1f1dSLionel Sambuc unsigned Index = getHWRegIndex(Reg);
457*0a6a1f1dSLionel Sambuc return SubRC->getRegister(Index + Channel);
458*0a6a1f1dSLionel Sambuc }
459*0a6a1f1dSLionel Sambuc
opCanUseLiteralConstant(unsigned OpType) const460*0a6a1f1dSLionel Sambuc bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
461*0a6a1f1dSLionel Sambuc return OpType == AMDGPU::OPERAND_REG_IMM32;
462*0a6a1f1dSLionel Sambuc }
463*0a6a1f1dSLionel Sambuc
opCanUseInlineConstant(unsigned OpType) const464*0a6a1f1dSLionel Sambuc bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
465*0a6a1f1dSLionel Sambuc if (opCanUseLiteralConstant(OpType))
466*0a6a1f1dSLionel Sambuc return true;
467*0a6a1f1dSLionel Sambuc
468*0a6a1f1dSLionel Sambuc return OpType == AMDGPU::OPERAND_REG_INLINE_C;
469*0a6a1f1dSLionel Sambuc }
470*0a6a1f1dSLionel Sambuc
getPreloadedValue(const MachineFunction & MF,enum PreloadedValue Value) const471*0a6a1f1dSLionel Sambuc unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
472*0a6a1f1dSLionel Sambuc enum PreloadedValue Value) const {
473*0a6a1f1dSLionel Sambuc
474*0a6a1f1dSLionel Sambuc const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
475*0a6a1f1dSLionel Sambuc switch (Value) {
476*0a6a1f1dSLionel Sambuc case SIRegisterInfo::TGID_X:
477*0a6a1f1dSLionel Sambuc return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0);
478*0a6a1f1dSLionel Sambuc case SIRegisterInfo::TGID_Y:
479*0a6a1f1dSLionel Sambuc return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1);
480*0a6a1f1dSLionel Sambuc case SIRegisterInfo::TGID_Z:
481*0a6a1f1dSLionel Sambuc return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
482*0a6a1f1dSLionel Sambuc case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
483*0a6a1f1dSLionel Sambuc if (MFI->getShaderType() != ShaderType::COMPUTE)
484*0a6a1f1dSLionel Sambuc return MFI->ScratchOffsetReg;
485*0a6a1f1dSLionel Sambuc return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
486*0a6a1f1dSLionel Sambuc case SIRegisterInfo::SCRATCH_PTR:
487*0a6a1f1dSLionel Sambuc return AMDGPU::SGPR2_SGPR3;
488*0a6a1f1dSLionel Sambuc case SIRegisterInfo::INPUT_PTR:
489*0a6a1f1dSLionel Sambuc return AMDGPU::SGPR0_SGPR1;
490*0a6a1f1dSLionel Sambuc case SIRegisterInfo::TIDIG_X:
491*0a6a1f1dSLionel Sambuc return AMDGPU::VGPR0;
492*0a6a1f1dSLionel Sambuc case SIRegisterInfo::TIDIG_Y:
493*0a6a1f1dSLionel Sambuc return AMDGPU::VGPR1;
494*0a6a1f1dSLionel Sambuc case SIRegisterInfo::TIDIG_Z:
495*0a6a1f1dSLionel Sambuc return AMDGPU::VGPR2;
496*0a6a1f1dSLionel Sambuc }
497*0a6a1f1dSLionel Sambuc llvm_unreachable("unexpected preloaded value type");
498*0a6a1f1dSLionel Sambuc }
499*0a6a1f1dSLionel Sambuc
500*0a6a1f1dSLionel Sambuc /// \brief Returns a register that is not used at any point in the function.
501*0a6a1f1dSLionel Sambuc /// If all registers are used, then this function will return
502*0a6a1f1dSLionel Sambuc // AMDGPU::NoRegister.
findUnusedRegister(const MachineRegisterInfo & MRI,const TargetRegisterClass * RC) const503*0a6a1f1dSLionel Sambuc unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
504*0a6a1f1dSLionel Sambuc const TargetRegisterClass *RC) const {
505*0a6a1f1dSLionel Sambuc
506*0a6a1f1dSLionel Sambuc for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
507*0a6a1f1dSLionel Sambuc I != E; ++I) {
508*0a6a1f1dSLionel Sambuc if (!MRI.isPhysRegUsed(*I))
509*0a6a1f1dSLionel Sambuc return *I;
510*0a6a1f1dSLionel Sambuc }
511*0a6a1f1dSLionel Sambuc return AMDGPU::NoRegister;
512*0a6a1f1dSLionel Sambuc }
513*0a6a1f1dSLionel Sambuc
getNumVGPRsAllowed(unsigned WaveCount) const514*0a6a1f1dSLionel Sambuc unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
515*0a6a1f1dSLionel Sambuc switch(WaveCount) {
516*0a6a1f1dSLionel Sambuc case 10: return 24;
517*0a6a1f1dSLionel Sambuc case 9: return 28;
518*0a6a1f1dSLionel Sambuc case 8: return 32;
519*0a6a1f1dSLionel Sambuc case 7: return 36;
520*0a6a1f1dSLionel Sambuc case 6: return 40;
521*0a6a1f1dSLionel Sambuc case 5: return 48;
522*0a6a1f1dSLionel Sambuc case 4: return 64;
523*0a6a1f1dSLionel Sambuc case 3: return 84;
524*0a6a1f1dSLionel Sambuc case 2: return 128;
525*0a6a1f1dSLionel Sambuc default: return 256;
526*0a6a1f1dSLionel Sambuc }
527*0a6a1f1dSLionel Sambuc }
528*0a6a1f1dSLionel Sambuc
getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,unsigned WaveCount) const529*0a6a1f1dSLionel Sambuc unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
530*0a6a1f1dSLionel Sambuc unsigned WaveCount) const {
531*0a6a1f1dSLionel Sambuc if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
532*0a6a1f1dSLionel Sambuc switch (WaveCount) {
533*0a6a1f1dSLionel Sambuc case 10: return 80;
534*0a6a1f1dSLionel Sambuc case 9: return 80;
535*0a6a1f1dSLionel Sambuc case 8: return 96;
536*0a6a1f1dSLionel Sambuc default: return 102;
537*0a6a1f1dSLionel Sambuc }
538*0a6a1f1dSLionel Sambuc } else {
539*0a6a1f1dSLionel Sambuc switch(WaveCount) {
540*0a6a1f1dSLionel Sambuc case 10: return 48;
541*0a6a1f1dSLionel Sambuc case 9: return 56;
542*0a6a1f1dSLionel Sambuc case 8: return 64;
543*0a6a1f1dSLionel Sambuc case 7: return 72;
544*0a6a1f1dSLionel Sambuc case 6: return 80;
545*0a6a1f1dSLionel Sambuc case 5: return 96;
546*0a6a1f1dSLionel Sambuc default: return 103;
547*0a6a1f1dSLionel Sambuc }
548*0a6a1f1dSLionel Sambuc }
549*0a6a1f1dSLionel Sambuc }
550