1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/CodeGen/LiveIntervals.h"
20 #include "llvm/CodeGen/LiveRegMatrix.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/InitializePasses.h"
23
24 using namespace llvm;
25
26 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
27
28 namespace {
29
30 class SIPreAllocateWWMRegs : public MachineFunctionPass {
31 private:
32 const SIInstrInfo *TII;
33 const SIRegisterInfo *TRI;
34 MachineRegisterInfo *MRI;
35 LiveIntervals *LIS;
36 LiveRegMatrix *Matrix;
37 VirtRegMap *VRM;
38 RegisterClassInfo RegClassInfo;
39
40 std::vector<unsigned> RegsToRewrite;
41 #ifndef NDEBUG
42 void printWWMInfo(const MachineInstr &MI);
43 #endif
44
45 public:
46 static char ID;
47
SIPreAllocateWWMRegs()48 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
49 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
50 }
51
52 bool runOnMachineFunction(MachineFunction &MF) override;
53
getAnalysisUsage(AnalysisUsage & AU) const54 void getAnalysisUsage(AnalysisUsage &AU) const override {
55 AU.addRequired<LiveIntervals>();
56 AU.addPreserved<LiveIntervals>();
57 AU.addRequired<VirtRegMap>();
58 AU.addRequired<LiveRegMatrix>();
59 AU.addPreserved<SlotIndexes>();
60 AU.setPreservesCFG();
61 MachineFunctionPass::getAnalysisUsage(AU);
62 }
63
64 private:
65 bool processDef(MachineOperand &MO);
66 void rewriteRegs(MachineFunction &MF);
67 };
68
69 } // End anonymous namespace.
70
71 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
72 "SI Pre-allocate WWM Registers", false, false)
73 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
74 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
75 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
76 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
77 "SI Pre-allocate WWM Registers", false, false)
78
79 char SIPreAllocateWWMRegs::ID = 0;
80
81 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
82
createSIPreAllocateWWMRegsPass()83 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
84 return new SIPreAllocateWWMRegs();
85 }
86
processDef(MachineOperand & MO)87 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
88 if (!MO.isReg())
89 return false;
90
91 Register Reg = MO.getReg();
92 if (Reg.isPhysical())
93 return false;
94
95 if (!TRI->isVGPR(*MRI, Reg))
96 return false;
97
98 if (VRM->hasPhys(Reg))
99 return false;
100
101 LiveInterval &LI = LIS->getInterval(Reg);
102
103 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
104 if (!MRI->isPhysRegUsed(PhysReg) &&
105 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
106 Matrix->assign(LI, PhysReg);
107 assert(PhysReg != 0);
108 RegsToRewrite.push_back(Reg);
109 return true;
110 }
111 }
112
113 llvm_unreachable("physreg not found for WWM expression");
114 return false;
115 }
116
rewriteRegs(MachineFunction & MF)117 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
118 for (MachineBasicBlock &MBB : MF) {
119 for (MachineInstr &MI : MBB) {
120 for (MachineOperand &MO : MI.operands()) {
121 if (!MO.isReg())
122 continue;
123
124 const Register VirtReg = MO.getReg();
125 if (VirtReg.isPhysical())
126 continue;
127
128 if (!VRM->hasPhys(VirtReg))
129 continue;
130
131 Register PhysReg = VRM->getPhys(VirtReg);
132 const unsigned SubReg = MO.getSubReg();
133 if (SubReg != 0) {
134 PhysReg = TRI->getSubReg(PhysReg, SubReg);
135 MO.setSubReg(0);
136 }
137
138 MO.setReg(PhysReg);
139 MO.setIsRenamable(false);
140 }
141 }
142 }
143
144 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
145 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
146
147 for (unsigned Reg : RegsToRewrite) {
148 LIS->removeInterval(Reg);
149
150 const Register PhysReg = VRM->getPhys(Reg);
151 assert(PhysReg != 0);
152
153 // Check if PhysReg is already reserved
154 if (!MFI->WWMReservedRegs.count(PhysReg)) {
155 Optional<int> FI;
156 if (!MFI->isEntryFunction()) {
157 // Create a stack object for a possible spill in the function prologue.
158 // Note: Non-CSR VGPR also need this as we may overwrite inactive lanes.
159 const TargetRegisterClass *RC = TRI->getPhysRegClass(PhysReg);
160 FI = FrameInfo.CreateSpillStackObject(TRI->getSpillSize(*RC),
161 TRI->getSpillAlign(*RC));
162 }
163 MFI->reserveWWMRegister(PhysReg, FI);
164 }
165 }
166
167 RegsToRewrite.clear();
168
169 // Update the set of reserved registers to include WWM ones.
170 MRI->freezeReservedRegs(MF);
171 }
172
173 #ifndef NDEBUG
174 LLVM_DUMP_METHOD void
printWWMInfo(const MachineInstr & MI)175 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
176
177 unsigned Opc = MI.getOpcode();
178
179 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
180 dbgs() << "Entering ";
181 } else {
182 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
183 dbgs() << "Exiting ";
184 }
185
186 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
187 dbgs() << "Strict WWM ";
188 } else {
189 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
190 dbgs() << "Strict WQM ";
191 }
192
193 dbgs() << "region: " << MI;
194 }
195
196 #endif
197
runOnMachineFunction(MachineFunction & MF)198 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
199 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
200
201 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
202
203 TII = ST.getInstrInfo();
204 TRI = &TII->getRegisterInfo();
205 MRI = &MF.getRegInfo();
206
207 LIS = &getAnalysis<LiveIntervals>();
208 Matrix = &getAnalysis<LiveRegMatrix>();
209 VRM = &getAnalysis<VirtRegMap>();
210
211 RegClassInfo.runOnMachineFunction(MF);
212
213 bool RegsAssigned = false;
214
215 // We use a reverse post-order traversal of the control-flow graph to
216 // guarantee that we visit definitions in dominance order. Since WWM
217 // expressions are guaranteed to never involve phi nodes, and we can only
218 // escape WWM through the special WWM instruction, this means that this is a
219 // perfect elimination order, so we can never do any better.
220 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
221
222 for (MachineBasicBlock *MBB : RPOT) {
223 bool InWWM = false;
224 for (MachineInstr &MI : *MBB) {
225 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
226 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
227 RegsAssigned |= processDef(MI.getOperand(0));
228
229 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
230 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
231 LLVM_DEBUG(printWWMInfo(MI));
232 InWWM = true;
233 continue;
234 }
235
236 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
237 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
238 LLVM_DEBUG(printWWMInfo(MI));
239 InWWM = false;
240 }
241
242 if (!InWWM)
243 continue;
244
245 LLVM_DEBUG(dbgs() << "Processing " << MI);
246
247 for (MachineOperand &DefOpnd : MI.defs()) {
248 RegsAssigned |= processDef(DefOpnd);
249 }
250 }
251 }
252
253 if (!RegsAssigned)
254 return false;
255
256 rewriteRegs(MF);
257 return true;
258 }
259