xref: /llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp (revision be187369a03bf2df8bdbc76ecd381377b3bb6074)
1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIPreAllocateWWMRegs.h"
15 #include "AMDGPU.h"
16 #include "GCNSubtarget.h"
17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "llvm/ADT/PostOrderIterator.h"
20 #include "llvm/CodeGen/LiveIntervals.h"
21 #include "llvm/CodeGen/LiveRegMatrix.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/RegisterClassInfo.h"
25 #include "llvm/CodeGen/VirtRegMap.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
30 
31 static cl::opt<bool>
32     EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
33                                     cl::init(false), cl::Hidden);
34 
35 namespace {
36 
37 class SIPreAllocateWWMRegs {
38 private:
39   const SIInstrInfo *TII;
40   const SIRegisterInfo *TRI;
41   MachineRegisterInfo *MRI;
42   LiveIntervals *LIS;
43   LiveRegMatrix *Matrix;
44   VirtRegMap *VRM;
45   RegisterClassInfo RegClassInfo;
46 
47   std::vector<unsigned> RegsToRewrite;
48 #ifndef NDEBUG
49   void printWWMInfo(const MachineInstr &MI);
50 #endif
51   bool processDef(MachineOperand &MO);
52   void rewriteRegs(MachineFunction &MF);
53 
54 public:
55   SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
56                        VirtRegMap *VRM)
57       : LIS(LIS), Matrix(Matrix), VRM(VRM) {}
58   bool run(MachineFunction &MF);
59 };
60 
61 class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
62 public:
63   static char ID;
64 
65   SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}
66 
67   bool runOnMachineFunction(MachineFunction &MF) override;
68 
69   void getAnalysisUsage(AnalysisUsage &AU) const override {
70     AU.addRequired<LiveIntervalsWrapperPass>();
71     AU.addRequired<VirtRegMapWrapperLegacy>();
72     AU.addRequired<LiveRegMatrixWrapperLegacy>();
73     AU.setPreservesAll();
74     MachineFunctionPass::getAnalysisUsage(AU);
75   }
76 };
77 
78 } // End anonymous namespace.
79 
80 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
81                       "SI Pre-allocate WWM Registers", false, false)
82 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
83 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
84 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
85 INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
86                     "SI Pre-allocate WWM Registers", false, false)
87 
88 char SIPreAllocateWWMRegsLegacy::ID = 0;
89 
90 char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;
91 
92 FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() {
93   return new SIPreAllocateWWMRegsLegacy();
94 }
95 
96 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
97   Register Reg = MO.getReg();
98   if (Reg.isPhysical())
99     return false;
100 
101   if (!TRI->isVGPR(*MRI, Reg))
102     return false;
103 
104   if (VRM->hasPhys(Reg))
105     return false;
106 
107   LiveInterval &LI = LIS->getInterval(Reg);
108 
109   for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
110     if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
111         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
112       Matrix->assign(LI, PhysReg);
113       assert(PhysReg != 0);
114       RegsToRewrite.push_back(Reg);
115       return true;
116     }
117   }
118 
119   llvm_unreachable("physreg not found for WWM expression");
120 }
121 
122 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
123   for (MachineBasicBlock &MBB : MF) {
124     for (MachineInstr &MI : MBB) {
125       for (MachineOperand &MO : MI.operands()) {
126         if (!MO.isReg())
127           continue;
128 
129         const Register VirtReg = MO.getReg();
130         if (VirtReg.isPhysical())
131           continue;
132 
133         if (!VRM->hasPhys(VirtReg))
134           continue;
135 
136         Register PhysReg = VRM->getPhys(VirtReg);
137         const unsigned SubReg = MO.getSubReg();
138         if (SubReg != 0) {
139           PhysReg = TRI->getSubReg(PhysReg, SubReg);
140           MO.setSubReg(0);
141         }
142 
143         MO.setReg(PhysReg);
144         MO.setIsRenamable(false);
145       }
146     }
147   }
148 
149   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
150 
151   for (unsigned Reg : RegsToRewrite) {
152     LIS->removeInterval(Reg);
153 
154     const Register PhysReg = VRM->getPhys(Reg);
155     assert(PhysReg != 0);
156 
157     MFI->reserveWWMRegister(PhysReg);
158   }
159 
160   RegsToRewrite.clear();
161 
162   // Update the set of reserved registers to include WWM ones.
163   MRI->freezeReservedRegs();
164 }
165 
166 #ifndef NDEBUG
167 LLVM_DUMP_METHOD void
168 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
169 
170   unsigned Opc = MI.getOpcode();
171 
172   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
173     dbgs() << "Entering ";
174   } else {
175     assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
176     dbgs() << "Exiting ";
177   }
178 
179   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
180     dbgs() << "Strict WWM ";
181   } else {
182     assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
183     dbgs() << "Strict WQM ";
184   }
185 
186   dbgs() << "region: " << MI;
187 }
188 
189 #endif
190 
191 bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
192   auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
193   auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
194   auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
195   return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
196 }
197 
198 bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
199   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
200 
201   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
202 
203   TII = ST.getInstrInfo();
204   TRI = &TII->getRegisterInfo();
205   MRI = &MF.getRegInfo();
206 
207   RegClassInfo.runOnMachineFunction(MF);
208 
209   bool PreallocateSGPRSpillVGPRs =
210       EnablePreallocateSGPRSpillVGPRs ||
211       MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
212 
213   bool RegsAssigned = false;
214 
215   // We use a reverse post-order traversal of the control-flow graph to
216   // guarantee that we visit definitions in dominance order. Since WWM
217   // expressions are guaranteed to never involve phi nodes, and we can only
218   // escape WWM through the special WWM instruction, this means that this is a
219   // perfect elimination order, so we can never do any better.
220   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
221 
222   for (MachineBasicBlock *MBB : RPOT) {
223     bool InWWM = false;
224     for (MachineInstr &MI : *MBB) {
225       if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
226         if (PreallocateSGPRSpillVGPRs)
227           RegsAssigned |= processDef(MI.getOperand(0));
228         continue;
229       }
230 
231       if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
232           MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
233         LLVM_DEBUG(printWWMInfo(MI));
234         InWWM = true;
235         continue;
236       }
237 
238       if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
239           MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
240         LLVM_DEBUG(printWWMInfo(MI));
241         InWWM = false;
242       }
243 
244       if (!InWWM)
245         continue;
246 
247       LLVM_DEBUG(dbgs() << "Processing " << MI);
248 
249       for (MachineOperand &DefOpnd : MI.defs()) {
250         RegsAssigned |= processDef(DefOpnd);
251       }
252     }
253   }
254 
255   if (!RegsAssigned)
256     return false;
257 
258   rewriteRegs(MF);
259   return true;
260 }
261 
262 PreservedAnalyses
263 SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
264                               MachineFunctionAnalysisManager &MFAM) {
265   auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
266   auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF);
267   auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF);
268   SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
269   return PreservedAnalyses::all();
270 }
271