1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Pass to pre-allocated WWM registers 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "SIPreAllocateWWMRegs.h" 15 #include "AMDGPU.h" 16 #include "GCNSubtarget.h" 17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "llvm/ADT/PostOrderIterator.h" 20 #include "llvm/CodeGen/LiveIntervals.h" 21 #include "llvm/CodeGen/LiveRegMatrix.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/RegisterClassInfo.h" 25 #include "llvm/CodeGen/VirtRegMap.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs" 30 31 static cl::opt<bool> 32 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs", 33 cl::init(false), cl::Hidden); 34 35 namespace { 36 37 class SIPreAllocateWWMRegs { 38 private: 39 const SIInstrInfo *TII; 40 const SIRegisterInfo *TRI; 41 MachineRegisterInfo *MRI; 42 LiveIntervals *LIS; 43 LiveRegMatrix *Matrix; 44 VirtRegMap *VRM; 45 RegisterClassInfo RegClassInfo; 46 47 std::vector<unsigned> RegsToRewrite; 48 #ifndef NDEBUG 49 void printWWMInfo(const MachineInstr &MI); 50 #endif 51 bool processDef(MachineOperand &MO); 52 void rewriteRegs(MachineFunction &MF); 53 54 public: 55 SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix, 56 VirtRegMap *VRM) 57 : LIS(LIS), Matrix(Matrix), VRM(VRM) {} 58 bool run(MachineFunction &MF); 59 }; 60 61 class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass { 62 public: 63 static char ID; 64 65 SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {} 66 67 bool runOnMachineFunction(MachineFunction &MF) override; 68 69 void getAnalysisUsage(AnalysisUsage &AU) const override { 70 AU.addRequired<LiveIntervalsWrapperPass>(); 71 AU.addRequired<VirtRegMapWrapperLegacy>(); 72 AU.addRequired<LiveRegMatrixWrapperLegacy>(); 73 AU.setPreservesAll(); 74 MachineFunctionPass::getAnalysisUsage(AU); 75 } 76 }; 77 78 } // End anonymous namespace. 79 80 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE, 81 "SI Pre-allocate WWM Registers", false, false) 82 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 83 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) 84 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) 85 INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE, 86 "SI Pre-allocate WWM Registers", false, false) 87 88 char SIPreAllocateWWMRegsLegacy::ID = 0; 89 90 char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID; 91 92 FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() { 93 return new SIPreAllocateWWMRegsLegacy(); 94 } 95 96 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { 97 Register Reg = MO.getReg(); 98 if (Reg.isPhysical()) 99 return false; 100 101 if (!TRI->isVGPR(*MRI, Reg)) 102 return false; 103 104 if (VRM->hasPhys(Reg)) 105 return false; 106 107 LiveInterval &LI = LIS->getInterval(Reg); 108 109 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { 110 if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) && 111 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { 112 Matrix->assign(LI, PhysReg); 113 assert(PhysReg != 0); 114 RegsToRewrite.push_back(Reg); 115 return true; 116 } 117 } 118 119 llvm_unreachable("physreg not found for WWM expression"); 120 } 121 122 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { 123 for (MachineBasicBlock &MBB : MF) { 124 for (MachineInstr &MI : MBB) { 125 for (MachineOperand &MO : MI.operands()) { 126 if (!MO.isReg()) 127 continue; 128 129 const Register VirtReg = MO.getReg(); 130 if (VirtReg.isPhysical()) 131 continue; 132 133 if (!VRM->hasPhys(VirtReg)) 134 continue; 135 136 Register PhysReg = VRM->getPhys(VirtReg); 137 const unsigned SubReg = MO.getSubReg(); 138 if (SubReg != 0) { 139 PhysReg = TRI->getSubReg(PhysReg, SubReg); 140 MO.setSubReg(0); 141 } 142 143 MO.setReg(PhysReg); 144 MO.setIsRenamable(false); 145 } 146 } 147 } 148 149 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 150 151 for (unsigned Reg : RegsToRewrite) { 152 LIS->removeInterval(Reg); 153 154 const Register PhysReg = VRM->getPhys(Reg); 155 assert(PhysReg != 0); 156 157 MFI->reserveWWMRegister(PhysReg); 158 } 159 160 RegsToRewrite.clear(); 161 162 // Update the set of reserved registers to include WWM ones. 163 MRI->freezeReservedRegs(); 164 } 165 166 #ifndef NDEBUG 167 LLVM_DUMP_METHOD void 168 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { 169 170 unsigned Opc = MI.getOpcode(); 171 172 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) { 173 dbgs() << "Entering "; 174 } else { 175 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM); 176 dbgs() << "Exiting "; 177 } 178 179 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { 180 dbgs() << "Strict WWM "; 181 } else { 182 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); 183 dbgs() << "Strict WQM "; 184 } 185 186 dbgs() << "region: " << MI; 187 } 188 189 #endif 190 191 bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) { 192 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); 193 auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM(); 194 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM(); 195 return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF); 196 } 197 198 bool SIPreAllocateWWMRegs::run(MachineFunction &MF) { 199 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); 200 201 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 202 203 TII = ST.getInstrInfo(); 204 TRI = &TII->getRegisterInfo(); 205 MRI = &MF.getRegInfo(); 206 207 RegClassInfo.runOnMachineFunction(MF); 208 209 bool PreallocateSGPRSpillVGPRs = 210 EnablePreallocateSGPRSpillVGPRs || 211 MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs"); 212 213 bool RegsAssigned = false; 214 215 // We use a reverse post-order traversal of the control-flow graph to 216 // guarantee that we visit definitions in dominance order. Since WWM 217 // expressions are guaranteed to never involve phi nodes, and we can only 218 // escape WWM through the special WWM instruction, this means that this is a 219 // perfect elimination order, so we can never do any better. 220 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 221 222 for (MachineBasicBlock *MBB : RPOT) { 223 bool InWWM = false; 224 for (MachineInstr &MI : *MBB) { 225 if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) { 226 if (PreallocateSGPRSpillVGPRs) 227 RegsAssigned |= processDef(MI.getOperand(0)); 228 continue; 229 } 230 231 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || 232 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) { 233 LLVM_DEBUG(printWWMInfo(MI)); 234 InWWM = true; 235 continue; 236 } 237 238 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || 239 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) { 240 LLVM_DEBUG(printWWMInfo(MI)); 241 InWWM = false; 242 } 243 244 if (!InWWM) 245 continue; 246 247 LLVM_DEBUG(dbgs() << "Processing " << MI); 248 249 for (MachineOperand &DefOpnd : MI.defs()) { 250 RegsAssigned |= processDef(DefOpnd); 251 } 252 } 253 } 254 255 if (!RegsAssigned) 256 return false; 257 258 rewriteRegs(MF); 259 return true; 260 } 261 262 PreservedAnalyses 263 SIPreAllocateWWMRegsPass::run(MachineFunction &MF, 264 MachineFunctionAnalysisManager &MFAM) { 265 auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF); 266 auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF); 267 auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF); 268 SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF); 269 return PreservedAnalyses::all(); 270 } 271