10b57cec5SDimitry Andric //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// Pass to pre-allocated WWM registers 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "AMDGPU.h" 15e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 160b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17480093f4SDimitry Andric #include "SIMachineFunctionInfo.h" 180b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 190b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 200b57cec5SDimitry Andric #include "llvm/CodeGen/LiveRegMatrix.h" 2181ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 2381ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h" 2481ad6265SDimitry Andric #include "llvm/CodeGen/VirtRegMap.h" 25480093f4SDimitry Andric #include "llvm/InitializePasses.h" 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric using namespace llvm; 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric #define DEBUG_TYPE "si-pre-allocate-wwm-regs" 300b57cec5SDimitry Andric 315f757f3fSDimitry Andric static cl::opt<bool> 325f757f3fSDimitry Andric EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs", 335f757f3fSDimitry Andric cl::init(false), cl::Hidden); 345f757f3fSDimitry Andric 350b57cec5SDimitry Andric namespace { 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric class SIPreAllocateWWMRegs : public MachineFunctionPass { 380b57cec5SDimitry Andric private: 390b57cec5SDimitry Andric const SIInstrInfo *TII; 400b57cec5SDimitry Andric const SIRegisterInfo *TRI; 410b57cec5SDimitry Andric MachineRegisterInfo *MRI; 420b57cec5SDimitry Andric LiveIntervals *LIS; 430b57cec5SDimitry Andric LiveRegMatrix *Matrix; 440b57cec5SDimitry Andric VirtRegMap *VRM; 450b57cec5SDimitry Andric RegisterClassInfo RegClassInfo; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric std::vector<unsigned> RegsToRewrite; 48fe6060f1SDimitry Andric #ifndef NDEBUG 49fe6060f1SDimitry Andric void printWWMInfo(const MachineInstr &MI); 50fe6060f1SDimitry Andric #endif 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric public: 530b57cec5SDimitry Andric static char ID; 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { 560b57cec5SDimitry Andric initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 62*0fca6ea1SDimitry Andric AU.addRequired<LiveIntervalsWrapperPass>(); 630b57cec5SDimitry Andric AU.addRequired<VirtRegMap>(); 640b57cec5SDimitry Andric AU.addRequired<LiveRegMatrix>(); 655f757f3fSDimitry Andric AU.setPreservesAll(); 660b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric private: 700b57cec5SDimitry Andric bool processDef(MachineOperand &MO); 710b57cec5SDimitry Andric void rewriteRegs(MachineFunction &MF); 720b57cec5SDimitry Andric }; 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric } // End anonymous namespace. 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, 770b57cec5SDimitry Andric "SI Pre-allocate WWM Registers", false, false) 78*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 790b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 800b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 810b57cec5SDimitry Andric INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, 820b57cec5SDimitry Andric "SI Pre-allocate WWM Registers", false, false) 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric char SIPreAllocateWWMRegs::ID = 0; 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { 890b57cec5SDimitry Andric return new SIPreAllocateWWMRegs(); 900b57cec5SDimitry Andric } 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { 938bcb0991SDimitry Andric Register Reg = MO.getReg(); 94e8d8bef9SDimitry Andric if (Reg.isPhysical()) 950b57cec5SDimitry Andric return false; 960b57cec5SDimitry Andric 97e8d8bef9SDimitry Andric if (!TRI->isVGPR(*MRI, Reg)) 980b57cec5SDimitry Andric return false; 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric if (VRM->hasPhys(Reg)) 1010b57cec5SDimitry Andric return false; 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric LiveInterval &LI = LIS->getInterval(Reg); 1040b57cec5SDimitry Andric 105e8d8bef9SDimitry Andric for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { 1065f757f3fSDimitry Andric if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) && 1070b57cec5SDimitry Andric Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { 1080b57cec5SDimitry Andric Matrix->assign(LI, PhysReg); 1090b57cec5SDimitry Andric assert(PhysReg != 0); 1100b57cec5SDimitry Andric RegsToRewrite.push_back(Reg); 1110b57cec5SDimitry Andric return true; 1120b57cec5SDimitry Andric } 1130b57cec5SDimitry Andric } 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric llvm_unreachable("physreg not found for WWM expression"); 1160b57cec5SDimitry Andric } 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { 1190b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 1200b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 1210b57cec5SDimitry Andric for (MachineOperand &MO : MI.operands()) { 1220b57cec5SDimitry Andric if (!MO.isReg()) 1230b57cec5SDimitry Andric continue; 1240b57cec5SDimitry Andric 1258bcb0991SDimitry Andric const Register VirtReg = MO.getReg(); 126e8d8bef9SDimitry Andric if (VirtReg.isPhysical()) 1270b57cec5SDimitry Andric continue; 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric if (!VRM->hasPhys(VirtReg)) 1300b57cec5SDimitry Andric continue; 1310b57cec5SDimitry Andric 1328bcb0991SDimitry Andric Register PhysReg = VRM->getPhys(VirtReg); 1330b57cec5SDimitry Andric const unsigned SubReg = MO.getSubReg(); 1340b57cec5SDimitry Andric if (SubReg != 0) { 1350b57cec5SDimitry Andric PhysReg = TRI->getSubReg(PhysReg, SubReg); 1360b57cec5SDimitry Andric MO.setSubReg(0); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric MO.setReg(PhysReg); 1400b57cec5SDimitry Andric MO.setIsRenamable(false); 1410b57cec5SDimitry Andric } 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric for (unsigned Reg : RegsToRewrite) { 1480b57cec5SDimitry Andric LIS->removeInterval(Reg); 1490b57cec5SDimitry Andric 1508bcb0991SDimitry Andric const Register PhysReg = VRM->getPhys(Reg); 1510b57cec5SDimitry Andric assert(PhysReg != 0); 152fe6060f1SDimitry Andric 15381ad6265SDimitry Andric MFI->reserveWWMRegister(PhysReg); 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric RegsToRewrite.clear(); 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric // Update the set of reserved registers to include WWM ones. 159*0fca6ea1SDimitry Andric MRI->freezeReservedRegs(); 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric 162fe6060f1SDimitry Andric #ifndef NDEBUG 163fe6060f1SDimitry Andric LLVM_DUMP_METHOD void 164fe6060f1SDimitry Andric SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { 165fe6060f1SDimitry Andric 166fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 167fe6060f1SDimitry Andric 168*0fca6ea1SDimitry Andric if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) { 169fe6060f1SDimitry Andric dbgs() << "Entering "; 170fe6060f1SDimitry Andric } else { 171*0fca6ea1SDimitry Andric assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM); 172fe6060f1SDimitry Andric dbgs() << "Exiting "; 173fe6060f1SDimitry Andric } 174fe6060f1SDimitry Andric 175fe6060f1SDimitry Andric if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { 176fe6060f1SDimitry Andric dbgs() << "Strict WWM "; 177fe6060f1SDimitry Andric } else { 178fe6060f1SDimitry Andric assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); 179fe6060f1SDimitry Andric dbgs() << "Strict WQM "; 180fe6060f1SDimitry Andric } 181fe6060f1SDimitry Andric 182fe6060f1SDimitry Andric dbgs() << "region: " << MI; 183fe6060f1SDimitry Andric } 184fe6060f1SDimitry Andric 185fe6060f1SDimitry Andric #endif 186fe6060f1SDimitry Andric 1870b57cec5SDimitry Andric bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { 1880b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric TII = ST.getInstrInfo(); 1930b57cec5SDimitry Andric TRI = &TII->getRegisterInfo(); 1940b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 1950b57cec5SDimitry Andric 196*0fca6ea1SDimitry Andric LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); 1970b57cec5SDimitry Andric Matrix = &getAnalysis<LiveRegMatrix>(); 1980b57cec5SDimitry Andric VRM = &getAnalysis<VirtRegMap>(); 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric RegClassInfo.runOnMachineFunction(MF); 2010b57cec5SDimitry Andric 2025f757f3fSDimitry Andric bool PreallocateSGPRSpillVGPRs = 2035f757f3fSDimitry Andric EnablePreallocateSGPRSpillVGPRs || 2045f757f3fSDimitry Andric MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs"); 2055f757f3fSDimitry Andric 2060b57cec5SDimitry Andric bool RegsAssigned = false; 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric // We use a reverse post-order traversal of the control-flow graph to 2090b57cec5SDimitry Andric // guarantee that we visit definitions in dominance order. Since WWM 2100b57cec5SDimitry Andric // expressions are guaranteed to never involve phi nodes, and we can only 2110b57cec5SDimitry Andric // escape WWM through the special WWM instruction, this means that this is a 2120b57cec5SDimitry Andric // perfect elimination order, so we can never do any better. 2130b57cec5SDimitry Andric ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric for (MachineBasicBlock *MBB : RPOT) { 2160b57cec5SDimitry Andric bool InWWM = false; 2170b57cec5SDimitry Andric for (MachineInstr &MI : *MBB) { 2180b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || 2190b57cec5SDimitry Andric MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) 2200b57cec5SDimitry Andric RegsAssigned |= processDef(MI.getOperand(0)); 2210b57cec5SDimitry Andric 2225f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) { 2235f757f3fSDimitry Andric if (!PreallocateSGPRSpillVGPRs) 2245f757f3fSDimitry Andric continue; 2255f757f3fSDimitry Andric RegsAssigned |= processDef(MI.getOperand(0)); 2265f757f3fSDimitry Andric } 2275f757f3fSDimitry Andric 228fe6060f1SDimitry Andric if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || 229*0fca6ea1SDimitry Andric MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) { 230fe6060f1SDimitry Andric LLVM_DEBUG(printWWMInfo(MI)); 2310b57cec5SDimitry Andric InWWM = true; 2320b57cec5SDimitry Andric continue; 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 235fe6060f1SDimitry Andric if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || 236*0fca6ea1SDimitry Andric MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) { 237fe6060f1SDimitry Andric LLVM_DEBUG(printWWMInfo(MI)); 2380b57cec5SDimitry Andric InWWM = false; 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric if (!InWWM) 2420b57cec5SDimitry Andric continue; 2430b57cec5SDimitry Andric 244fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Processing " << MI); 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric for (MachineOperand &DefOpnd : MI.defs()) { 2470b57cec5SDimitry Andric RegsAssigned |= processDef(DefOpnd); 2480b57cec5SDimitry Andric } 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric if (!RegsAssigned) 2530b57cec5SDimitry Andric return false; 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric rewriteRegs(MF); 2560b57cec5SDimitry Andric return true; 2570b57cec5SDimitry Andric } 258