xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// Pass to pre-allocated WWM registers
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "AMDGPU.h"
15e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
160b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17480093f4SDimitry Andric #include "SIMachineFunctionInfo.h"
180b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/LiveRegMatrix.h"
2181ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
2381ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/VirtRegMap.h"
25480093f4SDimitry Andric #include "llvm/InitializePasses.h"
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric using namespace llvm;
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
300b57cec5SDimitry Andric 
315f757f3fSDimitry Andric static cl::opt<bool>
325f757f3fSDimitry Andric     EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
335f757f3fSDimitry Andric                                     cl::init(false), cl::Hidden);
345f757f3fSDimitry Andric 
350b57cec5SDimitry Andric namespace {
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric class SIPreAllocateWWMRegs : public MachineFunctionPass {
380b57cec5SDimitry Andric private:
390b57cec5SDimitry Andric   const SIInstrInfo *TII;
400b57cec5SDimitry Andric   const SIRegisterInfo *TRI;
410b57cec5SDimitry Andric   MachineRegisterInfo *MRI;
420b57cec5SDimitry Andric   LiveIntervals *LIS;
430b57cec5SDimitry Andric   LiveRegMatrix *Matrix;
440b57cec5SDimitry Andric   VirtRegMap *VRM;
450b57cec5SDimitry Andric   RegisterClassInfo RegClassInfo;
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric   std::vector<unsigned> RegsToRewrite;
48fe6060f1SDimitry Andric #ifndef NDEBUG
49fe6060f1SDimitry Andric   void printWWMInfo(const MachineInstr &MI);
50fe6060f1SDimitry Andric #endif
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric public:
530b57cec5SDimitry Andric   static char ID;
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric   SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
560b57cec5SDimitry Andric     initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
570b57cec5SDimitry Andric   }
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
62*0fca6ea1SDimitry Andric     AU.addRequired<LiveIntervalsWrapperPass>();
630b57cec5SDimitry Andric     AU.addRequired<VirtRegMap>();
640b57cec5SDimitry Andric     AU.addRequired<LiveRegMatrix>();
655f757f3fSDimitry Andric     AU.setPreservesAll();
660b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
670b57cec5SDimitry Andric   }
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric private:
700b57cec5SDimitry Andric   bool processDef(MachineOperand &MO);
710b57cec5SDimitry Andric   void rewriteRegs(MachineFunction &MF);
720b57cec5SDimitry Andric };
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric } // End anonymous namespace.
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
770b57cec5SDimitry Andric                 "SI Pre-allocate WWM Registers", false, false)
78*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
790b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
800b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
810b57cec5SDimitry Andric INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
820b57cec5SDimitry Andric                 "SI Pre-allocate WWM Registers", false, false)
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric char SIPreAllocateWWMRegs::ID = 0;
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
890b57cec5SDimitry Andric   return new SIPreAllocateWWMRegs();
900b57cec5SDimitry Andric }
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
938bcb0991SDimitry Andric   Register Reg = MO.getReg();
94e8d8bef9SDimitry Andric   if (Reg.isPhysical())
950b57cec5SDimitry Andric     return false;
960b57cec5SDimitry Andric 
97e8d8bef9SDimitry Andric   if (!TRI->isVGPR(*MRI, Reg))
980b57cec5SDimitry Andric     return false;
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric   if (VRM->hasPhys(Reg))
1010b57cec5SDimitry Andric     return false;
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric   LiveInterval &LI = LIS->getInterval(Reg);
1040b57cec5SDimitry Andric 
105e8d8bef9SDimitry Andric   for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
1065f757f3fSDimitry Andric     if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
1070b57cec5SDimitry Andric         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
1080b57cec5SDimitry Andric       Matrix->assign(LI, PhysReg);
1090b57cec5SDimitry Andric       assert(PhysReg != 0);
1100b57cec5SDimitry Andric       RegsToRewrite.push_back(Reg);
1110b57cec5SDimitry Andric       return true;
1120b57cec5SDimitry Andric     }
1130b57cec5SDimitry Andric   }
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   llvm_unreachable("physreg not found for WWM expression");
1160b57cec5SDimitry Andric }
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
1190b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
1200b57cec5SDimitry Andric     for (MachineInstr &MI : MBB) {
1210b57cec5SDimitry Andric       for (MachineOperand &MO : MI.operands()) {
1220b57cec5SDimitry Andric         if (!MO.isReg())
1230b57cec5SDimitry Andric           continue;
1240b57cec5SDimitry Andric 
1258bcb0991SDimitry Andric         const Register VirtReg = MO.getReg();
126e8d8bef9SDimitry Andric         if (VirtReg.isPhysical())
1270b57cec5SDimitry Andric           continue;
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric         if (!VRM->hasPhys(VirtReg))
1300b57cec5SDimitry Andric           continue;
1310b57cec5SDimitry Andric 
1328bcb0991SDimitry Andric         Register PhysReg = VRM->getPhys(VirtReg);
1330b57cec5SDimitry Andric         const unsigned SubReg = MO.getSubReg();
1340b57cec5SDimitry Andric         if (SubReg != 0) {
1350b57cec5SDimitry Andric           PhysReg = TRI->getSubReg(PhysReg, SubReg);
1360b57cec5SDimitry Andric           MO.setSubReg(0);
1370b57cec5SDimitry Andric         }
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric         MO.setReg(PhysReg);
1400b57cec5SDimitry Andric         MO.setIsRenamable(false);
1410b57cec5SDimitry Andric       }
1420b57cec5SDimitry Andric     }
1430b57cec5SDimitry Andric   }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric   for (unsigned Reg : RegsToRewrite) {
1480b57cec5SDimitry Andric     LIS->removeInterval(Reg);
1490b57cec5SDimitry Andric 
1508bcb0991SDimitry Andric     const Register PhysReg = VRM->getPhys(Reg);
1510b57cec5SDimitry Andric     assert(PhysReg != 0);
152fe6060f1SDimitry Andric 
15381ad6265SDimitry Andric     MFI->reserveWWMRegister(PhysReg);
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric   RegsToRewrite.clear();
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric   // Update the set of reserved registers to include WWM ones.
159*0fca6ea1SDimitry Andric   MRI->freezeReservedRegs();
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric 
162fe6060f1SDimitry Andric #ifndef NDEBUG
163fe6060f1SDimitry Andric LLVM_DUMP_METHOD void
164fe6060f1SDimitry Andric SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
165fe6060f1SDimitry Andric 
166fe6060f1SDimitry Andric   unsigned Opc = MI.getOpcode();
167fe6060f1SDimitry Andric 
168*0fca6ea1SDimitry Andric   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
169fe6060f1SDimitry Andric     dbgs() << "Entering ";
170fe6060f1SDimitry Andric   } else {
171*0fca6ea1SDimitry Andric     assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
172fe6060f1SDimitry Andric     dbgs() << "Exiting ";
173fe6060f1SDimitry Andric   }
174fe6060f1SDimitry Andric 
175fe6060f1SDimitry Andric   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
176fe6060f1SDimitry Andric     dbgs() << "Strict WWM ";
177fe6060f1SDimitry Andric   } else {
178fe6060f1SDimitry Andric     assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
179fe6060f1SDimitry Andric     dbgs() << "Strict WQM ";
180fe6060f1SDimitry Andric   }
181fe6060f1SDimitry Andric 
182fe6060f1SDimitry Andric   dbgs() << "region: " << MI;
183fe6060f1SDimitry Andric }
184fe6060f1SDimitry Andric 
185fe6060f1SDimitry Andric #endif
186fe6060f1SDimitry Andric 
1870b57cec5SDimitry Andric bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
1880b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
1890b57cec5SDimitry Andric 
1900b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric   TII = ST.getInstrInfo();
1930b57cec5SDimitry Andric   TRI = &TII->getRegisterInfo();
1940b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
1950b57cec5SDimitry Andric 
196*0fca6ea1SDimitry Andric   LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
1970b57cec5SDimitry Andric   Matrix = &getAnalysis<LiveRegMatrix>();
1980b57cec5SDimitry Andric   VRM = &getAnalysis<VirtRegMap>();
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric   RegClassInfo.runOnMachineFunction(MF);
2010b57cec5SDimitry Andric 
2025f757f3fSDimitry Andric   bool PreallocateSGPRSpillVGPRs =
2035f757f3fSDimitry Andric       EnablePreallocateSGPRSpillVGPRs ||
2045f757f3fSDimitry Andric       MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
2055f757f3fSDimitry Andric 
2060b57cec5SDimitry Andric   bool RegsAssigned = false;
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric   // We use a reverse post-order traversal of the control-flow graph to
2090b57cec5SDimitry Andric   // guarantee that we visit definitions in dominance order. Since WWM
2100b57cec5SDimitry Andric   // expressions are guaranteed to never involve phi nodes, and we can only
2110b57cec5SDimitry Andric   // escape WWM through the special WWM instruction, this means that this is a
2120b57cec5SDimitry Andric   // perfect elimination order, so we can never do any better.
2130b57cec5SDimitry Andric   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric   for (MachineBasicBlock *MBB : RPOT) {
2160b57cec5SDimitry Andric     bool InWWM = false;
2170b57cec5SDimitry Andric     for (MachineInstr &MI : *MBB) {
2180b57cec5SDimitry Andric       if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
2190b57cec5SDimitry Andric           MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
2200b57cec5SDimitry Andric         RegsAssigned |= processDef(MI.getOperand(0));
2210b57cec5SDimitry Andric 
2225f757f3fSDimitry Andric       if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
2235f757f3fSDimitry Andric         if (!PreallocateSGPRSpillVGPRs)
2245f757f3fSDimitry Andric           continue;
2255f757f3fSDimitry Andric         RegsAssigned |= processDef(MI.getOperand(0));
2265f757f3fSDimitry Andric       }
2275f757f3fSDimitry Andric 
228fe6060f1SDimitry Andric       if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
229*0fca6ea1SDimitry Andric           MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
230fe6060f1SDimitry Andric         LLVM_DEBUG(printWWMInfo(MI));
2310b57cec5SDimitry Andric         InWWM = true;
2320b57cec5SDimitry Andric         continue;
2330b57cec5SDimitry Andric       }
2340b57cec5SDimitry Andric 
235fe6060f1SDimitry Andric       if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
236*0fca6ea1SDimitry Andric           MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
237fe6060f1SDimitry Andric         LLVM_DEBUG(printWWMInfo(MI));
2380b57cec5SDimitry Andric         InWWM = false;
2390b57cec5SDimitry Andric       }
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric       if (!InWWM)
2420b57cec5SDimitry Andric         continue;
2430b57cec5SDimitry Andric 
244fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Processing " << MI);
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric       for (MachineOperand &DefOpnd : MI.defs()) {
2470b57cec5SDimitry Andric         RegsAssigned |= processDef(DefOpnd);
2480b57cec5SDimitry Andric       }
2490b57cec5SDimitry Andric     }
2500b57cec5SDimitry Andric   }
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric   if (!RegsAssigned)
2530b57cec5SDimitry Andric     return false;
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric   rewriteRegs(MF);
2560b57cec5SDimitry Andric   return true;
2570b57cec5SDimitry Andric }
258