1 //===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Lowering the WWM_COPY instructions for various register classes. 11 /// AMDGPU target generates WWM_COPY instruction to differentiate WWM 12 /// copy from COPY. This pass generates the necessary exec mask manipulation 13 /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to 14 /// COPY. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "SILowerWWMCopies.h" 19 #include "AMDGPU.h" 20 #include "GCNSubtarget.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/VirtRegMap.h" 26 #include "llvm/InitializePasses.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "si-lower-wwm-copies" 31 32 namespace { 33 34 class SILowerWWMCopies { 35 public: 36 SILowerWWMCopies(LiveIntervals *LIS, SlotIndexes *SI, VirtRegMap *VRM) 37 : LIS(LIS), Indexes(SI), VRM(VRM) {} 38 bool run(MachineFunction &MF); 39 40 private: 41 bool isSCCLiveAtMI(const MachineInstr &MI); 42 void addToWWMSpills(MachineFunction &MF, Register Reg); 43 44 LiveIntervals *LIS; 45 SlotIndexes *Indexes; 46 VirtRegMap *VRM; 47 const SIRegisterInfo *TRI; 48 const MachineRegisterInfo *MRI; 49 SIMachineFunctionInfo *MFI; 50 }; 51 52 class SILowerWWMCopiesLegacy : public MachineFunctionPass { 53 public: 54 static char ID; 55 56 SILowerWWMCopiesLegacy() : MachineFunctionPass(ID) { 57 initializeSILowerWWMCopiesLegacyPass(*PassRegistry::getPassRegistry()); 58 } 59 60 bool runOnMachineFunction(MachineFunction &MF) override; 61 62 StringRef getPassName() const override { return "SI Lower WWM Copies"; } 63 64 void getAnalysisUsage(AnalysisUsage &AU) const override { 65 AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); 66 AU.addUsedIfAvailable<SlotIndexesWrapperPass>(); 67 AU.addUsedIfAvailable<VirtRegMapWrapperLegacy>(); 68 AU.setPreservesAll(); 69 MachineFunctionPass::getAnalysisUsage(AU); 70 } 71 }; 72 73 } // End anonymous namespace. 74 75 INITIALIZE_PASS_BEGIN(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies", 76 false, false) 77 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 78 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) 79 INITIALIZE_PASS_END(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies", 80 false, false) 81 82 char SILowerWWMCopiesLegacy::ID = 0; 83 84 char &llvm::SILowerWWMCopiesLegacyID = SILowerWWMCopiesLegacy::ID; 85 86 bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) { 87 // We can't determine the liveness info if LIS isn't available. Early return 88 // in that case and always assume SCC is live. 89 if (!LIS) 90 return true; 91 92 LiveRange &LR = 93 LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI)); 94 SlotIndex Idx = LIS->getInstructionIndex(MI); 95 return LR.liveAt(Idx); 96 } 97 98 // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills 99 // for preserving its entire lanes at function prolog/epilog. 100 void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) { 101 if (Reg.isPhysical()) 102 return; 103 104 // FIXME: VRM may be null here. 105 MCRegister PhysReg = VRM->getPhys(Reg); 106 assert(PhysReg && "should have allocated a physical register"); 107 108 MFI->allocateWWMSpill(MF, PhysReg); 109 } 110 111 bool SILowerWWMCopiesLegacy::runOnMachineFunction(MachineFunction &MF) { 112 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 113 auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 114 115 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); 116 auto *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; 117 118 auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>(); 119 auto *VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr; 120 121 SILowerWWMCopies Impl(LIS, Indexes, VRM); 122 return Impl.run(MF); 123 } 124 125 PreservedAnalyses 126 SILowerWWMCopiesPass::run(MachineFunction &MF, 127 MachineFunctionAnalysisManager &MFAM) { 128 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF); 129 auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF); 130 auto *VRM = MFAM.getCachedResult<VirtRegMapAnalysis>(MF); 131 132 SILowerWWMCopies Impl(LIS, Indexes, VRM); 133 Impl.run(MF); 134 return PreservedAnalyses::all(); 135 } 136 137 bool SILowerWWMCopies::run(MachineFunction &MF) { 138 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 139 const SIInstrInfo *TII = ST.getInstrInfo(); 140 141 MFI = MF.getInfo<SIMachineFunctionInfo>(); 142 TRI = ST.getRegisterInfo(); 143 MRI = &MF.getRegInfo(); 144 145 if (!MFI->hasVRegFlags()) 146 return false; 147 148 bool Changed = false; 149 for (MachineBasicBlock &MBB : MF) { 150 for (MachineInstr &MI : MBB) { 151 if (MI.getOpcode() != AMDGPU::WWM_COPY) 152 continue; 153 154 // TODO: Club adjacent WWM ops between same exec save/restore 155 assert(TII->isVGPRCopy(MI)); 156 157 // For WWM vector copies, manipulate the exec mask around the copy 158 // instruction. 159 const DebugLoc &DL = MI.getDebugLoc(); 160 MachineBasicBlock::iterator InsertPt = MI.getIterator(); 161 Register RegForExecCopy = MFI->getSGPRForEXECCopy(); 162 TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy, 163 isSCCLiveAtMI(MI), Indexes); 164 TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes); 165 addToWWMSpills(MF, MI.getOperand(0).getReg()); 166 LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI); 167 168 // Lower WWM_COPY back to COPY 169 MI.setDesc(TII->get(AMDGPU::COPY)); 170 Changed |= true; 171 } 172 } 173 174 return Changed; 175 } 176