xref: /llvm-project/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp (revision a343b8e595d56bde91800aeaa7826cbed4e0a18d)
1 //===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Lowering the WWM_COPY instructions for various register classes.
11 /// AMDGPU target generates WWM_COPY instruction to differentiate WWM
12 /// copy from COPY. This pass generates the necessary exec mask manipulation
13 /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to
14 /// COPY.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "SILowerWWMCopies.h"
19 #include "AMDGPU.h"
20 #include "GCNSubtarget.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "SIMachineFunctionInfo.h"
23 #include "llvm/CodeGen/LiveIntervals.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/VirtRegMap.h"
26 #include "llvm/InitializePasses.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "si-lower-wwm-copies"
31 
32 namespace {
33 
34 class SILowerWWMCopies {
35 public:
36   SILowerWWMCopies(LiveIntervals *LIS, SlotIndexes *SI, VirtRegMap *VRM)
37       : LIS(LIS), Indexes(SI), VRM(VRM) {}
38   bool run(MachineFunction &MF);
39 
40 private:
41   bool isSCCLiveAtMI(const MachineInstr &MI);
42   void addToWWMSpills(MachineFunction &MF, Register Reg);
43 
44   LiveIntervals *LIS;
45   SlotIndexes *Indexes;
46   VirtRegMap *VRM;
47   const SIRegisterInfo *TRI;
48   const MachineRegisterInfo *MRI;
49   SIMachineFunctionInfo *MFI;
50 };
51 
52 class SILowerWWMCopiesLegacy : public MachineFunctionPass {
53 public:
54   static char ID;
55 
56   SILowerWWMCopiesLegacy() : MachineFunctionPass(ID) {
57     initializeSILowerWWMCopiesLegacyPass(*PassRegistry::getPassRegistry());
58   }
59 
60   bool runOnMachineFunction(MachineFunction &MF) override;
61 
62   StringRef getPassName() const override { return "SI Lower WWM Copies"; }
63 
64   void getAnalysisUsage(AnalysisUsage &AU) const override {
65     AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
66     AU.addUsedIfAvailable<SlotIndexesWrapperPass>();
67     AU.addUsedIfAvailable<VirtRegMapWrapperLegacy>();
68     AU.setPreservesAll();
69     MachineFunctionPass::getAnalysisUsage(AU);
70   }
71 };
72 
73 } // End anonymous namespace.
74 
75 INITIALIZE_PASS_BEGIN(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
76                       false, false)
77 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
78 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
79 INITIALIZE_PASS_END(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
80                     false, false)
81 
82 char SILowerWWMCopiesLegacy::ID = 0;
83 
84 char &llvm::SILowerWWMCopiesLegacyID = SILowerWWMCopiesLegacy::ID;
85 
86 bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
87   // We can't determine the liveness info if LIS isn't available. Early return
88   // in that case and always assume SCC is live.
89   if (!LIS)
90     return true;
91 
92   LiveRange &LR =
93       LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
94   SlotIndex Idx = LIS->getInstructionIndex(MI);
95   return LR.liveAt(Idx);
96 }
97 
98 // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills
99 // for preserving its entire lanes at function prolog/epilog.
100 void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
101   if (Reg.isPhysical())
102     return;
103 
104   // FIXME: VRM may be null here.
105   MCRegister PhysReg = VRM->getPhys(Reg);
106   assert(PhysReg && "should have allocated a physical register");
107 
108   MFI->allocateWWMSpill(MF, PhysReg);
109 }
110 
111 bool SILowerWWMCopiesLegacy::runOnMachineFunction(MachineFunction &MF) {
112   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
113   auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
114 
115   auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
116   auto *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
117 
118   auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>();
119   auto *VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr;
120 
121   SILowerWWMCopies Impl(LIS, Indexes, VRM);
122   return Impl.run(MF);
123 }
124 
125 PreservedAnalyses
126 SILowerWWMCopiesPass::run(MachineFunction &MF,
127                           MachineFunctionAnalysisManager &MFAM) {
128   auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
129   auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
130   auto *VRM = MFAM.getCachedResult<VirtRegMapAnalysis>(MF);
131 
132   SILowerWWMCopies Impl(LIS, Indexes, VRM);
133   Impl.run(MF);
134   return PreservedAnalyses::all();
135 }
136 
137 bool SILowerWWMCopies::run(MachineFunction &MF) {
138   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
139   const SIInstrInfo *TII = ST.getInstrInfo();
140 
141   MFI = MF.getInfo<SIMachineFunctionInfo>();
142   TRI = ST.getRegisterInfo();
143   MRI = &MF.getRegInfo();
144 
145   if (!MFI->hasVRegFlags())
146     return false;
147 
148   bool Changed = false;
149   for (MachineBasicBlock &MBB : MF) {
150     for (MachineInstr &MI : MBB) {
151       if (MI.getOpcode() != AMDGPU::WWM_COPY)
152         continue;
153 
154       // TODO: Club adjacent WWM ops between same exec save/restore
155       assert(TII->isVGPRCopy(MI));
156 
157       // For WWM vector copies, manipulate the exec mask around the copy
158       // instruction.
159       const DebugLoc &DL = MI.getDebugLoc();
160       MachineBasicBlock::iterator InsertPt = MI.getIterator();
161       Register RegForExecCopy = MFI->getSGPRForEXECCopy();
162       TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy,
163                                  isSCCLiveAtMI(MI), Indexes);
164       TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes);
165       addToWWMSpills(MF, MI.getOperand(0).getReg());
166       LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI);
167 
168       // Lower WWM_COPY back to COPY
169       MI.setDesc(TII->get(AMDGPU::COPY));
170       Changed |= true;
171     }
172   }
173 
174   return Changed;
175 }
176