xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15f757f3fSDimitry Andric //===-- AMDGPUGlobalISelDivergenceLowering.cpp ----------------------------===//
25f757f3fSDimitry Andric //
35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65f757f3fSDimitry Andric //
75f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
85f757f3fSDimitry Andric //
95f757f3fSDimitry Andric /// \file
105f757f3fSDimitry Andric /// GlobalISel pass that selects divergent i1 phis as lane mask phis.
115f757f3fSDimitry Andric /// Lane mask merging uses same algorithm as SDAG in SILowerI1Copies.
125f757f3fSDimitry Andric /// Handles all cases of temporal divergence.
135f757f3fSDimitry Andric /// For divergent non-phi i1 and uniform i1 uses outside of the cycle this pass
145f757f3fSDimitry Andric /// currently depends on LCSSA to insert phis with one incoming.
155f757f3fSDimitry Andric //
165f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
175f757f3fSDimitry Andric 
185f757f3fSDimitry Andric #include "AMDGPU.h"
19*0fca6ea1SDimitry Andric #include "SILowerI1Copies.h"
20*0fca6ea1SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
215f757f3fSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
22*0fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineUniformityAnalysis.h"
23*0fca6ea1SDimitry Andric #include "llvm/InitializePasses.h"
245f757f3fSDimitry Andric 
255f757f3fSDimitry Andric #define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
265f757f3fSDimitry Andric 
275f757f3fSDimitry Andric using namespace llvm;
285f757f3fSDimitry Andric 
295f757f3fSDimitry Andric namespace {
305f757f3fSDimitry Andric 
315f757f3fSDimitry Andric class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
325f757f3fSDimitry Andric public:
335f757f3fSDimitry Andric   static char ID;
345f757f3fSDimitry Andric 
355f757f3fSDimitry Andric public:
365f757f3fSDimitry Andric   AMDGPUGlobalISelDivergenceLowering() : MachineFunctionPass(ID) {
375f757f3fSDimitry Andric     initializeAMDGPUGlobalISelDivergenceLoweringPass(
385f757f3fSDimitry Andric         *PassRegistry::getPassRegistry());
395f757f3fSDimitry Andric   }
405f757f3fSDimitry Andric 
415f757f3fSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
425f757f3fSDimitry Andric 
435f757f3fSDimitry Andric   StringRef getPassName() const override {
445f757f3fSDimitry Andric     return "AMDGPU GlobalISel divergence lowering";
455f757f3fSDimitry Andric   }
465f757f3fSDimitry Andric 
475f757f3fSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
485f757f3fSDimitry Andric     AU.setPreservesCFG();
49*0fca6ea1SDimitry Andric     AU.addRequired<MachineDominatorTreeWrapperPass>();
50*0fca6ea1SDimitry Andric     AU.addRequired<MachinePostDominatorTreeWrapperPass>();
51*0fca6ea1SDimitry Andric     AU.addRequired<MachineUniformityAnalysisPass>();
525f757f3fSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
535f757f3fSDimitry Andric   }
545f757f3fSDimitry Andric };
555f757f3fSDimitry Andric 
56*0fca6ea1SDimitry Andric class DivergenceLoweringHelper : public PhiLoweringHelper {
57*0fca6ea1SDimitry Andric public:
58*0fca6ea1SDimitry Andric   DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
59*0fca6ea1SDimitry Andric                            MachinePostDominatorTree *PDT,
60*0fca6ea1SDimitry Andric                            MachineUniformityInfo *MUI);
61*0fca6ea1SDimitry Andric 
62*0fca6ea1SDimitry Andric private:
63*0fca6ea1SDimitry Andric   MachineUniformityInfo *MUI = nullptr;
64*0fca6ea1SDimitry Andric   MachineIRBuilder B;
65*0fca6ea1SDimitry Andric   Register buildRegCopyToLaneMask(Register Reg);
66*0fca6ea1SDimitry Andric 
67*0fca6ea1SDimitry Andric public:
68*0fca6ea1SDimitry Andric   void markAsLaneMask(Register DstReg) const override;
69*0fca6ea1SDimitry Andric   void getCandidatesForLowering(
70*0fca6ea1SDimitry Andric       SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
71*0fca6ea1SDimitry Andric   void collectIncomingValuesFromPhi(
72*0fca6ea1SDimitry Andric       const MachineInstr *MI,
73*0fca6ea1SDimitry Andric       SmallVectorImpl<Incoming> &Incomings) const override;
74*0fca6ea1SDimitry Andric   void replaceDstReg(Register NewReg, Register OldReg,
75*0fca6ea1SDimitry Andric                      MachineBasicBlock *MBB) override;
76*0fca6ea1SDimitry Andric   void buildMergeLaneMasks(MachineBasicBlock &MBB,
77*0fca6ea1SDimitry Andric                            MachineBasicBlock::iterator I, const DebugLoc &DL,
78*0fca6ea1SDimitry Andric                            Register DstReg, Register PrevReg,
79*0fca6ea1SDimitry Andric                            Register CurReg) override;
80*0fca6ea1SDimitry Andric   void constrainAsLaneMask(Incoming &In) override;
81*0fca6ea1SDimitry Andric };
82*0fca6ea1SDimitry Andric 
83*0fca6ea1SDimitry Andric DivergenceLoweringHelper::DivergenceLoweringHelper(
84*0fca6ea1SDimitry Andric     MachineFunction *MF, MachineDominatorTree *DT,
85*0fca6ea1SDimitry Andric     MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
86*0fca6ea1SDimitry Andric     : PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
87*0fca6ea1SDimitry Andric 
88*0fca6ea1SDimitry Andric // _(s1) -> SReg_32/64(s1)
89*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
90*0fca6ea1SDimitry Andric   assert(MRI->getType(DstReg) == LLT::scalar(1));
91*0fca6ea1SDimitry Andric 
92*0fca6ea1SDimitry Andric   if (MRI->getRegClassOrNull(DstReg)) {
93*0fca6ea1SDimitry Andric     if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
94*0fca6ea1SDimitry Andric       return;
95*0fca6ea1SDimitry Andric     llvm_unreachable("Failed to constrain register class");
96*0fca6ea1SDimitry Andric   }
97*0fca6ea1SDimitry Andric 
98*0fca6ea1SDimitry Andric   MRI->setRegClass(DstReg, ST->getBoolRC());
99*0fca6ea1SDimitry Andric }
100*0fca6ea1SDimitry Andric 
101*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::getCandidatesForLowering(
102*0fca6ea1SDimitry Andric     SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
103*0fca6ea1SDimitry Andric   LLT S1 = LLT::scalar(1);
104*0fca6ea1SDimitry Andric 
105*0fca6ea1SDimitry Andric   // Add divergent i1 phis to the list
106*0fca6ea1SDimitry Andric   for (MachineBasicBlock &MBB : *MF) {
107*0fca6ea1SDimitry Andric     for (MachineInstr &MI : MBB.phis()) {
108*0fca6ea1SDimitry Andric       Register Dst = MI.getOperand(0).getReg();
109*0fca6ea1SDimitry Andric       if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
110*0fca6ea1SDimitry Andric         Vreg1Phis.push_back(&MI);
111*0fca6ea1SDimitry Andric     }
112*0fca6ea1SDimitry Andric   }
113*0fca6ea1SDimitry Andric }
114*0fca6ea1SDimitry Andric 
115*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
116*0fca6ea1SDimitry Andric     const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
117*0fca6ea1SDimitry Andric   for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
118*0fca6ea1SDimitry Andric     Incomings.emplace_back(MI->getOperand(i).getReg(),
119*0fca6ea1SDimitry Andric                            MI->getOperand(i + 1).getMBB(), Register());
120*0fca6ea1SDimitry Andric   }
121*0fca6ea1SDimitry Andric }
122*0fca6ea1SDimitry Andric 
123*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
124*0fca6ea1SDimitry Andric                                              MachineBasicBlock *MBB) {
125*0fca6ea1SDimitry Andric   BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
126*0fca6ea1SDimitry Andric       .addReg(NewReg);
127*0fca6ea1SDimitry Andric }
128*0fca6ea1SDimitry Andric 
129*0fca6ea1SDimitry Andric // Copy Reg to new lane mask register, insert a copy after instruction that
130*0fca6ea1SDimitry Andric // defines Reg while skipping phis if needed.
131*0fca6ea1SDimitry Andric Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
132*0fca6ea1SDimitry Andric   Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
133*0fca6ea1SDimitry Andric   MachineInstr *Instr = MRI->getVRegDef(Reg);
134*0fca6ea1SDimitry Andric   MachineBasicBlock *MBB = Instr->getParent();
135*0fca6ea1SDimitry Andric   B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
136*0fca6ea1SDimitry Andric   B.buildCopy(LaneMask, Reg);
137*0fca6ea1SDimitry Andric   return LaneMask;
138*0fca6ea1SDimitry Andric }
139*0fca6ea1SDimitry Andric 
140*0fca6ea1SDimitry Andric // bb.previous
141*0fca6ea1SDimitry Andric //   %PrevReg = ...
142*0fca6ea1SDimitry Andric //
143*0fca6ea1SDimitry Andric // bb.current
144*0fca6ea1SDimitry Andric //   %CurReg = ...
145*0fca6ea1SDimitry Andric //
146*0fca6ea1SDimitry Andric //   %DstReg - not defined
147*0fca6ea1SDimitry Andric //
148*0fca6ea1SDimitry Andric // -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
149*0fca6ea1SDimitry Andric //
150*0fca6ea1SDimitry Andric // bb.previous
151*0fca6ea1SDimitry Andric //   %PrevReg = ...
152*0fca6ea1SDimitry Andric //   %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
153*0fca6ea1SDimitry Andric //
154*0fca6ea1SDimitry Andric // bb.current
155*0fca6ea1SDimitry Andric //   %CurReg = ...
156*0fca6ea1SDimitry Andric //   %CurRegCopy:sreg_32(s1) = COPY %CurReg
157*0fca6ea1SDimitry Andric //   ...
158*0fca6ea1SDimitry Andric //   %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
159*0fca6ea1SDimitry Andric //   %CurMaskedReg:sreg_32(s1)  = AND %ExecReg, CurRegCopy - inactive lanes to 0
160*0fca6ea1SDimitry Andric //   %DstReg:sreg_32(s1)        = OR %PrevMaskedReg, CurMaskedReg
161*0fca6ea1SDimitry Andric //
162*0fca6ea1SDimitry Andric // DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
163*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::buildMergeLaneMasks(
164*0fca6ea1SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
165*0fca6ea1SDimitry Andric     Register DstReg, Register PrevReg, Register CurReg) {
166*0fca6ea1SDimitry Andric   // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
167*0fca6ea1SDimitry Andric   // TODO: check if inputs are constants or results of a compare.
168*0fca6ea1SDimitry Andric 
169*0fca6ea1SDimitry Andric   Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
170*0fca6ea1SDimitry Andric   Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
171*0fca6ea1SDimitry Andric   Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
172*0fca6ea1SDimitry Andric   Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
173*0fca6ea1SDimitry Andric 
174*0fca6ea1SDimitry Andric   B.setInsertPt(MBB, I);
175*0fca6ea1SDimitry Andric   B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
176*0fca6ea1SDimitry Andric   B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
177*0fca6ea1SDimitry Andric   B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
178*0fca6ea1SDimitry Andric }
179*0fca6ea1SDimitry Andric 
180*0fca6ea1SDimitry Andric // GlobalISel has to constrain S1 incoming taken as-is with lane mask register
181*0fca6ea1SDimitry Andric // class. Insert a copy of Incoming.Reg to new lane mask inside Incoming.Block,
182*0fca6ea1SDimitry Andric // Incoming.Reg becomes that new lane mask.
183*0fca6ea1SDimitry Andric void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) {
184*0fca6ea1SDimitry Andric   B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
185*0fca6ea1SDimitry Andric 
186*0fca6ea1SDimitry Andric   auto Copy = B.buildCopy(LLT::scalar(1), In.Reg);
187*0fca6ea1SDimitry Andric   MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
188*0fca6ea1SDimitry Andric   In.Reg = Copy.getReg(0);
189*0fca6ea1SDimitry Andric }
190*0fca6ea1SDimitry Andric 
1915f757f3fSDimitry Andric } // End anonymous namespace.
1925f757f3fSDimitry Andric 
1935f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
1945f757f3fSDimitry Andric                       "AMDGPU GlobalISel divergence lowering", false, false)
195*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
196*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
197*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
1985f757f3fSDimitry Andric INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
1995f757f3fSDimitry Andric                     "AMDGPU GlobalISel divergence lowering", false, false)
2005f757f3fSDimitry Andric 
2015f757f3fSDimitry Andric char AMDGPUGlobalISelDivergenceLowering::ID = 0;
2025f757f3fSDimitry Andric 
2035f757f3fSDimitry Andric char &llvm::AMDGPUGlobalISelDivergenceLoweringID =
2045f757f3fSDimitry Andric     AMDGPUGlobalISelDivergenceLowering::ID;
2055f757f3fSDimitry Andric 
2065f757f3fSDimitry Andric FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
2075f757f3fSDimitry Andric   return new AMDGPUGlobalISelDivergenceLowering();
2085f757f3fSDimitry Andric }
2095f757f3fSDimitry Andric 
2105f757f3fSDimitry Andric bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
2115f757f3fSDimitry Andric     MachineFunction &MF) {
212*0fca6ea1SDimitry Andric   MachineDominatorTree &DT =
213*0fca6ea1SDimitry Andric       getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
214*0fca6ea1SDimitry Andric   MachinePostDominatorTree &PDT =
215*0fca6ea1SDimitry Andric       getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
216*0fca6ea1SDimitry Andric   MachineUniformityInfo &MUI =
217*0fca6ea1SDimitry Andric       getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
218*0fca6ea1SDimitry Andric 
219*0fca6ea1SDimitry Andric   DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
220*0fca6ea1SDimitry Andric 
221*0fca6ea1SDimitry Andric   return Helper.lowerPhis();
2225f757f3fSDimitry Andric }
223