xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1*bdd1243dSDimitry Andric //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
2*bdd1243dSDimitry Andric //
3*bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*bdd1243dSDimitry Andric //
7*bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8*bdd1243dSDimitry Andric // This file implements the idea to rewrite undef incoming operand for certain
9*bdd1243dSDimitry Andric // PHIs in structurized CFG. This pass only works on IR that has gone through
10*bdd1243dSDimitry Andric // StructurizedCFG pass, and this pass has some additional limitation that make
11*bdd1243dSDimitry Andric // it can only run after SIAnnotateControlFlow.
12*bdd1243dSDimitry Andric //
13*bdd1243dSDimitry Andric // To achieve optimal code generation for AMDGPU, we assume that divergence
14*bdd1243dSDimitry Andric // analysis reports the PHI in join block of divergent branch as uniform if
15*bdd1243dSDimitry Andric // it has one unique uniform value plus additional undefined/poisoned incoming
16*bdd1243dSDimitry Andric // value. That is to say the later compiler pipeline will ensure such PHI always
17*bdd1243dSDimitry Andric // return uniform value and ensure it work correctly. Let's take a look at two
18*bdd1243dSDimitry Andric // typical patterns in structured CFG that need to be taken care: (In both
19*bdd1243dSDimitry Andric // patterns, block %if terminate with divergent branch.)
20*bdd1243dSDimitry Andric //
21*bdd1243dSDimitry Andric // Pattern A: Block with undefined incoming value dominates defined predecessor
22*bdd1243dSDimitry Andric //  %if
23*bdd1243dSDimitry Andric //  | \
24*bdd1243dSDimitry Andric //  | %then
25*bdd1243dSDimitry Andric //  | /
26*bdd1243dSDimitry Andric //  %endif: %phi = phi [%undef, %if], [%uniform, %then]
27*bdd1243dSDimitry Andric //
28*bdd1243dSDimitry Andric //  Pattern B: Block with defined incoming value dominates undefined predecessor
29*bdd1243dSDimitry Andric //  %if
30*bdd1243dSDimitry Andric //  | \
31*bdd1243dSDimitry Andric //  | %then
32*bdd1243dSDimitry Andric //  | /
33*bdd1243dSDimitry Andric //  %endif: %phi = phi [%uniform, %if], [%undef, %then]
34*bdd1243dSDimitry Andric //
35*bdd1243dSDimitry Andric // For pattern A, by reporting %phi as uniform, the later pipeline need to make
36*bdd1243dSDimitry Andric // sure it be handled correctly. The backend usually allocates a scalar register
37*bdd1243dSDimitry Andric // and if any thread in a wave takes %then path, the scalar register will get
38*bdd1243dSDimitry Andric // the %uniform value.
39*bdd1243dSDimitry Andric //
40*bdd1243dSDimitry Andric // For pattern B, we will replace the undef operand with the other defined value
41*bdd1243dSDimitry Andric // in this pass. So the scalar register allocated for such PHI will get correct
42*bdd1243dSDimitry Andric // liveness. Without this transformation, the scalar register may be overwritten
43*bdd1243dSDimitry Andric // in the %then block.
44*bdd1243dSDimitry Andric //
45*bdd1243dSDimitry Andric // Limitation note:
46*bdd1243dSDimitry Andric // If the join block of divergent threads is a loop header, the pass cannot
47*bdd1243dSDimitry Andric // handle it correctly right now. For below case, the undef in %phi should also
48*bdd1243dSDimitry Andric // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
49*bdd1243dSDimitry Andric // block to get a separate join block, then we can rewrite the undef correctly.
50*bdd1243dSDimitry Andric //     %if
51*bdd1243dSDimitry Andric //     | \
52*bdd1243dSDimitry Andric //     | %then
53*bdd1243dSDimitry Andric //     | /
54*bdd1243dSDimitry Andric // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
55*bdd1243dSDimitry Andric // |   |
56*bdd1243dSDimitry Andric // \---
57*bdd1243dSDimitry Andric 
58*bdd1243dSDimitry Andric #include "AMDGPU.h"
59*bdd1243dSDimitry Andric #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
60*bdd1243dSDimitry Andric #include "llvm/IR/BasicBlock.h"
61*bdd1243dSDimitry Andric #include "llvm/IR/Constants.h"
62*bdd1243dSDimitry Andric #include "llvm/IR/Dominators.h"
63*bdd1243dSDimitry Andric #include "llvm/IR/Instructions.h"
64*bdd1243dSDimitry Andric #include "llvm/InitializePasses.h"
65*bdd1243dSDimitry Andric 
66*bdd1243dSDimitry Andric using namespace llvm;
67*bdd1243dSDimitry Andric 
68*bdd1243dSDimitry Andric #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
69*bdd1243dSDimitry Andric 
70*bdd1243dSDimitry Andric namespace {
71*bdd1243dSDimitry Andric 
72*bdd1243dSDimitry Andric class AMDGPURewriteUndefForPHI : public FunctionPass {
73*bdd1243dSDimitry Andric public:
74*bdd1243dSDimitry Andric   static char ID;
75*bdd1243dSDimitry Andric   AMDGPURewriteUndefForPHI() : FunctionPass(ID) {
76*bdd1243dSDimitry Andric     initializeAMDGPURewriteUndefForPHIPass(*PassRegistry::getPassRegistry());
77*bdd1243dSDimitry Andric   }
78*bdd1243dSDimitry Andric   bool runOnFunction(Function &F) override;
79*bdd1243dSDimitry Andric   StringRef getPassName() const override {
80*bdd1243dSDimitry Andric     return "AMDGPU Rewrite Undef for PHI";
81*bdd1243dSDimitry Andric   }
82*bdd1243dSDimitry Andric 
83*bdd1243dSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
84*bdd1243dSDimitry Andric     AU.addRequired<LegacyDivergenceAnalysis>();
85*bdd1243dSDimitry Andric     AU.addRequired<DominatorTreeWrapperPass>();
86*bdd1243dSDimitry Andric 
87*bdd1243dSDimitry Andric     AU.addPreserved<DominatorTreeWrapperPass>();
88*bdd1243dSDimitry Andric     AU.addPreserved<LegacyDivergenceAnalysis>();
89*bdd1243dSDimitry Andric     AU.setPreservesCFG();
90*bdd1243dSDimitry Andric   }
91*bdd1243dSDimitry Andric };
92*bdd1243dSDimitry Andric 
93*bdd1243dSDimitry Andric } // end anonymous namespace
94*bdd1243dSDimitry Andric char AMDGPURewriteUndefForPHI::ID = 0;
95*bdd1243dSDimitry Andric 
96*bdd1243dSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
97*bdd1243dSDimitry Andric                       "Rewrite undef for PHI", false, false)
98*bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
99*bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
100*bdd1243dSDimitry Andric INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
101*bdd1243dSDimitry Andric                     "Rewrite undef for PHI", false, false)
102*bdd1243dSDimitry Andric 
103*bdd1243dSDimitry Andric bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT) {
104*bdd1243dSDimitry Andric   bool Changed = false;
105*bdd1243dSDimitry Andric   SmallVector<PHINode *> ToBeDeleted;
106*bdd1243dSDimitry Andric   for (auto &BB : F) {
107*bdd1243dSDimitry Andric     for (auto &PHI : BB.phis()) {
108*bdd1243dSDimitry Andric       if (DA->isDivergent(&PHI))
109*bdd1243dSDimitry Andric         continue;
110*bdd1243dSDimitry Andric 
111*bdd1243dSDimitry Andric       // The unique incoming value except undef/poison for the PHI node.
112*bdd1243dSDimitry Andric       Value *UniqueDefinedIncoming = nullptr;
113*bdd1243dSDimitry Andric       // The divergent block with defined incoming value that dominates all
114*bdd1243dSDimitry Andric       // other block with the same incoming value.
115*bdd1243dSDimitry Andric       BasicBlock *DominateBB = nullptr;
116*bdd1243dSDimitry Andric       // Predecessors with undefined incoming value (excluding loop backedge).
117*bdd1243dSDimitry Andric       SmallVector<BasicBlock *> Undefs;
118*bdd1243dSDimitry Andric 
119*bdd1243dSDimitry Andric       for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
120*bdd1243dSDimitry Andric         Value *Incoming = PHI.getIncomingValue(i);
121*bdd1243dSDimitry Andric         BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
122*bdd1243dSDimitry Andric 
123*bdd1243dSDimitry Andric         if (Incoming == &PHI)
124*bdd1243dSDimitry Andric           continue;
125*bdd1243dSDimitry Andric 
126*bdd1243dSDimitry Andric         if (isa<UndefValue>(Incoming)) {
127*bdd1243dSDimitry Andric           // Undef from loop backedge will not be replaced.
128*bdd1243dSDimitry Andric           if (!DT->dominates(&BB, IncomingBB))
129*bdd1243dSDimitry Andric             Undefs.push_back(IncomingBB);
130*bdd1243dSDimitry Andric           continue;
131*bdd1243dSDimitry Andric         }
132*bdd1243dSDimitry Andric 
133*bdd1243dSDimitry Andric         if (!UniqueDefinedIncoming) {
134*bdd1243dSDimitry Andric           UniqueDefinedIncoming = Incoming;
135*bdd1243dSDimitry Andric           DominateBB = IncomingBB;
136*bdd1243dSDimitry Andric         } else if (Incoming == UniqueDefinedIncoming) {
137*bdd1243dSDimitry Andric           // Update DominateBB if necessary.
138*bdd1243dSDimitry Andric           if (DT->dominates(IncomingBB, DominateBB))
139*bdd1243dSDimitry Andric             DominateBB = IncomingBB;
140*bdd1243dSDimitry Andric         } else {
141*bdd1243dSDimitry Andric           UniqueDefinedIncoming = nullptr;
142*bdd1243dSDimitry Andric           break;
143*bdd1243dSDimitry Andric         }
144*bdd1243dSDimitry Andric       }
145*bdd1243dSDimitry Andric       // We only need to replace the undef for the PHI which is merging
146*bdd1243dSDimitry Andric       // defined/undefined values from divergent threads.
147*bdd1243dSDimitry Andric       // TODO: We should still be able to replace undef value if the unique
148*bdd1243dSDimitry Andric       // value is a Constant.
149*bdd1243dSDimitry Andric       if (!UniqueDefinedIncoming || Undefs.empty() ||
150*bdd1243dSDimitry Andric           !DA->isDivergent(DominateBB->getTerminator()))
151*bdd1243dSDimitry Andric         continue;
152*bdd1243dSDimitry Andric 
153*bdd1243dSDimitry Andric       // We only replace the undef when DominateBB truly dominates all the
154*bdd1243dSDimitry Andric       // other predecessors with undefined incoming value. Make sure DominateBB
155*bdd1243dSDimitry Andric       // dominates BB so that UniqueDefinedIncoming is available in BB and
156*bdd1243dSDimitry Andric       // afterwards.
157*bdd1243dSDimitry Andric       if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
158*bdd1243dSDimitry Andric             return DT->dominates(DominateBB, UD);
159*bdd1243dSDimitry Andric           })) {
160*bdd1243dSDimitry Andric         PHI.replaceAllUsesWith(UniqueDefinedIncoming);
161*bdd1243dSDimitry Andric         ToBeDeleted.push_back(&PHI);
162*bdd1243dSDimitry Andric         Changed = true;
163*bdd1243dSDimitry Andric       }
164*bdd1243dSDimitry Andric     }
165*bdd1243dSDimitry Andric   }
166*bdd1243dSDimitry Andric 
167*bdd1243dSDimitry Andric   for (auto *PHI : ToBeDeleted)
168*bdd1243dSDimitry Andric     PHI->eraseFromParent();
169*bdd1243dSDimitry Andric 
170*bdd1243dSDimitry Andric   return Changed;
171*bdd1243dSDimitry Andric }
172*bdd1243dSDimitry Andric 
173*bdd1243dSDimitry Andric bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) {
174*bdd1243dSDimitry Andric   LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
175*bdd1243dSDimitry Andric   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
176*bdd1243dSDimitry Andric   return rewritePHIs(F, DA, DT);
177*bdd1243dSDimitry Andric }
178*bdd1243dSDimitry Andric 
179*bdd1243dSDimitry Andric FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() {
180*bdd1243dSDimitry Andric   return new AMDGPURewriteUndefForPHI();
181*bdd1243dSDimitry Andric }
182