1bdd1243dSDimitry Andric //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
2bdd1243dSDimitry Andric //
3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bdd1243dSDimitry Andric //
7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8bdd1243dSDimitry Andric // This file implements the idea to rewrite undef incoming operand for certain
9bdd1243dSDimitry Andric // PHIs in structurized CFG. This pass only works on IR that has gone through
10bdd1243dSDimitry Andric // StructurizedCFG pass, and this pass has some additional limitation that make
11bdd1243dSDimitry Andric // it can only run after SIAnnotateControlFlow.
12bdd1243dSDimitry Andric //
1306c3fb27SDimitry Andric // To achieve optimal code generation for AMDGPU, we assume that uniformity
14bdd1243dSDimitry Andric // analysis reports the PHI in join block of divergent branch as uniform if
15bdd1243dSDimitry Andric // it has one unique uniform value plus additional undefined/poisoned incoming
16bdd1243dSDimitry Andric // value. That is to say the later compiler pipeline will ensure such PHI always
17bdd1243dSDimitry Andric // return uniform value and ensure it work correctly. Let's take a look at two
18bdd1243dSDimitry Andric // typical patterns in structured CFG that need to be taken care: (In both
19bdd1243dSDimitry Andric // patterns, block %if terminate with divergent branch.)
20bdd1243dSDimitry Andric //
21bdd1243dSDimitry Andric // Pattern A: Block with undefined incoming value dominates defined predecessor
22bdd1243dSDimitry Andric // %if
23bdd1243dSDimitry Andric // | \
24bdd1243dSDimitry Andric // | %then
25bdd1243dSDimitry Andric // | /
26bdd1243dSDimitry Andric // %endif: %phi = phi [%undef, %if], [%uniform, %then]
27bdd1243dSDimitry Andric //
28bdd1243dSDimitry Andric // Pattern B: Block with defined incoming value dominates undefined predecessor
29bdd1243dSDimitry Andric // %if
30bdd1243dSDimitry Andric // | \
31bdd1243dSDimitry Andric // | %then
32bdd1243dSDimitry Andric // | /
33bdd1243dSDimitry Andric // %endif: %phi = phi [%uniform, %if], [%undef, %then]
34bdd1243dSDimitry Andric //
35bdd1243dSDimitry Andric // For pattern A, by reporting %phi as uniform, the later pipeline need to make
36bdd1243dSDimitry Andric // sure it be handled correctly. The backend usually allocates a scalar register
37bdd1243dSDimitry Andric // and if any thread in a wave takes %then path, the scalar register will get
38bdd1243dSDimitry Andric // the %uniform value.
39bdd1243dSDimitry Andric //
40bdd1243dSDimitry Andric // For pattern B, we will replace the undef operand with the other defined value
41bdd1243dSDimitry Andric // in this pass. So the scalar register allocated for such PHI will get correct
42bdd1243dSDimitry Andric // liveness. Without this transformation, the scalar register may be overwritten
43bdd1243dSDimitry Andric // in the %then block.
44bdd1243dSDimitry Andric //
45bdd1243dSDimitry Andric // Limitation note:
46bdd1243dSDimitry Andric // If the join block of divergent threads is a loop header, the pass cannot
47bdd1243dSDimitry Andric // handle it correctly right now. For below case, the undef in %phi should also
48bdd1243dSDimitry Andric // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
49bdd1243dSDimitry Andric // block to get a separate join block, then we can rewrite the undef correctly.
50bdd1243dSDimitry Andric // %if
51bdd1243dSDimitry Andric // | \
52bdd1243dSDimitry Andric // | %then
53bdd1243dSDimitry Andric // | /
54bdd1243dSDimitry Andric // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
55bdd1243dSDimitry Andric // | |
56bdd1243dSDimitry Andric // \---
57bdd1243dSDimitry Andric
58bdd1243dSDimitry Andric #include "AMDGPU.h"
5906c3fb27SDimitry Andric #include "llvm/Analysis/UniformityAnalysis.h"
60bdd1243dSDimitry Andric #include "llvm/IR/BasicBlock.h"
61bdd1243dSDimitry Andric #include "llvm/IR/Constants.h"
62bdd1243dSDimitry Andric #include "llvm/IR/Dominators.h"
63bdd1243dSDimitry Andric #include "llvm/IR/Instructions.h"
64bdd1243dSDimitry Andric #include "llvm/InitializePasses.h"
65bdd1243dSDimitry Andric
66bdd1243dSDimitry Andric using namespace llvm;
67bdd1243dSDimitry Andric
68bdd1243dSDimitry Andric #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
69bdd1243dSDimitry Andric
70bdd1243dSDimitry Andric namespace {
71bdd1243dSDimitry Andric
72*5f757f3fSDimitry Andric class AMDGPURewriteUndefForPHILegacy : public FunctionPass {
73bdd1243dSDimitry Andric public:
74bdd1243dSDimitry Andric static char ID;
AMDGPURewriteUndefForPHILegacy()75*5f757f3fSDimitry Andric AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) {
76*5f757f3fSDimitry Andric initializeAMDGPURewriteUndefForPHILegacyPass(*PassRegistry::getPassRegistry());
77bdd1243dSDimitry Andric }
78bdd1243dSDimitry Andric bool runOnFunction(Function &F) override;
getPassName() const79bdd1243dSDimitry Andric StringRef getPassName() const override {
80bdd1243dSDimitry Andric return "AMDGPU Rewrite Undef for PHI";
81bdd1243dSDimitry Andric }
82bdd1243dSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const83bdd1243dSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
8406c3fb27SDimitry Andric AU.addRequired<UniformityInfoWrapperPass>();
85bdd1243dSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
86bdd1243dSDimitry Andric
87bdd1243dSDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
88bdd1243dSDimitry Andric AU.setPreservesCFG();
89bdd1243dSDimitry Andric }
90bdd1243dSDimitry Andric };
91bdd1243dSDimitry Andric
92bdd1243dSDimitry Andric } // end anonymous namespace
93*5f757f3fSDimitry Andric char AMDGPURewriteUndefForPHILegacy::ID = 0;
94bdd1243dSDimitry Andric
95*5f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
96bdd1243dSDimitry Andric "Rewrite undef for PHI", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)9706c3fb27SDimitry Andric INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
98bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
99*5f757f3fSDimitry Andric INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
100bdd1243dSDimitry Andric "Rewrite undef for PHI", false, false)
101bdd1243dSDimitry Andric
10206c3fb27SDimitry Andric bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
103bdd1243dSDimitry Andric bool Changed = false;
104bdd1243dSDimitry Andric SmallVector<PHINode *> ToBeDeleted;
105bdd1243dSDimitry Andric for (auto &BB : F) {
106bdd1243dSDimitry Andric for (auto &PHI : BB.phis()) {
10706c3fb27SDimitry Andric if (UA.isDivergent(&PHI))
108bdd1243dSDimitry Andric continue;
109bdd1243dSDimitry Andric
110bdd1243dSDimitry Andric // The unique incoming value except undef/poison for the PHI node.
111bdd1243dSDimitry Andric Value *UniqueDefinedIncoming = nullptr;
112bdd1243dSDimitry Andric // The divergent block with defined incoming value that dominates all
113bdd1243dSDimitry Andric // other block with the same incoming value.
114bdd1243dSDimitry Andric BasicBlock *DominateBB = nullptr;
115bdd1243dSDimitry Andric // Predecessors with undefined incoming value (excluding loop backedge).
116bdd1243dSDimitry Andric SmallVector<BasicBlock *> Undefs;
117bdd1243dSDimitry Andric
118bdd1243dSDimitry Andric for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
119bdd1243dSDimitry Andric Value *Incoming = PHI.getIncomingValue(i);
120bdd1243dSDimitry Andric BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
121bdd1243dSDimitry Andric
122bdd1243dSDimitry Andric if (Incoming == &PHI)
123bdd1243dSDimitry Andric continue;
124bdd1243dSDimitry Andric
125bdd1243dSDimitry Andric if (isa<UndefValue>(Incoming)) {
126bdd1243dSDimitry Andric // Undef from loop backedge will not be replaced.
127bdd1243dSDimitry Andric if (!DT->dominates(&BB, IncomingBB))
128bdd1243dSDimitry Andric Undefs.push_back(IncomingBB);
129bdd1243dSDimitry Andric continue;
130bdd1243dSDimitry Andric }
131bdd1243dSDimitry Andric
132bdd1243dSDimitry Andric if (!UniqueDefinedIncoming) {
133bdd1243dSDimitry Andric UniqueDefinedIncoming = Incoming;
134bdd1243dSDimitry Andric DominateBB = IncomingBB;
135bdd1243dSDimitry Andric } else if (Incoming == UniqueDefinedIncoming) {
136bdd1243dSDimitry Andric // Update DominateBB if necessary.
137bdd1243dSDimitry Andric if (DT->dominates(IncomingBB, DominateBB))
138bdd1243dSDimitry Andric DominateBB = IncomingBB;
139bdd1243dSDimitry Andric } else {
140bdd1243dSDimitry Andric UniqueDefinedIncoming = nullptr;
141bdd1243dSDimitry Andric break;
142bdd1243dSDimitry Andric }
143bdd1243dSDimitry Andric }
144bdd1243dSDimitry Andric // We only need to replace the undef for the PHI which is merging
145bdd1243dSDimitry Andric // defined/undefined values from divergent threads.
146bdd1243dSDimitry Andric // TODO: We should still be able to replace undef value if the unique
147bdd1243dSDimitry Andric // value is a Constant.
148bdd1243dSDimitry Andric if (!UniqueDefinedIncoming || Undefs.empty() ||
14906c3fb27SDimitry Andric !UA.isDivergent(DominateBB->getTerminator()))
150bdd1243dSDimitry Andric continue;
151bdd1243dSDimitry Andric
152bdd1243dSDimitry Andric // We only replace the undef when DominateBB truly dominates all the
153bdd1243dSDimitry Andric // other predecessors with undefined incoming value. Make sure DominateBB
154bdd1243dSDimitry Andric // dominates BB so that UniqueDefinedIncoming is available in BB and
155bdd1243dSDimitry Andric // afterwards.
156bdd1243dSDimitry Andric if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
157bdd1243dSDimitry Andric return DT->dominates(DominateBB, UD);
158bdd1243dSDimitry Andric })) {
159bdd1243dSDimitry Andric PHI.replaceAllUsesWith(UniqueDefinedIncoming);
160bdd1243dSDimitry Andric ToBeDeleted.push_back(&PHI);
161bdd1243dSDimitry Andric Changed = true;
162bdd1243dSDimitry Andric }
163bdd1243dSDimitry Andric }
164bdd1243dSDimitry Andric }
165bdd1243dSDimitry Andric
166bdd1243dSDimitry Andric for (auto *PHI : ToBeDeleted)
167bdd1243dSDimitry Andric PHI->eraseFromParent();
168bdd1243dSDimitry Andric
169bdd1243dSDimitry Andric return Changed;
170bdd1243dSDimitry Andric }
171bdd1243dSDimitry Andric
runOnFunction(Function & F)172*5f757f3fSDimitry Andric bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) {
17306c3fb27SDimitry Andric UniformityInfo &UA =
17406c3fb27SDimitry Andric getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
175bdd1243dSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
17606c3fb27SDimitry Andric return rewritePHIs(F, UA, DT);
177bdd1243dSDimitry Andric }
178bdd1243dSDimitry Andric
179*5f757f3fSDimitry Andric PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)180*5f757f3fSDimitry Andric AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) {
181*5f757f3fSDimitry Andric UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(F);
182*5f757f3fSDimitry Andric DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
183*5f757f3fSDimitry Andric bool Changed = rewritePHIs(F, UA, DT);
184*5f757f3fSDimitry Andric if (Changed) {
185*5f757f3fSDimitry Andric PreservedAnalyses PA;
186*5f757f3fSDimitry Andric PA.preserveSet<CFGAnalyses>();
187*5f757f3fSDimitry Andric return PA;
188*5f757f3fSDimitry Andric }
189*5f757f3fSDimitry Andric
190*5f757f3fSDimitry Andric return PreservedAnalyses::all();
191*5f757f3fSDimitry Andric }
192*5f757f3fSDimitry Andric
createAMDGPURewriteUndefForPHILegacyPass()193*5f757f3fSDimitry Andric FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() {
194*5f757f3fSDimitry Andric return new AMDGPURewriteUndefForPHILegacy();
195bdd1243dSDimitry Andric }
196