1 //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file implements the idea to rewrite undef incoming operand for certain
9 // PHIs in structurized CFG. This pass only works on IR that has gone through
10 // StructurizedCFG pass, and this pass has some additional limitation that make
11 // it can only run after SIAnnotateControlFlow.
12 //
13 // To achieve optimal code generation for AMDGPU, we assume that divergence
14 // analysis reports the PHI in join block of divergent branch as uniform if
15 // it has one unique uniform value plus additional undefined/poisoned incoming
16 // value. That is to say the later compiler pipeline will ensure such PHI always
17 // return uniform value and ensure it work correctly. Let's take a look at two
18 // typical patterns in structured CFG that need to be taken care: (In both
19 // patterns, block %if terminate with divergent branch.)
20 //
21 // Pattern A: Block with undefined incoming value dominates defined predecessor
22 // %if
23 // | \
24 // | %then
25 // | /
26 // %endif: %phi = phi [%undef, %if], [%uniform, %then]
27 //
28 // Pattern B: Block with defined incoming value dominates undefined predecessor
29 // %if
30 // | \
31 // | %then
32 // | /
33 // %endif: %phi = phi [%uniform, %if], [%undef, %then]
34 //
35 // For pattern A, by reporting %phi as uniform, the later pipeline need to make
36 // sure it be handled correctly. The backend usually allocates a scalar register
37 // and if any thread in a wave takes %then path, the scalar register will get
38 // the %uniform value.
39 //
40 // For pattern B, we will replace the undef operand with the other defined value
41 // in this pass. So the scalar register allocated for such PHI will get correct
42 // liveness. Without this transformation, the scalar register may be overwritten
43 // in the %then block.
44 //
45 // Limitation note:
46 // If the join block of divergent threads is a loop header, the pass cannot
47 // handle it correctly right now. For below case, the undef in %phi should also
48 // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
49 // block to get a separate join block, then we can rewrite the undef correctly.
50 // %if
51 // | \
52 // | %then
53 // | /
54 // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
55 // | |
56 // \---
57
58 #include "AMDGPU.h"
59 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
60 #include "llvm/IR/BasicBlock.h"
61 #include "llvm/IR/Constants.h"
62 #include "llvm/IR/Dominators.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/InitializePasses.h"
65
66 using namespace llvm;
67
68 #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
69
70 namespace {
71
72 class AMDGPURewriteUndefForPHI : public FunctionPass {
73 public:
74 static char ID;
AMDGPURewriteUndefForPHI()75 AMDGPURewriteUndefForPHI() : FunctionPass(ID) {
76 initializeAMDGPURewriteUndefForPHIPass(*PassRegistry::getPassRegistry());
77 }
78 bool runOnFunction(Function &F) override;
getPassName() const79 StringRef getPassName() const override {
80 return "AMDGPU Rewrite Undef for PHI";
81 }
82
getAnalysisUsage(AnalysisUsage & AU) const83 void getAnalysisUsage(AnalysisUsage &AU) const override {
84 AU.addRequired<LegacyDivergenceAnalysis>();
85 AU.addRequired<DominatorTreeWrapperPass>();
86
87 AU.addPreserved<DominatorTreeWrapperPass>();
88 AU.addPreserved<LegacyDivergenceAnalysis>();
89 AU.setPreservesCFG();
90 }
91 };
92
93 } // end anonymous namespace
94 char AMDGPURewriteUndefForPHI::ID = 0;
95
96 INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
97 "Rewrite undef for PHI", false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)98 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
99 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
100 INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
101 "Rewrite undef for PHI", false, false)
102
103 bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT) {
104 bool Changed = false;
105 SmallVector<PHINode *> ToBeDeleted;
106 for (auto &BB : F) {
107 for (auto &PHI : BB.phis()) {
108 if (DA->isDivergent(&PHI))
109 continue;
110
111 // The unique incoming value except undef/poison for the PHI node.
112 Value *UniqueDefinedIncoming = nullptr;
113 // The divergent block with defined incoming value that dominates all
114 // other block with the same incoming value.
115 BasicBlock *DominateBB = nullptr;
116 // Predecessors with undefined incoming value (excluding loop backedge).
117 SmallVector<BasicBlock *> Undefs;
118
119 for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
120 Value *Incoming = PHI.getIncomingValue(i);
121 BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
122
123 if (Incoming == &PHI)
124 continue;
125
126 if (isa<UndefValue>(Incoming)) {
127 // Undef from loop backedge will not be replaced.
128 if (!DT->dominates(&BB, IncomingBB))
129 Undefs.push_back(IncomingBB);
130 continue;
131 }
132
133 if (!UniqueDefinedIncoming) {
134 UniqueDefinedIncoming = Incoming;
135 DominateBB = IncomingBB;
136 } else if (Incoming == UniqueDefinedIncoming) {
137 // Update DominateBB if necessary.
138 if (DT->dominates(IncomingBB, DominateBB))
139 DominateBB = IncomingBB;
140 } else {
141 UniqueDefinedIncoming = nullptr;
142 break;
143 }
144 }
145 // We only need to replace the undef for the PHI which is merging
146 // defined/undefined values from divergent threads.
147 // TODO: We should still be able to replace undef value if the unique
148 // value is a Constant.
149 if (!UniqueDefinedIncoming || Undefs.empty() ||
150 !DA->isDivergent(DominateBB->getTerminator()))
151 continue;
152
153 // We only replace the undef when DominateBB truly dominates all the
154 // other predecessors with undefined incoming value. Make sure DominateBB
155 // dominates BB so that UniqueDefinedIncoming is available in BB and
156 // afterwards.
157 if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
158 return DT->dominates(DominateBB, UD);
159 })) {
160 PHI.replaceAllUsesWith(UniqueDefinedIncoming);
161 ToBeDeleted.push_back(&PHI);
162 Changed = true;
163 }
164 }
165 }
166
167 for (auto *PHI : ToBeDeleted)
168 PHI->eraseFromParent();
169
170 return Changed;
171 }
172
runOnFunction(Function & F)173 bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) {
174 LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
175 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
176 return rewritePHIs(F, DA, DT);
177 }
178
createAMDGPURewriteUndefForPHIPass()179 FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() {
180 return new AMDGPURewriteUndefForPHI();
181 }
182