1*bdd1243dSDimitry Andric //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===// 2*bdd1243dSDimitry Andric // 3*bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*bdd1243dSDimitry Andric // 7*bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 8*bdd1243dSDimitry Andric // This file implements the idea to rewrite undef incoming operand for certain 9*bdd1243dSDimitry Andric // PHIs in structurized CFG. This pass only works on IR that has gone through 10*bdd1243dSDimitry Andric // StructurizedCFG pass, and this pass has some additional limitation that make 11*bdd1243dSDimitry Andric // it can only run after SIAnnotateControlFlow. 12*bdd1243dSDimitry Andric // 13*bdd1243dSDimitry Andric // To achieve optimal code generation for AMDGPU, we assume that divergence 14*bdd1243dSDimitry Andric // analysis reports the PHI in join block of divergent branch as uniform if 15*bdd1243dSDimitry Andric // it has one unique uniform value plus additional undefined/poisoned incoming 16*bdd1243dSDimitry Andric // value. That is to say the later compiler pipeline will ensure such PHI always 17*bdd1243dSDimitry Andric // return uniform value and ensure it work correctly. Let's take a look at two 18*bdd1243dSDimitry Andric // typical patterns in structured CFG that need to be taken care: (In both 19*bdd1243dSDimitry Andric // patterns, block %if terminate with divergent branch.) 20*bdd1243dSDimitry Andric // 21*bdd1243dSDimitry Andric // Pattern A: Block with undefined incoming value dominates defined predecessor 22*bdd1243dSDimitry Andric // %if 23*bdd1243dSDimitry Andric // | \ 24*bdd1243dSDimitry Andric // | %then 25*bdd1243dSDimitry Andric // | / 26*bdd1243dSDimitry Andric // %endif: %phi = phi [%undef, %if], [%uniform, %then] 27*bdd1243dSDimitry Andric // 28*bdd1243dSDimitry Andric // Pattern B: Block with defined incoming value dominates undefined predecessor 29*bdd1243dSDimitry Andric // %if 30*bdd1243dSDimitry Andric // | \ 31*bdd1243dSDimitry Andric // | %then 32*bdd1243dSDimitry Andric // | / 33*bdd1243dSDimitry Andric // %endif: %phi = phi [%uniform, %if], [%undef, %then] 34*bdd1243dSDimitry Andric // 35*bdd1243dSDimitry Andric // For pattern A, by reporting %phi as uniform, the later pipeline need to make 36*bdd1243dSDimitry Andric // sure it be handled correctly. The backend usually allocates a scalar register 37*bdd1243dSDimitry Andric // and if any thread in a wave takes %then path, the scalar register will get 38*bdd1243dSDimitry Andric // the %uniform value. 39*bdd1243dSDimitry Andric // 40*bdd1243dSDimitry Andric // For pattern B, we will replace the undef operand with the other defined value 41*bdd1243dSDimitry Andric // in this pass. So the scalar register allocated for such PHI will get correct 42*bdd1243dSDimitry Andric // liveness. Without this transformation, the scalar register may be overwritten 43*bdd1243dSDimitry Andric // in the %then block. 44*bdd1243dSDimitry Andric // 45*bdd1243dSDimitry Andric // Limitation note: 46*bdd1243dSDimitry Andric // If the join block of divergent threads is a loop header, the pass cannot 47*bdd1243dSDimitry Andric // handle it correctly right now. For below case, the undef in %phi should also 48*bdd1243dSDimitry Andric // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header 49*bdd1243dSDimitry Andric // block to get a separate join block, then we can rewrite the undef correctly. 50*bdd1243dSDimitry Andric // %if 51*bdd1243dSDimitry Andric // | \ 52*bdd1243dSDimitry Andric // | %then 53*bdd1243dSDimitry Andric // | / 54*bdd1243dSDimitry Andric // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header] 55*bdd1243dSDimitry Andric // | | 56*bdd1243dSDimitry Andric // \--- 57*bdd1243dSDimitry Andric 58*bdd1243dSDimitry Andric #include "AMDGPU.h" 59*bdd1243dSDimitry Andric #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 60*bdd1243dSDimitry Andric #include "llvm/IR/BasicBlock.h" 61*bdd1243dSDimitry Andric #include "llvm/IR/Constants.h" 62*bdd1243dSDimitry Andric #include "llvm/IR/Dominators.h" 63*bdd1243dSDimitry Andric #include "llvm/IR/Instructions.h" 64*bdd1243dSDimitry Andric #include "llvm/InitializePasses.h" 65*bdd1243dSDimitry Andric 66*bdd1243dSDimitry Andric using namespace llvm; 67*bdd1243dSDimitry Andric 68*bdd1243dSDimitry Andric #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi" 69*bdd1243dSDimitry Andric 70*bdd1243dSDimitry Andric namespace { 71*bdd1243dSDimitry Andric 72*bdd1243dSDimitry Andric class AMDGPURewriteUndefForPHI : public FunctionPass { 73*bdd1243dSDimitry Andric public: 74*bdd1243dSDimitry Andric static char ID; 75*bdd1243dSDimitry Andric AMDGPURewriteUndefForPHI() : FunctionPass(ID) { 76*bdd1243dSDimitry Andric initializeAMDGPURewriteUndefForPHIPass(*PassRegistry::getPassRegistry()); 77*bdd1243dSDimitry Andric } 78*bdd1243dSDimitry Andric bool runOnFunction(Function &F) override; 79*bdd1243dSDimitry Andric StringRef getPassName() const override { 80*bdd1243dSDimitry Andric return "AMDGPU Rewrite Undef for PHI"; 81*bdd1243dSDimitry Andric } 82*bdd1243dSDimitry Andric 83*bdd1243dSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 84*bdd1243dSDimitry Andric AU.addRequired<LegacyDivergenceAnalysis>(); 85*bdd1243dSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 86*bdd1243dSDimitry Andric 87*bdd1243dSDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 88*bdd1243dSDimitry Andric AU.addPreserved<LegacyDivergenceAnalysis>(); 89*bdd1243dSDimitry Andric AU.setPreservesCFG(); 90*bdd1243dSDimitry Andric } 91*bdd1243dSDimitry Andric }; 92*bdd1243dSDimitry Andric 93*bdd1243dSDimitry Andric } // end anonymous namespace 94*bdd1243dSDimitry Andric char AMDGPURewriteUndefForPHI::ID = 0; 95*bdd1243dSDimitry Andric 96*bdd1243dSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE, 97*bdd1243dSDimitry Andric "Rewrite undef for PHI", false, false) 98*bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 99*bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 100*bdd1243dSDimitry Andric INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE, 101*bdd1243dSDimitry Andric "Rewrite undef for PHI", false, false) 102*bdd1243dSDimitry Andric 103*bdd1243dSDimitry Andric bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT) { 104*bdd1243dSDimitry Andric bool Changed = false; 105*bdd1243dSDimitry Andric SmallVector<PHINode *> ToBeDeleted; 106*bdd1243dSDimitry Andric for (auto &BB : F) { 107*bdd1243dSDimitry Andric for (auto &PHI : BB.phis()) { 108*bdd1243dSDimitry Andric if (DA->isDivergent(&PHI)) 109*bdd1243dSDimitry Andric continue; 110*bdd1243dSDimitry Andric 111*bdd1243dSDimitry Andric // The unique incoming value except undef/poison for the PHI node. 112*bdd1243dSDimitry Andric Value *UniqueDefinedIncoming = nullptr; 113*bdd1243dSDimitry Andric // The divergent block with defined incoming value that dominates all 114*bdd1243dSDimitry Andric // other block with the same incoming value. 115*bdd1243dSDimitry Andric BasicBlock *DominateBB = nullptr; 116*bdd1243dSDimitry Andric // Predecessors with undefined incoming value (excluding loop backedge). 117*bdd1243dSDimitry Andric SmallVector<BasicBlock *> Undefs; 118*bdd1243dSDimitry Andric 119*bdd1243dSDimitry Andric for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { 120*bdd1243dSDimitry Andric Value *Incoming = PHI.getIncomingValue(i); 121*bdd1243dSDimitry Andric BasicBlock *IncomingBB = PHI.getIncomingBlock(i); 122*bdd1243dSDimitry Andric 123*bdd1243dSDimitry Andric if (Incoming == &PHI) 124*bdd1243dSDimitry Andric continue; 125*bdd1243dSDimitry Andric 126*bdd1243dSDimitry Andric if (isa<UndefValue>(Incoming)) { 127*bdd1243dSDimitry Andric // Undef from loop backedge will not be replaced. 128*bdd1243dSDimitry Andric if (!DT->dominates(&BB, IncomingBB)) 129*bdd1243dSDimitry Andric Undefs.push_back(IncomingBB); 130*bdd1243dSDimitry Andric continue; 131*bdd1243dSDimitry Andric } 132*bdd1243dSDimitry Andric 133*bdd1243dSDimitry Andric if (!UniqueDefinedIncoming) { 134*bdd1243dSDimitry Andric UniqueDefinedIncoming = Incoming; 135*bdd1243dSDimitry Andric DominateBB = IncomingBB; 136*bdd1243dSDimitry Andric } else if (Incoming == UniqueDefinedIncoming) { 137*bdd1243dSDimitry Andric // Update DominateBB if necessary. 138*bdd1243dSDimitry Andric if (DT->dominates(IncomingBB, DominateBB)) 139*bdd1243dSDimitry Andric DominateBB = IncomingBB; 140*bdd1243dSDimitry Andric } else { 141*bdd1243dSDimitry Andric UniqueDefinedIncoming = nullptr; 142*bdd1243dSDimitry Andric break; 143*bdd1243dSDimitry Andric } 144*bdd1243dSDimitry Andric } 145*bdd1243dSDimitry Andric // We only need to replace the undef for the PHI which is merging 146*bdd1243dSDimitry Andric // defined/undefined values from divergent threads. 147*bdd1243dSDimitry Andric // TODO: We should still be able to replace undef value if the unique 148*bdd1243dSDimitry Andric // value is a Constant. 149*bdd1243dSDimitry Andric if (!UniqueDefinedIncoming || Undefs.empty() || 150*bdd1243dSDimitry Andric !DA->isDivergent(DominateBB->getTerminator())) 151*bdd1243dSDimitry Andric continue; 152*bdd1243dSDimitry Andric 153*bdd1243dSDimitry Andric // We only replace the undef when DominateBB truly dominates all the 154*bdd1243dSDimitry Andric // other predecessors with undefined incoming value. Make sure DominateBB 155*bdd1243dSDimitry Andric // dominates BB so that UniqueDefinedIncoming is available in BB and 156*bdd1243dSDimitry Andric // afterwards. 157*bdd1243dSDimitry Andric if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) { 158*bdd1243dSDimitry Andric return DT->dominates(DominateBB, UD); 159*bdd1243dSDimitry Andric })) { 160*bdd1243dSDimitry Andric PHI.replaceAllUsesWith(UniqueDefinedIncoming); 161*bdd1243dSDimitry Andric ToBeDeleted.push_back(&PHI); 162*bdd1243dSDimitry Andric Changed = true; 163*bdd1243dSDimitry Andric } 164*bdd1243dSDimitry Andric } 165*bdd1243dSDimitry Andric } 166*bdd1243dSDimitry Andric 167*bdd1243dSDimitry Andric for (auto *PHI : ToBeDeleted) 168*bdd1243dSDimitry Andric PHI->eraseFromParent(); 169*bdd1243dSDimitry Andric 170*bdd1243dSDimitry Andric return Changed; 171*bdd1243dSDimitry Andric } 172*bdd1243dSDimitry Andric 173*bdd1243dSDimitry Andric bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) { 174*bdd1243dSDimitry Andric LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>(); 175*bdd1243dSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 176*bdd1243dSDimitry Andric return rewritePHIs(F, DA, DT); 177*bdd1243dSDimitry Andric } 178*bdd1243dSDimitry Andric 179*bdd1243dSDimitry Andric FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() { 180*bdd1243dSDimitry Andric return new AMDGPURewriteUndefForPHI(); 181*bdd1243dSDimitry Andric } 182