1bdd1243dSDimitry Andric //===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===// 2bdd1243dSDimitry Andric // 3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bdd1243dSDimitry Andric // 7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 8bdd1243dSDimitry Andric // This file implements the idea to rewrite undef incoming operand for certain 9bdd1243dSDimitry Andric // PHIs in structurized CFG. This pass only works on IR that has gone through 10bdd1243dSDimitry Andric // StructurizedCFG pass, and this pass has some additional limitation that make 11bdd1243dSDimitry Andric // it can only run after SIAnnotateControlFlow. 12bdd1243dSDimitry Andric // 1306c3fb27SDimitry Andric // To achieve optimal code generation for AMDGPU, we assume that uniformity 14bdd1243dSDimitry Andric // analysis reports the PHI in join block of divergent branch as uniform if 15bdd1243dSDimitry Andric // it has one unique uniform value plus additional undefined/poisoned incoming 16bdd1243dSDimitry Andric // value. That is to say the later compiler pipeline will ensure such PHI always 17bdd1243dSDimitry Andric // return uniform value and ensure it work correctly. Let's take a look at two 18bdd1243dSDimitry Andric // typical patterns in structured CFG that need to be taken care: (In both 19bdd1243dSDimitry Andric // patterns, block %if terminate with divergent branch.) 20bdd1243dSDimitry Andric // 21bdd1243dSDimitry Andric // Pattern A: Block with undefined incoming value dominates defined predecessor 22bdd1243dSDimitry Andric // %if 23bdd1243dSDimitry Andric // | \ 24bdd1243dSDimitry Andric // | %then 25bdd1243dSDimitry Andric // | / 26bdd1243dSDimitry Andric // %endif: %phi = phi [%undef, %if], [%uniform, %then] 27bdd1243dSDimitry Andric // 28bdd1243dSDimitry Andric // Pattern B: Block with defined incoming value dominates undefined predecessor 29bdd1243dSDimitry Andric // %if 30bdd1243dSDimitry Andric // | \ 31bdd1243dSDimitry Andric // | %then 32bdd1243dSDimitry Andric // | / 33bdd1243dSDimitry Andric // %endif: %phi = phi [%uniform, %if], [%undef, %then] 34bdd1243dSDimitry Andric // 35bdd1243dSDimitry Andric // For pattern A, by reporting %phi as uniform, the later pipeline need to make 36bdd1243dSDimitry Andric // sure it be handled correctly. The backend usually allocates a scalar register 37bdd1243dSDimitry Andric // and if any thread in a wave takes %then path, the scalar register will get 38bdd1243dSDimitry Andric // the %uniform value. 39bdd1243dSDimitry Andric // 40bdd1243dSDimitry Andric // For pattern B, we will replace the undef operand with the other defined value 41bdd1243dSDimitry Andric // in this pass. So the scalar register allocated for such PHI will get correct 42bdd1243dSDimitry Andric // liveness. Without this transformation, the scalar register may be overwritten 43bdd1243dSDimitry Andric // in the %then block. 44bdd1243dSDimitry Andric // 45bdd1243dSDimitry Andric // Limitation note: 46bdd1243dSDimitry Andric // If the join block of divergent threads is a loop header, the pass cannot 47bdd1243dSDimitry Andric // handle it correctly right now. For below case, the undef in %phi should also 48bdd1243dSDimitry Andric // be rewritten. Currently we depend on SIAnnotateControlFlow to split %header 49bdd1243dSDimitry Andric // block to get a separate join block, then we can rewrite the undef correctly. 50bdd1243dSDimitry Andric // %if 51bdd1243dSDimitry Andric // | \ 52bdd1243dSDimitry Andric // | %then 53bdd1243dSDimitry Andric // | / 54bdd1243dSDimitry Andric // -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header] 55bdd1243dSDimitry Andric // | | 56bdd1243dSDimitry Andric // \--- 57bdd1243dSDimitry Andric 58bdd1243dSDimitry Andric #include "AMDGPU.h" 5906c3fb27SDimitry Andric #include "llvm/Analysis/UniformityAnalysis.h" 60bdd1243dSDimitry Andric #include "llvm/IR/BasicBlock.h" 61bdd1243dSDimitry Andric #include "llvm/IR/Constants.h" 62bdd1243dSDimitry Andric #include "llvm/IR/Dominators.h" 63bdd1243dSDimitry Andric #include "llvm/IR/Instructions.h" 64bdd1243dSDimitry Andric #include "llvm/InitializePasses.h" 65bdd1243dSDimitry Andric 66bdd1243dSDimitry Andric using namespace llvm; 67bdd1243dSDimitry Andric 68bdd1243dSDimitry Andric #define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi" 69bdd1243dSDimitry Andric 70bdd1243dSDimitry Andric namespace { 71bdd1243dSDimitry Andric 72*5f757f3fSDimitry Andric class AMDGPURewriteUndefForPHILegacy : public FunctionPass { 73bdd1243dSDimitry Andric public: 74bdd1243dSDimitry Andric static char ID; 75*5f757f3fSDimitry Andric AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) { 76*5f757f3fSDimitry Andric initializeAMDGPURewriteUndefForPHILegacyPass(*PassRegistry::getPassRegistry()); 77bdd1243dSDimitry Andric } 78bdd1243dSDimitry Andric bool runOnFunction(Function &F) override; 79bdd1243dSDimitry Andric StringRef getPassName() const override { 80bdd1243dSDimitry Andric return "AMDGPU Rewrite Undef for PHI"; 81bdd1243dSDimitry Andric } 82bdd1243dSDimitry Andric 83bdd1243dSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 8406c3fb27SDimitry Andric AU.addRequired<UniformityInfoWrapperPass>(); 85bdd1243dSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 86bdd1243dSDimitry Andric 87bdd1243dSDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 8806c3fb27SDimitry Andric AU.addPreserved<UniformityInfoWrapperPass>(); 89bdd1243dSDimitry Andric AU.setPreservesCFG(); 90bdd1243dSDimitry Andric } 91bdd1243dSDimitry Andric }; 92bdd1243dSDimitry Andric 93bdd1243dSDimitry Andric } // end anonymous namespace 94*5f757f3fSDimitry Andric char AMDGPURewriteUndefForPHILegacy::ID = 0; 95bdd1243dSDimitry Andric 96*5f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, 97bdd1243dSDimitry Andric "Rewrite undef for PHI", false, false) 9806c3fb27SDimitry Andric INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 99bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 100*5f757f3fSDimitry Andric INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE, 101bdd1243dSDimitry Andric "Rewrite undef for PHI", false, false) 102bdd1243dSDimitry Andric 10306c3fb27SDimitry Andric bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) { 104bdd1243dSDimitry Andric bool Changed = false; 105bdd1243dSDimitry Andric SmallVector<PHINode *> ToBeDeleted; 106bdd1243dSDimitry Andric for (auto &BB : F) { 107bdd1243dSDimitry Andric for (auto &PHI : BB.phis()) { 10806c3fb27SDimitry Andric if (UA.isDivergent(&PHI)) 109bdd1243dSDimitry Andric continue; 110bdd1243dSDimitry Andric 111bdd1243dSDimitry Andric // The unique incoming value except undef/poison for the PHI node. 112bdd1243dSDimitry Andric Value *UniqueDefinedIncoming = nullptr; 113bdd1243dSDimitry Andric // The divergent block with defined incoming value that dominates all 114bdd1243dSDimitry Andric // other block with the same incoming value. 115bdd1243dSDimitry Andric BasicBlock *DominateBB = nullptr; 116bdd1243dSDimitry Andric // Predecessors with undefined incoming value (excluding loop backedge). 117bdd1243dSDimitry Andric SmallVector<BasicBlock *> Undefs; 118bdd1243dSDimitry Andric 119bdd1243dSDimitry Andric for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { 120bdd1243dSDimitry Andric Value *Incoming = PHI.getIncomingValue(i); 121bdd1243dSDimitry Andric BasicBlock *IncomingBB = PHI.getIncomingBlock(i); 122bdd1243dSDimitry Andric 123bdd1243dSDimitry Andric if (Incoming == &PHI) 124bdd1243dSDimitry Andric continue; 125bdd1243dSDimitry Andric 126bdd1243dSDimitry Andric if (isa<UndefValue>(Incoming)) { 127bdd1243dSDimitry Andric // Undef from loop backedge will not be replaced. 128bdd1243dSDimitry Andric if (!DT->dominates(&BB, IncomingBB)) 129bdd1243dSDimitry Andric Undefs.push_back(IncomingBB); 130bdd1243dSDimitry Andric continue; 131bdd1243dSDimitry Andric } 132bdd1243dSDimitry Andric 133bdd1243dSDimitry Andric if (!UniqueDefinedIncoming) { 134bdd1243dSDimitry Andric UniqueDefinedIncoming = Incoming; 135bdd1243dSDimitry Andric DominateBB = IncomingBB; 136bdd1243dSDimitry Andric } else if (Incoming == UniqueDefinedIncoming) { 137bdd1243dSDimitry Andric // Update DominateBB if necessary. 138bdd1243dSDimitry Andric if (DT->dominates(IncomingBB, DominateBB)) 139bdd1243dSDimitry Andric DominateBB = IncomingBB; 140bdd1243dSDimitry Andric } else { 141bdd1243dSDimitry Andric UniqueDefinedIncoming = nullptr; 142bdd1243dSDimitry Andric break; 143bdd1243dSDimitry Andric } 144bdd1243dSDimitry Andric } 145bdd1243dSDimitry Andric // We only need to replace the undef for the PHI which is merging 146bdd1243dSDimitry Andric // defined/undefined values from divergent threads. 147bdd1243dSDimitry Andric // TODO: We should still be able to replace undef value if the unique 148bdd1243dSDimitry Andric // value is a Constant. 149bdd1243dSDimitry Andric if (!UniqueDefinedIncoming || Undefs.empty() || 15006c3fb27SDimitry Andric !UA.isDivergent(DominateBB->getTerminator())) 151bdd1243dSDimitry Andric continue; 152bdd1243dSDimitry Andric 153bdd1243dSDimitry Andric // We only replace the undef when DominateBB truly dominates all the 154bdd1243dSDimitry Andric // other predecessors with undefined incoming value. Make sure DominateBB 155bdd1243dSDimitry Andric // dominates BB so that UniqueDefinedIncoming is available in BB and 156bdd1243dSDimitry Andric // afterwards. 157bdd1243dSDimitry Andric if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) { 158bdd1243dSDimitry Andric return DT->dominates(DominateBB, UD); 159bdd1243dSDimitry Andric })) { 160bdd1243dSDimitry Andric PHI.replaceAllUsesWith(UniqueDefinedIncoming); 161bdd1243dSDimitry Andric ToBeDeleted.push_back(&PHI); 162bdd1243dSDimitry Andric Changed = true; 163bdd1243dSDimitry Andric } 164bdd1243dSDimitry Andric } 165bdd1243dSDimitry Andric } 166bdd1243dSDimitry Andric 167bdd1243dSDimitry Andric for (auto *PHI : ToBeDeleted) 168bdd1243dSDimitry Andric PHI->eraseFromParent(); 169bdd1243dSDimitry Andric 170bdd1243dSDimitry Andric return Changed; 171bdd1243dSDimitry Andric } 172bdd1243dSDimitry Andric 173*5f757f3fSDimitry Andric bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) { 17406c3fb27SDimitry Andric UniformityInfo &UA = 17506c3fb27SDimitry Andric getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 176bdd1243dSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 17706c3fb27SDimitry Andric return rewritePHIs(F, UA, DT); 178bdd1243dSDimitry Andric } 179bdd1243dSDimitry Andric 180*5f757f3fSDimitry Andric PreservedAnalyses 181*5f757f3fSDimitry Andric AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) { 182*5f757f3fSDimitry Andric UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(F); 183*5f757f3fSDimitry Andric DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 184*5f757f3fSDimitry Andric bool Changed = rewritePHIs(F, UA, DT); 185*5f757f3fSDimitry Andric if (Changed) { 186*5f757f3fSDimitry Andric PreservedAnalyses PA; 187*5f757f3fSDimitry Andric PA.preserveSet<CFGAnalyses>(); 188*5f757f3fSDimitry Andric return PA; 189*5f757f3fSDimitry Andric } 190*5f757f3fSDimitry Andric 191*5f757f3fSDimitry Andric return PreservedAnalyses::all(); 192*5f757f3fSDimitry Andric } 193*5f757f3fSDimitry Andric 194*5f757f3fSDimitry Andric FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() { 195*5f757f3fSDimitry Andric return new AMDGPURewriteUndefForPHILegacy(); 196bdd1243dSDimitry Andric } 197