102077da7SAlexey Zhikhartsev //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 202077da7SAlexey Zhikhartsev // 3c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information. 5c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 602077da7SAlexey Zhikhartsev // 702077da7SAlexey Zhikhartsev //===----------------------------------------------------------------------===// 802077da7SAlexey Zhikhartsev // 902077da7SAlexey Zhikhartsev // Transform each threading path to effectively jump thread the DFA. For 1002077da7SAlexey Zhikhartsev // example, the CFG below could be transformed as follows, where the cloned 1102077da7SAlexey Zhikhartsev // blocks unconditionally branch to the next correct case based on what is 1202077da7SAlexey Zhikhartsev // identified in the analysis. 1302077da7SAlexey Zhikhartsev // 1402077da7SAlexey Zhikhartsev // sw.bb sw.bb 1502077da7SAlexey Zhikhartsev // / | \ / | \ 1602077da7SAlexey Zhikhartsev // case1 case2 case3 case1 case2 case3 1702077da7SAlexey Zhikhartsev // \ | / | | | 1802077da7SAlexey Zhikhartsev // determinator det.2 det.3 det.1 1902077da7SAlexey Zhikhartsev // br sw.bb / | \ 2002077da7SAlexey Zhikhartsev // sw.bb.2 sw.bb.3 sw.bb.1 2102077da7SAlexey Zhikhartsev // br case2 br case3 br case1§ 2202077da7SAlexey Zhikhartsev // 2302077da7SAlexey Zhikhartsev // Definitions and Terminology: 2402077da7SAlexey Zhikhartsev // 2502077da7SAlexey Zhikhartsev // * Threading path: 2602077da7SAlexey Zhikhartsev // a list of basic blocks, the exit state, and the block that determines 2702077da7SAlexey Zhikhartsev // the next state, for which the following notation will be used: 2802077da7SAlexey Zhikhartsev // < path of BBs that form a cycle > [ state, determinator ] 2902077da7SAlexey Zhikhartsev // 3002077da7SAlexey Zhikhartsev // * Predictable switch: 3102077da7SAlexey Zhikhartsev // The switch variable is always a known constant so that all conditional 3202077da7SAlexey Zhikhartsev // jumps based on switch variable can be converted to unconditional jump. 3302077da7SAlexey Zhikhartsev // 3402077da7SAlexey Zhikhartsev // * Determinator: 3502077da7SAlexey Zhikhartsev // The basic block that determines the next state of the DFA. 3602077da7SAlexey Zhikhartsev // 3702077da7SAlexey Zhikhartsev // Representing the optimization in C-like pseudocode: the code pattern on the 3802077da7SAlexey Zhikhartsev // left could functionally be transformed to the right pattern if the switch 3902077da7SAlexey Zhikhartsev // condition is predictable. 4002077da7SAlexey Zhikhartsev // 4102077da7SAlexey Zhikhartsev // X = A goto A 4202077da7SAlexey Zhikhartsev // for (...) A: 4302077da7SAlexey Zhikhartsev // switch (X) ... 4402077da7SAlexey Zhikhartsev // case A goto B 4502077da7SAlexey Zhikhartsev // X = B B: 4602077da7SAlexey Zhikhartsev // case B ... 4702077da7SAlexey Zhikhartsev // X = C goto C 4802077da7SAlexey Zhikhartsev // 4902077da7SAlexey Zhikhartsev // The pass first checks that switch variable X is decided by the control flow 5002077da7SAlexey Zhikhartsev // path taken in the loop; for example, in case B, the next value of X is 5102077da7SAlexey Zhikhartsev // decided to be C. It then enumerates through all paths in the loop and labels 5202077da7SAlexey Zhikhartsev // the basic blocks where the next state is decided. 5302077da7SAlexey Zhikhartsev // 5402077da7SAlexey Zhikhartsev // Using this information it creates new paths that unconditionally branch to 5502077da7SAlexey Zhikhartsev // the next case. This involves cloning code, so it only gets triggered if the 5602077da7SAlexey Zhikhartsev // amount of code duplicated is below a threshold. 5702077da7SAlexey Zhikhartsev // 5802077da7SAlexey Zhikhartsev //===----------------------------------------------------------------------===// 5902077da7SAlexey Zhikhartsev 6002077da7SAlexey Zhikhartsev #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 6102077da7SAlexey Zhikhartsev #include "llvm/ADT/APInt.h" 6202077da7SAlexey Zhikhartsev #include "llvm/ADT/DenseMap.h" 6302077da7SAlexey Zhikhartsev #include "llvm/ADT/SmallSet.h" 6402077da7SAlexey Zhikhartsev #include "llvm/ADT/Statistic.h" 6502077da7SAlexey Zhikhartsev #include "llvm/Analysis/AssumptionCache.h" 6602077da7SAlexey Zhikhartsev #include "llvm/Analysis/CodeMetrics.h" 67a494ae43Sserge-sans-paille #include "llvm/Analysis/DomTreeUpdater.h" 686b53ada6SXChy #include "llvm/Analysis/LoopInfo.h" 6902077da7SAlexey Zhikhartsev #include "llvm/Analysis/OptimizationRemarkEmitter.h" 7002077da7SAlexey Zhikhartsev #include "llvm/Analysis/TargetTransformInfo.h" 7102077da7SAlexey Zhikhartsev #include "llvm/IR/CFG.h" 7202077da7SAlexey Zhikhartsev #include "llvm/IR/Constants.h" 7302077da7SAlexey Zhikhartsev #include "llvm/IR/IntrinsicInst.h" 7402077da7SAlexey Zhikhartsev #include "llvm/Support/CommandLine.h" 7502077da7SAlexey Zhikhartsev #include "llvm/Support/Debug.h" 7602077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/Cloning.h" 7702077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 7802077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/ValueMapper.h" 7902077da7SAlexey Zhikhartsev #include <algorithm> 8002077da7SAlexey Zhikhartsev #include <deque> 8102077da7SAlexey Zhikhartsev 82f90a66a5Sserge-sans-paille #ifdef EXPENSIVE_CHECKS 83f90a66a5Sserge-sans-paille #include "llvm/IR/Verifier.h" 84f90a66a5Sserge-sans-paille #endif 85f90a66a5Sserge-sans-paille 8602077da7SAlexey Zhikhartsev using namespace llvm; 8702077da7SAlexey Zhikhartsev 8802077da7SAlexey Zhikhartsev #define DEBUG_TYPE "dfa-jump-threading" 8902077da7SAlexey Zhikhartsev 9002077da7SAlexey Zhikhartsev STATISTIC(NumTransforms, "Number of transformations done"); 9102077da7SAlexey Zhikhartsev STATISTIC(NumCloned, "Number of blocks cloned"); 9202077da7SAlexey Zhikhartsev STATISTIC(NumPaths, "Number of individual paths threaded"); 9302077da7SAlexey Zhikhartsev 9402077da7SAlexey Zhikhartsev static cl::opt<bool> 9502077da7SAlexey Zhikhartsev ClViewCfgBefore("dfa-jump-view-cfg-before", 9602077da7SAlexey Zhikhartsev cl::desc("View the CFG before DFA Jump Threading"), 9702077da7SAlexey Zhikhartsev cl::Hidden, cl::init(false)); 9802077da7SAlexey Zhikhartsev 99c9325f8aSUsman Nadeem static cl::opt<bool> EarlyExitHeuristic( 100c9325f8aSUsman Nadeem "dfa-early-exit-heuristic", 101c9325f8aSUsman Nadeem cl::desc("Exit early if an unpredictable value come from the same loop"), 102c9325f8aSUsman Nadeem cl::Hidden, cl::init(true)); 103c9325f8aSUsman Nadeem 10402077da7SAlexey Zhikhartsev static cl::opt<unsigned> MaxPathLength( 10502077da7SAlexey Zhikhartsev "dfa-max-path-length", 10602077da7SAlexey Zhikhartsev cl::desc("Max number of blocks searched to find a threading path"), 10702077da7SAlexey Zhikhartsev cl::Hidden, cl::init(20)); 10802077da7SAlexey Zhikhartsev 109b167ada8SUsman Nadeem static cl::opt<unsigned> MaxNumVisitiedPaths( 110b167ada8SUsman Nadeem "dfa-max-num-visited-paths", 111b167ada8SUsman Nadeem cl::desc( 112b167ada8SUsman Nadeem "Max number of blocks visited while enumerating paths around a switch"), 113b167ada8SUsman Nadeem cl::Hidden, cl::init(2000)); 114b167ada8SUsman Nadeem 1157fa41d8aSXChy static cl::opt<unsigned> 1167fa41d8aSXChy MaxNumPaths("dfa-max-num-paths", 1178b0d7634SAlex Zhikhartsev cl::desc("Max number of paths enumerated around a switch"), 1188b0d7634SAlex Zhikhartsev cl::Hidden, cl::init(200)); 1198b0d7634SAlex Zhikhartsev 12002077da7SAlexey Zhikhartsev static cl::opt<unsigned> 12102077da7SAlexey Zhikhartsev CostThreshold("dfa-cost-threshold", 12202077da7SAlexey Zhikhartsev cl::desc("Maximum cost accepted for the transformation"), 12302077da7SAlexey Zhikhartsev cl::Hidden, cl::init(50)); 12402077da7SAlexey Zhikhartsev 12502077da7SAlexey Zhikhartsev namespace { 12602077da7SAlexey Zhikhartsev 12702077da7SAlexey Zhikhartsev class SelectInstToUnfold { 12802077da7SAlexey Zhikhartsev SelectInst *SI; 12902077da7SAlexey Zhikhartsev PHINode *SIUse; 13002077da7SAlexey Zhikhartsev 13102077da7SAlexey Zhikhartsev public: 13202077da7SAlexey Zhikhartsev SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 13302077da7SAlexey Zhikhartsev 13402077da7SAlexey Zhikhartsev SelectInst *getInst() { return SI; } 13502077da7SAlexey Zhikhartsev PHINode *getUse() { return SIUse; } 13602077da7SAlexey Zhikhartsev 13702077da7SAlexey Zhikhartsev explicit operator bool() const { return SI && SIUse; } 13802077da7SAlexey Zhikhartsev }; 13902077da7SAlexey Zhikhartsev 140c229f767SXChy void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, 14102077da7SAlexey Zhikhartsev std::vector<SelectInstToUnfold> *NewSIsToUnfold, 14202077da7SAlexey Zhikhartsev std::vector<BasicBlock *> *NewBBs); 14302077da7SAlexey Zhikhartsev 14402077da7SAlexey Zhikhartsev class DFAJumpThreading { 14502077da7SAlexey Zhikhartsev public: 1466b53ada6SXChy DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, 14702077da7SAlexey Zhikhartsev TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 1486b53ada6SXChy : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} 14902077da7SAlexey Zhikhartsev 15002077da7SAlexey Zhikhartsev bool run(Function &F); 151c229f767SXChy bool LoopInfoBroken; 15202077da7SAlexey Zhikhartsev 15302077da7SAlexey Zhikhartsev private: 15402077da7SAlexey Zhikhartsev void 15502077da7SAlexey Zhikhartsev unfoldSelectInstrs(DominatorTree *DT, 15602077da7SAlexey Zhikhartsev const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 15702077da7SAlexey Zhikhartsev DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 15834d48279SKazu Hirata SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); 15902077da7SAlexey Zhikhartsev 16002077da7SAlexey Zhikhartsev while (!Stack.empty()) { 16184b07c9bSKazu Hirata SelectInstToUnfold SIToUnfold = Stack.pop_back_val(); 16202077da7SAlexey Zhikhartsev 16302077da7SAlexey Zhikhartsev std::vector<SelectInstToUnfold> NewSIsToUnfold; 16402077da7SAlexey Zhikhartsev std::vector<BasicBlock *> NewBBs; 165c229f767SXChy unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); 16602077da7SAlexey Zhikhartsev 16702077da7SAlexey Zhikhartsev // Put newly discovered select instructions into the work list. 16802077da7SAlexey Zhikhartsev for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) 16902077da7SAlexey Zhikhartsev Stack.push_back(NewSIToUnfold); 17002077da7SAlexey Zhikhartsev } 17102077da7SAlexey Zhikhartsev } 17202077da7SAlexey Zhikhartsev 17302077da7SAlexey Zhikhartsev AssumptionCache *AC; 17402077da7SAlexey Zhikhartsev DominatorTree *DT; 1756b53ada6SXChy LoopInfo *LI; 17602077da7SAlexey Zhikhartsev TargetTransformInfo *TTI; 17702077da7SAlexey Zhikhartsev OptimizationRemarkEmitter *ORE; 17802077da7SAlexey Zhikhartsev }; 17902077da7SAlexey Zhikhartsev 18002077da7SAlexey Zhikhartsev } // end anonymous namespace 18102077da7SAlexey Zhikhartsev 18202077da7SAlexey Zhikhartsev namespace { 18302077da7SAlexey Zhikhartsev 18402077da7SAlexey Zhikhartsev /// Unfold the select instruction held in \p SIToUnfold by replacing it with 18502077da7SAlexey Zhikhartsev /// control flow. 18602077da7SAlexey Zhikhartsev /// 18702077da7SAlexey Zhikhartsev /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 18802077da7SAlexey Zhikhartsev /// created basic blocks into \p NewBBs. 18902077da7SAlexey Zhikhartsev /// 19002077da7SAlexey Zhikhartsev /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 191c229f767SXChy void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, 19202077da7SAlexey Zhikhartsev std::vector<SelectInstToUnfold> *NewSIsToUnfold, 19302077da7SAlexey Zhikhartsev std::vector<BasicBlock *> *NewBBs) { 19402077da7SAlexey Zhikhartsev SelectInst *SI = SIToUnfold.getInst(); 19502077da7SAlexey Zhikhartsev PHINode *SIUse = SIToUnfold.getUse(); 19602077da7SAlexey Zhikhartsev BasicBlock *StartBlock = SI->getParent(); 19702077da7SAlexey Zhikhartsev BranchInst *StartBlockTerm = 19802077da7SAlexey Zhikhartsev dyn_cast<BranchInst>(StartBlock->getTerminator()); 19902077da7SAlexey Zhikhartsev 200b167ada8SUsman Nadeem assert(StartBlockTerm); 20102077da7SAlexey Zhikhartsev assert(SI->hasOneUse()); 20202077da7SAlexey Zhikhartsev 203b167ada8SUsman Nadeem if (StartBlockTerm->isUnconditional()) { 204*d4a38c8fSUsman Nadeem BasicBlock *EndBlock = StartBlock->getUniqueSuccessor(); 205b167ada8SUsman Nadeem // Arbitrarily choose the 'false' side for a new input value to the PHI. 206b167ada8SUsman Nadeem BasicBlock *NewBlock = BasicBlock::Create( 207b167ada8SUsman Nadeem SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), 20802077da7SAlexey Zhikhartsev EndBlock->getParent(), EndBlock); 209b167ada8SUsman Nadeem NewBBs->push_back(NewBlock); 210b167ada8SUsman Nadeem BranchInst::Create(EndBlock, NewBlock); 211b167ada8SUsman Nadeem DTU->applyUpdates({{DominatorTree::Insert, NewBlock, EndBlock}}); 21202077da7SAlexey Zhikhartsev 213b167ada8SUsman Nadeem // StartBlock 214b167ada8SUsman Nadeem // | \ 215b167ada8SUsman Nadeem // | NewBlock 216b167ada8SUsman Nadeem // | / 217b167ada8SUsman Nadeem // EndBlock 21802077da7SAlexey Zhikhartsev Value *SIOp1 = SI->getTrueValue(); 21902077da7SAlexey Zhikhartsev Value *SIOp2 = SI->getFalseValue(); 22002077da7SAlexey Zhikhartsev 221b167ada8SUsman Nadeem PHINode *NewPhi = PHINode::Create(SIUse->getType(), 1, 222b167ada8SUsman Nadeem Twine(SIOp2->getName(), ".si.unfold.phi"), 223b167ada8SUsman Nadeem NewBlock->getFirstInsertionPt()); 224b167ada8SUsman Nadeem NewPhi->addIncoming(SIOp2, StartBlock); 225b167ada8SUsman Nadeem 226*d4a38c8fSUsman Nadeem // Update any other PHI nodes in EndBlock. 227*d4a38c8fSUsman Nadeem for (PHINode &Phi : EndBlock->phis()) { 228*d4a38c8fSUsman Nadeem if (SIUse == &Phi) 229*d4a38c8fSUsman Nadeem continue; 230*d4a38c8fSUsman Nadeem Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock); 231*d4a38c8fSUsman Nadeem } 232*d4a38c8fSUsman Nadeem 233*d4a38c8fSUsman Nadeem // Update the phi node of SI, which is its only use. 234*d4a38c8fSUsman Nadeem if (EndBlock == SIUse->getParent()) { 235*d4a38c8fSUsman Nadeem SIUse->addIncoming(NewPhi, NewBlock); 236*d4a38c8fSUsman Nadeem SIUse->replaceUsesOfWith(SI, SIOp1); 237*d4a38c8fSUsman Nadeem } else { 238*d4a38c8fSUsman Nadeem PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock), 239*d4a38c8fSUsman Nadeem Twine(SI->getName(), ".si.unfold.phi"), 240*d4a38c8fSUsman Nadeem EndBlock->getFirstInsertionPt()); 241*d4a38c8fSUsman Nadeem for (BasicBlock *Pred : predecessors(EndBlock)) { 242*d4a38c8fSUsman Nadeem if (Pred != StartBlock && Pred != NewBlock) 243*d4a38c8fSUsman Nadeem EndPhi->addIncoming(EndPhi, Pred); 244*d4a38c8fSUsman Nadeem } 245*d4a38c8fSUsman Nadeem 246*d4a38c8fSUsman Nadeem EndPhi->addIncoming(SIOp1, StartBlock); 247*d4a38c8fSUsman Nadeem EndPhi->addIncoming(NewPhi, NewBlock); 248*d4a38c8fSUsman Nadeem SIUse->replaceUsesOfWith(SI, EndPhi); 249*d4a38c8fSUsman Nadeem SIUse = EndPhi; 250*d4a38c8fSUsman Nadeem } 251*d4a38c8fSUsman Nadeem 252b167ada8SUsman Nadeem if (auto *OpSi = dyn_cast<SelectInst>(SIOp1)) 253b167ada8SUsman Nadeem NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse)); 254b167ada8SUsman Nadeem if (auto *OpSi = dyn_cast<SelectInst>(SIOp2)) 255b167ada8SUsman Nadeem NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi)); 25602077da7SAlexey Zhikhartsev 257b167ada8SUsman Nadeem // Insert the real conditional branch based on the original condition. 258*d4a38c8fSUsman Nadeem StartBlockTerm->eraseFromParent(); 259b167ada8SUsman Nadeem BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock); 260b167ada8SUsman Nadeem DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, 261b167ada8SUsman Nadeem {DominatorTree::Insert, StartBlock, NewBlock}}); 262b167ada8SUsman Nadeem } else { 263*d4a38c8fSUsman Nadeem BasicBlock *EndBlock = SIUse->getParent(); 264b167ada8SUsman Nadeem BasicBlock *NewBlockT = BasicBlock::Create( 265b167ada8SUsman Nadeem SI->getContext(), Twine(SI->getName(), ".si.unfold.true"), 266b167ada8SUsman Nadeem EndBlock->getParent(), EndBlock); 267b167ada8SUsman Nadeem BasicBlock *NewBlockF = BasicBlock::Create( 268b167ada8SUsman Nadeem SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), 269b167ada8SUsman Nadeem EndBlock->getParent(), EndBlock); 270b167ada8SUsman Nadeem 271b167ada8SUsman Nadeem NewBBs->push_back(NewBlockT); 272b167ada8SUsman Nadeem NewBBs->push_back(NewBlockF); 273b167ada8SUsman Nadeem 274b167ada8SUsman Nadeem // Def only has one use in EndBlock. 275b167ada8SUsman Nadeem // Before transformation: 276b167ada8SUsman Nadeem // StartBlock(Def) 277b167ada8SUsman Nadeem // | \ 278b167ada8SUsman Nadeem // EndBlock OtherBlock 279b167ada8SUsman Nadeem // (Use) 280b167ada8SUsman Nadeem // 281b167ada8SUsman Nadeem // After transformation: 282b167ada8SUsman Nadeem // StartBlock(Def) 283b167ada8SUsman Nadeem // | \ 284b167ada8SUsman Nadeem // | OtherBlock 285b167ada8SUsman Nadeem // NewBlockT 286b167ada8SUsman Nadeem // | \ 287b167ada8SUsman Nadeem // | NewBlockF 288b167ada8SUsman Nadeem // | / 289b167ada8SUsman Nadeem // | / 290b167ada8SUsman Nadeem // EndBlock 291b167ada8SUsman Nadeem // (Use) 292b167ada8SUsman Nadeem BranchInst::Create(EndBlock, NewBlockF); 293b167ada8SUsman Nadeem // Insert the real conditional branch based on the original condition. 294b167ada8SUsman Nadeem BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT); 295b167ada8SUsman Nadeem DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF}, 296b167ada8SUsman Nadeem {DominatorTree::Insert, NewBlockT, EndBlock}, 297b167ada8SUsman Nadeem {DominatorTree::Insert, NewBlockF, EndBlock}}); 298b167ada8SUsman Nadeem 299b167ada8SUsman Nadeem Value *TrueVal = SI->getTrueValue(); 300b167ada8SUsman Nadeem Value *FalseVal = SI->getFalseValue(); 301b167ada8SUsman Nadeem 302b167ada8SUsman Nadeem PHINode *NewPhiT = PHINode::Create( 303b167ada8SUsman Nadeem SIUse->getType(), 1, Twine(TrueVal->getName(), ".si.unfold.phi"), 304b167ada8SUsman Nadeem NewBlockT->getFirstInsertionPt()); 305b167ada8SUsman Nadeem PHINode *NewPhiF = PHINode::Create( 306b167ada8SUsman Nadeem SIUse->getType(), 1, Twine(FalseVal->getName(), ".si.unfold.phi"), 307b167ada8SUsman Nadeem NewBlockF->getFirstInsertionPt()); 308b167ada8SUsman Nadeem NewPhiT->addIncoming(TrueVal, StartBlock); 309b167ada8SUsman Nadeem NewPhiF->addIncoming(FalseVal, NewBlockT); 310b167ada8SUsman Nadeem 311b167ada8SUsman Nadeem if (auto *TrueSI = dyn_cast<SelectInst>(TrueVal)) 312b167ada8SUsman Nadeem NewSIsToUnfold->push_back(SelectInstToUnfold(TrueSI, NewPhiT)); 313b167ada8SUsman Nadeem if (auto *FalseSi = dyn_cast<SelectInst>(FalseVal)) 314b167ada8SUsman Nadeem NewSIsToUnfold->push_back(SelectInstToUnfold(FalseSi, NewPhiF)); 315b167ada8SUsman Nadeem 316b167ada8SUsman Nadeem SIUse->addIncoming(NewPhiT, NewBlockT); 317b167ada8SUsman Nadeem SIUse->addIncoming(NewPhiF, NewBlockF); 318b167ada8SUsman Nadeem SIUse->removeIncomingValue(StartBlock); 319b167ada8SUsman Nadeem 320b167ada8SUsman Nadeem // Update any other PHI nodes in EndBlock. 321b167ada8SUsman Nadeem for (PHINode &Phi : EndBlock->phis()) { 322b167ada8SUsman Nadeem if (SIUse == &Phi) 323b167ada8SUsman Nadeem continue; 324b167ada8SUsman Nadeem Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockT); 325b167ada8SUsman Nadeem Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockF); 326b167ada8SUsman Nadeem Phi.removeIncomingValue(StartBlock); 327b167ada8SUsman Nadeem } 328b167ada8SUsman Nadeem 329b167ada8SUsman Nadeem // Update the appropriate successor of the start block to point to the new 330b167ada8SUsman Nadeem // unfolded block. 331b167ada8SUsman Nadeem unsigned SuccNum = StartBlockTerm->getSuccessor(1) == EndBlock ? 1 : 0; 332b167ada8SUsman Nadeem StartBlockTerm->setSuccessor(SuccNum, NewBlockT); 333b167ada8SUsman Nadeem DTU->applyUpdates({{DominatorTree::Delete, StartBlock, EndBlock}, 334b167ada8SUsman Nadeem {DominatorTree::Insert, StartBlock, NewBlockT}}); 335b167ada8SUsman Nadeem } 33602077da7SAlexey Zhikhartsev 337c229f767SXChy // Preserve loop info 338c229f767SXChy if (Loop *L = LI->getLoopFor(SI->getParent())) { 339c229f767SXChy for (BasicBlock *NewBB : *NewBBs) 340c229f767SXChy L->addBasicBlockToLoop(NewBB, *LI); 341c229f767SXChy } 342c229f767SXChy 34302077da7SAlexey Zhikhartsev // The select is now dead. 3447fa41d8aSXChy assert(SI->use_empty() && "Select must be dead now"); 34502077da7SAlexey Zhikhartsev SI->eraseFromParent(); 34602077da7SAlexey Zhikhartsev } 34702077da7SAlexey Zhikhartsev 34802077da7SAlexey Zhikhartsev struct ClonedBlock { 34902077da7SAlexey Zhikhartsev BasicBlock *BB; 350019ffbf3SXChy APInt State; ///< \p State corresponds to the next value of a switch stmnt. 35102077da7SAlexey Zhikhartsev }; 35202077da7SAlexey Zhikhartsev 35302077da7SAlexey Zhikhartsev typedef std::deque<BasicBlock *> PathType; 35402077da7SAlexey Zhikhartsev typedef std::vector<PathType> PathsType; 355380b8a60SNikita Popov typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; 35602077da7SAlexey Zhikhartsev typedef std::vector<ClonedBlock> CloneList; 35702077da7SAlexey Zhikhartsev 35802077da7SAlexey Zhikhartsev // This data structure keeps track of all blocks that have been cloned. If two 35902077da7SAlexey Zhikhartsev // different ThreadingPaths clone the same block for a certain state it should 36002077da7SAlexey Zhikhartsev // be reused, and it can be looked up in this map. 36102077da7SAlexey Zhikhartsev typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 36202077da7SAlexey Zhikhartsev 36302077da7SAlexey Zhikhartsev // This map keeps track of all the new definitions for an instruction. This 36402077da7SAlexey Zhikhartsev // information is needed when restoring SSA form after cloning blocks. 3659d555b4aSOlle Fredriksson typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; 36602077da7SAlexey Zhikhartsev 36702077da7SAlexey Zhikhartsev inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 36802077da7SAlexey Zhikhartsev OS << "< "; 36902077da7SAlexey Zhikhartsev for (const BasicBlock *BB : Path) { 37002077da7SAlexey Zhikhartsev std::string BBName; 37102077da7SAlexey Zhikhartsev if (BB->hasName()) 37202077da7SAlexey Zhikhartsev raw_string_ostream(BBName) << BB->getName(); 37302077da7SAlexey Zhikhartsev else 37402077da7SAlexey Zhikhartsev raw_string_ostream(BBName) << BB; 37502077da7SAlexey Zhikhartsev OS << BBName << " "; 37602077da7SAlexey Zhikhartsev } 37702077da7SAlexey Zhikhartsev OS << ">"; 37802077da7SAlexey Zhikhartsev return OS; 37902077da7SAlexey Zhikhartsev } 38002077da7SAlexey Zhikhartsev 38102077da7SAlexey Zhikhartsev /// ThreadingPath is a path in the control flow of a loop that can be threaded 38202077da7SAlexey Zhikhartsev /// by cloning necessary basic blocks and replacing conditional branches with 38302077da7SAlexey Zhikhartsev /// unconditional ones. A threading path includes a list of basic blocks, the 38402077da7SAlexey Zhikhartsev /// exit state, and the block that determines the next state. 38502077da7SAlexey Zhikhartsev struct ThreadingPath { 38602077da7SAlexey Zhikhartsev /// Exit value is DFA's exit state for the given path. 387019ffbf3SXChy APInt getExitValue() const { return ExitVal; } 38802077da7SAlexey Zhikhartsev void setExitValue(const ConstantInt *V) { 389019ffbf3SXChy ExitVal = V->getValue(); 39002077da7SAlexey Zhikhartsev IsExitValSet = true; 39102077da7SAlexey Zhikhartsev } 39202077da7SAlexey Zhikhartsev bool isExitValueSet() const { return IsExitValSet; } 39302077da7SAlexey Zhikhartsev 39402077da7SAlexey Zhikhartsev /// Determinator is the basic block that determines the next state of the DFA. 39502077da7SAlexey Zhikhartsev const BasicBlock *getDeterminatorBB() const { return DBB; } 39602077da7SAlexey Zhikhartsev void setDeterminator(const BasicBlock *BB) { DBB = BB; } 39702077da7SAlexey Zhikhartsev 39802077da7SAlexey Zhikhartsev /// Path is a list of basic blocks. 39902077da7SAlexey Zhikhartsev const PathType &getPath() const { return Path; } 40002077da7SAlexey Zhikhartsev void setPath(const PathType &NewPath) { Path = NewPath; } 401b167ada8SUsman Nadeem void push_back(BasicBlock *BB) { Path.push_back(BB); } 402b167ada8SUsman Nadeem void push_front(BasicBlock *BB) { Path.push_front(BB); } 403b167ada8SUsman Nadeem void appendExcludingFirst(const PathType &OtherPath) { 404b167ada8SUsman Nadeem Path.insert(Path.end(), OtherPath.begin() + 1, OtherPath.end()); 405b167ada8SUsman Nadeem } 40602077da7SAlexey Zhikhartsev 40702077da7SAlexey Zhikhartsev void print(raw_ostream &OS) const { 40802077da7SAlexey Zhikhartsev OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 40902077da7SAlexey Zhikhartsev } 41002077da7SAlexey Zhikhartsev 41102077da7SAlexey Zhikhartsev private: 41202077da7SAlexey Zhikhartsev PathType Path; 413019ffbf3SXChy APInt ExitVal; 41402077da7SAlexey Zhikhartsev const BasicBlock *DBB = nullptr; 41502077da7SAlexey Zhikhartsev bool IsExitValSet = false; 41602077da7SAlexey Zhikhartsev }; 41702077da7SAlexey Zhikhartsev 41802077da7SAlexey Zhikhartsev #ifndef NDEBUG 41902077da7SAlexey Zhikhartsev inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 42002077da7SAlexey Zhikhartsev TPath.print(OS); 42102077da7SAlexey Zhikhartsev return OS; 42202077da7SAlexey Zhikhartsev } 42302077da7SAlexey Zhikhartsev #endif 42402077da7SAlexey Zhikhartsev 42502077da7SAlexey Zhikhartsev struct MainSwitch { 4266b53ada6SXChy MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE) 4276b53ada6SXChy : LI(LI) { 4288b0d7634SAlex Zhikhartsev if (isCandidate(SI)) { 42902077da7SAlexey Zhikhartsev Instr = SI; 43002077da7SAlexey Zhikhartsev } else { 43102077da7SAlexey Zhikhartsev ORE->emit([&]() { 43202077da7SAlexey Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 43302077da7SAlexey Zhikhartsev << "Switch instruction is not predictable."; 43402077da7SAlexey Zhikhartsev }); 43502077da7SAlexey Zhikhartsev } 43602077da7SAlexey Zhikhartsev } 43702077da7SAlexey Zhikhartsev 43802077da7SAlexey Zhikhartsev virtual ~MainSwitch() = default; 43902077da7SAlexey Zhikhartsev 44002077da7SAlexey Zhikhartsev SwitchInst *getInstr() const { return Instr; } 44102077da7SAlexey Zhikhartsev const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 44202077da7SAlexey Zhikhartsev return SelectInsts; 44302077da7SAlexey Zhikhartsev } 44402077da7SAlexey Zhikhartsev 44502077da7SAlexey Zhikhartsev private: 4468b0d7634SAlex Zhikhartsev /// Do a use-def chain traversal starting from the switch condition to see if 4478b0d7634SAlex Zhikhartsev /// \p SI is a potential condidate. 4488b0d7634SAlex Zhikhartsev /// 4498b0d7634SAlex Zhikhartsev /// Also, collect select instructions to unfold. 4508b0d7634SAlex Zhikhartsev bool isCandidate(const SwitchInst *SI) { 451c9325f8aSUsman Nadeem std::deque<std::pair<Value *, BasicBlock *>> Q; 45202077da7SAlexey Zhikhartsev SmallSet<Value *, 16> SeenValues; 45302077da7SAlexey Zhikhartsev SelectInsts.clear(); 45402077da7SAlexey Zhikhartsev 4558b0d7634SAlex Zhikhartsev Value *SICond = SI->getCondition(); 4568b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n"); 4578b0d7634SAlex Zhikhartsev if (!isa<PHINode>(SICond)) 45802077da7SAlexey Zhikhartsev return false; 45902077da7SAlexey Zhikhartsev 4606b53ada6SXChy // The switch must be in a loop. 461c9325f8aSUsman Nadeem const Loop *L = LI->getLoopFor(SI->getParent()); 462c9325f8aSUsman Nadeem if (!L) 4636b53ada6SXChy return false; 4646b53ada6SXChy 465c9325f8aSUsman Nadeem addToQueue(SICond, nullptr, Q, SeenValues); 46602077da7SAlexey Zhikhartsev 46702077da7SAlexey Zhikhartsev while (!Q.empty()) { 468c9325f8aSUsman Nadeem Value *Current = Q.front().first; 469c9325f8aSUsman Nadeem BasicBlock *CurrentIncomingBB = Q.front().second; 47002077da7SAlexey Zhikhartsev Q.pop_front(); 47102077da7SAlexey Zhikhartsev 47202077da7SAlexey Zhikhartsev if (auto *Phi = dyn_cast<PHINode>(Current)) { 473c9325f8aSUsman Nadeem for (BasicBlock *IncomingBB : Phi->blocks()) { 474c9325f8aSUsman Nadeem Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB); 475c9325f8aSUsman Nadeem addToQueue(Incoming, IncomingBB, Q, SeenValues); 47602077da7SAlexey Zhikhartsev } 4778b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n"); 47802077da7SAlexey Zhikhartsev } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 47902077da7SAlexey Zhikhartsev if (!isValidSelectInst(SelI)) 48002077da7SAlexey Zhikhartsev return false; 481c9325f8aSUsman Nadeem addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues); 482c9325f8aSUsman Nadeem addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues); 4838b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n"); 48402077da7SAlexey Zhikhartsev if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 48502077da7SAlexey Zhikhartsev SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 4868b0d7634SAlex Zhikhartsev } else if (isa<Constant>(Current)) { 4878b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n"); 4888b0d7634SAlex Zhikhartsev continue; 48902077da7SAlexey Zhikhartsev } else { 4908b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n"); 4918b0d7634SAlex Zhikhartsev // Allow unpredictable values. The hope is that those will be the 4928b0d7634SAlex Zhikhartsev // initial switch values that can be ignored (they will hit the 4938b0d7634SAlex Zhikhartsev // unthreaded switch) but this assumption will get checked later after 4948b0d7634SAlex Zhikhartsev // paths have been enumerated (in function getStateDefMap). 495c9325f8aSUsman Nadeem 496c9325f8aSUsman Nadeem // If the unpredictable value comes from the same inner loop it is 497c9325f8aSUsman Nadeem // likely that it will also be on the enumerated paths, causing us to 498c9325f8aSUsman Nadeem // exit after we have enumerated all the paths. This heuristic save 499c9325f8aSUsman Nadeem // compile time because a search for all the paths can become expensive. 500c9325f8aSUsman Nadeem if (EarlyExitHeuristic && 501c9325f8aSUsman Nadeem L->contains(LI->getLoopFor(CurrentIncomingBB))) { 502c9325f8aSUsman Nadeem LLVM_DEBUG(dbgs() 503c9325f8aSUsman Nadeem << "\tExiting early due to unpredictability heuristic.\n"); 504c9325f8aSUsman Nadeem return false; 505c9325f8aSUsman Nadeem } 506c9325f8aSUsman Nadeem 5078b0d7634SAlex Zhikhartsev continue; 50802077da7SAlexey Zhikhartsev } 50902077da7SAlexey Zhikhartsev } 51002077da7SAlexey Zhikhartsev 51102077da7SAlexey Zhikhartsev return true; 51202077da7SAlexey Zhikhartsev } 51302077da7SAlexey Zhikhartsev 514c9325f8aSUsman Nadeem void addToQueue(Value *Val, BasicBlock *BB, 515c9325f8aSUsman Nadeem std::deque<std::pair<Value *, BasicBlock *>> &Q, 51602077da7SAlexey Zhikhartsev SmallSet<Value *, 16> &SeenValues) { 51723a26e71SKazu Hirata if (SeenValues.insert(Val).second) 518c9325f8aSUsman Nadeem Q.push_back({Val, BB}); 51902077da7SAlexey Zhikhartsev } 52002077da7SAlexey Zhikhartsev 52102077da7SAlexey Zhikhartsev bool isValidSelectInst(SelectInst *SI) { 52202077da7SAlexey Zhikhartsev if (!SI->hasOneUse()) 52302077da7SAlexey Zhikhartsev return false; 52402077da7SAlexey Zhikhartsev 52502077da7SAlexey Zhikhartsev Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 52602077da7SAlexey Zhikhartsev // The use of the select inst should be either a phi or another select. 52702077da7SAlexey Zhikhartsev if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 52802077da7SAlexey Zhikhartsev return false; 52902077da7SAlexey Zhikhartsev 53002077da7SAlexey Zhikhartsev BasicBlock *SIBB = SI->getParent(); 53102077da7SAlexey Zhikhartsev 53202077da7SAlexey Zhikhartsev // Currently, we can only expand select instructions in basic blocks with 53302077da7SAlexey Zhikhartsev // one successor. 53402077da7SAlexey Zhikhartsev BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 53502077da7SAlexey Zhikhartsev if (!SITerm || !SITerm->isUnconditional()) 53602077da7SAlexey Zhikhartsev return false; 53702077da7SAlexey Zhikhartsev 5387fa41d8aSXChy // Only fold the select coming from directly where it is defined. 5397fa41d8aSXChy PHINode *PHIUser = dyn_cast<PHINode>(SIUse); 5407fa41d8aSXChy if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB) 54102077da7SAlexey Zhikhartsev return false; 54202077da7SAlexey Zhikhartsev 54302077da7SAlexey Zhikhartsev // If select will not be sunk during unfolding, and it is in the same basic 54402077da7SAlexey Zhikhartsev // block as another state defining select, then cannot unfold both. 54502077da7SAlexey Zhikhartsev for (SelectInstToUnfold SIToUnfold : SelectInsts) { 54602077da7SAlexey Zhikhartsev SelectInst *PrevSI = SIToUnfold.getInst(); 54702077da7SAlexey Zhikhartsev if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 54802077da7SAlexey Zhikhartsev PrevSI->getParent() == SI->getParent()) 54902077da7SAlexey Zhikhartsev return false; 55002077da7SAlexey Zhikhartsev } 55102077da7SAlexey Zhikhartsev 55202077da7SAlexey Zhikhartsev return true; 55302077da7SAlexey Zhikhartsev } 55402077da7SAlexey Zhikhartsev 5556b53ada6SXChy LoopInfo *LI; 55602077da7SAlexey Zhikhartsev SwitchInst *Instr = nullptr; 55702077da7SAlexey Zhikhartsev SmallVector<SelectInstToUnfold, 4> SelectInsts; 55802077da7SAlexey Zhikhartsev }; 55902077da7SAlexey Zhikhartsev 56002077da7SAlexey Zhikhartsev struct AllSwitchPaths { 561c229f767SXChy AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE, 562b167ada8SUsman Nadeem LoopInfo *LI, Loop *L) 563c229f767SXChy : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE), 564b167ada8SUsman Nadeem LI(LI), SwitchOuterLoop(L) {} 56502077da7SAlexey Zhikhartsev 56602077da7SAlexey Zhikhartsev std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 56702077da7SAlexey Zhikhartsev unsigned getNumThreadingPaths() { return TPaths.size(); } 56802077da7SAlexey Zhikhartsev SwitchInst *getSwitchInst() { return Switch; } 56902077da7SAlexey Zhikhartsev BasicBlock *getSwitchBlock() { return SwitchBlock; } 57002077da7SAlexey Zhikhartsev 57102077da7SAlexey Zhikhartsev void run() { 572b167ada8SUsman Nadeem StateDefMap StateDef = getStateDefMap(); 5738b0d7634SAlex Zhikhartsev if (StateDef.empty()) { 5748b0d7634SAlex Zhikhartsev ORE->emit([&]() { 5758b0d7634SAlex Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", 5768b0d7634SAlex Zhikhartsev Switch) 5778b0d7634SAlex Zhikhartsev << "Switch instruction is not predictable."; 5788b0d7634SAlex Zhikhartsev }); 5798b0d7634SAlex Zhikhartsev return; 5808b0d7634SAlex Zhikhartsev } 58102077da7SAlexey Zhikhartsev 582b167ada8SUsman Nadeem auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); 583b167ada8SUsman Nadeem auto *SwitchPhiDefBB = SwitchPhi->getParent(); 584b167ada8SUsman Nadeem VisitedBlocks VB; 585b167ada8SUsman Nadeem // Get paths from the determinator BBs to SwitchPhiDefBB 586b167ada8SUsman Nadeem std::vector<ThreadingPath> PathsToPhiDef = 587b167ada8SUsman Nadeem getPathsFromStateDefMap(StateDef, SwitchPhi, VB); 588b167ada8SUsman Nadeem if (SwitchPhiDefBB == SwitchBlock) { 589b167ada8SUsman Nadeem TPaths = std::move(PathsToPhiDef); 590b167ada8SUsman Nadeem return; 59102077da7SAlexey Zhikhartsev } 59202077da7SAlexey Zhikhartsev 593b167ada8SUsman Nadeem // Find and append paths from SwitchPhiDefBB to SwitchBlock. 594b167ada8SUsman Nadeem PathsType PathsToSwitchBB = 595b167ada8SUsman Nadeem paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1); 596b167ada8SUsman Nadeem if (PathsToSwitchBB.empty()) 597b167ada8SUsman Nadeem return; 59802077da7SAlexey Zhikhartsev 599b167ada8SUsman Nadeem std::vector<ThreadingPath> TempList; 600b167ada8SUsman Nadeem for (const ThreadingPath &Path : PathsToPhiDef) { 601b167ada8SUsman Nadeem for (const PathType &PathToSw : PathsToSwitchBB) { 602b167ada8SUsman Nadeem ThreadingPath PathCopy(Path); 603b167ada8SUsman Nadeem PathCopy.appendExcludingFirst(PathToSw); 604b167ada8SUsman Nadeem TempList.push_back(PathCopy); 60502077da7SAlexey Zhikhartsev } 60602077da7SAlexey Zhikhartsev } 607b167ada8SUsman Nadeem TPaths = std::move(TempList); 60802077da7SAlexey Zhikhartsev } 60902077da7SAlexey Zhikhartsev 61002077da7SAlexey Zhikhartsev private: 61102077da7SAlexey Zhikhartsev // Value: an instruction that defines a switch state; 61202077da7SAlexey Zhikhartsev // Key: the parent basic block of that instruction. 61302077da7SAlexey Zhikhartsev typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 614b167ada8SUsman Nadeem std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef, 615b167ada8SUsman Nadeem PHINode *Phi, 616b167ada8SUsman Nadeem VisitedBlocks &VB) { 617b167ada8SUsman Nadeem std::vector<ThreadingPath> Res; 618b167ada8SUsman Nadeem auto *PhiBB = Phi->getParent(); 619b167ada8SUsman Nadeem VB.insert(PhiBB); 62002077da7SAlexey Zhikhartsev 621b167ada8SUsman Nadeem VisitedBlocks UniqueBlocks; 622b167ada8SUsman Nadeem for (auto *IncomingBB : Phi->blocks()) { 623b167ada8SUsman Nadeem if (!UniqueBlocks.insert(IncomingBB).second) 624b167ada8SUsman Nadeem continue; 625b167ada8SUsman Nadeem if (!SwitchOuterLoop->contains(IncomingBB)) 626b167ada8SUsman Nadeem continue; 627b167ada8SUsman Nadeem 628b167ada8SUsman Nadeem Value *IncomingValue = Phi->getIncomingValueForBlock(IncomingBB); 629b167ada8SUsman Nadeem // We found the determinator. This is the start of our path. 630b167ada8SUsman Nadeem if (auto *C = dyn_cast<ConstantInt>(IncomingValue)) { 631b167ada8SUsman Nadeem // SwitchBlock is the determinator, unsupported unless its also the def. 632b167ada8SUsman Nadeem if (PhiBB == SwitchBlock && 633b167ada8SUsman Nadeem SwitchBlock != cast<PHINode>(Switch->getOperand(0))->getParent()) 634b167ada8SUsman Nadeem continue; 635b167ada8SUsman Nadeem ThreadingPath NewPath; 636b167ada8SUsman Nadeem NewPath.setDeterminator(PhiBB); 637b167ada8SUsman Nadeem NewPath.setExitValue(C); 638b167ada8SUsman Nadeem // Don't add SwitchBlock at the start, this is handled later. 639b167ada8SUsman Nadeem if (IncomingBB != SwitchBlock) 640b167ada8SUsman Nadeem NewPath.push_back(IncomingBB); 641b167ada8SUsman Nadeem NewPath.push_back(PhiBB); 642b167ada8SUsman Nadeem Res.push_back(NewPath); 643b167ada8SUsman Nadeem continue; 644b167ada8SUsman Nadeem } 645b167ada8SUsman Nadeem // Don't get into a cycle. 646b167ada8SUsman Nadeem if (VB.contains(IncomingBB) || IncomingBB == SwitchBlock) 647b167ada8SUsman Nadeem continue; 648b167ada8SUsman Nadeem // Recurse up the PHI chain. 649b167ada8SUsman Nadeem auto *IncomingPhi = dyn_cast<PHINode>(IncomingValue); 650b167ada8SUsman Nadeem if (!IncomingPhi) 651b167ada8SUsman Nadeem continue; 652b167ada8SUsman Nadeem auto *IncomingPhiDefBB = IncomingPhi->getParent(); 653b167ada8SUsman Nadeem if (!StateDef.contains(IncomingPhiDefBB)) 654b167ada8SUsman Nadeem continue; 655b167ada8SUsman Nadeem 656b167ada8SUsman Nadeem // Direct predecessor, just add to the path. 657b167ada8SUsman Nadeem if (IncomingPhiDefBB == IncomingBB) { 658b167ada8SUsman Nadeem std::vector<ThreadingPath> PredPaths = 659b167ada8SUsman Nadeem getPathsFromStateDefMap(StateDef, IncomingPhi, VB); 660b167ada8SUsman Nadeem for (ThreadingPath &Path : PredPaths) { 661b167ada8SUsman Nadeem Path.push_back(PhiBB); 662b167ada8SUsman Nadeem Res.push_back(std::move(Path)); 663b167ada8SUsman Nadeem } 664b167ada8SUsman Nadeem continue; 665b167ada8SUsman Nadeem } 666b167ada8SUsman Nadeem // Not a direct predecessor, find intermediate paths to append to the 667b167ada8SUsman Nadeem // existing path. 668b167ada8SUsman Nadeem if (VB.contains(IncomingPhiDefBB)) 669b167ada8SUsman Nadeem continue; 670b167ada8SUsman Nadeem 671b167ada8SUsman Nadeem PathsType IntermediatePaths; 672b167ada8SUsman Nadeem IntermediatePaths = 673b167ada8SUsman Nadeem paths(IncomingPhiDefBB, IncomingBB, VB, /* PathDepth = */ 1); 674b167ada8SUsman Nadeem if (IntermediatePaths.empty()) 675b167ada8SUsman Nadeem continue; 676b167ada8SUsman Nadeem 677b167ada8SUsman Nadeem std::vector<ThreadingPath> PredPaths = 678b167ada8SUsman Nadeem getPathsFromStateDefMap(StateDef, IncomingPhi, VB); 679b167ada8SUsman Nadeem for (const ThreadingPath &Path : PredPaths) { 680b167ada8SUsman Nadeem for (const PathType &IPath : IntermediatePaths) { 681b167ada8SUsman Nadeem ThreadingPath NewPath(Path); 682b167ada8SUsman Nadeem NewPath.appendExcludingFirst(IPath); 683b167ada8SUsman Nadeem NewPath.push_back(PhiBB); 684b167ada8SUsman Nadeem Res.push_back(NewPath); 685b167ada8SUsman Nadeem } 686b167ada8SUsman Nadeem } 687b167ada8SUsman Nadeem } 688b167ada8SUsman Nadeem VB.erase(PhiBB); 689b167ada8SUsman Nadeem return Res; 690b167ada8SUsman Nadeem } 691b167ada8SUsman Nadeem 692b167ada8SUsman Nadeem PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited, 693b167ada8SUsman Nadeem unsigned PathDepth) { 69402077da7SAlexey Zhikhartsev PathsType Res; 69502077da7SAlexey Zhikhartsev 69602077da7SAlexey Zhikhartsev // Stop exploring paths after visiting MaxPathLength blocks 69702077da7SAlexey Zhikhartsev if (PathDepth > MaxPathLength) { 69802077da7SAlexey Zhikhartsev ORE->emit([&]() { 69902077da7SAlexey Zhikhartsev return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 70002077da7SAlexey Zhikhartsev Switch) 70102077da7SAlexey Zhikhartsev << "Exploration stopped after visiting MaxPathLength=" 70202077da7SAlexey Zhikhartsev << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 70302077da7SAlexey Zhikhartsev }); 70402077da7SAlexey Zhikhartsev return Res; 70502077da7SAlexey Zhikhartsev } 70602077da7SAlexey Zhikhartsev 70702077da7SAlexey Zhikhartsev Visited.insert(BB); 708b167ada8SUsman Nadeem if (++NumVisited > MaxNumVisitiedPaths) 709b167ada8SUsman Nadeem return Res; 71002077da7SAlexey Zhikhartsev 711c229f767SXChy // Stop if we have reached the BB out of loop, since its successors have no 712c229f767SXChy // impact on the DFA. 713b167ada8SUsman Nadeem if (!SwitchOuterLoop->contains(BB)) 714c229f767SXChy return Res; 715c229f767SXChy 71602077da7SAlexey Zhikhartsev // Some blocks have multiple edges to the same successor, and this set 71702077da7SAlexey Zhikhartsev // is used to prevent a duplicate path from being generated 71802077da7SAlexey Zhikhartsev SmallSet<BasicBlock *, 4> Successors; 7193f7aea1aSNikita Popov for (BasicBlock *Succ : successors(BB)) { 7203f7aea1aSNikita Popov if (!Successors.insert(Succ).second) 72102077da7SAlexey Zhikhartsev continue; 72202077da7SAlexey Zhikhartsev 723b167ada8SUsman Nadeem // Found a cycle through the final block. 724b167ada8SUsman Nadeem if (Succ == ToBB) { 725b167ada8SUsman Nadeem Res.push_back({BB, ToBB}); 72602077da7SAlexey Zhikhartsev continue; 72702077da7SAlexey Zhikhartsev } 72802077da7SAlexey Zhikhartsev 72902077da7SAlexey Zhikhartsev // We have encountered a cycle, do not get caught in it 730380b8a60SNikita Popov if (Visited.contains(Succ)) 73102077da7SAlexey Zhikhartsev continue; 73202077da7SAlexey Zhikhartsev 733b167ada8SUsman Nadeem auto *CurrLoop = LI->getLoopFor(BB); 734b167ada8SUsman Nadeem // Unlikely to be beneficial. 735b167ada8SUsman Nadeem if (Succ == CurrLoop->getHeader()) 736b167ada8SUsman Nadeem continue; 737b167ada8SUsman Nadeem // Skip for now, revisit this condition later to see the impact on 738b167ada8SUsman Nadeem // coverage and compile time. 739b167ada8SUsman Nadeem if (LI->getLoopFor(Succ) != CurrLoop) 740b167ada8SUsman Nadeem continue; 741b167ada8SUsman Nadeem 742b167ada8SUsman Nadeem PathsType SuccPaths = paths(Succ, ToBB, Visited, PathDepth + 1); 743b167ada8SUsman Nadeem for (PathType &Path : SuccPaths) { 744b167ada8SUsman Nadeem Path.push_front(BB); 745b167ada8SUsman Nadeem Res.push_back(Path); 7468b0d7634SAlex Zhikhartsev if (Res.size() >= MaxNumPaths) { 7478b0d7634SAlex Zhikhartsev return Res; 7488b0d7634SAlex Zhikhartsev } 74902077da7SAlexey Zhikhartsev } 75002077da7SAlexey Zhikhartsev } 75102077da7SAlexey Zhikhartsev // This block could now be visited again from a different predecessor. Note 75202077da7SAlexey Zhikhartsev // that this will result in exponential runtime. Subpaths could possibly be 75302077da7SAlexey Zhikhartsev // cached but it takes a lot of memory to store them. 75402077da7SAlexey Zhikhartsev Visited.erase(BB); 75502077da7SAlexey Zhikhartsev return Res; 75602077da7SAlexey Zhikhartsev } 75702077da7SAlexey Zhikhartsev 75802077da7SAlexey Zhikhartsev /// Walk the use-def chain and collect all the state-defining instructions. 7598b0d7634SAlex Zhikhartsev /// 7608b0d7634SAlex Zhikhartsev /// Return an empty map if unpredictable values encountered inside the basic 7618b0d7634SAlex Zhikhartsev /// blocks of \p LoopPaths. 762b167ada8SUsman Nadeem StateDefMap getStateDefMap() const { 76302077da7SAlexey Zhikhartsev StateDefMap Res; 76402077da7SAlexey Zhikhartsev Value *FirstDef = Switch->getOperand(0); 7658b0d7634SAlex Zhikhartsev assert(isa<PHINode>(FirstDef) && "The first definition must be a phi."); 76602077da7SAlexey Zhikhartsev 76702077da7SAlexey Zhikhartsev SmallVector<PHINode *, 8> Stack; 76802077da7SAlexey Zhikhartsev Stack.push_back(dyn_cast<PHINode>(FirstDef)); 76902077da7SAlexey Zhikhartsev SmallSet<Value *, 16> SeenValues; 77002077da7SAlexey Zhikhartsev 77102077da7SAlexey Zhikhartsev while (!Stack.empty()) { 77284b07c9bSKazu Hirata PHINode *CurPhi = Stack.pop_back_val(); 77302077da7SAlexey Zhikhartsev 77402077da7SAlexey Zhikhartsev Res[CurPhi->getParent()] = CurPhi; 77502077da7SAlexey Zhikhartsev SeenValues.insert(CurPhi); 77602077da7SAlexey Zhikhartsev 7778b0d7634SAlex Zhikhartsev for (BasicBlock *IncomingBB : CurPhi->blocks()) { 7788b0d7634SAlex Zhikhartsev Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB); 779b167ada8SUsman Nadeem bool IsOutsideLoops = !SwitchOuterLoop->contains(IncomingBB); 78002077da7SAlexey Zhikhartsev if (Incoming == FirstDef || isa<ConstantInt>(Incoming) || 7818b0d7634SAlex Zhikhartsev SeenValues.contains(Incoming) || IsOutsideLoops) { 78202077da7SAlexey Zhikhartsev continue; 78302077da7SAlexey Zhikhartsev } 78402077da7SAlexey Zhikhartsev 7858b0d7634SAlex Zhikhartsev // Any unpredictable value inside the loops means we must bail out. 7868b0d7634SAlex Zhikhartsev if (!isa<PHINode>(Incoming)) 7878b0d7634SAlex Zhikhartsev return StateDefMap(); 78802077da7SAlexey Zhikhartsev 78902077da7SAlexey Zhikhartsev Stack.push_back(cast<PHINode>(Incoming)); 79002077da7SAlexey Zhikhartsev } 79102077da7SAlexey Zhikhartsev } 79202077da7SAlexey Zhikhartsev 79302077da7SAlexey Zhikhartsev return Res; 79402077da7SAlexey Zhikhartsev } 79502077da7SAlexey Zhikhartsev 796b167ada8SUsman Nadeem unsigned NumVisited = 0; 79702077da7SAlexey Zhikhartsev SwitchInst *Switch; 79802077da7SAlexey Zhikhartsev BasicBlock *SwitchBlock; 79902077da7SAlexey Zhikhartsev OptimizationRemarkEmitter *ORE; 80002077da7SAlexey Zhikhartsev std::vector<ThreadingPath> TPaths; 801c229f767SXChy LoopInfo *LI; 802b167ada8SUsman Nadeem Loop *SwitchOuterLoop; 80302077da7SAlexey Zhikhartsev }; 80402077da7SAlexey Zhikhartsev 80502077da7SAlexey Zhikhartsev struct TransformDFA { 80602077da7SAlexey Zhikhartsev TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 80702077da7SAlexey Zhikhartsev AssumptionCache *AC, TargetTransformInfo *TTI, 80802077da7SAlexey Zhikhartsev OptimizationRemarkEmitter *ORE, 80902077da7SAlexey Zhikhartsev SmallPtrSet<const Value *, 32> EphValues) 81002077da7SAlexey Zhikhartsev : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 81102077da7SAlexey Zhikhartsev EphValues(EphValues) {} 81202077da7SAlexey Zhikhartsev 81302077da7SAlexey Zhikhartsev void run() { 81402077da7SAlexey Zhikhartsev if (isLegalAndProfitableToTransform()) { 81502077da7SAlexey Zhikhartsev createAllExitPaths(); 81602077da7SAlexey Zhikhartsev NumTransforms++; 81702077da7SAlexey Zhikhartsev } 81802077da7SAlexey Zhikhartsev } 81902077da7SAlexey Zhikhartsev 82002077da7SAlexey Zhikhartsev private: 82102077da7SAlexey Zhikhartsev /// This function performs both a legality check and profitability check at 82202077da7SAlexey Zhikhartsev /// the same time since it is convenient to do so. It iterates through all 82302077da7SAlexey Zhikhartsev /// blocks that will be cloned, and keeps track of the duplication cost. It 82402077da7SAlexey Zhikhartsev /// also returns false if it is illegal to clone some required block. 82502077da7SAlexey Zhikhartsev bool isLegalAndProfitableToTransform() { 82602077da7SAlexey Zhikhartsev CodeMetrics Metrics; 82702077da7SAlexey Zhikhartsev SwitchInst *Switch = SwitchPaths->getSwitchInst(); 82802077da7SAlexey Zhikhartsev 8292fba4694SXChy // Don't thread switch without multiple successors. 8302fba4694SXChy if (Switch->getNumSuccessors() <= 1) 8312fba4694SXChy return false; 8322fba4694SXChy 83302077da7SAlexey Zhikhartsev // Note that DuplicateBlockMap is not being used as intended here. It is 83402077da7SAlexey Zhikhartsev // just being used to ensure (BB, State) pairs are only counted once. 83502077da7SAlexey Zhikhartsev DuplicateBlockMap DuplicateMap; 83602077da7SAlexey Zhikhartsev 83702077da7SAlexey Zhikhartsev for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 83802077da7SAlexey Zhikhartsev PathType PathBBs = TPath.getPath(); 839019ffbf3SXChy APInt NextState = TPath.getExitValue(); 84002077da7SAlexey Zhikhartsev const BasicBlock *Determinator = TPath.getDeterminatorBB(); 84102077da7SAlexey Zhikhartsev 84202077da7SAlexey Zhikhartsev // Update Metrics for the Switch block, this is always cloned 84302077da7SAlexey Zhikhartsev BasicBlock *BB = SwitchPaths->getSwitchBlock(); 84402077da7SAlexey Zhikhartsev BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 84502077da7SAlexey Zhikhartsev if (!VisitedBB) { 84602077da7SAlexey Zhikhartsev Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 84702077da7SAlexey Zhikhartsev DuplicateMap[BB].push_back({BB, NextState}); 84802077da7SAlexey Zhikhartsev } 84902077da7SAlexey Zhikhartsev 85002077da7SAlexey Zhikhartsev // If the Switch block is the Determinator, then we can continue since 85102077da7SAlexey Zhikhartsev // this is the only block that is cloned and we already counted for it. 85202077da7SAlexey Zhikhartsev if (PathBBs.front() == Determinator) 85302077da7SAlexey Zhikhartsev continue; 85402077da7SAlexey Zhikhartsev 85502077da7SAlexey Zhikhartsev // Otherwise update Metrics for all blocks that will be cloned. If any 85602077da7SAlexey Zhikhartsev // block is already cloned and would be reused, don't double count it. 8575ea31555SKazu Hirata auto DetIt = llvm::find(PathBBs, Determinator); 85802077da7SAlexey Zhikhartsev for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 85902077da7SAlexey Zhikhartsev BB = *BBIt; 86002077da7SAlexey Zhikhartsev VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 86102077da7SAlexey Zhikhartsev if (VisitedBB) 86202077da7SAlexey Zhikhartsev continue; 86302077da7SAlexey Zhikhartsev Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 86402077da7SAlexey Zhikhartsev DuplicateMap[BB].push_back({BB, NextState}); 86502077da7SAlexey Zhikhartsev } 86602077da7SAlexey Zhikhartsev 86702077da7SAlexey Zhikhartsev if (Metrics.notDuplicatable) { 86802077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 86902077da7SAlexey Zhikhartsev << "non-duplicatable instructions.\n"); 87002077da7SAlexey Zhikhartsev ORE->emit([&]() { 87102077da7SAlexey Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 87202077da7SAlexey Zhikhartsev Switch) 87302077da7SAlexey Zhikhartsev << "Contains non-duplicatable instructions."; 87402077da7SAlexey Zhikhartsev }); 87502077da7SAlexey Zhikhartsev return false; 87602077da7SAlexey Zhikhartsev } 87702077da7SAlexey Zhikhartsev 878e0ac087fSSameer Sahasrabuddhe // FIXME: Allow jump threading with controlled convergence. 879e0ac087fSSameer Sahasrabuddhe if (Metrics.Convergence != ConvergenceKind::None) { 88002077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 88102077da7SAlexey Zhikhartsev << "convergent instructions.\n"); 88202077da7SAlexey Zhikhartsev ORE->emit([&]() { 88302077da7SAlexey Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 88402077da7SAlexey Zhikhartsev << "Contains convergent instructions."; 88502077da7SAlexey Zhikhartsev }); 88602077da7SAlexey Zhikhartsev return false; 88702077da7SAlexey Zhikhartsev } 888f85c5079SPhilip Reames 889f85c5079SPhilip Reames if (!Metrics.NumInsts.isValid()) { 890f85c5079SPhilip Reames LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 891f85c5079SPhilip Reames << "instructions with invalid cost.\n"); 892f85c5079SPhilip Reames ORE->emit([&]() { 893f85c5079SPhilip Reames return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 894f85c5079SPhilip Reames << "Contains instructions with invalid cost."; 895f85c5079SPhilip Reames }); 896f85c5079SPhilip Reames return false; 897f85c5079SPhilip Reames } 89802077da7SAlexey Zhikhartsev } 89902077da7SAlexey Zhikhartsev 9009c710ebbSDaniil Fukalov InstructionCost DuplicationCost = 0; 90102077da7SAlexey Zhikhartsev 90202077da7SAlexey Zhikhartsev unsigned JumpTableSize = 0; 90302077da7SAlexey Zhikhartsev TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 90402077da7SAlexey Zhikhartsev nullptr); 90502077da7SAlexey Zhikhartsev if (JumpTableSize == 0) { 90602077da7SAlexey Zhikhartsev // Factor in the number of conditional branches reduced from jump 90702077da7SAlexey Zhikhartsev // threading. Assume that lowering the switch block is implemented by 90802077da7SAlexey Zhikhartsev // using binary search, hence the LogBase2(). 90902077da7SAlexey Zhikhartsev unsigned CondBranches = 91002077da7SAlexey Zhikhartsev APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 9112fba4694SXChy assert(CondBranches > 0 && 9122fba4694SXChy "The threaded switch must have multiple branches"); 9139c710ebbSDaniil Fukalov DuplicationCost = Metrics.NumInsts / CondBranches; 91402077da7SAlexey Zhikhartsev } else { 91502077da7SAlexey Zhikhartsev // Compared with jump tables, the DFA optimizer removes an indirect branch 91602077da7SAlexey Zhikhartsev // on each loop iteration, thus making branch prediction more precise. The 91702077da7SAlexey Zhikhartsev // more branch targets there are, the more likely it is for the branch 91802077da7SAlexey Zhikhartsev // predictor to make a mistake, and the more benefit there is in the DFA 91902077da7SAlexey Zhikhartsev // optimizer. Thus, the more branch targets there are, the lower is the 92002077da7SAlexey Zhikhartsev // cost of the DFA opt. 9219c710ebbSDaniil Fukalov DuplicationCost = Metrics.NumInsts / JumpTableSize; 92202077da7SAlexey Zhikhartsev } 92302077da7SAlexey Zhikhartsev 92402077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 92502077da7SAlexey Zhikhartsev << SwitchPaths->getSwitchBlock()->getName() 92602077da7SAlexey Zhikhartsev << " is: " << DuplicationCost << "\n\n"); 92702077da7SAlexey Zhikhartsev 92802077da7SAlexey Zhikhartsev if (DuplicationCost > CostThreshold) { 92902077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 93002077da7SAlexey Zhikhartsev << "cost threshold.\n"); 93102077da7SAlexey Zhikhartsev ORE->emit([&]() { 93202077da7SAlexey Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 93302077da7SAlexey Zhikhartsev << "Duplication cost exceeds the cost threshold (cost=" 93402077da7SAlexey Zhikhartsev << ore::NV("Cost", DuplicationCost) 93502077da7SAlexey Zhikhartsev << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 93602077da7SAlexey Zhikhartsev }); 93702077da7SAlexey Zhikhartsev return false; 93802077da7SAlexey Zhikhartsev } 93902077da7SAlexey Zhikhartsev 94002077da7SAlexey Zhikhartsev ORE->emit([&]() { 94102077da7SAlexey Zhikhartsev return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 94202077da7SAlexey Zhikhartsev << "Switch statement jump-threaded."; 94302077da7SAlexey Zhikhartsev }); 94402077da7SAlexey Zhikhartsev 94502077da7SAlexey Zhikhartsev return true; 94602077da7SAlexey Zhikhartsev } 94702077da7SAlexey Zhikhartsev 94802077da7SAlexey Zhikhartsev /// Transform each threading path to effectively jump thread the DFA. 94902077da7SAlexey Zhikhartsev void createAllExitPaths() { 95002077da7SAlexey Zhikhartsev DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 95102077da7SAlexey Zhikhartsev 95202077da7SAlexey Zhikhartsev // Move the switch block to the end of the path, since it will be duplicated 95302077da7SAlexey Zhikhartsev BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 95402077da7SAlexey Zhikhartsev for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 95502077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << TPath << "\n"); 956b167ada8SUsman Nadeem // TODO: Fix exit path creation logic so that we dont need this 957b167ada8SUsman Nadeem // placeholder. 958b167ada8SUsman Nadeem TPath.push_front(SwitchBlock); 95902077da7SAlexey Zhikhartsev } 96002077da7SAlexey Zhikhartsev 96102077da7SAlexey Zhikhartsev // Transform the ThreadingPaths and keep track of the cloned values 96202077da7SAlexey Zhikhartsev DuplicateBlockMap DuplicateMap; 96302077da7SAlexey Zhikhartsev DefMap NewDefs; 96402077da7SAlexey Zhikhartsev 96502077da7SAlexey Zhikhartsev SmallSet<BasicBlock *, 16> BlocksToClean; 96602077da7SAlexey Zhikhartsev for (BasicBlock *BB : successors(SwitchBlock)) 96702077da7SAlexey Zhikhartsev BlocksToClean.insert(BB); 96802077da7SAlexey Zhikhartsev 96902077da7SAlexey Zhikhartsev for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 97002077da7SAlexey Zhikhartsev createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 97102077da7SAlexey Zhikhartsev NumPaths++; 97202077da7SAlexey Zhikhartsev } 97302077da7SAlexey Zhikhartsev 97402077da7SAlexey Zhikhartsev // After all paths are cloned, now update the last successor of the cloned 97502077da7SAlexey Zhikhartsev // path so it skips over the switch statement 97602077da7SAlexey Zhikhartsev for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 97702077da7SAlexey Zhikhartsev updateLastSuccessor(TPath, DuplicateMap, &DTU); 97802077da7SAlexey Zhikhartsev 97902077da7SAlexey Zhikhartsev // For each instruction that was cloned and used outside, update its uses 98002077da7SAlexey Zhikhartsev updateSSA(NewDefs); 98102077da7SAlexey Zhikhartsev 98202077da7SAlexey Zhikhartsev // Clean PHI Nodes for the newly created blocks 98302077da7SAlexey Zhikhartsev for (BasicBlock *BB : BlocksToClean) 98402077da7SAlexey Zhikhartsev cleanPhiNodes(BB); 98502077da7SAlexey Zhikhartsev } 98602077da7SAlexey Zhikhartsev 98702077da7SAlexey Zhikhartsev /// For a specific ThreadingPath \p Path, create an exit path starting from 98802077da7SAlexey Zhikhartsev /// the determinator block. 98902077da7SAlexey Zhikhartsev /// 99002077da7SAlexey Zhikhartsev /// To remember the correct destination, we have to duplicate blocks 99102077da7SAlexey Zhikhartsev /// corresponding to each state. Also update the terminating instruction of 99202077da7SAlexey Zhikhartsev /// the predecessors, and phis in the successor blocks. 99302077da7SAlexey Zhikhartsev void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 99402077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap, 99502077da7SAlexey Zhikhartsev SmallSet<BasicBlock *, 16> &BlocksToClean, 99602077da7SAlexey Zhikhartsev DomTreeUpdater *DTU) { 997019ffbf3SXChy APInt NextState = Path.getExitValue(); 99802077da7SAlexey Zhikhartsev const BasicBlock *Determinator = Path.getDeterminatorBB(); 99902077da7SAlexey Zhikhartsev PathType PathBBs = Path.getPath(); 100002077da7SAlexey Zhikhartsev 100102077da7SAlexey Zhikhartsev // Don't select the placeholder block in front 100202077da7SAlexey Zhikhartsev if (PathBBs.front() == Determinator) 100302077da7SAlexey Zhikhartsev PathBBs.pop_front(); 100402077da7SAlexey Zhikhartsev 10055ea31555SKazu Hirata auto DetIt = llvm::find(PathBBs, Determinator); 10062c0fc0f3SXChy // When there is only one BB in PathBBs, the determinator takes itself as a 10072c0fc0f3SXChy // direct predecessor. 10082c0fc0f3SXChy BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt); 100902077da7SAlexey Zhikhartsev for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 101002077da7SAlexey Zhikhartsev BasicBlock *BB = *BBIt; 101102077da7SAlexey Zhikhartsev BlocksToClean.insert(BB); 101202077da7SAlexey Zhikhartsev 101302077da7SAlexey Zhikhartsev // We already cloned BB for this NextState, now just update the branch 101402077da7SAlexey Zhikhartsev // and continue. 101502077da7SAlexey Zhikhartsev BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 101602077da7SAlexey Zhikhartsev if (NextBB) { 101702077da7SAlexey Zhikhartsev updatePredecessor(PrevBB, BB, NextBB, DTU); 101802077da7SAlexey Zhikhartsev PrevBB = NextBB; 101902077da7SAlexey Zhikhartsev continue; 102002077da7SAlexey Zhikhartsev } 102102077da7SAlexey Zhikhartsev 102202077da7SAlexey Zhikhartsev // Clone the BB and update the successor of Prev to jump to the new block 102302077da7SAlexey Zhikhartsev BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 102402077da7SAlexey Zhikhartsev BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 102502077da7SAlexey Zhikhartsev DuplicateMap[BB].push_back({NewBB, NextState}); 102602077da7SAlexey Zhikhartsev BlocksToClean.insert(NewBB); 102702077da7SAlexey Zhikhartsev PrevBB = NewBB; 102802077da7SAlexey Zhikhartsev } 102902077da7SAlexey Zhikhartsev } 103002077da7SAlexey Zhikhartsev 103102077da7SAlexey Zhikhartsev /// Restore SSA form after cloning blocks. 103202077da7SAlexey Zhikhartsev /// 103302077da7SAlexey Zhikhartsev /// Each cloned block creates new defs for a variable, and the uses need to be 103402077da7SAlexey Zhikhartsev /// updated to reflect this. The uses may be replaced with a cloned value, or 103502077da7SAlexey Zhikhartsev /// some derived phi instruction. Note that all uses of a value defined in the 103602077da7SAlexey Zhikhartsev /// same block were already remapped when cloning the block. 103702077da7SAlexey Zhikhartsev void updateSSA(DefMap &NewDefs) { 103802077da7SAlexey Zhikhartsev SSAUpdaterBulk SSAUpdate; 103902077da7SAlexey Zhikhartsev SmallVector<Use *, 16> UsesToRename; 104002077da7SAlexey Zhikhartsev 1041c83c4b58SKazu Hirata for (const auto &KV : NewDefs) { 104202077da7SAlexey Zhikhartsev Instruction *I = KV.first; 104302077da7SAlexey Zhikhartsev BasicBlock *BB = I->getParent(); 104402077da7SAlexey Zhikhartsev std::vector<Instruction *> Cloned = KV.second; 104502077da7SAlexey Zhikhartsev 104602077da7SAlexey Zhikhartsev // Scan all uses of this instruction to see if it is used outside of its 104702077da7SAlexey Zhikhartsev // block, and if so, record them in UsesToRename. 104802077da7SAlexey Zhikhartsev for (Use &U : I->uses()) { 104902077da7SAlexey Zhikhartsev Instruction *User = cast<Instruction>(U.getUser()); 105002077da7SAlexey Zhikhartsev if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 105102077da7SAlexey Zhikhartsev if (UserPN->getIncomingBlock(U) == BB) 105202077da7SAlexey Zhikhartsev continue; 105302077da7SAlexey Zhikhartsev } else if (User->getParent() == BB) { 105402077da7SAlexey Zhikhartsev continue; 105502077da7SAlexey Zhikhartsev } 105602077da7SAlexey Zhikhartsev 105702077da7SAlexey Zhikhartsev UsesToRename.push_back(&U); 105802077da7SAlexey Zhikhartsev } 105902077da7SAlexey Zhikhartsev 106002077da7SAlexey Zhikhartsev // If there are no uses outside the block, we're done with this 106102077da7SAlexey Zhikhartsev // instruction. 106202077da7SAlexey Zhikhartsev if (UsesToRename.empty()) 106302077da7SAlexey Zhikhartsev continue; 106402077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 106502077da7SAlexey Zhikhartsev << "\n"); 106602077da7SAlexey Zhikhartsev 106702077da7SAlexey Zhikhartsev // We found a use of I outside of BB. Rename all uses of I that are 106802077da7SAlexey Zhikhartsev // outside its block to be uses of the appropriate PHI node etc. See 106902077da7SAlexey Zhikhartsev // ValuesInBlocks with the values we know. 107002077da7SAlexey Zhikhartsev unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 107102077da7SAlexey Zhikhartsev SSAUpdate.AddAvailableValue(VarNum, BB, I); 107202077da7SAlexey Zhikhartsev for (Instruction *New : Cloned) 107302077da7SAlexey Zhikhartsev SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 107402077da7SAlexey Zhikhartsev 107502077da7SAlexey Zhikhartsev while (!UsesToRename.empty()) 107602077da7SAlexey Zhikhartsev SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 107702077da7SAlexey Zhikhartsev 107802077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\n"); 107902077da7SAlexey Zhikhartsev } 108002077da7SAlexey Zhikhartsev // SSAUpdater handles phi placement and renaming uses with the appropriate 108102077da7SAlexey Zhikhartsev // value. 108202077da7SAlexey Zhikhartsev SSAUpdate.RewriteAllUses(DT); 108302077da7SAlexey Zhikhartsev } 108402077da7SAlexey Zhikhartsev 108502077da7SAlexey Zhikhartsev /// Clones a basic block, and adds it to the CFG. 108602077da7SAlexey Zhikhartsev /// 108702077da7SAlexey Zhikhartsev /// This function also includes updating phi nodes in the successors of the 108802077da7SAlexey Zhikhartsev /// BB, and remapping uses that were defined locally in the cloned BB. 108902077da7SAlexey Zhikhartsev BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 1090019ffbf3SXChy const APInt &NextState, 109102077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap, 109202077da7SAlexey Zhikhartsev DefMap &NewDefs, 109302077da7SAlexey Zhikhartsev DomTreeUpdater *DTU) { 109402077da7SAlexey Zhikhartsev ValueToValueMapTy VMap; 109502077da7SAlexey Zhikhartsev BasicBlock *NewBB = CloneBasicBlock( 1096019ffbf3SXChy BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()), 1097019ffbf3SXChy BB->getParent()); 109802077da7SAlexey Zhikhartsev NewBB->moveAfter(BB); 109902077da7SAlexey Zhikhartsev NumCloned++; 110002077da7SAlexey Zhikhartsev 110102077da7SAlexey Zhikhartsev for (Instruction &I : *NewBB) { 110202077da7SAlexey Zhikhartsev // Do not remap operands of PHINode in case a definition in BB is an 110302077da7SAlexey Zhikhartsev // incoming value to a phi in the same block. This incoming value will 110402077da7SAlexey Zhikhartsev // be renamed later while restoring SSA. 110502077da7SAlexey Zhikhartsev if (isa<PHINode>(&I)) 110602077da7SAlexey Zhikhartsev continue; 110702077da7SAlexey Zhikhartsev RemapInstruction(&I, VMap, 110802077da7SAlexey Zhikhartsev RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 110902077da7SAlexey Zhikhartsev if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 111002077da7SAlexey Zhikhartsev AC->registerAssumption(II); 111102077da7SAlexey Zhikhartsev } 111202077da7SAlexey Zhikhartsev 111302077da7SAlexey Zhikhartsev updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 111402077da7SAlexey Zhikhartsev updatePredecessor(PrevBB, BB, NewBB, DTU); 111502077da7SAlexey Zhikhartsev updateDefMap(NewDefs, VMap); 111602077da7SAlexey Zhikhartsev 111702077da7SAlexey Zhikhartsev // Add all successors to the DominatorTree 111802077da7SAlexey Zhikhartsev SmallPtrSet<BasicBlock *, 4> SuccSet; 111902077da7SAlexey Zhikhartsev for (auto *SuccBB : successors(NewBB)) { 112002077da7SAlexey Zhikhartsev if (SuccSet.insert(SuccBB).second) 112102077da7SAlexey Zhikhartsev DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 112202077da7SAlexey Zhikhartsev } 112302077da7SAlexey Zhikhartsev SuccSet.clear(); 112402077da7SAlexey Zhikhartsev return NewBB; 112502077da7SAlexey Zhikhartsev } 112602077da7SAlexey Zhikhartsev 112702077da7SAlexey Zhikhartsev /// Update the phi nodes in BB's successors. 112802077da7SAlexey Zhikhartsev /// 112902077da7SAlexey Zhikhartsev /// This means creating a new incoming value from NewBB with the new 113002077da7SAlexey Zhikhartsev /// instruction wherever there is an incoming value from BB. 113102077da7SAlexey Zhikhartsev void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1132019ffbf3SXChy const APInt &NextState, ValueToValueMapTy &VMap, 113302077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap) { 113402077da7SAlexey Zhikhartsev std::vector<BasicBlock *> BlocksToUpdate; 113502077da7SAlexey Zhikhartsev 113602077da7SAlexey Zhikhartsev // If BB is the last block in the path, we can simply update the one case 113702077da7SAlexey Zhikhartsev // successor that will be reached. 113802077da7SAlexey Zhikhartsev if (BB == SwitchPaths->getSwitchBlock()) { 113902077da7SAlexey Zhikhartsev SwitchInst *Switch = SwitchPaths->getSwitchInst(); 114002077da7SAlexey Zhikhartsev BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 114102077da7SAlexey Zhikhartsev BlocksToUpdate.push_back(NextCase); 114202077da7SAlexey Zhikhartsev BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 114302077da7SAlexey Zhikhartsev if (ClonedSucc) 114402077da7SAlexey Zhikhartsev BlocksToUpdate.push_back(ClonedSucc); 114502077da7SAlexey Zhikhartsev } 114602077da7SAlexey Zhikhartsev // Otherwise update phis in all successors. 114702077da7SAlexey Zhikhartsev else { 114802077da7SAlexey Zhikhartsev for (BasicBlock *Succ : successors(BB)) { 114902077da7SAlexey Zhikhartsev BlocksToUpdate.push_back(Succ); 115002077da7SAlexey Zhikhartsev 115102077da7SAlexey Zhikhartsev // Check if a successor has already been cloned for the particular exit 115202077da7SAlexey Zhikhartsev // value. In this case if a successor was already cloned, the phi nodes 115302077da7SAlexey Zhikhartsev // in the cloned block should be updated directly. 115402077da7SAlexey Zhikhartsev BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 115502077da7SAlexey Zhikhartsev if (ClonedSucc) 115602077da7SAlexey Zhikhartsev BlocksToUpdate.push_back(ClonedSucc); 115702077da7SAlexey Zhikhartsev } 115802077da7SAlexey Zhikhartsev } 115902077da7SAlexey Zhikhartsev 116002077da7SAlexey Zhikhartsev // If there is a phi with an incoming value from BB, create a new incoming 116102077da7SAlexey Zhikhartsev // value for the new predecessor ClonedBB. The value will either be the same 116202077da7SAlexey Zhikhartsev // value from BB or a cloned value. 116302077da7SAlexey Zhikhartsev for (BasicBlock *Succ : BlocksToUpdate) { 116402077da7SAlexey Zhikhartsev for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 116502077da7SAlexey Zhikhartsev ++II) { 116602077da7SAlexey Zhikhartsev Value *Incoming = Phi->getIncomingValueForBlock(BB); 116702077da7SAlexey Zhikhartsev if (Incoming) { 116802077da7SAlexey Zhikhartsev if (isa<Constant>(Incoming)) { 116902077da7SAlexey Zhikhartsev Phi->addIncoming(Incoming, ClonedBB); 117002077da7SAlexey Zhikhartsev continue; 117102077da7SAlexey Zhikhartsev } 117202077da7SAlexey Zhikhartsev Value *ClonedVal = VMap[Incoming]; 117302077da7SAlexey Zhikhartsev if (ClonedVal) 117402077da7SAlexey Zhikhartsev Phi->addIncoming(ClonedVal, ClonedBB); 117502077da7SAlexey Zhikhartsev else 117602077da7SAlexey Zhikhartsev Phi->addIncoming(Incoming, ClonedBB); 117702077da7SAlexey Zhikhartsev } 117802077da7SAlexey Zhikhartsev } 117902077da7SAlexey Zhikhartsev } 118002077da7SAlexey Zhikhartsev } 118102077da7SAlexey Zhikhartsev 118202077da7SAlexey Zhikhartsev /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 118302077da7SAlexey Zhikhartsev /// other successors are kept as well. 118402077da7SAlexey Zhikhartsev void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 118502077da7SAlexey Zhikhartsev BasicBlock *NewBB, DomTreeUpdater *DTU) { 118602077da7SAlexey Zhikhartsev // When a path is reused, there is a chance that predecessors were already 118702077da7SAlexey Zhikhartsev // updated before. Check if the predecessor needs to be updated first. 118802077da7SAlexey Zhikhartsev if (!isPredecessor(OldBB, PrevBB)) 118902077da7SAlexey Zhikhartsev return; 119002077da7SAlexey Zhikhartsev 119102077da7SAlexey Zhikhartsev Instruction *PrevTerm = PrevBB->getTerminator(); 119202077da7SAlexey Zhikhartsev for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 119302077da7SAlexey Zhikhartsev if (PrevTerm->getSuccessor(Idx) == OldBB) { 119402077da7SAlexey Zhikhartsev OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 119502077da7SAlexey Zhikhartsev PrevTerm->setSuccessor(Idx, NewBB); 119602077da7SAlexey Zhikhartsev } 119702077da7SAlexey Zhikhartsev } 119802077da7SAlexey Zhikhartsev DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 119902077da7SAlexey Zhikhartsev {DominatorTree::Insert, PrevBB, NewBB}}); 120002077da7SAlexey Zhikhartsev } 120102077da7SAlexey Zhikhartsev 120202077da7SAlexey Zhikhartsev /// Add new value mappings to the DefMap to keep track of all new definitions 120302077da7SAlexey Zhikhartsev /// for a particular instruction. These will be used while updating SSA form. 120402077da7SAlexey Zhikhartsev void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 12059d555b4aSOlle Fredriksson SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; 12069d555b4aSOlle Fredriksson NewDefsVector.reserve(VMap.size()); 12079d555b4aSOlle Fredriksson 120802077da7SAlexey Zhikhartsev for (auto Entry : VMap) { 120902077da7SAlexey Zhikhartsev Instruction *Inst = 121002077da7SAlexey Zhikhartsev dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 121102077da7SAlexey Zhikhartsev if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 121202077da7SAlexey Zhikhartsev isa<SwitchInst>(Inst)) { 121302077da7SAlexey Zhikhartsev continue; 121402077da7SAlexey Zhikhartsev } 121502077da7SAlexey Zhikhartsev 121602077da7SAlexey Zhikhartsev Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 121702077da7SAlexey Zhikhartsev if (!Cloned) 121802077da7SAlexey Zhikhartsev continue; 121902077da7SAlexey Zhikhartsev 12209d555b4aSOlle Fredriksson NewDefsVector.push_back({Inst, Cloned}); 122102077da7SAlexey Zhikhartsev } 12229d555b4aSOlle Fredriksson 12239d555b4aSOlle Fredriksson // Sort the defs to get deterministic insertion order into NewDefs. 12249d555b4aSOlle Fredriksson sort(NewDefsVector, [](const auto &LHS, const auto &RHS) { 12259d555b4aSOlle Fredriksson if (LHS.first == RHS.first) 12269d555b4aSOlle Fredriksson return LHS.second->comesBefore(RHS.second); 12279d555b4aSOlle Fredriksson return LHS.first->comesBefore(RHS.first); 12289d555b4aSOlle Fredriksson }); 12299d555b4aSOlle Fredriksson 12309d555b4aSOlle Fredriksson for (const auto &KV : NewDefsVector) 12319d555b4aSOlle Fredriksson NewDefs[KV.first].push_back(KV.second); 123202077da7SAlexey Zhikhartsev } 123302077da7SAlexey Zhikhartsev 123402077da7SAlexey Zhikhartsev /// Update the last branch of a particular cloned path to point to the correct 123502077da7SAlexey Zhikhartsev /// case successor. 123602077da7SAlexey Zhikhartsev /// 123702077da7SAlexey Zhikhartsev /// Note that this is an optional step and would have been done in later 123802077da7SAlexey Zhikhartsev /// optimizations, but it makes the CFG significantly easier to work with. 123902077da7SAlexey Zhikhartsev void updateLastSuccessor(ThreadingPath &TPath, 124002077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap, 124102077da7SAlexey Zhikhartsev DomTreeUpdater *DTU) { 1242019ffbf3SXChy APInt NextState = TPath.getExitValue(); 124302077da7SAlexey Zhikhartsev BasicBlock *BB = TPath.getPath().back(); 124402077da7SAlexey Zhikhartsev BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 124502077da7SAlexey Zhikhartsev 124602077da7SAlexey Zhikhartsev // Note multiple paths can end at the same block so check that it is not 124702077da7SAlexey Zhikhartsev // updated yet 124802077da7SAlexey Zhikhartsev if (!isa<SwitchInst>(LastBlock->getTerminator())) 124902077da7SAlexey Zhikhartsev return; 125002077da7SAlexey Zhikhartsev SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 125102077da7SAlexey Zhikhartsev BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 125202077da7SAlexey Zhikhartsev 125302077da7SAlexey Zhikhartsev std::vector<DominatorTree::UpdateType> DTUpdates; 125402077da7SAlexey Zhikhartsev SmallPtrSet<BasicBlock *, 4> SuccSet; 125502077da7SAlexey Zhikhartsev for (BasicBlock *Succ : successors(LastBlock)) { 125602077da7SAlexey Zhikhartsev if (Succ != NextCase && SuccSet.insert(Succ).second) 125702077da7SAlexey Zhikhartsev DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 125802077da7SAlexey Zhikhartsev } 125902077da7SAlexey Zhikhartsev 126002077da7SAlexey Zhikhartsev Switch->eraseFromParent(); 126102077da7SAlexey Zhikhartsev BranchInst::Create(NextCase, LastBlock); 126202077da7SAlexey Zhikhartsev 126302077da7SAlexey Zhikhartsev DTU->applyUpdates(DTUpdates); 126402077da7SAlexey Zhikhartsev } 126502077da7SAlexey Zhikhartsev 126602077da7SAlexey Zhikhartsev /// After cloning blocks, some of the phi nodes have extra incoming values 126702077da7SAlexey Zhikhartsev /// that are no longer used. This function removes them. 126802077da7SAlexey Zhikhartsev void cleanPhiNodes(BasicBlock *BB) { 126902077da7SAlexey Zhikhartsev // If BB is no longer reachable, remove any remaining phi nodes 127002077da7SAlexey Zhikhartsev if (pred_empty(BB)) { 127102077da7SAlexey Zhikhartsev std::vector<PHINode *> PhiToRemove; 127202077da7SAlexey Zhikhartsev for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 127302077da7SAlexey Zhikhartsev PhiToRemove.push_back(Phi); 127402077da7SAlexey Zhikhartsev } 127502077da7SAlexey Zhikhartsev for (PHINode *PN : PhiToRemove) { 127653dc0f10SNuno Lopes PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); 127702077da7SAlexey Zhikhartsev PN->eraseFromParent(); 127802077da7SAlexey Zhikhartsev } 127902077da7SAlexey Zhikhartsev return; 128002077da7SAlexey Zhikhartsev } 128102077da7SAlexey Zhikhartsev 128202077da7SAlexey Zhikhartsev // Remove any incoming values that come from an invalid predecessor 128302077da7SAlexey Zhikhartsev for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 128402077da7SAlexey Zhikhartsev std::vector<BasicBlock *> BlocksToRemove; 128502077da7SAlexey Zhikhartsev for (BasicBlock *IncomingBB : Phi->blocks()) { 128602077da7SAlexey Zhikhartsev if (!isPredecessor(BB, IncomingBB)) 128702077da7SAlexey Zhikhartsev BlocksToRemove.push_back(IncomingBB); 128802077da7SAlexey Zhikhartsev } 128902077da7SAlexey Zhikhartsev for (BasicBlock *BB : BlocksToRemove) 129002077da7SAlexey Zhikhartsev Phi->removeIncomingValue(BB); 129102077da7SAlexey Zhikhartsev } 129202077da7SAlexey Zhikhartsev } 129302077da7SAlexey Zhikhartsev 129402077da7SAlexey Zhikhartsev /// Checks if BB was already cloned for a particular next state value. If it 129502077da7SAlexey Zhikhartsev /// was then it returns this cloned block, and otherwise null. 1296019ffbf3SXChy BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState, 129702077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap) { 129802077da7SAlexey Zhikhartsev CloneList ClonedBBs = DuplicateMap[BB]; 129902077da7SAlexey Zhikhartsev 130002077da7SAlexey Zhikhartsev // Find an entry in the CloneList with this NextState. If it exists then 130102077da7SAlexey Zhikhartsev // return the corresponding BB 130202077da7SAlexey Zhikhartsev auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 130302077da7SAlexey Zhikhartsev return C.State == NextState; 130402077da7SAlexey Zhikhartsev }); 130502077da7SAlexey Zhikhartsev return It != ClonedBBs.end() ? (*It).BB : nullptr; 130602077da7SAlexey Zhikhartsev } 130702077da7SAlexey Zhikhartsev 130802077da7SAlexey Zhikhartsev /// Helper to get the successor corresponding to a particular case value for 130902077da7SAlexey Zhikhartsev /// a switch statement. 1310019ffbf3SXChy BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { 131102077da7SAlexey Zhikhartsev BasicBlock *NextCase = nullptr; 131202077da7SAlexey Zhikhartsev for (auto Case : Switch->cases()) { 1313019ffbf3SXChy if (Case.getCaseValue()->getValue() == NextState) { 131402077da7SAlexey Zhikhartsev NextCase = Case.getCaseSuccessor(); 131502077da7SAlexey Zhikhartsev break; 131602077da7SAlexey Zhikhartsev } 131702077da7SAlexey Zhikhartsev } 131802077da7SAlexey Zhikhartsev if (!NextCase) 131902077da7SAlexey Zhikhartsev NextCase = Switch->getDefaultDest(); 132002077da7SAlexey Zhikhartsev return NextCase; 132102077da7SAlexey Zhikhartsev } 132202077da7SAlexey Zhikhartsev 132302077da7SAlexey Zhikhartsev /// Returns true if IncomingBB is a predecessor of BB. 132402077da7SAlexey Zhikhartsev bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 1325f83a88a1SKazu Hirata return llvm::is_contained(predecessors(BB), IncomingBB); 132602077da7SAlexey Zhikhartsev } 132702077da7SAlexey Zhikhartsev 132802077da7SAlexey Zhikhartsev AllSwitchPaths *SwitchPaths; 132902077da7SAlexey Zhikhartsev DominatorTree *DT; 133002077da7SAlexey Zhikhartsev AssumptionCache *AC; 133102077da7SAlexey Zhikhartsev TargetTransformInfo *TTI; 133202077da7SAlexey Zhikhartsev OptimizationRemarkEmitter *ORE; 133302077da7SAlexey Zhikhartsev SmallPtrSet<const Value *, 32> EphValues; 133402077da7SAlexey Zhikhartsev std::vector<ThreadingPath> TPaths; 133502077da7SAlexey Zhikhartsev }; 133602077da7SAlexey Zhikhartsev 133702077da7SAlexey Zhikhartsev bool DFAJumpThreading::run(Function &F) { 133802077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 133902077da7SAlexey Zhikhartsev 134002077da7SAlexey Zhikhartsev if (F.hasOptSize()) { 134102077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 134202077da7SAlexey Zhikhartsev return false; 134302077da7SAlexey Zhikhartsev } 134402077da7SAlexey Zhikhartsev 134502077da7SAlexey Zhikhartsev if (ClViewCfgBefore) 134602077da7SAlexey Zhikhartsev F.viewCFG(); 134702077da7SAlexey Zhikhartsev 134802077da7SAlexey Zhikhartsev SmallVector<AllSwitchPaths, 2> ThreadableLoops; 134902077da7SAlexey Zhikhartsev bool MadeChanges = false; 1350c229f767SXChy LoopInfoBroken = false; 135102077da7SAlexey Zhikhartsev 135202077da7SAlexey Zhikhartsev for (BasicBlock &BB : F) { 135302077da7SAlexey Zhikhartsev auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 135402077da7SAlexey Zhikhartsev if (!SI) 135502077da7SAlexey Zhikhartsev continue; 135602077da7SAlexey Zhikhartsev 135702077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 13588b0d7634SAlex Zhikhartsev << " is a candidate\n"); 13596b53ada6SXChy MainSwitch Switch(SI, LI, ORE); 136002077da7SAlexey Zhikhartsev 1361b167ada8SUsman Nadeem if (!Switch.getInstr()) { 1362b167ada8SUsman Nadeem LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a " 1363b167ada8SUsman Nadeem << "candidate for jump threading\n"); 136402077da7SAlexey Zhikhartsev continue; 1365b167ada8SUsman Nadeem } 136602077da7SAlexey Zhikhartsev 136702077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 136802077da7SAlexey Zhikhartsev << "candidate for jump threading\n"); 136902077da7SAlexey Zhikhartsev LLVM_DEBUG(SI->dump()); 137002077da7SAlexey Zhikhartsev 137102077da7SAlexey Zhikhartsev unfoldSelectInstrs(DT, Switch.getSelectInsts()); 137202077da7SAlexey Zhikhartsev if (!Switch.getSelectInsts().empty()) 137302077da7SAlexey Zhikhartsev MadeChanges = true; 137402077da7SAlexey Zhikhartsev 1375b167ada8SUsman Nadeem AllSwitchPaths SwitchPaths(&Switch, ORE, LI, 1376b167ada8SUsman Nadeem LI->getLoopFor(&BB)->getOutermostLoop()); 137702077da7SAlexey Zhikhartsev SwitchPaths.run(); 137802077da7SAlexey Zhikhartsev 137902077da7SAlexey Zhikhartsev if (SwitchPaths.getNumThreadingPaths() > 0) { 138002077da7SAlexey Zhikhartsev ThreadableLoops.push_back(SwitchPaths); 138102077da7SAlexey Zhikhartsev 138202077da7SAlexey Zhikhartsev // For the time being limit this optimization to occurring once in a 138302077da7SAlexey Zhikhartsev // function since it can change the CFG significantly. This is not a 138402077da7SAlexey Zhikhartsev // strict requirement but it can cause buggy behavior if there is an 138502077da7SAlexey Zhikhartsev // overlap of blocks in different opportunities. There is a lot of room to 138602077da7SAlexey Zhikhartsev // experiment with catching more opportunities here. 1387c229f767SXChy // NOTE: To release this contraint, we must handle LoopInfo invalidation 138802077da7SAlexey Zhikhartsev break; 138902077da7SAlexey Zhikhartsev } 139002077da7SAlexey Zhikhartsev } 139102077da7SAlexey Zhikhartsev 1392c229f767SXChy #ifdef NDEBUG 1393c229f767SXChy LI->verify(*DT); 1394c229f767SXChy #endif 1395c229f767SXChy 139602077da7SAlexey Zhikhartsev SmallPtrSet<const Value *, 32> EphValues; 139702077da7SAlexey Zhikhartsev if (ThreadableLoops.size() > 0) 139802077da7SAlexey Zhikhartsev CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 139902077da7SAlexey Zhikhartsev 140002077da7SAlexey Zhikhartsev for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 140102077da7SAlexey Zhikhartsev TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 140202077da7SAlexey Zhikhartsev Transform.run(); 140302077da7SAlexey Zhikhartsev MadeChanges = true; 1404c229f767SXChy LoopInfoBroken = true; 140502077da7SAlexey Zhikhartsev } 140602077da7SAlexey Zhikhartsev 140702077da7SAlexey Zhikhartsev #ifdef EXPENSIVE_CHECKS 140802077da7SAlexey Zhikhartsev assert(DT->verify(DominatorTree::VerificationLevel::Full)); 140902077da7SAlexey Zhikhartsev verifyFunction(F, &dbgs()); 141002077da7SAlexey Zhikhartsev #endif 141102077da7SAlexey Zhikhartsev 141202077da7SAlexey Zhikhartsev return MadeChanges; 141302077da7SAlexey Zhikhartsev } 141402077da7SAlexey Zhikhartsev 141502077da7SAlexey Zhikhartsev } // end anonymous namespace 141602077da7SAlexey Zhikhartsev 141702077da7SAlexey Zhikhartsev /// Integrate with the new Pass Manager 141802077da7SAlexey Zhikhartsev PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 141902077da7SAlexey Zhikhartsev FunctionAnalysisManager &AM) { 142002077da7SAlexey Zhikhartsev AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 142102077da7SAlexey Zhikhartsev DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 14226b53ada6SXChy LoopInfo &LI = AM.getResult<LoopAnalysis>(F); 142302077da7SAlexey Zhikhartsev TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 142402077da7SAlexey Zhikhartsev OptimizationRemarkEmitter ORE(&F); 1425c229f767SXChy DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); 1426c229f767SXChy if (!ThreadImpl.run(F)) 142702077da7SAlexey Zhikhartsev return PreservedAnalyses::all(); 142802077da7SAlexey Zhikhartsev 142902077da7SAlexey Zhikhartsev PreservedAnalyses PA; 143002077da7SAlexey Zhikhartsev PA.preserve<DominatorTreeAnalysis>(); 1431c229f767SXChy if (!ThreadImpl.LoopInfoBroken) 1432c229f767SXChy PA.preserve<LoopAnalysis>(); 143302077da7SAlexey Zhikhartsev return PA; 143402077da7SAlexey Zhikhartsev } 1435