102077da7SAlexey Zhikhartsev //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 202077da7SAlexey Zhikhartsev // 3c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information. 5c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 602077da7SAlexey Zhikhartsev // 702077da7SAlexey Zhikhartsev //===----------------------------------------------------------------------===// 802077da7SAlexey Zhikhartsev // 902077da7SAlexey Zhikhartsev // Transform each threading path to effectively jump thread the DFA. For 1002077da7SAlexey Zhikhartsev // example, the CFG below could be transformed as follows, where the cloned 1102077da7SAlexey Zhikhartsev // blocks unconditionally branch to the next correct case based on what is 1202077da7SAlexey Zhikhartsev // identified in the analysis. 1302077da7SAlexey Zhikhartsev // 1402077da7SAlexey Zhikhartsev // sw.bb sw.bb 1502077da7SAlexey Zhikhartsev // / | \ / | \ 1602077da7SAlexey Zhikhartsev // case1 case2 case3 case1 case2 case3 1702077da7SAlexey Zhikhartsev // \ | / | | | 1802077da7SAlexey Zhikhartsev // determinator det.2 det.3 det.1 1902077da7SAlexey Zhikhartsev // br sw.bb / | \ 2002077da7SAlexey Zhikhartsev // sw.bb.2 sw.bb.3 sw.bb.1 2102077da7SAlexey Zhikhartsev // br case2 br case3 br case1§ 2202077da7SAlexey Zhikhartsev // 2302077da7SAlexey Zhikhartsev // Definitions and Terminology: 2402077da7SAlexey Zhikhartsev // 2502077da7SAlexey Zhikhartsev // * Threading path: 2602077da7SAlexey Zhikhartsev // a list of basic blocks, the exit state, and the block that determines 2702077da7SAlexey Zhikhartsev // the next state, for which the following notation will be used: 2802077da7SAlexey Zhikhartsev // < path of BBs that form a cycle > [ state, determinator ] 2902077da7SAlexey Zhikhartsev // 3002077da7SAlexey Zhikhartsev // * Predictable switch: 3102077da7SAlexey Zhikhartsev // The switch variable is always a known constant so that all conditional 3202077da7SAlexey Zhikhartsev // jumps based on switch variable can be converted to unconditional jump. 3302077da7SAlexey Zhikhartsev // 3402077da7SAlexey Zhikhartsev // * Determinator: 3502077da7SAlexey Zhikhartsev // The basic block that determines the next state of the DFA. 3602077da7SAlexey Zhikhartsev // 3702077da7SAlexey Zhikhartsev // Representing the optimization in C-like pseudocode: the code pattern on the 3802077da7SAlexey Zhikhartsev // left could functionally be transformed to the right pattern if the switch 3902077da7SAlexey Zhikhartsev // condition is predictable. 4002077da7SAlexey Zhikhartsev // 4102077da7SAlexey Zhikhartsev // X = A goto A 4202077da7SAlexey Zhikhartsev // for (...) A: 4302077da7SAlexey Zhikhartsev // switch (X) ... 4402077da7SAlexey Zhikhartsev // case A goto B 4502077da7SAlexey Zhikhartsev // X = B B: 4602077da7SAlexey Zhikhartsev // case B ... 4702077da7SAlexey Zhikhartsev // X = C goto C 4802077da7SAlexey Zhikhartsev // 4902077da7SAlexey Zhikhartsev // The pass first checks that switch variable X is decided by the control flow 5002077da7SAlexey Zhikhartsev // path taken in the loop; for example, in case B, the next value of X is 5102077da7SAlexey Zhikhartsev // decided to be C. It then enumerates through all paths in the loop and labels 5202077da7SAlexey Zhikhartsev // the basic blocks where the next state is decided. 5302077da7SAlexey Zhikhartsev // 5402077da7SAlexey Zhikhartsev // Using this information it creates new paths that unconditionally branch to 5502077da7SAlexey Zhikhartsev // the next case. This involves cloning code, so it only gets triggered if the 5602077da7SAlexey Zhikhartsev // amount of code duplicated is below a threshold. 5702077da7SAlexey Zhikhartsev // 5802077da7SAlexey Zhikhartsev //===----------------------------------------------------------------------===// 5902077da7SAlexey Zhikhartsev 6002077da7SAlexey Zhikhartsev #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 6102077da7SAlexey Zhikhartsev #include "llvm/ADT/APInt.h" 6202077da7SAlexey Zhikhartsev #include "llvm/ADT/DenseMap.h" 6302077da7SAlexey Zhikhartsev #include "llvm/ADT/SmallSet.h" 6402077da7SAlexey Zhikhartsev #include "llvm/ADT/Statistic.h" 6502077da7SAlexey Zhikhartsev #include "llvm/Analysis/AssumptionCache.h" 6602077da7SAlexey Zhikhartsev #include "llvm/Analysis/CodeMetrics.h" 67a494ae43Sserge-sans-paille #include "llvm/Analysis/DomTreeUpdater.h" 686b53ada6SXChy #include "llvm/Analysis/LoopInfo.h" 6902077da7SAlexey Zhikhartsev #include "llvm/Analysis/OptimizationRemarkEmitter.h" 7002077da7SAlexey Zhikhartsev #include "llvm/Analysis/TargetTransformInfo.h" 7102077da7SAlexey Zhikhartsev #include "llvm/IR/CFG.h" 7202077da7SAlexey Zhikhartsev #include "llvm/IR/Constants.h" 7302077da7SAlexey Zhikhartsev #include "llvm/IR/IntrinsicInst.h" 7402077da7SAlexey Zhikhartsev #include "llvm/Support/CommandLine.h" 7502077da7SAlexey Zhikhartsev #include "llvm/Support/Debug.h" 7602077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/Cloning.h" 7702077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 7802077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/ValueMapper.h" 7902077da7SAlexey Zhikhartsev #include <deque> 8002077da7SAlexey Zhikhartsev 81f90a66a5Sserge-sans-paille #ifdef EXPENSIVE_CHECKS 82f90a66a5Sserge-sans-paille #include "llvm/IR/Verifier.h" 83f90a66a5Sserge-sans-paille #endif 84f90a66a5Sserge-sans-paille 8502077da7SAlexey Zhikhartsev using namespace llvm; 8602077da7SAlexey Zhikhartsev 8702077da7SAlexey Zhikhartsev #define DEBUG_TYPE "dfa-jump-threading" 8802077da7SAlexey Zhikhartsev 8902077da7SAlexey Zhikhartsev STATISTIC(NumTransforms, "Number of transformations done"); 9002077da7SAlexey Zhikhartsev STATISTIC(NumCloned, "Number of blocks cloned"); 9102077da7SAlexey Zhikhartsev STATISTIC(NumPaths, "Number of individual paths threaded"); 9202077da7SAlexey Zhikhartsev 9302077da7SAlexey Zhikhartsev static cl::opt<bool> 9402077da7SAlexey Zhikhartsev ClViewCfgBefore("dfa-jump-view-cfg-before", 9502077da7SAlexey Zhikhartsev cl::desc("View the CFG before DFA Jump Threading"), 9602077da7SAlexey Zhikhartsev cl::Hidden, cl::init(false)); 9702077da7SAlexey Zhikhartsev 98c9325f8aSUsman Nadeem static cl::opt<bool> EarlyExitHeuristic( 99c9325f8aSUsman Nadeem "dfa-early-exit-heuristic", 100c9325f8aSUsman Nadeem cl::desc("Exit early if an unpredictable value come from the same loop"), 101c9325f8aSUsman Nadeem cl::Hidden, cl::init(true)); 102c9325f8aSUsman Nadeem 10302077da7SAlexey Zhikhartsev static cl::opt<unsigned> MaxPathLength( 10402077da7SAlexey Zhikhartsev "dfa-max-path-length", 10502077da7SAlexey Zhikhartsev cl::desc("Max number of blocks searched to find a threading path"), 10602077da7SAlexey Zhikhartsev cl::Hidden, cl::init(20)); 10702077da7SAlexey Zhikhartsev 108b167ada8SUsman Nadeem static cl::opt<unsigned> MaxNumVisitiedPaths( 109b167ada8SUsman Nadeem "dfa-max-num-visited-paths", 110b167ada8SUsman Nadeem cl::desc( 111b167ada8SUsman Nadeem "Max number of blocks visited while enumerating paths around a switch"), 112*5fb57131SUsman Nadeem cl::Hidden, cl::init(2500)); 113b167ada8SUsman Nadeem 1147fa41d8aSXChy static cl::opt<unsigned> 1157fa41d8aSXChy MaxNumPaths("dfa-max-num-paths", 1168b0d7634SAlex Zhikhartsev cl::desc("Max number of paths enumerated around a switch"), 1178b0d7634SAlex Zhikhartsev cl::Hidden, cl::init(200)); 1188b0d7634SAlex Zhikhartsev 11902077da7SAlexey Zhikhartsev static cl::opt<unsigned> 12002077da7SAlexey Zhikhartsev CostThreshold("dfa-cost-threshold", 12102077da7SAlexey Zhikhartsev cl::desc("Maximum cost accepted for the transformation"), 12202077da7SAlexey Zhikhartsev cl::Hidden, cl::init(50)); 12302077da7SAlexey Zhikhartsev 12402077da7SAlexey Zhikhartsev namespace { 12502077da7SAlexey Zhikhartsev 12602077da7SAlexey Zhikhartsev class SelectInstToUnfold { 12702077da7SAlexey Zhikhartsev SelectInst *SI; 12802077da7SAlexey Zhikhartsev PHINode *SIUse; 12902077da7SAlexey Zhikhartsev 13002077da7SAlexey Zhikhartsev public: 13102077da7SAlexey Zhikhartsev SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 13202077da7SAlexey Zhikhartsev 13302077da7SAlexey Zhikhartsev SelectInst *getInst() { return SI; } 13402077da7SAlexey Zhikhartsev PHINode *getUse() { return SIUse; } 13502077da7SAlexey Zhikhartsev 13602077da7SAlexey Zhikhartsev explicit operator bool() const { return SI && SIUse; } 13702077da7SAlexey Zhikhartsev }; 13802077da7SAlexey Zhikhartsev 139c229f767SXChy void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, 14002077da7SAlexey Zhikhartsev std::vector<SelectInstToUnfold> *NewSIsToUnfold, 14102077da7SAlexey Zhikhartsev std::vector<BasicBlock *> *NewBBs); 14202077da7SAlexey Zhikhartsev 14302077da7SAlexey Zhikhartsev class DFAJumpThreading { 14402077da7SAlexey Zhikhartsev public: 1456b53ada6SXChy DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, 14602077da7SAlexey Zhikhartsev TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 1476b53ada6SXChy : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} 14802077da7SAlexey Zhikhartsev 14902077da7SAlexey Zhikhartsev bool run(Function &F); 150c229f767SXChy bool LoopInfoBroken; 15102077da7SAlexey Zhikhartsev 15202077da7SAlexey Zhikhartsev private: 15302077da7SAlexey Zhikhartsev void 15402077da7SAlexey Zhikhartsev unfoldSelectInstrs(DominatorTree *DT, 15502077da7SAlexey Zhikhartsev const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 15602077da7SAlexey Zhikhartsev DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 15734d48279SKazu Hirata SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); 15802077da7SAlexey Zhikhartsev 15902077da7SAlexey Zhikhartsev while (!Stack.empty()) { 16084b07c9bSKazu Hirata SelectInstToUnfold SIToUnfold = Stack.pop_back_val(); 16102077da7SAlexey Zhikhartsev 16202077da7SAlexey Zhikhartsev std::vector<SelectInstToUnfold> NewSIsToUnfold; 16302077da7SAlexey Zhikhartsev std::vector<BasicBlock *> NewBBs; 164c229f767SXChy unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); 16502077da7SAlexey Zhikhartsev 16602077da7SAlexey Zhikhartsev // Put newly discovered select instructions into the work list. 16702077da7SAlexey Zhikhartsev for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) 16802077da7SAlexey Zhikhartsev Stack.push_back(NewSIToUnfold); 16902077da7SAlexey Zhikhartsev } 17002077da7SAlexey Zhikhartsev } 17102077da7SAlexey Zhikhartsev 17202077da7SAlexey Zhikhartsev AssumptionCache *AC; 17302077da7SAlexey Zhikhartsev DominatorTree *DT; 1746b53ada6SXChy LoopInfo *LI; 17502077da7SAlexey Zhikhartsev TargetTransformInfo *TTI; 17602077da7SAlexey Zhikhartsev OptimizationRemarkEmitter *ORE; 17702077da7SAlexey Zhikhartsev }; 17802077da7SAlexey Zhikhartsev 17902077da7SAlexey Zhikhartsev } // end anonymous namespace 18002077da7SAlexey Zhikhartsev 18102077da7SAlexey Zhikhartsev namespace { 18202077da7SAlexey Zhikhartsev 18302077da7SAlexey Zhikhartsev /// Unfold the select instruction held in \p SIToUnfold by replacing it with 18402077da7SAlexey Zhikhartsev /// control flow. 18502077da7SAlexey Zhikhartsev /// 18602077da7SAlexey Zhikhartsev /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 18702077da7SAlexey Zhikhartsev /// created basic blocks into \p NewBBs. 18802077da7SAlexey Zhikhartsev /// 18902077da7SAlexey Zhikhartsev /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 190c229f767SXChy void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, 19102077da7SAlexey Zhikhartsev std::vector<SelectInstToUnfold> *NewSIsToUnfold, 19202077da7SAlexey Zhikhartsev std::vector<BasicBlock *> *NewBBs) { 19302077da7SAlexey Zhikhartsev SelectInst *SI = SIToUnfold.getInst(); 19402077da7SAlexey Zhikhartsev PHINode *SIUse = SIToUnfold.getUse(); 19502077da7SAlexey Zhikhartsev BasicBlock *StartBlock = SI->getParent(); 19602077da7SAlexey Zhikhartsev BranchInst *StartBlockTerm = 19702077da7SAlexey Zhikhartsev dyn_cast<BranchInst>(StartBlock->getTerminator()); 19802077da7SAlexey Zhikhartsev 199b167ada8SUsman Nadeem assert(StartBlockTerm); 20002077da7SAlexey Zhikhartsev assert(SI->hasOneUse()); 20102077da7SAlexey Zhikhartsev 202b167ada8SUsman Nadeem if (StartBlockTerm->isUnconditional()) { 203d4a38c8fSUsman Nadeem BasicBlock *EndBlock = StartBlock->getUniqueSuccessor(); 204b167ada8SUsman Nadeem // Arbitrarily choose the 'false' side for a new input value to the PHI. 205b167ada8SUsman Nadeem BasicBlock *NewBlock = BasicBlock::Create( 206b167ada8SUsman Nadeem SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), 20702077da7SAlexey Zhikhartsev EndBlock->getParent(), EndBlock); 208b167ada8SUsman Nadeem NewBBs->push_back(NewBlock); 209b167ada8SUsman Nadeem BranchInst::Create(EndBlock, NewBlock); 210b167ada8SUsman Nadeem DTU->applyUpdates({{DominatorTree::Insert, NewBlock, EndBlock}}); 21102077da7SAlexey Zhikhartsev 212b167ada8SUsman Nadeem // StartBlock 213b167ada8SUsman Nadeem // | \ 214b167ada8SUsman Nadeem // | NewBlock 215b167ada8SUsman Nadeem // | / 216b167ada8SUsman Nadeem // EndBlock 21702077da7SAlexey Zhikhartsev Value *SIOp1 = SI->getTrueValue(); 21802077da7SAlexey Zhikhartsev Value *SIOp2 = SI->getFalseValue(); 21902077da7SAlexey Zhikhartsev 220b167ada8SUsman Nadeem PHINode *NewPhi = PHINode::Create(SIUse->getType(), 1, 221b167ada8SUsman Nadeem Twine(SIOp2->getName(), ".si.unfold.phi"), 222b167ada8SUsman Nadeem NewBlock->getFirstInsertionPt()); 223b167ada8SUsman Nadeem NewPhi->addIncoming(SIOp2, StartBlock); 224b167ada8SUsman Nadeem 225d4a38c8fSUsman Nadeem // Update any other PHI nodes in EndBlock. 226d4a38c8fSUsman Nadeem for (PHINode &Phi : EndBlock->phis()) { 227d4a38c8fSUsman Nadeem if (SIUse == &Phi) 228d4a38c8fSUsman Nadeem continue; 229d4a38c8fSUsman Nadeem Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock); 230d4a38c8fSUsman Nadeem } 231d4a38c8fSUsman Nadeem 232d4a38c8fSUsman Nadeem // Update the phi node of SI, which is its only use. 233d4a38c8fSUsman Nadeem if (EndBlock == SIUse->getParent()) { 234d4a38c8fSUsman Nadeem SIUse->addIncoming(NewPhi, NewBlock); 235d4a38c8fSUsman Nadeem SIUse->replaceUsesOfWith(SI, SIOp1); 236d4a38c8fSUsman Nadeem } else { 237d4a38c8fSUsman Nadeem PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock), 238d4a38c8fSUsman Nadeem Twine(SI->getName(), ".si.unfold.phi"), 239d4a38c8fSUsman Nadeem EndBlock->getFirstInsertionPt()); 240d4a38c8fSUsman Nadeem for (BasicBlock *Pred : predecessors(EndBlock)) { 241d4a38c8fSUsman Nadeem if (Pred != StartBlock && Pred != NewBlock) 242d4a38c8fSUsman Nadeem EndPhi->addIncoming(EndPhi, Pred); 243d4a38c8fSUsman Nadeem } 244d4a38c8fSUsman Nadeem 245d4a38c8fSUsman Nadeem EndPhi->addIncoming(SIOp1, StartBlock); 246d4a38c8fSUsman Nadeem EndPhi->addIncoming(NewPhi, NewBlock); 247d4a38c8fSUsman Nadeem SIUse->replaceUsesOfWith(SI, EndPhi); 248d4a38c8fSUsman Nadeem SIUse = EndPhi; 249d4a38c8fSUsman Nadeem } 250d4a38c8fSUsman Nadeem 251b167ada8SUsman Nadeem if (auto *OpSi = dyn_cast<SelectInst>(SIOp1)) 252b167ada8SUsman Nadeem NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse)); 253b167ada8SUsman Nadeem if (auto *OpSi = dyn_cast<SelectInst>(SIOp2)) 254b167ada8SUsman Nadeem NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi)); 25502077da7SAlexey Zhikhartsev 256b167ada8SUsman Nadeem // Insert the real conditional branch based on the original condition. 257d4a38c8fSUsman Nadeem StartBlockTerm->eraseFromParent(); 258b167ada8SUsman Nadeem BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock); 259b167ada8SUsman Nadeem DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, 260b167ada8SUsman Nadeem {DominatorTree::Insert, StartBlock, NewBlock}}); 261b167ada8SUsman Nadeem } else { 262d4a38c8fSUsman Nadeem BasicBlock *EndBlock = SIUse->getParent(); 263b167ada8SUsman Nadeem BasicBlock *NewBlockT = BasicBlock::Create( 264b167ada8SUsman Nadeem SI->getContext(), Twine(SI->getName(), ".si.unfold.true"), 265b167ada8SUsman Nadeem EndBlock->getParent(), EndBlock); 266b167ada8SUsman Nadeem BasicBlock *NewBlockF = BasicBlock::Create( 267b167ada8SUsman Nadeem SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), 268b167ada8SUsman Nadeem EndBlock->getParent(), EndBlock); 269b167ada8SUsman Nadeem 270b167ada8SUsman Nadeem NewBBs->push_back(NewBlockT); 271b167ada8SUsman Nadeem NewBBs->push_back(NewBlockF); 272b167ada8SUsman Nadeem 273b167ada8SUsman Nadeem // Def only has one use in EndBlock. 274b167ada8SUsman Nadeem // Before transformation: 275b167ada8SUsman Nadeem // StartBlock(Def) 276b167ada8SUsman Nadeem // | \ 277b167ada8SUsman Nadeem // EndBlock OtherBlock 278b167ada8SUsman Nadeem // (Use) 279b167ada8SUsman Nadeem // 280b167ada8SUsman Nadeem // After transformation: 281b167ada8SUsman Nadeem // StartBlock(Def) 282b167ada8SUsman Nadeem // | \ 283b167ada8SUsman Nadeem // | OtherBlock 284b167ada8SUsman Nadeem // NewBlockT 285b167ada8SUsman Nadeem // | \ 286b167ada8SUsman Nadeem // | NewBlockF 287b167ada8SUsman Nadeem // | / 288b167ada8SUsman Nadeem // | / 289b167ada8SUsman Nadeem // EndBlock 290b167ada8SUsman Nadeem // (Use) 291b167ada8SUsman Nadeem BranchInst::Create(EndBlock, NewBlockF); 292b167ada8SUsman Nadeem // Insert the real conditional branch based on the original condition. 293b167ada8SUsman Nadeem BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT); 294b167ada8SUsman Nadeem DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF}, 295b167ada8SUsman Nadeem {DominatorTree::Insert, NewBlockT, EndBlock}, 296b167ada8SUsman Nadeem {DominatorTree::Insert, NewBlockF, EndBlock}}); 297b167ada8SUsman Nadeem 298b167ada8SUsman Nadeem Value *TrueVal = SI->getTrueValue(); 299b167ada8SUsman Nadeem Value *FalseVal = SI->getFalseValue(); 300b167ada8SUsman Nadeem 301b167ada8SUsman Nadeem PHINode *NewPhiT = PHINode::Create( 302b167ada8SUsman Nadeem SIUse->getType(), 1, Twine(TrueVal->getName(), ".si.unfold.phi"), 303b167ada8SUsman Nadeem NewBlockT->getFirstInsertionPt()); 304b167ada8SUsman Nadeem PHINode *NewPhiF = PHINode::Create( 305b167ada8SUsman Nadeem SIUse->getType(), 1, Twine(FalseVal->getName(), ".si.unfold.phi"), 306b167ada8SUsman Nadeem NewBlockF->getFirstInsertionPt()); 307b167ada8SUsman Nadeem NewPhiT->addIncoming(TrueVal, StartBlock); 308b167ada8SUsman Nadeem NewPhiF->addIncoming(FalseVal, NewBlockT); 309b167ada8SUsman Nadeem 310b167ada8SUsman Nadeem if (auto *TrueSI = dyn_cast<SelectInst>(TrueVal)) 311b167ada8SUsman Nadeem NewSIsToUnfold->push_back(SelectInstToUnfold(TrueSI, NewPhiT)); 312b167ada8SUsman Nadeem if (auto *FalseSi = dyn_cast<SelectInst>(FalseVal)) 313b167ada8SUsman Nadeem NewSIsToUnfold->push_back(SelectInstToUnfold(FalseSi, NewPhiF)); 314b167ada8SUsman Nadeem 315b167ada8SUsman Nadeem SIUse->addIncoming(NewPhiT, NewBlockT); 316b167ada8SUsman Nadeem SIUse->addIncoming(NewPhiF, NewBlockF); 317b167ada8SUsman Nadeem SIUse->removeIncomingValue(StartBlock); 318b167ada8SUsman Nadeem 319b167ada8SUsman Nadeem // Update any other PHI nodes in EndBlock. 320b167ada8SUsman Nadeem for (PHINode &Phi : EndBlock->phis()) { 321b167ada8SUsman Nadeem if (SIUse == &Phi) 322b167ada8SUsman Nadeem continue; 323b167ada8SUsman Nadeem Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockT); 324b167ada8SUsman Nadeem Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockF); 325b167ada8SUsman Nadeem Phi.removeIncomingValue(StartBlock); 326b167ada8SUsman Nadeem } 327b167ada8SUsman Nadeem 328b167ada8SUsman Nadeem // Update the appropriate successor of the start block to point to the new 329b167ada8SUsman Nadeem // unfolded block. 330b167ada8SUsman Nadeem unsigned SuccNum = StartBlockTerm->getSuccessor(1) == EndBlock ? 1 : 0; 331b167ada8SUsman Nadeem StartBlockTerm->setSuccessor(SuccNum, NewBlockT); 332b167ada8SUsman Nadeem DTU->applyUpdates({{DominatorTree::Delete, StartBlock, EndBlock}, 333b167ada8SUsman Nadeem {DominatorTree::Insert, StartBlock, NewBlockT}}); 334b167ada8SUsman Nadeem } 33502077da7SAlexey Zhikhartsev 336c229f767SXChy // Preserve loop info 337c229f767SXChy if (Loop *L = LI->getLoopFor(SI->getParent())) { 338c229f767SXChy for (BasicBlock *NewBB : *NewBBs) 339c229f767SXChy L->addBasicBlockToLoop(NewBB, *LI); 340c229f767SXChy } 341c229f767SXChy 34202077da7SAlexey Zhikhartsev // The select is now dead. 3437fa41d8aSXChy assert(SI->use_empty() && "Select must be dead now"); 34402077da7SAlexey Zhikhartsev SI->eraseFromParent(); 34502077da7SAlexey Zhikhartsev } 34602077da7SAlexey Zhikhartsev 34702077da7SAlexey Zhikhartsev struct ClonedBlock { 34802077da7SAlexey Zhikhartsev BasicBlock *BB; 349019ffbf3SXChy APInt State; ///< \p State corresponds to the next value of a switch stmnt. 35002077da7SAlexey Zhikhartsev }; 35102077da7SAlexey Zhikhartsev 35202077da7SAlexey Zhikhartsev typedef std::deque<BasicBlock *> PathType; 35302077da7SAlexey Zhikhartsev typedef std::vector<PathType> PathsType; 354380b8a60SNikita Popov typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; 35502077da7SAlexey Zhikhartsev typedef std::vector<ClonedBlock> CloneList; 35602077da7SAlexey Zhikhartsev 35702077da7SAlexey Zhikhartsev // This data structure keeps track of all blocks that have been cloned. If two 35802077da7SAlexey Zhikhartsev // different ThreadingPaths clone the same block for a certain state it should 35902077da7SAlexey Zhikhartsev // be reused, and it can be looked up in this map. 36002077da7SAlexey Zhikhartsev typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 36102077da7SAlexey Zhikhartsev 36202077da7SAlexey Zhikhartsev // This map keeps track of all the new definitions for an instruction. This 36302077da7SAlexey Zhikhartsev // information is needed when restoring SSA form after cloning blocks. 3649d555b4aSOlle Fredriksson typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; 36502077da7SAlexey Zhikhartsev 36602077da7SAlexey Zhikhartsev inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 36702077da7SAlexey Zhikhartsev OS << "< "; 36802077da7SAlexey Zhikhartsev for (const BasicBlock *BB : Path) { 36902077da7SAlexey Zhikhartsev std::string BBName; 37002077da7SAlexey Zhikhartsev if (BB->hasName()) 37102077da7SAlexey Zhikhartsev raw_string_ostream(BBName) << BB->getName(); 37202077da7SAlexey Zhikhartsev else 37302077da7SAlexey Zhikhartsev raw_string_ostream(BBName) << BB; 37402077da7SAlexey Zhikhartsev OS << BBName << " "; 37502077da7SAlexey Zhikhartsev } 37602077da7SAlexey Zhikhartsev OS << ">"; 37702077da7SAlexey Zhikhartsev return OS; 37802077da7SAlexey Zhikhartsev } 37902077da7SAlexey Zhikhartsev 38002077da7SAlexey Zhikhartsev /// ThreadingPath is a path in the control flow of a loop that can be threaded 38102077da7SAlexey Zhikhartsev /// by cloning necessary basic blocks and replacing conditional branches with 38202077da7SAlexey Zhikhartsev /// unconditional ones. A threading path includes a list of basic blocks, the 38302077da7SAlexey Zhikhartsev /// exit state, and the block that determines the next state. 38402077da7SAlexey Zhikhartsev struct ThreadingPath { 38502077da7SAlexey Zhikhartsev /// Exit value is DFA's exit state for the given path. 386019ffbf3SXChy APInt getExitValue() const { return ExitVal; } 38702077da7SAlexey Zhikhartsev void setExitValue(const ConstantInt *V) { 388019ffbf3SXChy ExitVal = V->getValue(); 38902077da7SAlexey Zhikhartsev IsExitValSet = true; 39002077da7SAlexey Zhikhartsev } 39102077da7SAlexey Zhikhartsev bool isExitValueSet() const { return IsExitValSet; } 39202077da7SAlexey Zhikhartsev 39302077da7SAlexey Zhikhartsev /// Determinator is the basic block that determines the next state of the DFA. 39402077da7SAlexey Zhikhartsev const BasicBlock *getDeterminatorBB() const { return DBB; } 39502077da7SAlexey Zhikhartsev void setDeterminator(const BasicBlock *BB) { DBB = BB; } 39602077da7SAlexey Zhikhartsev 39702077da7SAlexey Zhikhartsev /// Path is a list of basic blocks. 39802077da7SAlexey Zhikhartsev const PathType &getPath() const { return Path; } 39902077da7SAlexey Zhikhartsev void setPath(const PathType &NewPath) { Path = NewPath; } 400b167ada8SUsman Nadeem void push_back(BasicBlock *BB) { Path.push_back(BB); } 401b167ada8SUsman Nadeem void push_front(BasicBlock *BB) { Path.push_front(BB); } 402b167ada8SUsman Nadeem void appendExcludingFirst(const PathType &OtherPath) { 403b167ada8SUsman Nadeem Path.insert(Path.end(), OtherPath.begin() + 1, OtherPath.end()); 404b167ada8SUsman Nadeem } 40502077da7SAlexey Zhikhartsev 40602077da7SAlexey Zhikhartsev void print(raw_ostream &OS) const { 40702077da7SAlexey Zhikhartsev OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 40802077da7SAlexey Zhikhartsev } 40902077da7SAlexey Zhikhartsev 41002077da7SAlexey Zhikhartsev private: 41102077da7SAlexey Zhikhartsev PathType Path; 412019ffbf3SXChy APInt ExitVal; 41302077da7SAlexey Zhikhartsev const BasicBlock *DBB = nullptr; 41402077da7SAlexey Zhikhartsev bool IsExitValSet = false; 41502077da7SAlexey Zhikhartsev }; 41602077da7SAlexey Zhikhartsev 41702077da7SAlexey Zhikhartsev #ifndef NDEBUG 41802077da7SAlexey Zhikhartsev inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 41902077da7SAlexey Zhikhartsev TPath.print(OS); 42002077da7SAlexey Zhikhartsev return OS; 42102077da7SAlexey Zhikhartsev } 42202077da7SAlexey Zhikhartsev #endif 42302077da7SAlexey Zhikhartsev 42402077da7SAlexey Zhikhartsev struct MainSwitch { 4256b53ada6SXChy MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE) 4266b53ada6SXChy : LI(LI) { 4278b0d7634SAlex Zhikhartsev if (isCandidate(SI)) { 42802077da7SAlexey Zhikhartsev Instr = SI; 42902077da7SAlexey Zhikhartsev } else { 43002077da7SAlexey Zhikhartsev ORE->emit([&]() { 43102077da7SAlexey Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 43202077da7SAlexey Zhikhartsev << "Switch instruction is not predictable."; 43302077da7SAlexey Zhikhartsev }); 43402077da7SAlexey Zhikhartsev } 43502077da7SAlexey Zhikhartsev } 43602077da7SAlexey Zhikhartsev 43702077da7SAlexey Zhikhartsev virtual ~MainSwitch() = default; 43802077da7SAlexey Zhikhartsev 43902077da7SAlexey Zhikhartsev SwitchInst *getInstr() const { return Instr; } 44002077da7SAlexey Zhikhartsev const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 44102077da7SAlexey Zhikhartsev return SelectInsts; 44202077da7SAlexey Zhikhartsev } 44302077da7SAlexey Zhikhartsev 44402077da7SAlexey Zhikhartsev private: 4458b0d7634SAlex Zhikhartsev /// Do a use-def chain traversal starting from the switch condition to see if 4468b0d7634SAlex Zhikhartsev /// \p SI is a potential condidate. 4478b0d7634SAlex Zhikhartsev /// 4488b0d7634SAlex Zhikhartsev /// Also, collect select instructions to unfold. 4498b0d7634SAlex Zhikhartsev bool isCandidate(const SwitchInst *SI) { 450c9325f8aSUsman Nadeem std::deque<std::pair<Value *, BasicBlock *>> Q; 45102077da7SAlexey Zhikhartsev SmallSet<Value *, 16> SeenValues; 45202077da7SAlexey Zhikhartsev SelectInsts.clear(); 45302077da7SAlexey Zhikhartsev 4548b0d7634SAlex Zhikhartsev Value *SICond = SI->getCondition(); 4558b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n"); 4568b0d7634SAlex Zhikhartsev if (!isa<PHINode>(SICond)) 45702077da7SAlexey Zhikhartsev return false; 45802077da7SAlexey Zhikhartsev 4596b53ada6SXChy // The switch must be in a loop. 460c9325f8aSUsman Nadeem const Loop *L = LI->getLoopFor(SI->getParent()); 461c9325f8aSUsman Nadeem if (!L) 4626b53ada6SXChy return false; 4636b53ada6SXChy 464c9325f8aSUsman Nadeem addToQueue(SICond, nullptr, Q, SeenValues); 46502077da7SAlexey Zhikhartsev 46602077da7SAlexey Zhikhartsev while (!Q.empty()) { 467c9325f8aSUsman Nadeem Value *Current = Q.front().first; 468c9325f8aSUsman Nadeem BasicBlock *CurrentIncomingBB = Q.front().second; 46902077da7SAlexey Zhikhartsev Q.pop_front(); 47002077da7SAlexey Zhikhartsev 47102077da7SAlexey Zhikhartsev if (auto *Phi = dyn_cast<PHINode>(Current)) { 472c9325f8aSUsman Nadeem for (BasicBlock *IncomingBB : Phi->blocks()) { 473c9325f8aSUsman Nadeem Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB); 474c9325f8aSUsman Nadeem addToQueue(Incoming, IncomingBB, Q, SeenValues); 47502077da7SAlexey Zhikhartsev } 4768b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n"); 47702077da7SAlexey Zhikhartsev } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 47802077da7SAlexey Zhikhartsev if (!isValidSelectInst(SelI)) 47902077da7SAlexey Zhikhartsev return false; 480c9325f8aSUsman Nadeem addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues); 481c9325f8aSUsman Nadeem addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues); 4828b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n"); 48302077da7SAlexey Zhikhartsev if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 48402077da7SAlexey Zhikhartsev SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 4858b0d7634SAlex Zhikhartsev } else if (isa<Constant>(Current)) { 4868b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n"); 4878b0d7634SAlex Zhikhartsev continue; 48802077da7SAlexey Zhikhartsev } else { 4898b0d7634SAlex Zhikhartsev LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n"); 4908b0d7634SAlex Zhikhartsev // Allow unpredictable values. The hope is that those will be the 4918b0d7634SAlex Zhikhartsev // initial switch values that can be ignored (they will hit the 4928b0d7634SAlex Zhikhartsev // unthreaded switch) but this assumption will get checked later after 4938b0d7634SAlex Zhikhartsev // paths have been enumerated (in function getStateDefMap). 494c9325f8aSUsman Nadeem 495c9325f8aSUsman Nadeem // If the unpredictable value comes from the same inner loop it is 496c9325f8aSUsman Nadeem // likely that it will also be on the enumerated paths, causing us to 497c9325f8aSUsman Nadeem // exit after we have enumerated all the paths. This heuristic save 498c9325f8aSUsman Nadeem // compile time because a search for all the paths can become expensive. 499c9325f8aSUsman Nadeem if (EarlyExitHeuristic && 500c9325f8aSUsman Nadeem L->contains(LI->getLoopFor(CurrentIncomingBB))) { 501c9325f8aSUsman Nadeem LLVM_DEBUG(dbgs() 502c9325f8aSUsman Nadeem << "\tExiting early due to unpredictability heuristic.\n"); 503c9325f8aSUsman Nadeem return false; 504c9325f8aSUsman Nadeem } 505c9325f8aSUsman Nadeem 5068b0d7634SAlex Zhikhartsev continue; 50702077da7SAlexey Zhikhartsev } 50802077da7SAlexey Zhikhartsev } 50902077da7SAlexey Zhikhartsev 51002077da7SAlexey Zhikhartsev return true; 51102077da7SAlexey Zhikhartsev } 51202077da7SAlexey Zhikhartsev 513c9325f8aSUsman Nadeem void addToQueue(Value *Val, BasicBlock *BB, 514c9325f8aSUsman Nadeem std::deque<std::pair<Value *, BasicBlock *>> &Q, 51502077da7SAlexey Zhikhartsev SmallSet<Value *, 16> &SeenValues) { 51623a26e71SKazu Hirata if (SeenValues.insert(Val).second) 517c9325f8aSUsman Nadeem Q.push_back({Val, BB}); 51802077da7SAlexey Zhikhartsev } 51902077da7SAlexey Zhikhartsev 52002077da7SAlexey Zhikhartsev bool isValidSelectInst(SelectInst *SI) { 52102077da7SAlexey Zhikhartsev if (!SI->hasOneUse()) 52202077da7SAlexey Zhikhartsev return false; 52302077da7SAlexey Zhikhartsev 52402077da7SAlexey Zhikhartsev Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 52502077da7SAlexey Zhikhartsev // The use of the select inst should be either a phi or another select. 52602077da7SAlexey Zhikhartsev if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 52702077da7SAlexey Zhikhartsev return false; 52802077da7SAlexey Zhikhartsev 52902077da7SAlexey Zhikhartsev BasicBlock *SIBB = SI->getParent(); 53002077da7SAlexey Zhikhartsev 53102077da7SAlexey Zhikhartsev // Currently, we can only expand select instructions in basic blocks with 53202077da7SAlexey Zhikhartsev // one successor. 53302077da7SAlexey Zhikhartsev BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 53402077da7SAlexey Zhikhartsev if (!SITerm || !SITerm->isUnconditional()) 53502077da7SAlexey Zhikhartsev return false; 53602077da7SAlexey Zhikhartsev 5377fa41d8aSXChy // Only fold the select coming from directly where it is defined. 5387fa41d8aSXChy PHINode *PHIUser = dyn_cast<PHINode>(SIUse); 5397fa41d8aSXChy if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB) 54002077da7SAlexey Zhikhartsev return false; 54102077da7SAlexey Zhikhartsev 54202077da7SAlexey Zhikhartsev // If select will not be sunk during unfolding, and it is in the same basic 54302077da7SAlexey Zhikhartsev // block as another state defining select, then cannot unfold both. 54402077da7SAlexey Zhikhartsev for (SelectInstToUnfold SIToUnfold : SelectInsts) { 54502077da7SAlexey Zhikhartsev SelectInst *PrevSI = SIToUnfold.getInst(); 54602077da7SAlexey Zhikhartsev if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 54702077da7SAlexey Zhikhartsev PrevSI->getParent() == SI->getParent()) 54802077da7SAlexey Zhikhartsev return false; 54902077da7SAlexey Zhikhartsev } 55002077da7SAlexey Zhikhartsev 55102077da7SAlexey Zhikhartsev return true; 55202077da7SAlexey Zhikhartsev } 55302077da7SAlexey Zhikhartsev 5546b53ada6SXChy LoopInfo *LI; 55502077da7SAlexey Zhikhartsev SwitchInst *Instr = nullptr; 55602077da7SAlexey Zhikhartsev SmallVector<SelectInstToUnfold, 4> SelectInsts; 55702077da7SAlexey Zhikhartsev }; 55802077da7SAlexey Zhikhartsev 55902077da7SAlexey Zhikhartsev struct AllSwitchPaths { 560c229f767SXChy AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE, 561b167ada8SUsman Nadeem LoopInfo *LI, Loop *L) 562c229f767SXChy : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE), 563b167ada8SUsman Nadeem LI(LI), SwitchOuterLoop(L) {} 56402077da7SAlexey Zhikhartsev 56502077da7SAlexey Zhikhartsev std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 56602077da7SAlexey Zhikhartsev unsigned getNumThreadingPaths() { return TPaths.size(); } 56702077da7SAlexey Zhikhartsev SwitchInst *getSwitchInst() { return Switch; } 56802077da7SAlexey Zhikhartsev BasicBlock *getSwitchBlock() { return SwitchBlock; } 56902077da7SAlexey Zhikhartsev 57002077da7SAlexey Zhikhartsev void run() { 571b167ada8SUsman Nadeem StateDefMap StateDef = getStateDefMap(); 5728b0d7634SAlex Zhikhartsev if (StateDef.empty()) { 5738b0d7634SAlex Zhikhartsev ORE->emit([&]() { 5748b0d7634SAlex Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", 5758b0d7634SAlex Zhikhartsev Switch) 5768b0d7634SAlex Zhikhartsev << "Switch instruction is not predictable."; 5778b0d7634SAlex Zhikhartsev }); 5788b0d7634SAlex Zhikhartsev return; 5798b0d7634SAlex Zhikhartsev } 58002077da7SAlexey Zhikhartsev 581b167ada8SUsman Nadeem auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); 582b167ada8SUsman Nadeem auto *SwitchPhiDefBB = SwitchPhi->getParent(); 583b167ada8SUsman Nadeem VisitedBlocks VB; 584b167ada8SUsman Nadeem // Get paths from the determinator BBs to SwitchPhiDefBB 585b167ada8SUsman Nadeem std::vector<ThreadingPath> PathsToPhiDef = 586b167ada8SUsman Nadeem getPathsFromStateDefMap(StateDef, SwitchPhi, VB); 587b167ada8SUsman Nadeem if (SwitchPhiDefBB == SwitchBlock) { 588b167ada8SUsman Nadeem TPaths = std::move(PathsToPhiDef); 589b167ada8SUsman Nadeem return; 59002077da7SAlexey Zhikhartsev } 59102077da7SAlexey Zhikhartsev 592b167ada8SUsman Nadeem // Find and append paths from SwitchPhiDefBB to SwitchBlock. 593b167ada8SUsman Nadeem PathsType PathsToSwitchBB = 594b167ada8SUsman Nadeem paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1); 595b167ada8SUsman Nadeem if (PathsToSwitchBB.empty()) 596b167ada8SUsman Nadeem return; 59702077da7SAlexey Zhikhartsev 598b167ada8SUsman Nadeem std::vector<ThreadingPath> TempList; 599b167ada8SUsman Nadeem for (const ThreadingPath &Path : PathsToPhiDef) { 600b167ada8SUsman Nadeem for (const PathType &PathToSw : PathsToSwitchBB) { 601b167ada8SUsman Nadeem ThreadingPath PathCopy(Path); 602b167ada8SUsman Nadeem PathCopy.appendExcludingFirst(PathToSw); 603b167ada8SUsman Nadeem TempList.push_back(PathCopy); 60402077da7SAlexey Zhikhartsev } 60502077da7SAlexey Zhikhartsev } 606b167ada8SUsman Nadeem TPaths = std::move(TempList); 60702077da7SAlexey Zhikhartsev } 60802077da7SAlexey Zhikhartsev 60902077da7SAlexey Zhikhartsev private: 61002077da7SAlexey Zhikhartsev // Value: an instruction that defines a switch state; 61102077da7SAlexey Zhikhartsev // Key: the parent basic block of that instruction. 61202077da7SAlexey Zhikhartsev typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 613b167ada8SUsman Nadeem std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef, 614b167ada8SUsman Nadeem PHINode *Phi, 615b167ada8SUsman Nadeem VisitedBlocks &VB) { 616b167ada8SUsman Nadeem std::vector<ThreadingPath> Res; 617b167ada8SUsman Nadeem auto *PhiBB = Phi->getParent(); 618b167ada8SUsman Nadeem VB.insert(PhiBB); 61902077da7SAlexey Zhikhartsev 620b167ada8SUsman Nadeem VisitedBlocks UniqueBlocks; 621b167ada8SUsman Nadeem for (auto *IncomingBB : Phi->blocks()) { 622b167ada8SUsman Nadeem if (!UniqueBlocks.insert(IncomingBB).second) 623b167ada8SUsman Nadeem continue; 624b167ada8SUsman Nadeem if (!SwitchOuterLoop->contains(IncomingBB)) 625b167ada8SUsman Nadeem continue; 626b167ada8SUsman Nadeem 627b167ada8SUsman Nadeem Value *IncomingValue = Phi->getIncomingValueForBlock(IncomingBB); 628b167ada8SUsman Nadeem // We found the determinator. This is the start of our path. 629b167ada8SUsman Nadeem if (auto *C = dyn_cast<ConstantInt>(IncomingValue)) { 630b167ada8SUsman Nadeem // SwitchBlock is the determinator, unsupported unless its also the def. 631b167ada8SUsman Nadeem if (PhiBB == SwitchBlock && 632b167ada8SUsman Nadeem SwitchBlock != cast<PHINode>(Switch->getOperand(0))->getParent()) 633b167ada8SUsman Nadeem continue; 634b167ada8SUsman Nadeem ThreadingPath NewPath; 635b167ada8SUsman Nadeem NewPath.setDeterminator(PhiBB); 636b167ada8SUsman Nadeem NewPath.setExitValue(C); 637b167ada8SUsman Nadeem // Don't add SwitchBlock at the start, this is handled later. 638b167ada8SUsman Nadeem if (IncomingBB != SwitchBlock) 639b167ada8SUsman Nadeem NewPath.push_back(IncomingBB); 640b167ada8SUsman Nadeem NewPath.push_back(PhiBB); 641b167ada8SUsman Nadeem Res.push_back(NewPath); 642b167ada8SUsman Nadeem continue; 643b167ada8SUsman Nadeem } 644b167ada8SUsman Nadeem // Don't get into a cycle. 645b167ada8SUsman Nadeem if (VB.contains(IncomingBB) || IncomingBB == SwitchBlock) 646b167ada8SUsman Nadeem continue; 647b167ada8SUsman Nadeem // Recurse up the PHI chain. 648b167ada8SUsman Nadeem auto *IncomingPhi = dyn_cast<PHINode>(IncomingValue); 649b167ada8SUsman Nadeem if (!IncomingPhi) 650b167ada8SUsman Nadeem continue; 651b167ada8SUsman Nadeem auto *IncomingPhiDefBB = IncomingPhi->getParent(); 652b167ada8SUsman Nadeem if (!StateDef.contains(IncomingPhiDefBB)) 653b167ada8SUsman Nadeem continue; 654b167ada8SUsman Nadeem 655b167ada8SUsman Nadeem // Direct predecessor, just add to the path. 656b167ada8SUsman Nadeem if (IncomingPhiDefBB == IncomingBB) { 657b167ada8SUsman Nadeem std::vector<ThreadingPath> PredPaths = 658b167ada8SUsman Nadeem getPathsFromStateDefMap(StateDef, IncomingPhi, VB); 659b167ada8SUsman Nadeem for (ThreadingPath &Path : PredPaths) { 660b167ada8SUsman Nadeem Path.push_back(PhiBB); 661b167ada8SUsman Nadeem Res.push_back(std::move(Path)); 662b167ada8SUsman Nadeem } 663b167ada8SUsman Nadeem continue; 664b167ada8SUsman Nadeem } 665b167ada8SUsman Nadeem // Not a direct predecessor, find intermediate paths to append to the 666b167ada8SUsman Nadeem // existing path. 667b167ada8SUsman Nadeem if (VB.contains(IncomingPhiDefBB)) 668b167ada8SUsman Nadeem continue; 669b167ada8SUsman Nadeem 670b167ada8SUsman Nadeem PathsType IntermediatePaths; 671b167ada8SUsman Nadeem IntermediatePaths = 672b167ada8SUsman Nadeem paths(IncomingPhiDefBB, IncomingBB, VB, /* PathDepth = */ 1); 673b167ada8SUsman Nadeem if (IntermediatePaths.empty()) 674b167ada8SUsman Nadeem continue; 675b167ada8SUsman Nadeem 676b167ada8SUsman Nadeem std::vector<ThreadingPath> PredPaths = 677b167ada8SUsman Nadeem getPathsFromStateDefMap(StateDef, IncomingPhi, VB); 678b167ada8SUsman Nadeem for (const ThreadingPath &Path : PredPaths) { 679b167ada8SUsman Nadeem for (const PathType &IPath : IntermediatePaths) { 680b167ada8SUsman Nadeem ThreadingPath NewPath(Path); 681b167ada8SUsman Nadeem NewPath.appendExcludingFirst(IPath); 682b167ada8SUsman Nadeem NewPath.push_back(PhiBB); 683b167ada8SUsman Nadeem Res.push_back(NewPath); 684b167ada8SUsman Nadeem } 685b167ada8SUsman Nadeem } 686b167ada8SUsman Nadeem } 687b167ada8SUsman Nadeem VB.erase(PhiBB); 688b167ada8SUsman Nadeem return Res; 689b167ada8SUsman Nadeem } 690b167ada8SUsman Nadeem 691b167ada8SUsman Nadeem PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited, 692b167ada8SUsman Nadeem unsigned PathDepth) { 69302077da7SAlexey Zhikhartsev PathsType Res; 69402077da7SAlexey Zhikhartsev 69502077da7SAlexey Zhikhartsev // Stop exploring paths after visiting MaxPathLength blocks 69602077da7SAlexey Zhikhartsev if (PathDepth > MaxPathLength) { 69702077da7SAlexey Zhikhartsev ORE->emit([&]() { 69802077da7SAlexey Zhikhartsev return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 69902077da7SAlexey Zhikhartsev Switch) 70002077da7SAlexey Zhikhartsev << "Exploration stopped after visiting MaxPathLength=" 70102077da7SAlexey Zhikhartsev << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 70202077da7SAlexey Zhikhartsev }); 70302077da7SAlexey Zhikhartsev return Res; 70402077da7SAlexey Zhikhartsev } 70502077da7SAlexey Zhikhartsev 70602077da7SAlexey Zhikhartsev Visited.insert(BB); 707b167ada8SUsman Nadeem if (++NumVisited > MaxNumVisitiedPaths) 708b167ada8SUsman Nadeem return Res; 70902077da7SAlexey Zhikhartsev 710c229f767SXChy // Stop if we have reached the BB out of loop, since its successors have no 711c229f767SXChy // impact on the DFA. 712b167ada8SUsman Nadeem if (!SwitchOuterLoop->contains(BB)) 713c229f767SXChy return Res; 714c229f767SXChy 71502077da7SAlexey Zhikhartsev // Some blocks have multiple edges to the same successor, and this set 71602077da7SAlexey Zhikhartsev // is used to prevent a duplicate path from being generated 71702077da7SAlexey Zhikhartsev SmallSet<BasicBlock *, 4> Successors; 7183f7aea1aSNikita Popov for (BasicBlock *Succ : successors(BB)) { 7193f7aea1aSNikita Popov if (!Successors.insert(Succ).second) 72002077da7SAlexey Zhikhartsev continue; 72102077da7SAlexey Zhikhartsev 722b167ada8SUsman Nadeem // Found a cycle through the final block. 723b167ada8SUsman Nadeem if (Succ == ToBB) { 724b167ada8SUsman Nadeem Res.push_back({BB, ToBB}); 72502077da7SAlexey Zhikhartsev continue; 72602077da7SAlexey Zhikhartsev } 72702077da7SAlexey Zhikhartsev 72802077da7SAlexey Zhikhartsev // We have encountered a cycle, do not get caught in it 729380b8a60SNikita Popov if (Visited.contains(Succ)) 73002077da7SAlexey Zhikhartsev continue; 73102077da7SAlexey Zhikhartsev 732b167ada8SUsman Nadeem auto *CurrLoop = LI->getLoopFor(BB); 733b167ada8SUsman Nadeem // Unlikely to be beneficial. 734b167ada8SUsman Nadeem if (Succ == CurrLoop->getHeader()) 735b167ada8SUsman Nadeem continue; 736b167ada8SUsman Nadeem // Skip for now, revisit this condition later to see the impact on 737b167ada8SUsman Nadeem // coverage and compile time. 738b167ada8SUsman Nadeem if (LI->getLoopFor(Succ) != CurrLoop) 739b167ada8SUsman Nadeem continue; 740b167ada8SUsman Nadeem 741b167ada8SUsman Nadeem PathsType SuccPaths = paths(Succ, ToBB, Visited, PathDepth + 1); 742b167ada8SUsman Nadeem for (PathType &Path : SuccPaths) { 743b167ada8SUsman Nadeem Path.push_front(BB); 744b167ada8SUsman Nadeem Res.push_back(Path); 7458b0d7634SAlex Zhikhartsev if (Res.size() >= MaxNumPaths) { 7468b0d7634SAlex Zhikhartsev return Res; 7478b0d7634SAlex Zhikhartsev } 74802077da7SAlexey Zhikhartsev } 74902077da7SAlexey Zhikhartsev } 75002077da7SAlexey Zhikhartsev // This block could now be visited again from a different predecessor. Note 75102077da7SAlexey Zhikhartsev // that this will result in exponential runtime. Subpaths could possibly be 75202077da7SAlexey Zhikhartsev // cached but it takes a lot of memory to store them. 75302077da7SAlexey Zhikhartsev Visited.erase(BB); 75402077da7SAlexey Zhikhartsev return Res; 75502077da7SAlexey Zhikhartsev } 75602077da7SAlexey Zhikhartsev 757*5fb57131SUsman Nadeem /// Walk the use-def chain and collect all the state-defining blocks and the 758*5fb57131SUsman Nadeem /// PHI nodes in those blocks that define the state. 759b167ada8SUsman Nadeem StateDefMap getStateDefMap() const { 76002077da7SAlexey Zhikhartsev StateDefMap Res; 761*5fb57131SUsman Nadeem PHINode *FirstDef = dyn_cast<PHINode>(Switch->getOperand(0)); 762*5fb57131SUsman Nadeem assert(FirstDef && "The first definition must be a phi."); 76302077da7SAlexey Zhikhartsev 76402077da7SAlexey Zhikhartsev SmallVector<PHINode *, 8> Stack; 765*5fb57131SUsman Nadeem Stack.push_back(FirstDef); 76602077da7SAlexey Zhikhartsev SmallSet<Value *, 16> SeenValues; 76702077da7SAlexey Zhikhartsev 76802077da7SAlexey Zhikhartsev while (!Stack.empty()) { 76984b07c9bSKazu Hirata PHINode *CurPhi = Stack.pop_back_val(); 77002077da7SAlexey Zhikhartsev 77102077da7SAlexey Zhikhartsev Res[CurPhi->getParent()] = CurPhi; 77202077da7SAlexey Zhikhartsev SeenValues.insert(CurPhi); 77302077da7SAlexey Zhikhartsev 7748b0d7634SAlex Zhikhartsev for (BasicBlock *IncomingBB : CurPhi->blocks()) { 775*5fb57131SUsman Nadeem PHINode *IncomingPhi = 776*5fb57131SUsman Nadeem dyn_cast<PHINode>(CurPhi->getIncomingValueForBlock(IncomingBB)); 777*5fb57131SUsman Nadeem if (!IncomingPhi) 77802077da7SAlexey Zhikhartsev continue; 779*5fb57131SUsman Nadeem bool IsOutsideLoops = !SwitchOuterLoop->contains(IncomingBB); 780*5fb57131SUsman Nadeem if (SeenValues.contains(IncomingPhi) || IsOutsideLoops) 781*5fb57131SUsman Nadeem continue; 78202077da7SAlexey Zhikhartsev 783*5fb57131SUsman Nadeem Stack.push_back(IncomingPhi); 78402077da7SAlexey Zhikhartsev } 78502077da7SAlexey Zhikhartsev } 78602077da7SAlexey Zhikhartsev 78702077da7SAlexey Zhikhartsev return Res; 78802077da7SAlexey Zhikhartsev } 78902077da7SAlexey Zhikhartsev 790b167ada8SUsman Nadeem unsigned NumVisited = 0; 79102077da7SAlexey Zhikhartsev SwitchInst *Switch; 79202077da7SAlexey Zhikhartsev BasicBlock *SwitchBlock; 79302077da7SAlexey Zhikhartsev OptimizationRemarkEmitter *ORE; 79402077da7SAlexey Zhikhartsev std::vector<ThreadingPath> TPaths; 795c229f767SXChy LoopInfo *LI; 796b167ada8SUsman Nadeem Loop *SwitchOuterLoop; 79702077da7SAlexey Zhikhartsev }; 79802077da7SAlexey Zhikhartsev 79902077da7SAlexey Zhikhartsev struct TransformDFA { 80002077da7SAlexey Zhikhartsev TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 80102077da7SAlexey Zhikhartsev AssumptionCache *AC, TargetTransformInfo *TTI, 80202077da7SAlexey Zhikhartsev OptimizationRemarkEmitter *ORE, 80302077da7SAlexey Zhikhartsev SmallPtrSet<const Value *, 32> EphValues) 80402077da7SAlexey Zhikhartsev : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 80502077da7SAlexey Zhikhartsev EphValues(EphValues) {} 80602077da7SAlexey Zhikhartsev 80702077da7SAlexey Zhikhartsev void run() { 80802077da7SAlexey Zhikhartsev if (isLegalAndProfitableToTransform()) { 80902077da7SAlexey Zhikhartsev createAllExitPaths(); 81002077da7SAlexey Zhikhartsev NumTransforms++; 81102077da7SAlexey Zhikhartsev } 81202077da7SAlexey Zhikhartsev } 81302077da7SAlexey Zhikhartsev 81402077da7SAlexey Zhikhartsev private: 81502077da7SAlexey Zhikhartsev /// This function performs both a legality check and profitability check at 81602077da7SAlexey Zhikhartsev /// the same time since it is convenient to do so. It iterates through all 81702077da7SAlexey Zhikhartsev /// blocks that will be cloned, and keeps track of the duplication cost. It 81802077da7SAlexey Zhikhartsev /// also returns false if it is illegal to clone some required block. 81902077da7SAlexey Zhikhartsev bool isLegalAndProfitableToTransform() { 82002077da7SAlexey Zhikhartsev CodeMetrics Metrics; 82102077da7SAlexey Zhikhartsev SwitchInst *Switch = SwitchPaths->getSwitchInst(); 82202077da7SAlexey Zhikhartsev 8232fba4694SXChy // Don't thread switch without multiple successors. 8242fba4694SXChy if (Switch->getNumSuccessors() <= 1) 8252fba4694SXChy return false; 8262fba4694SXChy 82702077da7SAlexey Zhikhartsev // Note that DuplicateBlockMap is not being used as intended here. It is 82802077da7SAlexey Zhikhartsev // just being used to ensure (BB, State) pairs are only counted once. 82902077da7SAlexey Zhikhartsev DuplicateBlockMap DuplicateMap; 83002077da7SAlexey Zhikhartsev 83102077da7SAlexey Zhikhartsev for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 83202077da7SAlexey Zhikhartsev PathType PathBBs = TPath.getPath(); 833019ffbf3SXChy APInt NextState = TPath.getExitValue(); 83402077da7SAlexey Zhikhartsev const BasicBlock *Determinator = TPath.getDeterminatorBB(); 83502077da7SAlexey Zhikhartsev 83602077da7SAlexey Zhikhartsev // Update Metrics for the Switch block, this is always cloned 83702077da7SAlexey Zhikhartsev BasicBlock *BB = SwitchPaths->getSwitchBlock(); 83802077da7SAlexey Zhikhartsev BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 83902077da7SAlexey Zhikhartsev if (!VisitedBB) { 84002077da7SAlexey Zhikhartsev Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 84102077da7SAlexey Zhikhartsev DuplicateMap[BB].push_back({BB, NextState}); 84202077da7SAlexey Zhikhartsev } 84302077da7SAlexey Zhikhartsev 84402077da7SAlexey Zhikhartsev // If the Switch block is the Determinator, then we can continue since 84502077da7SAlexey Zhikhartsev // this is the only block that is cloned and we already counted for it. 84602077da7SAlexey Zhikhartsev if (PathBBs.front() == Determinator) 84702077da7SAlexey Zhikhartsev continue; 84802077da7SAlexey Zhikhartsev 84902077da7SAlexey Zhikhartsev // Otherwise update Metrics for all blocks that will be cloned. If any 85002077da7SAlexey Zhikhartsev // block is already cloned and would be reused, don't double count it. 8515ea31555SKazu Hirata auto DetIt = llvm::find(PathBBs, Determinator); 85202077da7SAlexey Zhikhartsev for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 85302077da7SAlexey Zhikhartsev BB = *BBIt; 85402077da7SAlexey Zhikhartsev VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 85502077da7SAlexey Zhikhartsev if (VisitedBB) 85602077da7SAlexey Zhikhartsev continue; 85702077da7SAlexey Zhikhartsev Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 85802077da7SAlexey Zhikhartsev DuplicateMap[BB].push_back({BB, NextState}); 85902077da7SAlexey Zhikhartsev } 86002077da7SAlexey Zhikhartsev 86102077da7SAlexey Zhikhartsev if (Metrics.notDuplicatable) { 86202077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 86302077da7SAlexey Zhikhartsev << "non-duplicatable instructions.\n"); 86402077da7SAlexey Zhikhartsev ORE->emit([&]() { 86502077da7SAlexey Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 86602077da7SAlexey Zhikhartsev Switch) 86702077da7SAlexey Zhikhartsev << "Contains non-duplicatable instructions."; 86802077da7SAlexey Zhikhartsev }); 86902077da7SAlexey Zhikhartsev return false; 87002077da7SAlexey Zhikhartsev } 87102077da7SAlexey Zhikhartsev 872e0ac087fSSameer Sahasrabuddhe // FIXME: Allow jump threading with controlled convergence. 873e0ac087fSSameer Sahasrabuddhe if (Metrics.Convergence != ConvergenceKind::None) { 87402077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 87502077da7SAlexey Zhikhartsev << "convergent instructions.\n"); 87602077da7SAlexey Zhikhartsev ORE->emit([&]() { 87702077da7SAlexey Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 87802077da7SAlexey Zhikhartsev << "Contains convergent instructions."; 87902077da7SAlexey Zhikhartsev }); 88002077da7SAlexey Zhikhartsev return false; 88102077da7SAlexey Zhikhartsev } 882f85c5079SPhilip Reames 883f85c5079SPhilip Reames if (!Metrics.NumInsts.isValid()) { 884f85c5079SPhilip Reames LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 885f85c5079SPhilip Reames << "instructions with invalid cost.\n"); 886f85c5079SPhilip Reames ORE->emit([&]() { 887f85c5079SPhilip Reames return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 888f85c5079SPhilip Reames << "Contains instructions with invalid cost."; 889f85c5079SPhilip Reames }); 890f85c5079SPhilip Reames return false; 891f85c5079SPhilip Reames } 89202077da7SAlexey Zhikhartsev } 89302077da7SAlexey Zhikhartsev 8949c710ebbSDaniil Fukalov InstructionCost DuplicationCost = 0; 89502077da7SAlexey Zhikhartsev 89602077da7SAlexey Zhikhartsev unsigned JumpTableSize = 0; 89702077da7SAlexey Zhikhartsev TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 89802077da7SAlexey Zhikhartsev nullptr); 89902077da7SAlexey Zhikhartsev if (JumpTableSize == 0) { 90002077da7SAlexey Zhikhartsev // Factor in the number of conditional branches reduced from jump 90102077da7SAlexey Zhikhartsev // threading. Assume that lowering the switch block is implemented by 90202077da7SAlexey Zhikhartsev // using binary search, hence the LogBase2(). 90302077da7SAlexey Zhikhartsev unsigned CondBranches = 90402077da7SAlexey Zhikhartsev APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 9052fba4694SXChy assert(CondBranches > 0 && 9062fba4694SXChy "The threaded switch must have multiple branches"); 9079c710ebbSDaniil Fukalov DuplicationCost = Metrics.NumInsts / CondBranches; 90802077da7SAlexey Zhikhartsev } else { 90902077da7SAlexey Zhikhartsev // Compared with jump tables, the DFA optimizer removes an indirect branch 91002077da7SAlexey Zhikhartsev // on each loop iteration, thus making branch prediction more precise. The 91102077da7SAlexey Zhikhartsev // more branch targets there are, the more likely it is for the branch 91202077da7SAlexey Zhikhartsev // predictor to make a mistake, and the more benefit there is in the DFA 91302077da7SAlexey Zhikhartsev // optimizer. Thus, the more branch targets there are, the lower is the 91402077da7SAlexey Zhikhartsev // cost of the DFA opt. 9159c710ebbSDaniil Fukalov DuplicationCost = Metrics.NumInsts / JumpTableSize; 91602077da7SAlexey Zhikhartsev } 91702077da7SAlexey Zhikhartsev 91802077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 91902077da7SAlexey Zhikhartsev << SwitchPaths->getSwitchBlock()->getName() 92002077da7SAlexey Zhikhartsev << " is: " << DuplicationCost << "\n\n"); 92102077da7SAlexey Zhikhartsev 92202077da7SAlexey Zhikhartsev if (DuplicationCost > CostThreshold) { 92302077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 92402077da7SAlexey Zhikhartsev << "cost threshold.\n"); 92502077da7SAlexey Zhikhartsev ORE->emit([&]() { 92602077da7SAlexey Zhikhartsev return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 92702077da7SAlexey Zhikhartsev << "Duplication cost exceeds the cost threshold (cost=" 92802077da7SAlexey Zhikhartsev << ore::NV("Cost", DuplicationCost) 92902077da7SAlexey Zhikhartsev << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 93002077da7SAlexey Zhikhartsev }); 93102077da7SAlexey Zhikhartsev return false; 93202077da7SAlexey Zhikhartsev } 93302077da7SAlexey Zhikhartsev 93402077da7SAlexey Zhikhartsev ORE->emit([&]() { 93502077da7SAlexey Zhikhartsev return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 93602077da7SAlexey Zhikhartsev << "Switch statement jump-threaded."; 93702077da7SAlexey Zhikhartsev }); 93802077da7SAlexey Zhikhartsev 93902077da7SAlexey Zhikhartsev return true; 94002077da7SAlexey Zhikhartsev } 94102077da7SAlexey Zhikhartsev 94202077da7SAlexey Zhikhartsev /// Transform each threading path to effectively jump thread the DFA. 94302077da7SAlexey Zhikhartsev void createAllExitPaths() { 94402077da7SAlexey Zhikhartsev DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 94502077da7SAlexey Zhikhartsev 94602077da7SAlexey Zhikhartsev // Move the switch block to the end of the path, since it will be duplicated 94702077da7SAlexey Zhikhartsev BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 94802077da7SAlexey Zhikhartsev for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 94902077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << TPath << "\n"); 950b167ada8SUsman Nadeem // TODO: Fix exit path creation logic so that we dont need this 951b167ada8SUsman Nadeem // placeholder. 952b167ada8SUsman Nadeem TPath.push_front(SwitchBlock); 95302077da7SAlexey Zhikhartsev } 95402077da7SAlexey Zhikhartsev 95502077da7SAlexey Zhikhartsev // Transform the ThreadingPaths and keep track of the cloned values 95602077da7SAlexey Zhikhartsev DuplicateBlockMap DuplicateMap; 95702077da7SAlexey Zhikhartsev DefMap NewDefs; 95802077da7SAlexey Zhikhartsev 95902077da7SAlexey Zhikhartsev SmallSet<BasicBlock *, 16> BlocksToClean; 96002077da7SAlexey Zhikhartsev for (BasicBlock *BB : successors(SwitchBlock)) 96102077da7SAlexey Zhikhartsev BlocksToClean.insert(BB); 96202077da7SAlexey Zhikhartsev 96302077da7SAlexey Zhikhartsev for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 96402077da7SAlexey Zhikhartsev createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 96502077da7SAlexey Zhikhartsev NumPaths++; 96602077da7SAlexey Zhikhartsev } 96702077da7SAlexey Zhikhartsev 96802077da7SAlexey Zhikhartsev // After all paths are cloned, now update the last successor of the cloned 96902077da7SAlexey Zhikhartsev // path so it skips over the switch statement 97002077da7SAlexey Zhikhartsev for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 97102077da7SAlexey Zhikhartsev updateLastSuccessor(TPath, DuplicateMap, &DTU); 97202077da7SAlexey Zhikhartsev 97302077da7SAlexey Zhikhartsev // For each instruction that was cloned and used outside, update its uses 97402077da7SAlexey Zhikhartsev updateSSA(NewDefs); 97502077da7SAlexey Zhikhartsev 97602077da7SAlexey Zhikhartsev // Clean PHI Nodes for the newly created blocks 97702077da7SAlexey Zhikhartsev for (BasicBlock *BB : BlocksToClean) 97802077da7SAlexey Zhikhartsev cleanPhiNodes(BB); 97902077da7SAlexey Zhikhartsev } 98002077da7SAlexey Zhikhartsev 98102077da7SAlexey Zhikhartsev /// For a specific ThreadingPath \p Path, create an exit path starting from 98202077da7SAlexey Zhikhartsev /// the determinator block. 98302077da7SAlexey Zhikhartsev /// 98402077da7SAlexey Zhikhartsev /// To remember the correct destination, we have to duplicate blocks 98502077da7SAlexey Zhikhartsev /// corresponding to each state. Also update the terminating instruction of 98602077da7SAlexey Zhikhartsev /// the predecessors, and phis in the successor blocks. 98702077da7SAlexey Zhikhartsev void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 98802077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap, 98902077da7SAlexey Zhikhartsev SmallSet<BasicBlock *, 16> &BlocksToClean, 99002077da7SAlexey Zhikhartsev DomTreeUpdater *DTU) { 991019ffbf3SXChy APInt NextState = Path.getExitValue(); 99202077da7SAlexey Zhikhartsev const BasicBlock *Determinator = Path.getDeterminatorBB(); 99302077da7SAlexey Zhikhartsev PathType PathBBs = Path.getPath(); 99402077da7SAlexey Zhikhartsev 99502077da7SAlexey Zhikhartsev // Don't select the placeholder block in front 99602077da7SAlexey Zhikhartsev if (PathBBs.front() == Determinator) 99702077da7SAlexey Zhikhartsev PathBBs.pop_front(); 99802077da7SAlexey Zhikhartsev 9995ea31555SKazu Hirata auto DetIt = llvm::find(PathBBs, Determinator); 10002c0fc0f3SXChy // When there is only one BB in PathBBs, the determinator takes itself as a 10012c0fc0f3SXChy // direct predecessor. 10022c0fc0f3SXChy BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt); 100302077da7SAlexey Zhikhartsev for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 100402077da7SAlexey Zhikhartsev BasicBlock *BB = *BBIt; 100502077da7SAlexey Zhikhartsev BlocksToClean.insert(BB); 100602077da7SAlexey Zhikhartsev 100702077da7SAlexey Zhikhartsev // We already cloned BB for this NextState, now just update the branch 100802077da7SAlexey Zhikhartsev // and continue. 100902077da7SAlexey Zhikhartsev BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 101002077da7SAlexey Zhikhartsev if (NextBB) { 101102077da7SAlexey Zhikhartsev updatePredecessor(PrevBB, BB, NextBB, DTU); 101202077da7SAlexey Zhikhartsev PrevBB = NextBB; 101302077da7SAlexey Zhikhartsev continue; 101402077da7SAlexey Zhikhartsev } 101502077da7SAlexey Zhikhartsev 101602077da7SAlexey Zhikhartsev // Clone the BB and update the successor of Prev to jump to the new block 101702077da7SAlexey Zhikhartsev BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 101802077da7SAlexey Zhikhartsev BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 101902077da7SAlexey Zhikhartsev DuplicateMap[BB].push_back({NewBB, NextState}); 102002077da7SAlexey Zhikhartsev BlocksToClean.insert(NewBB); 102102077da7SAlexey Zhikhartsev PrevBB = NewBB; 102202077da7SAlexey Zhikhartsev } 102302077da7SAlexey Zhikhartsev } 102402077da7SAlexey Zhikhartsev 102502077da7SAlexey Zhikhartsev /// Restore SSA form after cloning blocks. 102602077da7SAlexey Zhikhartsev /// 102702077da7SAlexey Zhikhartsev /// Each cloned block creates new defs for a variable, and the uses need to be 102802077da7SAlexey Zhikhartsev /// updated to reflect this. The uses may be replaced with a cloned value, or 102902077da7SAlexey Zhikhartsev /// some derived phi instruction. Note that all uses of a value defined in the 103002077da7SAlexey Zhikhartsev /// same block were already remapped when cloning the block. 103102077da7SAlexey Zhikhartsev void updateSSA(DefMap &NewDefs) { 103202077da7SAlexey Zhikhartsev SSAUpdaterBulk SSAUpdate; 103302077da7SAlexey Zhikhartsev SmallVector<Use *, 16> UsesToRename; 103402077da7SAlexey Zhikhartsev 1035c83c4b58SKazu Hirata for (const auto &KV : NewDefs) { 103602077da7SAlexey Zhikhartsev Instruction *I = KV.first; 103702077da7SAlexey Zhikhartsev BasicBlock *BB = I->getParent(); 103802077da7SAlexey Zhikhartsev std::vector<Instruction *> Cloned = KV.second; 103902077da7SAlexey Zhikhartsev 104002077da7SAlexey Zhikhartsev // Scan all uses of this instruction to see if it is used outside of its 104102077da7SAlexey Zhikhartsev // block, and if so, record them in UsesToRename. 104202077da7SAlexey Zhikhartsev for (Use &U : I->uses()) { 104302077da7SAlexey Zhikhartsev Instruction *User = cast<Instruction>(U.getUser()); 104402077da7SAlexey Zhikhartsev if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 104502077da7SAlexey Zhikhartsev if (UserPN->getIncomingBlock(U) == BB) 104602077da7SAlexey Zhikhartsev continue; 104702077da7SAlexey Zhikhartsev } else if (User->getParent() == BB) { 104802077da7SAlexey Zhikhartsev continue; 104902077da7SAlexey Zhikhartsev } 105002077da7SAlexey Zhikhartsev 105102077da7SAlexey Zhikhartsev UsesToRename.push_back(&U); 105202077da7SAlexey Zhikhartsev } 105302077da7SAlexey Zhikhartsev 105402077da7SAlexey Zhikhartsev // If there are no uses outside the block, we're done with this 105502077da7SAlexey Zhikhartsev // instruction. 105602077da7SAlexey Zhikhartsev if (UsesToRename.empty()) 105702077da7SAlexey Zhikhartsev continue; 105802077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 105902077da7SAlexey Zhikhartsev << "\n"); 106002077da7SAlexey Zhikhartsev 106102077da7SAlexey Zhikhartsev // We found a use of I outside of BB. Rename all uses of I that are 106202077da7SAlexey Zhikhartsev // outside its block to be uses of the appropriate PHI node etc. See 106302077da7SAlexey Zhikhartsev // ValuesInBlocks with the values we know. 106402077da7SAlexey Zhikhartsev unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 106502077da7SAlexey Zhikhartsev SSAUpdate.AddAvailableValue(VarNum, BB, I); 106602077da7SAlexey Zhikhartsev for (Instruction *New : Cloned) 106702077da7SAlexey Zhikhartsev SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 106802077da7SAlexey Zhikhartsev 106902077da7SAlexey Zhikhartsev while (!UsesToRename.empty()) 107002077da7SAlexey Zhikhartsev SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 107102077da7SAlexey Zhikhartsev 107202077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\n"); 107302077da7SAlexey Zhikhartsev } 107402077da7SAlexey Zhikhartsev // SSAUpdater handles phi placement and renaming uses with the appropriate 107502077da7SAlexey Zhikhartsev // value. 107602077da7SAlexey Zhikhartsev SSAUpdate.RewriteAllUses(DT); 107702077da7SAlexey Zhikhartsev } 107802077da7SAlexey Zhikhartsev 107902077da7SAlexey Zhikhartsev /// Clones a basic block, and adds it to the CFG. 108002077da7SAlexey Zhikhartsev /// 108102077da7SAlexey Zhikhartsev /// This function also includes updating phi nodes in the successors of the 108202077da7SAlexey Zhikhartsev /// BB, and remapping uses that were defined locally in the cloned BB. 108302077da7SAlexey Zhikhartsev BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 1084019ffbf3SXChy const APInt &NextState, 108502077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap, 108602077da7SAlexey Zhikhartsev DefMap &NewDefs, 108702077da7SAlexey Zhikhartsev DomTreeUpdater *DTU) { 108802077da7SAlexey Zhikhartsev ValueToValueMapTy VMap; 108902077da7SAlexey Zhikhartsev BasicBlock *NewBB = CloneBasicBlock( 1090019ffbf3SXChy BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()), 1091019ffbf3SXChy BB->getParent()); 109202077da7SAlexey Zhikhartsev NewBB->moveAfter(BB); 109302077da7SAlexey Zhikhartsev NumCloned++; 109402077da7SAlexey Zhikhartsev 109502077da7SAlexey Zhikhartsev for (Instruction &I : *NewBB) { 109602077da7SAlexey Zhikhartsev // Do not remap operands of PHINode in case a definition in BB is an 109702077da7SAlexey Zhikhartsev // incoming value to a phi in the same block. This incoming value will 109802077da7SAlexey Zhikhartsev // be renamed later while restoring SSA. 109902077da7SAlexey Zhikhartsev if (isa<PHINode>(&I)) 110002077da7SAlexey Zhikhartsev continue; 110102077da7SAlexey Zhikhartsev RemapInstruction(&I, VMap, 110202077da7SAlexey Zhikhartsev RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 110302077da7SAlexey Zhikhartsev if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 110402077da7SAlexey Zhikhartsev AC->registerAssumption(II); 110502077da7SAlexey Zhikhartsev } 110602077da7SAlexey Zhikhartsev 110702077da7SAlexey Zhikhartsev updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 110802077da7SAlexey Zhikhartsev updatePredecessor(PrevBB, BB, NewBB, DTU); 110902077da7SAlexey Zhikhartsev updateDefMap(NewDefs, VMap); 111002077da7SAlexey Zhikhartsev 111102077da7SAlexey Zhikhartsev // Add all successors to the DominatorTree 111202077da7SAlexey Zhikhartsev SmallPtrSet<BasicBlock *, 4> SuccSet; 111302077da7SAlexey Zhikhartsev for (auto *SuccBB : successors(NewBB)) { 111402077da7SAlexey Zhikhartsev if (SuccSet.insert(SuccBB).second) 111502077da7SAlexey Zhikhartsev DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 111602077da7SAlexey Zhikhartsev } 111702077da7SAlexey Zhikhartsev SuccSet.clear(); 111802077da7SAlexey Zhikhartsev return NewBB; 111902077da7SAlexey Zhikhartsev } 112002077da7SAlexey Zhikhartsev 112102077da7SAlexey Zhikhartsev /// Update the phi nodes in BB's successors. 112202077da7SAlexey Zhikhartsev /// 112302077da7SAlexey Zhikhartsev /// This means creating a new incoming value from NewBB with the new 112402077da7SAlexey Zhikhartsev /// instruction wherever there is an incoming value from BB. 112502077da7SAlexey Zhikhartsev void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1126019ffbf3SXChy const APInt &NextState, ValueToValueMapTy &VMap, 112702077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap) { 112802077da7SAlexey Zhikhartsev std::vector<BasicBlock *> BlocksToUpdate; 112902077da7SAlexey Zhikhartsev 113002077da7SAlexey Zhikhartsev // If BB is the last block in the path, we can simply update the one case 113102077da7SAlexey Zhikhartsev // successor that will be reached. 113202077da7SAlexey Zhikhartsev if (BB == SwitchPaths->getSwitchBlock()) { 113302077da7SAlexey Zhikhartsev SwitchInst *Switch = SwitchPaths->getSwitchInst(); 113402077da7SAlexey Zhikhartsev BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 113502077da7SAlexey Zhikhartsev BlocksToUpdate.push_back(NextCase); 113602077da7SAlexey Zhikhartsev BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 113702077da7SAlexey Zhikhartsev if (ClonedSucc) 113802077da7SAlexey Zhikhartsev BlocksToUpdate.push_back(ClonedSucc); 113902077da7SAlexey Zhikhartsev } 114002077da7SAlexey Zhikhartsev // Otherwise update phis in all successors. 114102077da7SAlexey Zhikhartsev else { 114202077da7SAlexey Zhikhartsev for (BasicBlock *Succ : successors(BB)) { 114302077da7SAlexey Zhikhartsev BlocksToUpdate.push_back(Succ); 114402077da7SAlexey Zhikhartsev 114502077da7SAlexey Zhikhartsev // Check if a successor has already been cloned for the particular exit 114602077da7SAlexey Zhikhartsev // value. In this case if a successor was already cloned, the phi nodes 114702077da7SAlexey Zhikhartsev // in the cloned block should be updated directly. 114802077da7SAlexey Zhikhartsev BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 114902077da7SAlexey Zhikhartsev if (ClonedSucc) 115002077da7SAlexey Zhikhartsev BlocksToUpdate.push_back(ClonedSucc); 115102077da7SAlexey Zhikhartsev } 115202077da7SAlexey Zhikhartsev } 115302077da7SAlexey Zhikhartsev 115402077da7SAlexey Zhikhartsev // If there is a phi with an incoming value from BB, create a new incoming 115502077da7SAlexey Zhikhartsev // value for the new predecessor ClonedBB. The value will either be the same 115602077da7SAlexey Zhikhartsev // value from BB or a cloned value. 115702077da7SAlexey Zhikhartsev for (BasicBlock *Succ : BlocksToUpdate) { 115802077da7SAlexey Zhikhartsev for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 115902077da7SAlexey Zhikhartsev ++II) { 116002077da7SAlexey Zhikhartsev Value *Incoming = Phi->getIncomingValueForBlock(BB); 116102077da7SAlexey Zhikhartsev if (Incoming) { 116202077da7SAlexey Zhikhartsev if (isa<Constant>(Incoming)) { 116302077da7SAlexey Zhikhartsev Phi->addIncoming(Incoming, ClonedBB); 116402077da7SAlexey Zhikhartsev continue; 116502077da7SAlexey Zhikhartsev } 116602077da7SAlexey Zhikhartsev Value *ClonedVal = VMap[Incoming]; 116702077da7SAlexey Zhikhartsev if (ClonedVal) 116802077da7SAlexey Zhikhartsev Phi->addIncoming(ClonedVal, ClonedBB); 116902077da7SAlexey Zhikhartsev else 117002077da7SAlexey Zhikhartsev Phi->addIncoming(Incoming, ClonedBB); 117102077da7SAlexey Zhikhartsev } 117202077da7SAlexey Zhikhartsev } 117302077da7SAlexey Zhikhartsev } 117402077da7SAlexey Zhikhartsev } 117502077da7SAlexey Zhikhartsev 117602077da7SAlexey Zhikhartsev /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 117702077da7SAlexey Zhikhartsev /// other successors are kept as well. 117802077da7SAlexey Zhikhartsev void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 117902077da7SAlexey Zhikhartsev BasicBlock *NewBB, DomTreeUpdater *DTU) { 118002077da7SAlexey Zhikhartsev // When a path is reused, there is a chance that predecessors were already 118102077da7SAlexey Zhikhartsev // updated before. Check if the predecessor needs to be updated first. 118202077da7SAlexey Zhikhartsev if (!isPredecessor(OldBB, PrevBB)) 118302077da7SAlexey Zhikhartsev return; 118402077da7SAlexey Zhikhartsev 118502077da7SAlexey Zhikhartsev Instruction *PrevTerm = PrevBB->getTerminator(); 118602077da7SAlexey Zhikhartsev for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 118702077da7SAlexey Zhikhartsev if (PrevTerm->getSuccessor(Idx) == OldBB) { 118802077da7SAlexey Zhikhartsev OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 118902077da7SAlexey Zhikhartsev PrevTerm->setSuccessor(Idx, NewBB); 119002077da7SAlexey Zhikhartsev } 119102077da7SAlexey Zhikhartsev } 119202077da7SAlexey Zhikhartsev DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 119302077da7SAlexey Zhikhartsev {DominatorTree::Insert, PrevBB, NewBB}}); 119402077da7SAlexey Zhikhartsev } 119502077da7SAlexey Zhikhartsev 119602077da7SAlexey Zhikhartsev /// Add new value mappings to the DefMap to keep track of all new definitions 119702077da7SAlexey Zhikhartsev /// for a particular instruction. These will be used while updating SSA form. 119802077da7SAlexey Zhikhartsev void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 11999d555b4aSOlle Fredriksson SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; 12009d555b4aSOlle Fredriksson NewDefsVector.reserve(VMap.size()); 12019d555b4aSOlle Fredriksson 120202077da7SAlexey Zhikhartsev for (auto Entry : VMap) { 120302077da7SAlexey Zhikhartsev Instruction *Inst = 120402077da7SAlexey Zhikhartsev dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 120502077da7SAlexey Zhikhartsev if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 120602077da7SAlexey Zhikhartsev isa<SwitchInst>(Inst)) { 120702077da7SAlexey Zhikhartsev continue; 120802077da7SAlexey Zhikhartsev } 120902077da7SAlexey Zhikhartsev 121002077da7SAlexey Zhikhartsev Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 121102077da7SAlexey Zhikhartsev if (!Cloned) 121202077da7SAlexey Zhikhartsev continue; 121302077da7SAlexey Zhikhartsev 12149d555b4aSOlle Fredriksson NewDefsVector.push_back({Inst, Cloned}); 121502077da7SAlexey Zhikhartsev } 12169d555b4aSOlle Fredriksson 12179d555b4aSOlle Fredriksson // Sort the defs to get deterministic insertion order into NewDefs. 12189d555b4aSOlle Fredriksson sort(NewDefsVector, [](const auto &LHS, const auto &RHS) { 12199d555b4aSOlle Fredriksson if (LHS.first == RHS.first) 12209d555b4aSOlle Fredriksson return LHS.second->comesBefore(RHS.second); 12219d555b4aSOlle Fredriksson return LHS.first->comesBefore(RHS.first); 12229d555b4aSOlle Fredriksson }); 12239d555b4aSOlle Fredriksson 12249d555b4aSOlle Fredriksson for (const auto &KV : NewDefsVector) 12259d555b4aSOlle Fredriksson NewDefs[KV.first].push_back(KV.second); 122602077da7SAlexey Zhikhartsev } 122702077da7SAlexey Zhikhartsev 122802077da7SAlexey Zhikhartsev /// Update the last branch of a particular cloned path to point to the correct 122902077da7SAlexey Zhikhartsev /// case successor. 123002077da7SAlexey Zhikhartsev /// 123102077da7SAlexey Zhikhartsev /// Note that this is an optional step and would have been done in later 123202077da7SAlexey Zhikhartsev /// optimizations, but it makes the CFG significantly easier to work with. 123302077da7SAlexey Zhikhartsev void updateLastSuccessor(ThreadingPath &TPath, 123402077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap, 123502077da7SAlexey Zhikhartsev DomTreeUpdater *DTU) { 1236019ffbf3SXChy APInt NextState = TPath.getExitValue(); 123702077da7SAlexey Zhikhartsev BasicBlock *BB = TPath.getPath().back(); 123802077da7SAlexey Zhikhartsev BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 123902077da7SAlexey Zhikhartsev 124002077da7SAlexey Zhikhartsev // Note multiple paths can end at the same block so check that it is not 124102077da7SAlexey Zhikhartsev // updated yet 124202077da7SAlexey Zhikhartsev if (!isa<SwitchInst>(LastBlock->getTerminator())) 124302077da7SAlexey Zhikhartsev return; 124402077da7SAlexey Zhikhartsev SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 124502077da7SAlexey Zhikhartsev BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 124602077da7SAlexey Zhikhartsev 124702077da7SAlexey Zhikhartsev std::vector<DominatorTree::UpdateType> DTUpdates; 124802077da7SAlexey Zhikhartsev SmallPtrSet<BasicBlock *, 4> SuccSet; 124902077da7SAlexey Zhikhartsev for (BasicBlock *Succ : successors(LastBlock)) { 125002077da7SAlexey Zhikhartsev if (Succ != NextCase && SuccSet.insert(Succ).second) 125102077da7SAlexey Zhikhartsev DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 125202077da7SAlexey Zhikhartsev } 125302077da7SAlexey Zhikhartsev 125402077da7SAlexey Zhikhartsev Switch->eraseFromParent(); 125502077da7SAlexey Zhikhartsev BranchInst::Create(NextCase, LastBlock); 125602077da7SAlexey Zhikhartsev 125702077da7SAlexey Zhikhartsev DTU->applyUpdates(DTUpdates); 125802077da7SAlexey Zhikhartsev } 125902077da7SAlexey Zhikhartsev 126002077da7SAlexey Zhikhartsev /// After cloning blocks, some of the phi nodes have extra incoming values 126102077da7SAlexey Zhikhartsev /// that are no longer used. This function removes them. 126202077da7SAlexey Zhikhartsev void cleanPhiNodes(BasicBlock *BB) { 126302077da7SAlexey Zhikhartsev // If BB is no longer reachable, remove any remaining phi nodes 126402077da7SAlexey Zhikhartsev if (pred_empty(BB)) { 126502077da7SAlexey Zhikhartsev std::vector<PHINode *> PhiToRemove; 126602077da7SAlexey Zhikhartsev for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 126702077da7SAlexey Zhikhartsev PhiToRemove.push_back(Phi); 126802077da7SAlexey Zhikhartsev } 126902077da7SAlexey Zhikhartsev for (PHINode *PN : PhiToRemove) { 127053dc0f10SNuno Lopes PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); 127102077da7SAlexey Zhikhartsev PN->eraseFromParent(); 127202077da7SAlexey Zhikhartsev } 127302077da7SAlexey Zhikhartsev return; 127402077da7SAlexey Zhikhartsev } 127502077da7SAlexey Zhikhartsev 127602077da7SAlexey Zhikhartsev // Remove any incoming values that come from an invalid predecessor 127702077da7SAlexey Zhikhartsev for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 127802077da7SAlexey Zhikhartsev std::vector<BasicBlock *> BlocksToRemove; 127902077da7SAlexey Zhikhartsev for (BasicBlock *IncomingBB : Phi->blocks()) { 128002077da7SAlexey Zhikhartsev if (!isPredecessor(BB, IncomingBB)) 128102077da7SAlexey Zhikhartsev BlocksToRemove.push_back(IncomingBB); 128202077da7SAlexey Zhikhartsev } 128302077da7SAlexey Zhikhartsev for (BasicBlock *BB : BlocksToRemove) 128402077da7SAlexey Zhikhartsev Phi->removeIncomingValue(BB); 128502077da7SAlexey Zhikhartsev } 128602077da7SAlexey Zhikhartsev } 128702077da7SAlexey Zhikhartsev 128802077da7SAlexey Zhikhartsev /// Checks if BB was already cloned for a particular next state value. If it 128902077da7SAlexey Zhikhartsev /// was then it returns this cloned block, and otherwise null. 1290019ffbf3SXChy BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState, 129102077da7SAlexey Zhikhartsev DuplicateBlockMap &DuplicateMap) { 129202077da7SAlexey Zhikhartsev CloneList ClonedBBs = DuplicateMap[BB]; 129302077da7SAlexey Zhikhartsev 129402077da7SAlexey Zhikhartsev // Find an entry in the CloneList with this NextState. If it exists then 129502077da7SAlexey Zhikhartsev // return the corresponding BB 129602077da7SAlexey Zhikhartsev auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 129702077da7SAlexey Zhikhartsev return C.State == NextState; 129802077da7SAlexey Zhikhartsev }); 129902077da7SAlexey Zhikhartsev return It != ClonedBBs.end() ? (*It).BB : nullptr; 130002077da7SAlexey Zhikhartsev } 130102077da7SAlexey Zhikhartsev 130202077da7SAlexey Zhikhartsev /// Helper to get the successor corresponding to a particular case value for 130302077da7SAlexey Zhikhartsev /// a switch statement. 1304019ffbf3SXChy BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { 130502077da7SAlexey Zhikhartsev BasicBlock *NextCase = nullptr; 130602077da7SAlexey Zhikhartsev for (auto Case : Switch->cases()) { 1307019ffbf3SXChy if (Case.getCaseValue()->getValue() == NextState) { 130802077da7SAlexey Zhikhartsev NextCase = Case.getCaseSuccessor(); 130902077da7SAlexey Zhikhartsev break; 131002077da7SAlexey Zhikhartsev } 131102077da7SAlexey Zhikhartsev } 131202077da7SAlexey Zhikhartsev if (!NextCase) 131302077da7SAlexey Zhikhartsev NextCase = Switch->getDefaultDest(); 131402077da7SAlexey Zhikhartsev return NextCase; 131502077da7SAlexey Zhikhartsev } 131602077da7SAlexey Zhikhartsev 131702077da7SAlexey Zhikhartsev /// Returns true if IncomingBB is a predecessor of BB. 131802077da7SAlexey Zhikhartsev bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 1319f83a88a1SKazu Hirata return llvm::is_contained(predecessors(BB), IncomingBB); 132002077da7SAlexey Zhikhartsev } 132102077da7SAlexey Zhikhartsev 132202077da7SAlexey Zhikhartsev AllSwitchPaths *SwitchPaths; 132302077da7SAlexey Zhikhartsev DominatorTree *DT; 132402077da7SAlexey Zhikhartsev AssumptionCache *AC; 132502077da7SAlexey Zhikhartsev TargetTransformInfo *TTI; 132602077da7SAlexey Zhikhartsev OptimizationRemarkEmitter *ORE; 132702077da7SAlexey Zhikhartsev SmallPtrSet<const Value *, 32> EphValues; 132802077da7SAlexey Zhikhartsev std::vector<ThreadingPath> TPaths; 132902077da7SAlexey Zhikhartsev }; 133002077da7SAlexey Zhikhartsev 133102077da7SAlexey Zhikhartsev bool DFAJumpThreading::run(Function &F) { 133202077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 133302077da7SAlexey Zhikhartsev 133402077da7SAlexey Zhikhartsev if (F.hasOptSize()) { 133502077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 133602077da7SAlexey Zhikhartsev return false; 133702077da7SAlexey Zhikhartsev } 133802077da7SAlexey Zhikhartsev 133902077da7SAlexey Zhikhartsev if (ClViewCfgBefore) 134002077da7SAlexey Zhikhartsev F.viewCFG(); 134102077da7SAlexey Zhikhartsev 134202077da7SAlexey Zhikhartsev SmallVector<AllSwitchPaths, 2> ThreadableLoops; 134302077da7SAlexey Zhikhartsev bool MadeChanges = false; 1344c229f767SXChy LoopInfoBroken = false; 134502077da7SAlexey Zhikhartsev 134602077da7SAlexey Zhikhartsev for (BasicBlock &BB : F) { 134702077da7SAlexey Zhikhartsev auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 134802077da7SAlexey Zhikhartsev if (!SI) 134902077da7SAlexey Zhikhartsev continue; 135002077da7SAlexey Zhikhartsev 135102077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 13528b0d7634SAlex Zhikhartsev << " is a candidate\n"); 13536b53ada6SXChy MainSwitch Switch(SI, LI, ORE); 135402077da7SAlexey Zhikhartsev 1355b167ada8SUsman Nadeem if (!Switch.getInstr()) { 1356b167ada8SUsman Nadeem LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a " 1357b167ada8SUsman Nadeem << "candidate for jump threading\n"); 135802077da7SAlexey Zhikhartsev continue; 1359b167ada8SUsman Nadeem } 136002077da7SAlexey Zhikhartsev 136102077da7SAlexey Zhikhartsev LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 136202077da7SAlexey Zhikhartsev << "candidate for jump threading\n"); 136302077da7SAlexey Zhikhartsev LLVM_DEBUG(SI->dump()); 136402077da7SAlexey Zhikhartsev 136502077da7SAlexey Zhikhartsev unfoldSelectInstrs(DT, Switch.getSelectInsts()); 136602077da7SAlexey Zhikhartsev if (!Switch.getSelectInsts().empty()) 136702077da7SAlexey Zhikhartsev MadeChanges = true; 136802077da7SAlexey Zhikhartsev 1369b167ada8SUsman Nadeem AllSwitchPaths SwitchPaths(&Switch, ORE, LI, 1370b167ada8SUsman Nadeem LI->getLoopFor(&BB)->getOutermostLoop()); 137102077da7SAlexey Zhikhartsev SwitchPaths.run(); 137202077da7SAlexey Zhikhartsev 137302077da7SAlexey Zhikhartsev if (SwitchPaths.getNumThreadingPaths() > 0) { 137402077da7SAlexey Zhikhartsev ThreadableLoops.push_back(SwitchPaths); 137502077da7SAlexey Zhikhartsev 137602077da7SAlexey Zhikhartsev // For the time being limit this optimization to occurring once in a 137702077da7SAlexey Zhikhartsev // function since it can change the CFG significantly. This is not a 137802077da7SAlexey Zhikhartsev // strict requirement but it can cause buggy behavior if there is an 137902077da7SAlexey Zhikhartsev // overlap of blocks in different opportunities. There is a lot of room to 138002077da7SAlexey Zhikhartsev // experiment with catching more opportunities here. 1381c229f767SXChy // NOTE: To release this contraint, we must handle LoopInfo invalidation 138202077da7SAlexey Zhikhartsev break; 138302077da7SAlexey Zhikhartsev } 138402077da7SAlexey Zhikhartsev } 138502077da7SAlexey Zhikhartsev 1386c229f767SXChy #ifdef NDEBUG 1387c229f767SXChy LI->verify(*DT); 1388c229f767SXChy #endif 1389c229f767SXChy 139002077da7SAlexey Zhikhartsev SmallPtrSet<const Value *, 32> EphValues; 139102077da7SAlexey Zhikhartsev if (ThreadableLoops.size() > 0) 139202077da7SAlexey Zhikhartsev CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 139302077da7SAlexey Zhikhartsev 139402077da7SAlexey Zhikhartsev for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 139502077da7SAlexey Zhikhartsev TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 139602077da7SAlexey Zhikhartsev Transform.run(); 139702077da7SAlexey Zhikhartsev MadeChanges = true; 1398c229f767SXChy LoopInfoBroken = true; 139902077da7SAlexey Zhikhartsev } 140002077da7SAlexey Zhikhartsev 140102077da7SAlexey Zhikhartsev #ifdef EXPENSIVE_CHECKS 140202077da7SAlexey Zhikhartsev assert(DT->verify(DominatorTree::VerificationLevel::Full)); 140302077da7SAlexey Zhikhartsev verifyFunction(F, &dbgs()); 140402077da7SAlexey Zhikhartsev #endif 140502077da7SAlexey Zhikhartsev 140602077da7SAlexey Zhikhartsev return MadeChanges; 140702077da7SAlexey Zhikhartsev } 140802077da7SAlexey Zhikhartsev 140902077da7SAlexey Zhikhartsev } // end anonymous namespace 141002077da7SAlexey Zhikhartsev 141102077da7SAlexey Zhikhartsev /// Integrate with the new Pass Manager 141202077da7SAlexey Zhikhartsev PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 141302077da7SAlexey Zhikhartsev FunctionAnalysisManager &AM) { 141402077da7SAlexey Zhikhartsev AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 141502077da7SAlexey Zhikhartsev DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 14166b53ada6SXChy LoopInfo &LI = AM.getResult<LoopAnalysis>(F); 141702077da7SAlexey Zhikhartsev TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 141802077da7SAlexey Zhikhartsev OptimizationRemarkEmitter ORE(&F); 1419c229f767SXChy DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); 1420c229f767SXChy if (!ThreadImpl.run(F)) 142102077da7SAlexey Zhikhartsev return PreservedAnalyses::all(); 142202077da7SAlexey Zhikhartsev 142302077da7SAlexey Zhikhartsev PreservedAnalyses PA; 142402077da7SAlexey Zhikhartsev PA.preserve<DominatorTreeAnalysis>(); 1425c229f767SXChy if (!ThreadImpl.LoopInfoBroken) 1426c229f767SXChy PA.preserve<LoopAnalysis>(); 142702077da7SAlexey Zhikhartsev return PA; 142802077da7SAlexey Zhikhartsev } 1429