1*fe6060f1SDimitry Andric //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // The LLVM Compiler Infrastructure 4*fe6060f1SDimitry Andric // 5*fe6060f1SDimitry Andric // This file is distributed under the University of Illinois Open Source 6*fe6060f1SDimitry Andric // License. See LICENSE.TXT for details. 7*fe6060f1SDimitry Andric // 8*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 9*fe6060f1SDimitry Andric // 10*fe6060f1SDimitry Andric // Transform each threading path to effectively jump thread the DFA. For 11*fe6060f1SDimitry Andric // example, the CFG below could be transformed as follows, where the cloned 12*fe6060f1SDimitry Andric // blocks unconditionally branch to the next correct case based on what is 13*fe6060f1SDimitry Andric // identified in the analysis. 14*fe6060f1SDimitry Andric // 15*fe6060f1SDimitry Andric // sw.bb sw.bb 16*fe6060f1SDimitry Andric // / | \ / | \ 17*fe6060f1SDimitry Andric // case1 case2 case3 case1 case2 case3 18*fe6060f1SDimitry Andric // \ | / | | | 19*fe6060f1SDimitry Andric // determinator det.2 det.3 det.1 20*fe6060f1SDimitry Andric // br sw.bb / | \ 21*fe6060f1SDimitry Andric // sw.bb.2 sw.bb.3 sw.bb.1 22*fe6060f1SDimitry Andric // br case2 br case3 br case1§ 23*fe6060f1SDimitry Andric // 24*fe6060f1SDimitry Andric // Definitions and Terminology: 25*fe6060f1SDimitry Andric // 26*fe6060f1SDimitry Andric // * Threading path: 27*fe6060f1SDimitry Andric // a list of basic blocks, the exit state, and the block that determines 28*fe6060f1SDimitry Andric // the next state, for which the following notation will be used: 29*fe6060f1SDimitry Andric // < path of BBs that form a cycle > [ state, determinator ] 30*fe6060f1SDimitry Andric // 31*fe6060f1SDimitry Andric // * Predictable switch: 32*fe6060f1SDimitry Andric // The switch variable is always a known constant so that all conditional 33*fe6060f1SDimitry Andric // jumps based on switch variable can be converted to unconditional jump. 34*fe6060f1SDimitry Andric // 35*fe6060f1SDimitry Andric // * Determinator: 36*fe6060f1SDimitry Andric // The basic block that determines the next state of the DFA. 37*fe6060f1SDimitry Andric // 38*fe6060f1SDimitry Andric // Representing the optimization in C-like pseudocode: the code pattern on the 39*fe6060f1SDimitry Andric // left could functionally be transformed to the right pattern if the switch 40*fe6060f1SDimitry Andric // condition is predictable. 41*fe6060f1SDimitry Andric // 42*fe6060f1SDimitry Andric // X = A goto A 43*fe6060f1SDimitry Andric // for (...) A: 44*fe6060f1SDimitry Andric // switch (X) ... 45*fe6060f1SDimitry Andric // case A goto B 46*fe6060f1SDimitry Andric // X = B B: 47*fe6060f1SDimitry Andric // case B ... 48*fe6060f1SDimitry Andric // X = C goto C 49*fe6060f1SDimitry Andric // 50*fe6060f1SDimitry Andric // The pass first checks that switch variable X is decided by the control flow 51*fe6060f1SDimitry Andric // path taken in the loop; for example, in case B, the next value of X is 52*fe6060f1SDimitry Andric // decided to be C. It then enumerates through all paths in the loop and labels 53*fe6060f1SDimitry Andric // the basic blocks where the next state is decided. 54*fe6060f1SDimitry Andric // 55*fe6060f1SDimitry Andric // Using this information it creates new paths that unconditionally branch to 56*fe6060f1SDimitry Andric // the next case. This involves cloning code, so it only gets triggered if the 57*fe6060f1SDimitry Andric // amount of code duplicated is below a threshold. 58*fe6060f1SDimitry Andric // 59*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 60*fe6060f1SDimitry Andric 61*fe6060f1SDimitry Andric #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 62*fe6060f1SDimitry Andric #include "llvm/ADT/APInt.h" 63*fe6060f1SDimitry Andric #include "llvm/ADT/DenseMap.h" 64*fe6060f1SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 65*fe6060f1SDimitry Andric #include "llvm/ADT/SmallSet.h" 66*fe6060f1SDimitry Andric #include "llvm/ADT/Statistic.h" 67*fe6060f1SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 68*fe6060f1SDimitry Andric #include "llvm/Analysis/CodeMetrics.h" 69*fe6060f1SDimitry Andric #include "llvm/Analysis/LoopIterator.h" 70*fe6060f1SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 71*fe6060f1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 72*fe6060f1SDimitry Andric #include "llvm/IR/CFG.h" 73*fe6060f1SDimitry Andric #include "llvm/IR/Constants.h" 74*fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 75*fe6060f1SDimitry Andric #include "llvm/IR/Verifier.h" 76*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 77*fe6060f1SDimitry Andric #include "llvm/Pass.h" 78*fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h" 79*fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 80*fe6060f1SDimitry Andric #include "llvm/Transforms/Scalar.h" 81*fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82*fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 83*fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 84*fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 85*fe6060f1SDimitry Andric #include <algorithm> 86*fe6060f1SDimitry Andric #include <deque> 87*fe6060f1SDimitry Andric #include <unordered_map> 88*fe6060f1SDimitry Andric #include <unordered_set> 89*fe6060f1SDimitry Andric 90*fe6060f1SDimitry Andric using namespace llvm; 91*fe6060f1SDimitry Andric 92*fe6060f1SDimitry Andric #define DEBUG_TYPE "dfa-jump-threading" 93*fe6060f1SDimitry Andric 94*fe6060f1SDimitry Andric STATISTIC(NumTransforms, "Number of transformations done"); 95*fe6060f1SDimitry Andric STATISTIC(NumCloned, "Number of blocks cloned"); 96*fe6060f1SDimitry Andric STATISTIC(NumPaths, "Number of individual paths threaded"); 97*fe6060f1SDimitry Andric 98*fe6060f1SDimitry Andric static cl::opt<bool> 99*fe6060f1SDimitry Andric ClViewCfgBefore("dfa-jump-view-cfg-before", 100*fe6060f1SDimitry Andric cl::desc("View the CFG before DFA Jump Threading"), 101*fe6060f1SDimitry Andric cl::Hidden, cl::init(false)); 102*fe6060f1SDimitry Andric 103*fe6060f1SDimitry Andric static cl::opt<unsigned> MaxPathLength( 104*fe6060f1SDimitry Andric "dfa-max-path-length", 105*fe6060f1SDimitry Andric cl::desc("Max number of blocks searched to find a threading path"), 106*fe6060f1SDimitry Andric cl::Hidden, cl::init(20)); 107*fe6060f1SDimitry Andric 108*fe6060f1SDimitry Andric static cl::opt<unsigned> 109*fe6060f1SDimitry Andric CostThreshold("dfa-cost-threshold", 110*fe6060f1SDimitry Andric cl::desc("Maximum cost accepted for the transformation"), 111*fe6060f1SDimitry Andric cl::Hidden, cl::init(50)); 112*fe6060f1SDimitry Andric 113*fe6060f1SDimitry Andric namespace { 114*fe6060f1SDimitry Andric 115*fe6060f1SDimitry Andric class SelectInstToUnfold { 116*fe6060f1SDimitry Andric SelectInst *SI; 117*fe6060f1SDimitry Andric PHINode *SIUse; 118*fe6060f1SDimitry Andric 119*fe6060f1SDimitry Andric public: 120*fe6060f1SDimitry Andric SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 121*fe6060f1SDimitry Andric 122*fe6060f1SDimitry Andric SelectInst *getInst() { return SI; } 123*fe6060f1SDimitry Andric PHINode *getUse() { return SIUse; } 124*fe6060f1SDimitry Andric 125*fe6060f1SDimitry Andric explicit operator bool() const { return SI && SIUse; } 126*fe6060f1SDimitry Andric }; 127*fe6060f1SDimitry Andric 128*fe6060f1SDimitry Andric void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 129*fe6060f1SDimitry Andric std::vector<SelectInstToUnfold> *NewSIsToUnfold, 130*fe6060f1SDimitry Andric std::vector<BasicBlock *> *NewBBs); 131*fe6060f1SDimitry Andric 132*fe6060f1SDimitry Andric class DFAJumpThreading { 133*fe6060f1SDimitry Andric public: 134*fe6060f1SDimitry Andric DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, 135*fe6060f1SDimitry Andric TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 136*fe6060f1SDimitry Andric : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {} 137*fe6060f1SDimitry Andric 138*fe6060f1SDimitry Andric bool run(Function &F); 139*fe6060f1SDimitry Andric 140*fe6060f1SDimitry Andric private: 141*fe6060f1SDimitry Andric void 142*fe6060f1SDimitry Andric unfoldSelectInstrs(DominatorTree *DT, 143*fe6060f1SDimitry Andric const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 144*fe6060f1SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 145*fe6060f1SDimitry Andric SmallVector<SelectInstToUnfold, 4> Stack; 146*fe6060f1SDimitry Andric for (SelectInstToUnfold SIToUnfold : SelectInsts) 147*fe6060f1SDimitry Andric Stack.push_back(SIToUnfold); 148*fe6060f1SDimitry Andric 149*fe6060f1SDimitry Andric while (!Stack.empty()) { 150*fe6060f1SDimitry Andric SelectInstToUnfold SIToUnfold = Stack.back(); 151*fe6060f1SDimitry Andric Stack.pop_back(); 152*fe6060f1SDimitry Andric 153*fe6060f1SDimitry Andric std::vector<SelectInstToUnfold> NewSIsToUnfold; 154*fe6060f1SDimitry Andric std::vector<BasicBlock *> NewBBs; 155*fe6060f1SDimitry Andric unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs); 156*fe6060f1SDimitry Andric 157*fe6060f1SDimitry Andric // Put newly discovered select instructions into the work list. 158*fe6060f1SDimitry Andric for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) 159*fe6060f1SDimitry Andric Stack.push_back(NewSIToUnfold); 160*fe6060f1SDimitry Andric } 161*fe6060f1SDimitry Andric } 162*fe6060f1SDimitry Andric 163*fe6060f1SDimitry Andric AssumptionCache *AC; 164*fe6060f1SDimitry Andric DominatorTree *DT; 165*fe6060f1SDimitry Andric TargetTransformInfo *TTI; 166*fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE; 167*fe6060f1SDimitry Andric }; 168*fe6060f1SDimitry Andric 169*fe6060f1SDimitry Andric class DFAJumpThreadingLegacyPass : public FunctionPass { 170*fe6060f1SDimitry Andric public: 171*fe6060f1SDimitry Andric static char ID; // Pass identification 172*fe6060f1SDimitry Andric DFAJumpThreadingLegacyPass() : FunctionPass(ID) {} 173*fe6060f1SDimitry Andric 174*fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 175*fe6060f1SDimitry Andric AU.addRequired<AssumptionCacheTracker>(); 176*fe6060f1SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 177*fe6060f1SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 178*fe6060f1SDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 179*fe6060f1SDimitry Andric } 180*fe6060f1SDimitry Andric 181*fe6060f1SDimitry Andric bool runOnFunction(Function &F) override { 182*fe6060f1SDimitry Andric if (skipFunction(F)) 183*fe6060f1SDimitry Andric return false; 184*fe6060f1SDimitry Andric 185*fe6060f1SDimitry Andric AssumptionCache *AC = 186*fe6060f1SDimitry Andric &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 187*fe6060f1SDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 188*fe6060f1SDimitry Andric TargetTransformInfo *TTI = 189*fe6060f1SDimitry Andric &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 190*fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE = 191*fe6060f1SDimitry Andric &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 192*fe6060f1SDimitry Andric 193*fe6060f1SDimitry Andric return DFAJumpThreading(AC, DT, TTI, ORE).run(F); 194*fe6060f1SDimitry Andric } 195*fe6060f1SDimitry Andric }; 196*fe6060f1SDimitry Andric } // end anonymous namespace 197*fe6060f1SDimitry Andric 198*fe6060f1SDimitry Andric char DFAJumpThreadingLegacyPass::ID = 0; 199*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading", 200*fe6060f1SDimitry Andric "DFA Jump Threading", false, false) 201*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 202*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 203*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 204*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 205*fe6060f1SDimitry Andric INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading", 206*fe6060f1SDimitry Andric "DFA Jump Threading", false, false) 207*fe6060f1SDimitry Andric 208*fe6060f1SDimitry Andric // Public interface to the DFA Jump Threading pass 209*fe6060f1SDimitry Andric FunctionPass *llvm::createDFAJumpThreadingPass() { 210*fe6060f1SDimitry Andric return new DFAJumpThreadingLegacyPass(); 211*fe6060f1SDimitry Andric } 212*fe6060f1SDimitry Andric 213*fe6060f1SDimitry Andric namespace { 214*fe6060f1SDimitry Andric 215*fe6060f1SDimitry Andric /// Create a new basic block and sink \p SIToSink into it. 216*fe6060f1SDimitry Andric void createBasicBlockAndSinkSelectInst( 217*fe6060f1SDimitry Andric DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink, 218*fe6060f1SDimitry Andric BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock, 219*fe6060f1SDimitry Andric BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold, 220*fe6060f1SDimitry Andric std::vector<BasicBlock *> *NewBBs) { 221*fe6060f1SDimitry Andric assert(SIToSink->hasOneUse()); 222*fe6060f1SDimitry Andric assert(NewBlock); 223*fe6060f1SDimitry Andric assert(NewBranch); 224*fe6060f1SDimitry Andric *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName, 225*fe6060f1SDimitry Andric EndBlock->getParent(), EndBlock); 226*fe6060f1SDimitry Andric NewBBs->push_back(*NewBlock); 227*fe6060f1SDimitry Andric *NewBranch = BranchInst::Create(EndBlock, *NewBlock); 228*fe6060f1SDimitry Andric SIToSink->moveBefore(*NewBranch); 229*fe6060f1SDimitry Andric NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse)); 230*fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}}); 231*fe6060f1SDimitry Andric } 232*fe6060f1SDimitry Andric 233*fe6060f1SDimitry Andric /// Unfold the select instruction held in \p SIToUnfold by replacing it with 234*fe6060f1SDimitry Andric /// control flow. 235*fe6060f1SDimitry Andric /// 236*fe6060f1SDimitry Andric /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 237*fe6060f1SDimitry Andric /// created basic blocks into \p NewBBs. 238*fe6060f1SDimitry Andric /// 239*fe6060f1SDimitry Andric /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 240*fe6060f1SDimitry Andric void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 241*fe6060f1SDimitry Andric std::vector<SelectInstToUnfold> *NewSIsToUnfold, 242*fe6060f1SDimitry Andric std::vector<BasicBlock *> *NewBBs) { 243*fe6060f1SDimitry Andric SelectInst *SI = SIToUnfold.getInst(); 244*fe6060f1SDimitry Andric PHINode *SIUse = SIToUnfold.getUse(); 245*fe6060f1SDimitry Andric BasicBlock *StartBlock = SI->getParent(); 246*fe6060f1SDimitry Andric BasicBlock *EndBlock = SIUse->getParent(); 247*fe6060f1SDimitry Andric BranchInst *StartBlockTerm = 248*fe6060f1SDimitry Andric dyn_cast<BranchInst>(StartBlock->getTerminator()); 249*fe6060f1SDimitry Andric 250*fe6060f1SDimitry Andric assert(StartBlockTerm && StartBlockTerm->isUnconditional()); 251*fe6060f1SDimitry Andric assert(SI->hasOneUse()); 252*fe6060f1SDimitry Andric 253*fe6060f1SDimitry Andric // These are the new basic blocks for the conditional branch. 254*fe6060f1SDimitry Andric // At least one will become an actual new basic block. 255*fe6060f1SDimitry Andric BasicBlock *TrueBlock = nullptr; 256*fe6060f1SDimitry Andric BasicBlock *FalseBlock = nullptr; 257*fe6060f1SDimitry Andric BranchInst *TrueBranch = nullptr; 258*fe6060f1SDimitry Andric BranchInst *FalseBranch = nullptr; 259*fe6060f1SDimitry Andric 260*fe6060f1SDimitry Andric // Sink select instructions to be able to unfold them later. 261*fe6060f1SDimitry Andric if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) { 262*fe6060f1SDimitry Andric createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 263*fe6060f1SDimitry Andric "si.unfold.true", &TrueBlock, &TrueBranch, 264*fe6060f1SDimitry Andric NewSIsToUnfold, NewBBs); 265*fe6060f1SDimitry Andric } 266*fe6060f1SDimitry Andric if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) { 267*fe6060f1SDimitry Andric createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 268*fe6060f1SDimitry Andric "si.unfold.false", &FalseBlock, 269*fe6060f1SDimitry Andric &FalseBranch, NewSIsToUnfold, NewBBs); 270*fe6060f1SDimitry Andric } 271*fe6060f1SDimitry Andric 272*fe6060f1SDimitry Andric // If there was nothing to sink, then arbitrarily choose the 'false' side 273*fe6060f1SDimitry Andric // for a new input value to the PHI. 274*fe6060f1SDimitry Andric if (!TrueBlock && !FalseBlock) { 275*fe6060f1SDimitry Andric FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false", 276*fe6060f1SDimitry Andric EndBlock->getParent(), EndBlock); 277*fe6060f1SDimitry Andric NewBBs->push_back(FalseBlock); 278*fe6060f1SDimitry Andric BranchInst::Create(EndBlock, FalseBlock); 279*fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}}); 280*fe6060f1SDimitry Andric } 281*fe6060f1SDimitry Andric 282*fe6060f1SDimitry Andric // Insert the real conditional branch based on the original condition. 283*fe6060f1SDimitry Andric // If we did not create a new block for one of the 'true' or 'false' paths 284*fe6060f1SDimitry Andric // of the condition, it means that side of the branch goes to the end block 285*fe6060f1SDimitry Andric // directly and the path originates from the start block from the point of 286*fe6060f1SDimitry Andric // view of the new PHI. 287*fe6060f1SDimitry Andric BasicBlock *TT = EndBlock; 288*fe6060f1SDimitry Andric BasicBlock *FT = EndBlock; 289*fe6060f1SDimitry Andric if (TrueBlock && FalseBlock) { 290*fe6060f1SDimitry Andric // A diamond. 291*fe6060f1SDimitry Andric TT = TrueBlock; 292*fe6060f1SDimitry Andric FT = FalseBlock; 293*fe6060f1SDimitry Andric 294*fe6060f1SDimitry Andric // Update the phi node of SI. 295*fe6060f1SDimitry Andric SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false); 296*fe6060f1SDimitry Andric SIUse->addIncoming(SI->getTrueValue(), TrueBlock); 297*fe6060f1SDimitry Andric SIUse->addIncoming(SI->getFalseValue(), FalseBlock); 298*fe6060f1SDimitry Andric 299*fe6060f1SDimitry Andric // Update any other PHI nodes in EndBlock. 300*fe6060f1SDimitry Andric for (PHINode &Phi : EndBlock->phis()) { 301*fe6060f1SDimitry Andric if (&Phi != SIUse) { 302*fe6060f1SDimitry Andric Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock); 303*fe6060f1SDimitry Andric Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock); 304*fe6060f1SDimitry Andric } 305*fe6060f1SDimitry Andric } 306*fe6060f1SDimitry Andric } else { 307*fe6060f1SDimitry Andric BasicBlock *NewBlock = nullptr; 308*fe6060f1SDimitry Andric Value *SIOp1 = SI->getTrueValue(); 309*fe6060f1SDimitry Andric Value *SIOp2 = SI->getFalseValue(); 310*fe6060f1SDimitry Andric 311*fe6060f1SDimitry Andric // A triangle pointing right. 312*fe6060f1SDimitry Andric if (!TrueBlock) { 313*fe6060f1SDimitry Andric NewBlock = FalseBlock; 314*fe6060f1SDimitry Andric FT = FalseBlock; 315*fe6060f1SDimitry Andric } 316*fe6060f1SDimitry Andric // A triangle pointing left. 317*fe6060f1SDimitry Andric else { 318*fe6060f1SDimitry Andric NewBlock = TrueBlock; 319*fe6060f1SDimitry Andric TT = TrueBlock; 320*fe6060f1SDimitry Andric std::swap(SIOp1, SIOp2); 321*fe6060f1SDimitry Andric } 322*fe6060f1SDimitry Andric 323*fe6060f1SDimitry Andric // Update the phi node of SI. 324*fe6060f1SDimitry Andric for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { 325*fe6060f1SDimitry Andric if (SIUse->getIncomingBlock(Idx) == StartBlock) 326*fe6060f1SDimitry Andric SIUse->setIncomingValue(Idx, SIOp1); 327*fe6060f1SDimitry Andric } 328*fe6060f1SDimitry Andric SIUse->addIncoming(SIOp2, NewBlock); 329*fe6060f1SDimitry Andric 330*fe6060f1SDimitry Andric // Update any other PHI nodes in EndBlock. 331*fe6060f1SDimitry Andric for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 332*fe6060f1SDimitry Andric ++II) { 333*fe6060f1SDimitry Andric if (Phi != SIUse) 334*fe6060f1SDimitry Andric Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock); 335*fe6060f1SDimitry Andric } 336*fe6060f1SDimitry Andric } 337*fe6060f1SDimitry Andric StartBlockTerm->eraseFromParent(); 338*fe6060f1SDimitry Andric BranchInst::Create(TT, FT, SI->getCondition(), StartBlock); 339*fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT}, 340*fe6060f1SDimitry Andric {DominatorTree::Insert, StartBlock, FT}}); 341*fe6060f1SDimitry Andric 342*fe6060f1SDimitry Andric // The select is now dead. 343*fe6060f1SDimitry Andric SI->eraseFromParent(); 344*fe6060f1SDimitry Andric } 345*fe6060f1SDimitry Andric 346*fe6060f1SDimitry Andric struct ClonedBlock { 347*fe6060f1SDimitry Andric BasicBlock *BB; 348*fe6060f1SDimitry Andric uint64_t State; ///< \p State corresponds to the next value of a switch stmnt. 349*fe6060f1SDimitry Andric }; 350*fe6060f1SDimitry Andric 351*fe6060f1SDimitry Andric typedef std::deque<BasicBlock *> PathType; 352*fe6060f1SDimitry Andric typedef std::vector<PathType> PathsType; 353*fe6060f1SDimitry Andric typedef std::set<const BasicBlock *> VisitedBlocks; 354*fe6060f1SDimitry Andric typedef std::vector<ClonedBlock> CloneList; 355*fe6060f1SDimitry Andric 356*fe6060f1SDimitry Andric // This data structure keeps track of all blocks that have been cloned. If two 357*fe6060f1SDimitry Andric // different ThreadingPaths clone the same block for a certain state it should 358*fe6060f1SDimitry Andric // be reused, and it can be looked up in this map. 359*fe6060f1SDimitry Andric typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 360*fe6060f1SDimitry Andric 361*fe6060f1SDimitry Andric // This map keeps track of all the new definitions for an instruction. This 362*fe6060f1SDimitry Andric // information is needed when restoring SSA form after cloning blocks. 363*fe6060f1SDimitry Andric typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap; 364*fe6060f1SDimitry Andric 365*fe6060f1SDimitry Andric inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 366*fe6060f1SDimitry Andric OS << "< "; 367*fe6060f1SDimitry Andric for (const BasicBlock *BB : Path) { 368*fe6060f1SDimitry Andric std::string BBName; 369*fe6060f1SDimitry Andric if (BB->hasName()) 370*fe6060f1SDimitry Andric raw_string_ostream(BBName) << BB->getName(); 371*fe6060f1SDimitry Andric else 372*fe6060f1SDimitry Andric raw_string_ostream(BBName) << BB; 373*fe6060f1SDimitry Andric OS << BBName << " "; 374*fe6060f1SDimitry Andric } 375*fe6060f1SDimitry Andric OS << ">"; 376*fe6060f1SDimitry Andric return OS; 377*fe6060f1SDimitry Andric } 378*fe6060f1SDimitry Andric 379*fe6060f1SDimitry Andric /// ThreadingPath is a path in the control flow of a loop that can be threaded 380*fe6060f1SDimitry Andric /// by cloning necessary basic blocks and replacing conditional branches with 381*fe6060f1SDimitry Andric /// unconditional ones. A threading path includes a list of basic blocks, the 382*fe6060f1SDimitry Andric /// exit state, and the block that determines the next state. 383*fe6060f1SDimitry Andric struct ThreadingPath { 384*fe6060f1SDimitry Andric /// Exit value is DFA's exit state for the given path. 385*fe6060f1SDimitry Andric uint64_t getExitValue() const { return ExitVal; } 386*fe6060f1SDimitry Andric void setExitValue(const ConstantInt *V) { 387*fe6060f1SDimitry Andric ExitVal = V->getZExtValue(); 388*fe6060f1SDimitry Andric IsExitValSet = true; 389*fe6060f1SDimitry Andric } 390*fe6060f1SDimitry Andric bool isExitValueSet() const { return IsExitValSet; } 391*fe6060f1SDimitry Andric 392*fe6060f1SDimitry Andric /// Determinator is the basic block that determines the next state of the DFA. 393*fe6060f1SDimitry Andric const BasicBlock *getDeterminatorBB() const { return DBB; } 394*fe6060f1SDimitry Andric void setDeterminator(const BasicBlock *BB) { DBB = BB; } 395*fe6060f1SDimitry Andric 396*fe6060f1SDimitry Andric /// Path is a list of basic blocks. 397*fe6060f1SDimitry Andric const PathType &getPath() const { return Path; } 398*fe6060f1SDimitry Andric void setPath(const PathType &NewPath) { Path = NewPath; } 399*fe6060f1SDimitry Andric 400*fe6060f1SDimitry Andric void print(raw_ostream &OS) const { 401*fe6060f1SDimitry Andric OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 402*fe6060f1SDimitry Andric } 403*fe6060f1SDimitry Andric 404*fe6060f1SDimitry Andric private: 405*fe6060f1SDimitry Andric PathType Path; 406*fe6060f1SDimitry Andric uint64_t ExitVal; 407*fe6060f1SDimitry Andric const BasicBlock *DBB = nullptr; 408*fe6060f1SDimitry Andric bool IsExitValSet = false; 409*fe6060f1SDimitry Andric }; 410*fe6060f1SDimitry Andric 411*fe6060f1SDimitry Andric #ifndef NDEBUG 412*fe6060f1SDimitry Andric inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 413*fe6060f1SDimitry Andric TPath.print(OS); 414*fe6060f1SDimitry Andric return OS; 415*fe6060f1SDimitry Andric } 416*fe6060f1SDimitry Andric #endif 417*fe6060f1SDimitry Andric 418*fe6060f1SDimitry Andric struct MainSwitch { 419*fe6060f1SDimitry Andric MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) { 420*fe6060f1SDimitry Andric if (isPredictable(SI)) { 421*fe6060f1SDimitry Andric Instr = SI; 422*fe6060f1SDimitry Andric } else { 423*fe6060f1SDimitry Andric ORE->emit([&]() { 424*fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 425*fe6060f1SDimitry Andric << "Switch instruction is not predictable."; 426*fe6060f1SDimitry Andric }); 427*fe6060f1SDimitry Andric } 428*fe6060f1SDimitry Andric } 429*fe6060f1SDimitry Andric 430*fe6060f1SDimitry Andric virtual ~MainSwitch() = default; 431*fe6060f1SDimitry Andric 432*fe6060f1SDimitry Andric SwitchInst *getInstr() const { return Instr; } 433*fe6060f1SDimitry Andric const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 434*fe6060f1SDimitry Andric return SelectInsts; 435*fe6060f1SDimitry Andric } 436*fe6060f1SDimitry Andric 437*fe6060f1SDimitry Andric private: 438*fe6060f1SDimitry Andric /// Do a use-def chain traversal. Make sure the value of the switch variable 439*fe6060f1SDimitry Andric /// is always a known constant. This means that all conditional jumps based on 440*fe6060f1SDimitry Andric /// switch variable can be converted to unconditional jumps. 441*fe6060f1SDimitry Andric bool isPredictable(const SwitchInst *SI) { 442*fe6060f1SDimitry Andric std::deque<Instruction *> Q; 443*fe6060f1SDimitry Andric SmallSet<Value *, 16> SeenValues; 444*fe6060f1SDimitry Andric SelectInsts.clear(); 445*fe6060f1SDimitry Andric 446*fe6060f1SDimitry Andric Value *FirstDef = SI->getOperand(0); 447*fe6060f1SDimitry Andric auto *Inst = dyn_cast<Instruction>(FirstDef); 448*fe6060f1SDimitry Andric 449*fe6060f1SDimitry Andric // If this is a function argument or another non-instruction, then give up. 450*fe6060f1SDimitry Andric // We are interested in loop local variables. 451*fe6060f1SDimitry Andric if (!Inst) 452*fe6060f1SDimitry Andric return false; 453*fe6060f1SDimitry Andric 454*fe6060f1SDimitry Andric // Require the first definition to be a PHINode 455*fe6060f1SDimitry Andric if (!isa<PHINode>(Inst)) 456*fe6060f1SDimitry Andric return false; 457*fe6060f1SDimitry Andric 458*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n"); 459*fe6060f1SDimitry Andric 460*fe6060f1SDimitry Andric Q.push_back(Inst); 461*fe6060f1SDimitry Andric SeenValues.insert(FirstDef); 462*fe6060f1SDimitry Andric 463*fe6060f1SDimitry Andric while (!Q.empty()) { 464*fe6060f1SDimitry Andric Instruction *Current = Q.front(); 465*fe6060f1SDimitry Andric Q.pop_front(); 466*fe6060f1SDimitry Andric 467*fe6060f1SDimitry Andric if (auto *Phi = dyn_cast<PHINode>(Current)) { 468*fe6060f1SDimitry Andric for (Value *Incoming : Phi->incoming_values()) { 469*fe6060f1SDimitry Andric if (!isPredictableValue(Incoming, SeenValues)) 470*fe6060f1SDimitry Andric return false; 471*fe6060f1SDimitry Andric addInstToQueue(Incoming, Q, SeenValues); 472*fe6060f1SDimitry Andric } 473*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n"); 474*fe6060f1SDimitry Andric } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 475*fe6060f1SDimitry Andric if (!isValidSelectInst(SelI)) 476*fe6060f1SDimitry Andric return false; 477*fe6060f1SDimitry Andric if (!isPredictableValue(SelI->getTrueValue(), SeenValues) || 478*fe6060f1SDimitry Andric !isPredictableValue(SelI->getFalseValue(), SeenValues)) { 479*fe6060f1SDimitry Andric return false; 480*fe6060f1SDimitry Andric } 481*fe6060f1SDimitry Andric addInstToQueue(SelI->getTrueValue(), Q, SeenValues); 482*fe6060f1SDimitry Andric addInstToQueue(SelI->getFalseValue(), Q, SeenValues); 483*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n"); 484*fe6060f1SDimitry Andric if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 485*fe6060f1SDimitry Andric SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 486*fe6060f1SDimitry Andric } else { 487*fe6060f1SDimitry Andric // If it is neither a phi nor a select, then we give up. 488*fe6060f1SDimitry Andric return false; 489*fe6060f1SDimitry Andric } 490*fe6060f1SDimitry Andric } 491*fe6060f1SDimitry Andric 492*fe6060f1SDimitry Andric return true; 493*fe6060f1SDimitry Andric } 494*fe6060f1SDimitry Andric 495*fe6060f1SDimitry Andric bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) { 496*fe6060f1SDimitry Andric if (SeenValues.find(InpVal) != SeenValues.end()) 497*fe6060f1SDimitry Andric return true; 498*fe6060f1SDimitry Andric 499*fe6060f1SDimitry Andric if (isa<ConstantInt>(InpVal)) 500*fe6060f1SDimitry Andric return true; 501*fe6060f1SDimitry Andric 502*fe6060f1SDimitry Andric // If this is a function argument or another non-instruction, then give up. 503*fe6060f1SDimitry Andric if (!isa<Instruction>(InpVal)) 504*fe6060f1SDimitry Andric return false; 505*fe6060f1SDimitry Andric 506*fe6060f1SDimitry Andric return true; 507*fe6060f1SDimitry Andric } 508*fe6060f1SDimitry Andric 509*fe6060f1SDimitry Andric void addInstToQueue(Value *Val, std::deque<Instruction *> &Q, 510*fe6060f1SDimitry Andric SmallSet<Value *, 16> &SeenValues) { 511*fe6060f1SDimitry Andric if (SeenValues.find(Val) != SeenValues.end()) 512*fe6060f1SDimitry Andric return; 513*fe6060f1SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(Val)) 514*fe6060f1SDimitry Andric Q.push_back(I); 515*fe6060f1SDimitry Andric SeenValues.insert(Val); 516*fe6060f1SDimitry Andric } 517*fe6060f1SDimitry Andric 518*fe6060f1SDimitry Andric bool isValidSelectInst(SelectInst *SI) { 519*fe6060f1SDimitry Andric if (!SI->hasOneUse()) 520*fe6060f1SDimitry Andric return false; 521*fe6060f1SDimitry Andric 522*fe6060f1SDimitry Andric Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 523*fe6060f1SDimitry Andric // The use of the select inst should be either a phi or another select. 524*fe6060f1SDimitry Andric if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 525*fe6060f1SDimitry Andric return false; 526*fe6060f1SDimitry Andric 527*fe6060f1SDimitry Andric BasicBlock *SIBB = SI->getParent(); 528*fe6060f1SDimitry Andric 529*fe6060f1SDimitry Andric // Currently, we can only expand select instructions in basic blocks with 530*fe6060f1SDimitry Andric // one successor. 531*fe6060f1SDimitry Andric BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 532*fe6060f1SDimitry Andric if (!SITerm || !SITerm->isUnconditional()) 533*fe6060f1SDimitry Andric return false; 534*fe6060f1SDimitry Andric 535*fe6060f1SDimitry Andric if (isa<PHINode>(SIUse) && 536*fe6060f1SDimitry Andric SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent()) 537*fe6060f1SDimitry Andric return false; 538*fe6060f1SDimitry Andric 539*fe6060f1SDimitry Andric // If select will not be sunk during unfolding, and it is in the same basic 540*fe6060f1SDimitry Andric // block as another state defining select, then cannot unfold both. 541*fe6060f1SDimitry Andric for (SelectInstToUnfold SIToUnfold : SelectInsts) { 542*fe6060f1SDimitry Andric SelectInst *PrevSI = SIToUnfold.getInst(); 543*fe6060f1SDimitry Andric if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 544*fe6060f1SDimitry Andric PrevSI->getParent() == SI->getParent()) 545*fe6060f1SDimitry Andric return false; 546*fe6060f1SDimitry Andric } 547*fe6060f1SDimitry Andric 548*fe6060f1SDimitry Andric return true; 549*fe6060f1SDimitry Andric } 550*fe6060f1SDimitry Andric 551*fe6060f1SDimitry Andric SwitchInst *Instr = nullptr; 552*fe6060f1SDimitry Andric SmallVector<SelectInstToUnfold, 4> SelectInsts; 553*fe6060f1SDimitry Andric }; 554*fe6060f1SDimitry Andric 555*fe6060f1SDimitry Andric struct AllSwitchPaths { 556*fe6060f1SDimitry Andric AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE) 557*fe6060f1SDimitry Andric : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), 558*fe6060f1SDimitry Andric ORE(ORE) {} 559*fe6060f1SDimitry Andric 560*fe6060f1SDimitry Andric std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 561*fe6060f1SDimitry Andric unsigned getNumThreadingPaths() { return TPaths.size(); } 562*fe6060f1SDimitry Andric SwitchInst *getSwitchInst() { return Switch; } 563*fe6060f1SDimitry Andric BasicBlock *getSwitchBlock() { return SwitchBlock; } 564*fe6060f1SDimitry Andric 565*fe6060f1SDimitry Andric void run() { 566*fe6060f1SDimitry Andric VisitedBlocks Visited; 567*fe6060f1SDimitry Andric PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); 568*fe6060f1SDimitry Andric StateDefMap StateDef = getStateDefMap(); 569*fe6060f1SDimitry Andric 570*fe6060f1SDimitry Andric for (PathType Path : LoopPaths) { 571*fe6060f1SDimitry Andric ThreadingPath TPath; 572*fe6060f1SDimitry Andric 573*fe6060f1SDimitry Andric const BasicBlock *PrevBB = Path.back(); 574*fe6060f1SDimitry Andric for (const BasicBlock *BB : Path) { 575*fe6060f1SDimitry Andric if (StateDef.count(BB) != 0) { 576*fe6060f1SDimitry Andric const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]); 577*fe6060f1SDimitry Andric assert(Phi && "Expected a state-defining instr to be a phi node."); 578*fe6060f1SDimitry Andric 579*fe6060f1SDimitry Andric const Value *V = Phi->getIncomingValueForBlock(PrevBB); 580*fe6060f1SDimitry Andric if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) { 581*fe6060f1SDimitry Andric TPath.setExitValue(C); 582*fe6060f1SDimitry Andric TPath.setDeterminator(BB); 583*fe6060f1SDimitry Andric TPath.setPath(Path); 584*fe6060f1SDimitry Andric } 585*fe6060f1SDimitry Andric } 586*fe6060f1SDimitry Andric 587*fe6060f1SDimitry Andric // Switch block is the determinator, this is the final exit value. 588*fe6060f1SDimitry Andric if (TPath.isExitValueSet() && BB == Path.front()) 589*fe6060f1SDimitry Andric break; 590*fe6060f1SDimitry Andric 591*fe6060f1SDimitry Andric PrevBB = BB; 592*fe6060f1SDimitry Andric } 593*fe6060f1SDimitry Andric 594*fe6060f1SDimitry Andric if (TPath.isExitValueSet()) 595*fe6060f1SDimitry Andric TPaths.push_back(TPath); 596*fe6060f1SDimitry Andric } 597*fe6060f1SDimitry Andric } 598*fe6060f1SDimitry Andric 599*fe6060f1SDimitry Andric private: 600*fe6060f1SDimitry Andric // Value: an instruction that defines a switch state; 601*fe6060f1SDimitry Andric // Key: the parent basic block of that instruction. 602*fe6060f1SDimitry Andric typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 603*fe6060f1SDimitry Andric 604*fe6060f1SDimitry Andric PathsType paths(BasicBlock *BB, VisitedBlocks &Visited, 605*fe6060f1SDimitry Andric unsigned PathDepth) const { 606*fe6060f1SDimitry Andric PathsType Res; 607*fe6060f1SDimitry Andric 608*fe6060f1SDimitry Andric // Stop exploring paths after visiting MaxPathLength blocks 609*fe6060f1SDimitry Andric if (PathDepth > MaxPathLength) { 610*fe6060f1SDimitry Andric ORE->emit([&]() { 611*fe6060f1SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 612*fe6060f1SDimitry Andric Switch) 613*fe6060f1SDimitry Andric << "Exploration stopped after visiting MaxPathLength=" 614*fe6060f1SDimitry Andric << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 615*fe6060f1SDimitry Andric }); 616*fe6060f1SDimitry Andric return Res; 617*fe6060f1SDimitry Andric } 618*fe6060f1SDimitry Andric 619*fe6060f1SDimitry Andric Visited.insert(BB); 620*fe6060f1SDimitry Andric 621*fe6060f1SDimitry Andric // Some blocks have multiple edges to the same successor, and this set 622*fe6060f1SDimitry Andric // is used to prevent a duplicate path from being generated 623*fe6060f1SDimitry Andric SmallSet<BasicBlock *, 4> Successors; 624*fe6060f1SDimitry Andric 625*fe6060f1SDimitry Andric for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { 626*fe6060f1SDimitry Andric BasicBlock *Succ = *SI; 627*fe6060f1SDimitry Andric 628*fe6060f1SDimitry Andric if (Successors.find(Succ) != Successors.end()) 629*fe6060f1SDimitry Andric continue; 630*fe6060f1SDimitry Andric Successors.insert(Succ); 631*fe6060f1SDimitry Andric 632*fe6060f1SDimitry Andric // Found a cycle through the SwitchBlock 633*fe6060f1SDimitry Andric if (Succ == SwitchBlock) { 634*fe6060f1SDimitry Andric Res.push_back({BB}); 635*fe6060f1SDimitry Andric continue; 636*fe6060f1SDimitry Andric } 637*fe6060f1SDimitry Andric 638*fe6060f1SDimitry Andric // We have encountered a cycle, do not get caught in it 639*fe6060f1SDimitry Andric if (Visited.find(Succ) != Visited.end()) 640*fe6060f1SDimitry Andric continue; 641*fe6060f1SDimitry Andric 642*fe6060f1SDimitry Andric PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1); 643*fe6060f1SDimitry Andric for (PathType Path : SuccPaths) { 644*fe6060f1SDimitry Andric PathType NewPath(Path); 645*fe6060f1SDimitry Andric NewPath.push_front(BB); 646*fe6060f1SDimitry Andric Res.push_back(NewPath); 647*fe6060f1SDimitry Andric } 648*fe6060f1SDimitry Andric } 649*fe6060f1SDimitry Andric // This block could now be visited again from a different predecessor. Note 650*fe6060f1SDimitry Andric // that this will result in exponential runtime. Subpaths could possibly be 651*fe6060f1SDimitry Andric // cached but it takes a lot of memory to store them. 652*fe6060f1SDimitry Andric Visited.erase(BB); 653*fe6060f1SDimitry Andric return Res; 654*fe6060f1SDimitry Andric } 655*fe6060f1SDimitry Andric 656*fe6060f1SDimitry Andric /// Walk the use-def chain and collect all the state-defining instructions. 657*fe6060f1SDimitry Andric StateDefMap getStateDefMap() const { 658*fe6060f1SDimitry Andric StateDefMap Res; 659*fe6060f1SDimitry Andric 660*fe6060f1SDimitry Andric Value *FirstDef = Switch->getOperand(0); 661*fe6060f1SDimitry Andric 662*fe6060f1SDimitry Andric assert(isa<PHINode>(FirstDef) && "After select unfolding, all state " 663*fe6060f1SDimitry Andric "definitions are expected to be phi " 664*fe6060f1SDimitry Andric "nodes."); 665*fe6060f1SDimitry Andric 666*fe6060f1SDimitry Andric SmallVector<PHINode *, 8> Stack; 667*fe6060f1SDimitry Andric Stack.push_back(dyn_cast<PHINode>(FirstDef)); 668*fe6060f1SDimitry Andric SmallSet<Value *, 16> SeenValues; 669*fe6060f1SDimitry Andric 670*fe6060f1SDimitry Andric while (!Stack.empty()) { 671*fe6060f1SDimitry Andric PHINode *CurPhi = Stack.back(); 672*fe6060f1SDimitry Andric Stack.pop_back(); 673*fe6060f1SDimitry Andric 674*fe6060f1SDimitry Andric Res[CurPhi->getParent()] = CurPhi; 675*fe6060f1SDimitry Andric SeenValues.insert(CurPhi); 676*fe6060f1SDimitry Andric 677*fe6060f1SDimitry Andric for (Value *Incoming : CurPhi->incoming_values()) { 678*fe6060f1SDimitry Andric if (Incoming == FirstDef || isa<ConstantInt>(Incoming) || 679*fe6060f1SDimitry Andric SeenValues.find(Incoming) != SeenValues.end()) { 680*fe6060f1SDimitry Andric continue; 681*fe6060f1SDimitry Andric } 682*fe6060f1SDimitry Andric 683*fe6060f1SDimitry Andric assert(isa<PHINode>(Incoming) && "After select unfolding, all state " 684*fe6060f1SDimitry Andric "definitions are expected to be phi " 685*fe6060f1SDimitry Andric "nodes."); 686*fe6060f1SDimitry Andric 687*fe6060f1SDimitry Andric Stack.push_back(cast<PHINode>(Incoming)); 688*fe6060f1SDimitry Andric } 689*fe6060f1SDimitry Andric } 690*fe6060f1SDimitry Andric 691*fe6060f1SDimitry Andric return Res; 692*fe6060f1SDimitry Andric } 693*fe6060f1SDimitry Andric 694*fe6060f1SDimitry Andric SwitchInst *Switch; 695*fe6060f1SDimitry Andric BasicBlock *SwitchBlock; 696*fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE; 697*fe6060f1SDimitry Andric std::vector<ThreadingPath> TPaths; 698*fe6060f1SDimitry Andric }; 699*fe6060f1SDimitry Andric 700*fe6060f1SDimitry Andric struct TransformDFA { 701*fe6060f1SDimitry Andric TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 702*fe6060f1SDimitry Andric AssumptionCache *AC, TargetTransformInfo *TTI, 703*fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE, 704*fe6060f1SDimitry Andric SmallPtrSet<const Value *, 32> EphValues) 705*fe6060f1SDimitry Andric : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 706*fe6060f1SDimitry Andric EphValues(EphValues) {} 707*fe6060f1SDimitry Andric 708*fe6060f1SDimitry Andric void run() { 709*fe6060f1SDimitry Andric if (isLegalAndProfitableToTransform()) { 710*fe6060f1SDimitry Andric createAllExitPaths(); 711*fe6060f1SDimitry Andric NumTransforms++; 712*fe6060f1SDimitry Andric } 713*fe6060f1SDimitry Andric } 714*fe6060f1SDimitry Andric 715*fe6060f1SDimitry Andric private: 716*fe6060f1SDimitry Andric /// This function performs both a legality check and profitability check at 717*fe6060f1SDimitry Andric /// the same time since it is convenient to do so. It iterates through all 718*fe6060f1SDimitry Andric /// blocks that will be cloned, and keeps track of the duplication cost. It 719*fe6060f1SDimitry Andric /// also returns false if it is illegal to clone some required block. 720*fe6060f1SDimitry Andric bool isLegalAndProfitableToTransform() { 721*fe6060f1SDimitry Andric CodeMetrics Metrics; 722*fe6060f1SDimitry Andric SwitchInst *Switch = SwitchPaths->getSwitchInst(); 723*fe6060f1SDimitry Andric 724*fe6060f1SDimitry Andric // Note that DuplicateBlockMap is not being used as intended here. It is 725*fe6060f1SDimitry Andric // just being used to ensure (BB, State) pairs are only counted once. 726*fe6060f1SDimitry Andric DuplicateBlockMap DuplicateMap; 727*fe6060f1SDimitry Andric 728*fe6060f1SDimitry Andric for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 729*fe6060f1SDimitry Andric PathType PathBBs = TPath.getPath(); 730*fe6060f1SDimitry Andric uint64_t NextState = TPath.getExitValue(); 731*fe6060f1SDimitry Andric const BasicBlock *Determinator = TPath.getDeterminatorBB(); 732*fe6060f1SDimitry Andric 733*fe6060f1SDimitry Andric // Update Metrics for the Switch block, this is always cloned 734*fe6060f1SDimitry Andric BasicBlock *BB = SwitchPaths->getSwitchBlock(); 735*fe6060f1SDimitry Andric BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 736*fe6060f1SDimitry Andric if (!VisitedBB) { 737*fe6060f1SDimitry Andric Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 738*fe6060f1SDimitry Andric DuplicateMap[BB].push_back({BB, NextState}); 739*fe6060f1SDimitry Andric } 740*fe6060f1SDimitry Andric 741*fe6060f1SDimitry Andric // If the Switch block is the Determinator, then we can continue since 742*fe6060f1SDimitry Andric // this is the only block that is cloned and we already counted for it. 743*fe6060f1SDimitry Andric if (PathBBs.front() == Determinator) 744*fe6060f1SDimitry Andric continue; 745*fe6060f1SDimitry Andric 746*fe6060f1SDimitry Andric // Otherwise update Metrics for all blocks that will be cloned. If any 747*fe6060f1SDimitry Andric // block is already cloned and would be reused, don't double count it. 748*fe6060f1SDimitry Andric auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator); 749*fe6060f1SDimitry Andric for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 750*fe6060f1SDimitry Andric BB = *BBIt; 751*fe6060f1SDimitry Andric VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 752*fe6060f1SDimitry Andric if (VisitedBB) 753*fe6060f1SDimitry Andric continue; 754*fe6060f1SDimitry Andric Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 755*fe6060f1SDimitry Andric DuplicateMap[BB].push_back({BB, NextState}); 756*fe6060f1SDimitry Andric } 757*fe6060f1SDimitry Andric 758*fe6060f1SDimitry Andric if (Metrics.notDuplicatable) { 759*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 760*fe6060f1SDimitry Andric << "non-duplicatable instructions.\n"); 761*fe6060f1SDimitry Andric ORE->emit([&]() { 762*fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 763*fe6060f1SDimitry Andric Switch) 764*fe6060f1SDimitry Andric << "Contains non-duplicatable instructions."; 765*fe6060f1SDimitry Andric }); 766*fe6060f1SDimitry Andric return false; 767*fe6060f1SDimitry Andric } 768*fe6060f1SDimitry Andric 769*fe6060f1SDimitry Andric if (Metrics.convergent) { 770*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 771*fe6060f1SDimitry Andric << "convergent instructions.\n"); 772*fe6060f1SDimitry Andric ORE->emit([&]() { 773*fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 774*fe6060f1SDimitry Andric << "Contains convergent instructions."; 775*fe6060f1SDimitry Andric }); 776*fe6060f1SDimitry Andric return false; 777*fe6060f1SDimitry Andric } 778*fe6060f1SDimitry Andric } 779*fe6060f1SDimitry Andric 780*fe6060f1SDimitry Andric unsigned DuplicationCost = 0; 781*fe6060f1SDimitry Andric 782*fe6060f1SDimitry Andric unsigned JumpTableSize = 0; 783*fe6060f1SDimitry Andric TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 784*fe6060f1SDimitry Andric nullptr); 785*fe6060f1SDimitry Andric if (JumpTableSize == 0) { 786*fe6060f1SDimitry Andric // Factor in the number of conditional branches reduced from jump 787*fe6060f1SDimitry Andric // threading. Assume that lowering the switch block is implemented by 788*fe6060f1SDimitry Andric // using binary search, hence the LogBase2(). 789*fe6060f1SDimitry Andric unsigned CondBranches = 790*fe6060f1SDimitry Andric APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 791*fe6060f1SDimitry Andric DuplicationCost = Metrics.NumInsts / CondBranches; 792*fe6060f1SDimitry Andric } else { 793*fe6060f1SDimitry Andric // Compared with jump tables, the DFA optimizer removes an indirect branch 794*fe6060f1SDimitry Andric // on each loop iteration, thus making branch prediction more precise. The 795*fe6060f1SDimitry Andric // more branch targets there are, the more likely it is for the branch 796*fe6060f1SDimitry Andric // predictor to make a mistake, and the more benefit there is in the DFA 797*fe6060f1SDimitry Andric // optimizer. Thus, the more branch targets there are, the lower is the 798*fe6060f1SDimitry Andric // cost of the DFA opt. 799*fe6060f1SDimitry Andric DuplicationCost = Metrics.NumInsts / JumpTableSize; 800*fe6060f1SDimitry Andric } 801*fe6060f1SDimitry Andric 802*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 803*fe6060f1SDimitry Andric << SwitchPaths->getSwitchBlock()->getName() 804*fe6060f1SDimitry Andric << " is: " << DuplicationCost << "\n\n"); 805*fe6060f1SDimitry Andric 806*fe6060f1SDimitry Andric if (DuplicationCost > CostThreshold) { 807*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 808*fe6060f1SDimitry Andric << "cost threshold.\n"); 809*fe6060f1SDimitry Andric ORE->emit([&]() { 810*fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 811*fe6060f1SDimitry Andric << "Duplication cost exceeds the cost threshold (cost=" 812*fe6060f1SDimitry Andric << ore::NV("Cost", DuplicationCost) 813*fe6060f1SDimitry Andric << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 814*fe6060f1SDimitry Andric }); 815*fe6060f1SDimitry Andric return false; 816*fe6060f1SDimitry Andric } 817*fe6060f1SDimitry Andric 818*fe6060f1SDimitry Andric ORE->emit([&]() { 819*fe6060f1SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 820*fe6060f1SDimitry Andric << "Switch statement jump-threaded."; 821*fe6060f1SDimitry Andric }); 822*fe6060f1SDimitry Andric 823*fe6060f1SDimitry Andric return true; 824*fe6060f1SDimitry Andric } 825*fe6060f1SDimitry Andric 826*fe6060f1SDimitry Andric /// Transform each threading path to effectively jump thread the DFA. 827*fe6060f1SDimitry Andric void createAllExitPaths() { 828*fe6060f1SDimitry Andric DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 829*fe6060f1SDimitry Andric 830*fe6060f1SDimitry Andric // Move the switch block to the end of the path, since it will be duplicated 831*fe6060f1SDimitry Andric BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 832*fe6060f1SDimitry Andric for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 833*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << TPath << "\n"); 834*fe6060f1SDimitry Andric PathType NewPath(TPath.getPath()); 835*fe6060f1SDimitry Andric NewPath.push_back(SwitchBlock); 836*fe6060f1SDimitry Andric TPath.setPath(NewPath); 837*fe6060f1SDimitry Andric } 838*fe6060f1SDimitry Andric 839*fe6060f1SDimitry Andric // Transform the ThreadingPaths and keep track of the cloned values 840*fe6060f1SDimitry Andric DuplicateBlockMap DuplicateMap; 841*fe6060f1SDimitry Andric DefMap NewDefs; 842*fe6060f1SDimitry Andric 843*fe6060f1SDimitry Andric SmallSet<BasicBlock *, 16> BlocksToClean; 844*fe6060f1SDimitry Andric for (BasicBlock *BB : successors(SwitchBlock)) 845*fe6060f1SDimitry Andric BlocksToClean.insert(BB); 846*fe6060f1SDimitry Andric 847*fe6060f1SDimitry Andric for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 848*fe6060f1SDimitry Andric createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 849*fe6060f1SDimitry Andric NumPaths++; 850*fe6060f1SDimitry Andric } 851*fe6060f1SDimitry Andric 852*fe6060f1SDimitry Andric // After all paths are cloned, now update the last successor of the cloned 853*fe6060f1SDimitry Andric // path so it skips over the switch statement 854*fe6060f1SDimitry Andric for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 855*fe6060f1SDimitry Andric updateLastSuccessor(TPath, DuplicateMap, &DTU); 856*fe6060f1SDimitry Andric 857*fe6060f1SDimitry Andric // For each instruction that was cloned and used outside, update its uses 858*fe6060f1SDimitry Andric updateSSA(NewDefs); 859*fe6060f1SDimitry Andric 860*fe6060f1SDimitry Andric // Clean PHI Nodes for the newly created blocks 861*fe6060f1SDimitry Andric for (BasicBlock *BB : BlocksToClean) 862*fe6060f1SDimitry Andric cleanPhiNodes(BB); 863*fe6060f1SDimitry Andric } 864*fe6060f1SDimitry Andric 865*fe6060f1SDimitry Andric /// For a specific ThreadingPath \p Path, create an exit path starting from 866*fe6060f1SDimitry Andric /// the determinator block. 867*fe6060f1SDimitry Andric /// 868*fe6060f1SDimitry Andric /// To remember the correct destination, we have to duplicate blocks 869*fe6060f1SDimitry Andric /// corresponding to each state. Also update the terminating instruction of 870*fe6060f1SDimitry Andric /// the predecessors, and phis in the successor blocks. 871*fe6060f1SDimitry Andric void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 872*fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap, 873*fe6060f1SDimitry Andric SmallSet<BasicBlock *, 16> &BlocksToClean, 874*fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 875*fe6060f1SDimitry Andric uint64_t NextState = Path.getExitValue(); 876*fe6060f1SDimitry Andric const BasicBlock *Determinator = Path.getDeterminatorBB(); 877*fe6060f1SDimitry Andric PathType PathBBs = Path.getPath(); 878*fe6060f1SDimitry Andric 879*fe6060f1SDimitry Andric // Don't select the placeholder block in front 880*fe6060f1SDimitry Andric if (PathBBs.front() == Determinator) 881*fe6060f1SDimitry Andric PathBBs.pop_front(); 882*fe6060f1SDimitry Andric 883*fe6060f1SDimitry Andric auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator); 884*fe6060f1SDimitry Andric auto Prev = std::prev(DetIt); 885*fe6060f1SDimitry Andric BasicBlock *PrevBB = *Prev; 886*fe6060f1SDimitry Andric for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 887*fe6060f1SDimitry Andric BasicBlock *BB = *BBIt; 888*fe6060f1SDimitry Andric BlocksToClean.insert(BB); 889*fe6060f1SDimitry Andric 890*fe6060f1SDimitry Andric // We already cloned BB for this NextState, now just update the branch 891*fe6060f1SDimitry Andric // and continue. 892*fe6060f1SDimitry Andric BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 893*fe6060f1SDimitry Andric if (NextBB) { 894*fe6060f1SDimitry Andric updatePredecessor(PrevBB, BB, NextBB, DTU); 895*fe6060f1SDimitry Andric PrevBB = NextBB; 896*fe6060f1SDimitry Andric continue; 897*fe6060f1SDimitry Andric } 898*fe6060f1SDimitry Andric 899*fe6060f1SDimitry Andric // Clone the BB and update the successor of Prev to jump to the new block 900*fe6060f1SDimitry Andric BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 901*fe6060f1SDimitry Andric BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 902*fe6060f1SDimitry Andric DuplicateMap[BB].push_back({NewBB, NextState}); 903*fe6060f1SDimitry Andric BlocksToClean.insert(NewBB); 904*fe6060f1SDimitry Andric PrevBB = NewBB; 905*fe6060f1SDimitry Andric } 906*fe6060f1SDimitry Andric } 907*fe6060f1SDimitry Andric 908*fe6060f1SDimitry Andric /// Restore SSA form after cloning blocks. 909*fe6060f1SDimitry Andric /// 910*fe6060f1SDimitry Andric /// Each cloned block creates new defs for a variable, and the uses need to be 911*fe6060f1SDimitry Andric /// updated to reflect this. The uses may be replaced with a cloned value, or 912*fe6060f1SDimitry Andric /// some derived phi instruction. Note that all uses of a value defined in the 913*fe6060f1SDimitry Andric /// same block were already remapped when cloning the block. 914*fe6060f1SDimitry Andric void updateSSA(DefMap &NewDefs) { 915*fe6060f1SDimitry Andric SSAUpdaterBulk SSAUpdate; 916*fe6060f1SDimitry Andric SmallVector<Use *, 16> UsesToRename; 917*fe6060f1SDimitry Andric 918*fe6060f1SDimitry Andric for (auto KV : NewDefs) { 919*fe6060f1SDimitry Andric Instruction *I = KV.first; 920*fe6060f1SDimitry Andric BasicBlock *BB = I->getParent(); 921*fe6060f1SDimitry Andric std::vector<Instruction *> Cloned = KV.second; 922*fe6060f1SDimitry Andric 923*fe6060f1SDimitry Andric // Scan all uses of this instruction to see if it is used outside of its 924*fe6060f1SDimitry Andric // block, and if so, record them in UsesToRename. 925*fe6060f1SDimitry Andric for (Use &U : I->uses()) { 926*fe6060f1SDimitry Andric Instruction *User = cast<Instruction>(U.getUser()); 927*fe6060f1SDimitry Andric if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 928*fe6060f1SDimitry Andric if (UserPN->getIncomingBlock(U) == BB) 929*fe6060f1SDimitry Andric continue; 930*fe6060f1SDimitry Andric } else if (User->getParent() == BB) { 931*fe6060f1SDimitry Andric continue; 932*fe6060f1SDimitry Andric } 933*fe6060f1SDimitry Andric 934*fe6060f1SDimitry Andric UsesToRename.push_back(&U); 935*fe6060f1SDimitry Andric } 936*fe6060f1SDimitry Andric 937*fe6060f1SDimitry Andric // If there are no uses outside the block, we're done with this 938*fe6060f1SDimitry Andric // instruction. 939*fe6060f1SDimitry Andric if (UsesToRename.empty()) 940*fe6060f1SDimitry Andric continue; 941*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 942*fe6060f1SDimitry Andric << "\n"); 943*fe6060f1SDimitry Andric 944*fe6060f1SDimitry Andric // We found a use of I outside of BB. Rename all uses of I that are 945*fe6060f1SDimitry Andric // outside its block to be uses of the appropriate PHI node etc. See 946*fe6060f1SDimitry Andric // ValuesInBlocks with the values we know. 947*fe6060f1SDimitry Andric unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 948*fe6060f1SDimitry Andric SSAUpdate.AddAvailableValue(VarNum, BB, I); 949*fe6060f1SDimitry Andric for (Instruction *New : Cloned) 950*fe6060f1SDimitry Andric SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 951*fe6060f1SDimitry Andric 952*fe6060f1SDimitry Andric while (!UsesToRename.empty()) 953*fe6060f1SDimitry Andric SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 954*fe6060f1SDimitry Andric 955*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 956*fe6060f1SDimitry Andric } 957*fe6060f1SDimitry Andric // SSAUpdater handles phi placement and renaming uses with the appropriate 958*fe6060f1SDimitry Andric // value. 959*fe6060f1SDimitry Andric SSAUpdate.RewriteAllUses(DT); 960*fe6060f1SDimitry Andric } 961*fe6060f1SDimitry Andric 962*fe6060f1SDimitry Andric /// Clones a basic block, and adds it to the CFG. 963*fe6060f1SDimitry Andric /// 964*fe6060f1SDimitry Andric /// This function also includes updating phi nodes in the successors of the 965*fe6060f1SDimitry Andric /// BB, and remapping uses that were defined locally in the cloned BB. 966*fe6060f1SDimitry Andric BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 967*fe6060f1SDimitry Andric uint64_t NextState, 968*fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap, 969*fe6060f1SDimitry Andric DefMap &NewDefs, 970*fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 971*fe6060f1SDimitry Andric ValueToValueMapTy VMap; 972*fe6060f1SDimitry Andric BasicBlock *NewBB = CloneBasicBlock( 973*fe6060f1SDimitry Andric BB, VMap, ".jt" + std::to_string(NextState), BB->getParent()); 974*fe6060f1SDimitry Andric NewBB->moveAfter(BB); 975*fe6060f1SDimitry Andric NumCloned++; 976*fe6060f1SDimitry Andric 977*fe6060f1SDimitry Andric for (Instruction &I : *NewBB) { 978*fe6060f1SDimitry Andric // Do not remap operands of PHINode in case a definition in BB is an 979*fe6060f1SDimitry Andric // incoming value to a phi in the same block. This incoming value will 980*fe6060f1SDimitry Andric // be renamed later while restoring SSA. 981*fe6060f1SDimitry Andric if (isa<PHINode>(&I)) 982*fe6060f1SDimitry Andric continue; 983*fe6060f1SDimitry Andric RemapInstruction(&I, VMap, 984*fe6060f1SDimitry Andric RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 985*fe6060f1SDimitry Andric if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 986*fe6060f1SDimitry Andric AC->registerAssumption(II); 987*fe6060f1SDimitry Andric } 988*fe6060f1SDimitry Andric 989*fe6060f1SDimitry Andric updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 990*fe6060f1SDimitry Andric updatePredecessor(PrevBB, BB, NewBB, DTU); 991*fe6060f1SDimitry Andric updateDefMap(NewDefs, VMap); 992*fe6060f1SDimitry Andric 993*fe6060f1SDimitry Andric // Add all successors to the DominatorTree 994*fe6060f1SDimitry Andric SmallPtrSet<BasicBlock *, 4> SuccSet; 995*fe6060f1SDimitry Andric for (auto *SuccBB : successors(NewBB)) { 996*fe6060f1SDimitry Andric if (SuccSet.insert(SuccBB).second) 997*fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 998*fe6060f1SDimitry Andric } 999*fe6060f1SDimitry Andric SuccSet.clear(); 1000*fe6060f1SDimitry Andric return NewBB; 1001*fe6060f1SDimitry Andric } 1002*fe6060f1SDimitry Andric 1003*fe6060f1SDimitry Andric /// Update the phi nodes in BB's successors. 1004*fe6060f1SDimitry Andric /// 1005*fe6060f1SDimitry Andric /// This means creating a new incoming value from NewBB with the new 1006*fe6060f1SDimitry Andric /// instruction wherever there is an incoming value from BB. 1007*fe6060f1SDimitry Andric void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1008*fe6060f1SDimitry Andric uint64_t NextState, ValueToValueMapTy &VMap, 1009*fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap) { 1010*fe6060f1SDimitry Andric std::vector<BasicBlock *> BlocksToUpdate; 1011*fe6060f1SDimitry Andric 1012*fe6060f1SDimitry Andric // If BB is the last block in the path, we can simply update the one case 1013*fe6060f1SDimitry Andric // successor that will be reached. 1014*fe6060f1SDimitry Andric if (BB == SwitchPaths->getSwitchBlock()) { 1015*fe6060f1SDimitry Andric SwitchInst *Switch = SwitchPaths->getSwitchInst(); 1016*fe6060f1SDimitry Andric BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1017*fe6060f1SDimitry Andric BlocksToUpdate.push_back(NextCase); 1018*fe6060f1SDimitry Andric BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 1019*fe6060f1SDimitry Andric if (ClonedSucc) 1020*fe6060f1SDimitry Andric BlocksToUpdate.push_back(ClonedSucc); 1021*fe6060f1SDimitry Andric } 1022*fe6060f1SDimitry Andric // Otherwise update phis in all successors. 1023*fe6060f1SDimitry Andric else { 1024*fe6060f1SDimitry Andric for (BasicBlock *Succ : successors(BB)) { 1025*fe6060f1SDimitry Andric BlocksToUpdate.push_back(Succ); 1026*fe6060f1SDimitry Andric 1027*fe6060f1SDimitry Andric // Check if a successor has already been cloned for the particular exit 1028*fe6060f1SDimitry Andric // value. In this case if a successor was already cloned, the phi nodes 1029*fe6060f1SDimitry Andric // in the cloned block should be updated directly. 1030*fe6060f1SDimitry Andric BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 1031*fe6060f1SDimitry Andric if (ClonedSucc) 1032*fe6060f1SDimitry Andric BlocksToUpdate.push_back(ClonedSucc); 1033*fe6060f1SDimitry Andric } 1034*fe6060f1SDimitry Andric } 1035*fe6060f1SDimitry Andric 1036*fe6060f1SDimitry Andric // If there is a phi with an incoming value from BB, create a new incoming 1037*fe6060f1SDimitry Andric // value for the new predecessor ClonedBB. The value will either be the same 1038*fe6060f1SDimitry Andric // value from BB or a cloned value. 1039*fe6060f1SDimitry Andric for (BasicBlock *Succ : BlocksToUpdate) { 1040*fe6060f1SDimitry Andric for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 1041*fe6060f1SDimitry Andric ++II) { 1042*fe6060f1SDimitry Andric Value *Incoming = Phi->getIncomingValueForBlock(BB); 1043*fe6060f1SDimitry Andric if (Incoming) { 1044*fe6060f1SDimitry Andric if (isa<Constant>(Incoming)) { 1045*fe6060f1SDimitry Andric Phi->addIncoming(Incoming, ClonedBB); 1046*fe6060f1SDimitry Andric continue; 1047*fe6060f1SDimitry Andric } 1048*fe6060f1SDimitry Andric Value *ClonedVal = VMap[Incoming]; 1049*fe6060f1SDimitry Andric if (ClonedVal) 1050*fe6060f1SDimitry Andric Phi->addIncoming(ClonedVal, ClonedBB); 1051*fe6060f1SDimitry Andric else 1052*fe6060f1SDimitry Andric Phi->addIncoming(Incoming, ClonedBB); 1053*fe6060f1SDimitry Andric } 1054*fe6060f1SDimitry Andric } 1055*fe6060f1SDimitry Andric } 1056*fe6060f1SDimitry Andric } 1057*fe6060f1SDimitry Andric 1058*fe6060f1SDimitry Andric /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 1059*fe6060f1SDimitry Andric /// other successors are kept as well. 1060*fe6060f1SDimitry Andric void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 1061*fe6060f1SDimitry Andric BasicBlock *NewBB, DomTreeUpdater *DTU) { 1062*fe6060f1SDimitry Andric // When a path is reused, there is a chance that predecessors were already 1063*fe6060f1SDimitry Andric // updated before. Check if the predecessor needs to be updated first. 1064*fe6060f1SDimitry Andric if (!isPredecessor(OldBB, PrevBB)) 1065*fe6060f1SDimitry Andric return; 1066*fe6060f1SDimitry Andric 1067*fe6060f1SDimitry Andric Instruction *PrevTerm = PrevBB->getTerminator(); 1068*fe6060f1SDimitry Andric for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 1069*fe6060f1SDimitry Andric if (PrevTerm->getSuccessor(Idx) == OldBB) { 1070*fe6060f1SDimitry Andric OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 1071*fe6060f1SDimitry Andric PrevTerm->setSuccessor(Idx, NewBB); 1072*fe6060f1SDimitry Andric } 1073*fe6060f1SDimitry Andric } 1074*fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 1075*fe6060f1SDimitry Andric {DominatorTree::Insert, PrevBB, NewBB}}); 1076*fe6060f1SDimitry Andric } 1077*fe6060f1SDimitry Andric 1078*fe6060f1SDimitry Andric /// Add new value mappings to the DefMap to keep track of all new definitions 1079*fe6060f1SDimitry Andric /// for a particular instruction. These will be used while updating SSA form. 1080*fe6060f1SDimitry Andric void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 1081*fe6060f1SDimitry Andric for (auto Entry : VMap) { 1082*fe6060f1SDimitry Andric Instruction *Inst = 1083*fe6060f1SDimitry Andric dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 1084*fe6060f1SDimitry Andric if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 1085*fe6060f1SDimitry Andric isa<SwitchInst>(Inst)) { 1086*fe6060f1SDimitry Andric continue; 1087*fe6060f1SDimitry Andric } 1088*fe6060f1SDimitry Andric 1089*fe6060f1SDimitry Andric Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 1090*fe6060f1SDimitry Andric if (!Cloned) 1091*fe6060f1SDimitry Andric continue; 1092*fe6060f1SDimitry Andric 1093*fe6060f1SDimitry Andric if (NewDefs.find(Inst) == NewDefs.end()) 1094*fe6060f1SDimitry Andric NewDefs[Inst] = {Cloned}; 1095*fe6060f1SDimitry Andric else 1096*fe6060f1SDimitry Andric NewDefs[Inst].push_back(Cloned); 1097*fe6060f1SDimitry Andric } 1098*fe6060f1SDimitry Andric } 1099*fe6060f1SDimitry Andric 1100*fe6060f1SDimitry Andric /// Update the last branch of a particular cloned path to point to the correct 1101*fe6060f1SDimitry Andric /// case successor. 1102*fe6060f1SDimitry Andric /// 1103*fe6060f1SDimitry Andric /// Note that this is an optional step and would have been done in later 1104*fe6060f1SDimitry Andric /// optimizations, but it makes the CFG significantly easier to work with. 1105*fe6060f1SDimitry Andric void updateLastSuccessor(ThreadingPath &TPath, 1106*fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap, 1107*fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 1108*fe6060f1SDimitry Andric uint64_t NextState = TPath.getExitValue(); 1109*fe6060f1SDimitry Andric BasicBlock *BB = TPath.getPath().back(); 1110*fe6060f1SDimitry Andric BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 1111*fe6060f1SDimitry Andric 1112*fe6060f1SDimitry Andric // Note multiple paths can end at the same block so check that it is not 1113*fe6060f1SDimitry Andric // updated yet 1114*fe6060f1SDimitry Andric if (!isa<SwitchInst>(LastBlock->getTerminator())) 1115*fe6060f1SDimitry Andric return; 1116*fe6060f1SDimitry Andric SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 1117*fe6060f1SDimitry Andric BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1118*fe6060f1SDimitry Andric 1119*fe6060f1SDimitry Andric std::vector<DominatorTree::UpdateType> DTUpdates; 1120*fe6060f1SDimitry Andric SmallPtrSet<BasicBlock *, 4> SuccSet; 1121*fe6060f1SDimitry Andric for (BasicBlock *Succ : successors(LastBlock)) { 1122*fe6060f1SDimitry Andric if (Succ != NextCase && SuccSet.insert(Succ).second) 1123*fe6060f1SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 1124*fe6060f1SDimitry Andric } 1125*fe6060f1SDimitry Andric 1126*fe6060f1SDimitry Andric Switch->eraseFromParent(); 1127*fe6060f1SDimitry Andric BranchInst::Create(NextCase, LastBlock); 1128*fe6060f1SDimitry Andric 1129*fe6060f1SDimitry Andric DTU->applyUpdates(DTUpdates); 1130*fe6060f1SDimitry Andric } 1131*fe6060f1SDimitry Andric 1132*fe6060f1SDimitry Andric /// After cloning blocks, some of the phi nodes have extra incoming values 1133*fe6060f1SDimitry Andric /// that are no longer used. This function removes them. 1134*fe6060f1SDimitry Andric void cleanPhiNodes(BasicBlock *BB) { 1135*fe6060f1SDimitry Andric // If BB is no longer reachable, remove any remaining phi nodes 1136*fe6060f1SDimitry Andric if (pred_empty(BB)) { 1137*fe6060f1SDimitry Andric std::vector<PHINode *> PhiToRemove; 1138*fe6060f1SDimitry Andric for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1139*fe6060f1SDimitry Andric PhiToRemove.push_back(Phi); 1140*fe6060f1SDimitry Andric } 1141*fe6060f1SDimitry Andric for (PHINode *PN : PhiToRemove) { 1142*fe6060f1SDimitry Andric PN->replaceAllUsesWith(UndefValue::get(PN->getType())); 1143*fe6060f1SDimitry Andric PN->eraseFromParent(); 1144*fe6060f1SDimitry Andric } 1145*fe6060f1SDimitry Andric return; 1146*fe6060f1SDimitry Andric } 1147*fe6060f1SDimitry Andric 1148*fe6060f1SDimitry Andric // Remove any incoming values that come from an invalid predecessor 1149*fe6060f1SDimitry Andric for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1150*fe6060f1SDimitry Andric std::vector<BasicBlock *> BlocksToRemove; 1151*fe6060f1SDimitry Andric for (BasicBlock *IncomingBB : Phi->blocks()) { 1152*fe6060f1SDimitry Andric if (!isPredecessor(BB, IncomingBB)) 1153*fe6060f1SDimitry Andric BlocksToRemove.push_back(IncomingBB); 1154*fe6060f1SDimitry Andric } 1155*fe6060f1SDimitry Andric for (BasicBlock *BB : BlocksToRemove) 1156*fe6060f1SDimitry Andric Phi->removeIncomingValue(BB); 1157*fe6060f1SDimitry Andric } 1158*fe6060f1SDimitry Andric } 1159*fe6060f1SDimitry Andric 1160*fe6060f1SDimitry Andric /// Checks if BB was already cloned for a particular next state value. If it 1161*fe6060f1SDimitry Andric /// was then it returns this cloned block, and otherwise null. 1162*fe6060f1SDimitry Andric BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState, 1163*fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap) { 1164*fe6060f1SDimitry Andric CloneList ClonedBBs = DuplicateMap[BB]; 1165*fe6060f1SDimitry Andric 1166*fe6060f1SDimitry Andric // Find an entry in the CloneList with this NextState. If it exists then 1167*fe6060f1SDimitry Andric // return the corresponding BB 1168*fe6060f1SDimitry Andric auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 1169*fe6060f1SDimitry Andric return C.State == NextState; 1170*fe6060f1SDimitry Andric }); 1171*fe6060f1SDimitry Andric return It != ClonedBBs.end() ? (*It).BB : nullptr; 1172*fe6060f1SDimitry Andric } 1173*fe6060f1SDimitry Andric 1174*fe6060f1SDimitry Andric /// Helper to get the successor corresponding to a particular case value for 1175*fe6060f1SDimitry Andric /// a switch statement. 1176*fe6060f1SDimitry Andric BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) { 1177*fe6060f1SDimitry Andric BasicBlock *NextCase = nullptr; 1178*fe6060f1SDimitry Andric for (auto Case : Switch->cases()) { 1179*fe6060f1SDimitry Andric if (Case.getCaseValue()->getZExtValue() == NextState) { 1180*fe6060f1SDimitry Andric NextCase = Case.getCaseSuccessor(); 1181*fe6060f1SDimitry Andric break; 1182*fe6060f1SDimitry Andric } 1183*fe6060f1SDimitry Andric } 1184*fe6060f1SDimitry Andric if (!NextCase) 1185*fe6060f1SDimitry Andric NextCase = Switch->getDefaultDest(); 1186*fe6060f1SDimitry Andric return NextCase; 1187*fe6060f1SDimitry Andric } 1188*fe6060f1SDimitry Andric 1189*fe6060f1SDimitry Andric /// Returns true if IncomingBB is a predecessor of BB. 1190*fe6060f1SDimitry Andric bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 1191*fe6060f1SDimitry Andric return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB); 1192*fe6060f1SDimitry Andric } 1193*fe6060f1SDimitry Andric 1194*fe6060f1SDimitry Andric AllSwitchPaths *SwitchPaths; 1195*fe6060f1SDimitry Andric DominatorTree *DT; 1196*fe6060f1SDimitry Andric AssumptionCache *AC; 1197*fe6060f1SDimitry Andric TargetTransformInfo *TTI; 1198*fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE; 1199*fe6060f1SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 1200*fe6060f1SDimitry Andric std::vector<ThreadingPath> TPaths; 1201*fe6060f1SDimitry Andric }; 1202*fe6060f1SDimitry Andric 1203*fe6060f1SDimitry Andric bool DFAJumpThreading::run(Function &F) { 1204*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 1205*fe6060f1SDimitry Andric 1206*fe6060f1SDimitry Andric if (F.hasOptSize()) { 1207*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 1208*fe6060f1SDimitry Andric return false; 1209*fe6060f1SDimitry Andric } 1210*fe6060f1SDimitry Andric 1211*fe6060f1SDimitry Andric if (ClViewCfgBefore) 1212*fe6060f1SDimitry Andric F.viewCFG(); 1213*fe6060f1SDimitry Andric 1214*fe6060f1SDimitry Andric SmallVector<AllSwitchPaths, 2> ThreadableLoops; 1215*fe6060f1SDimitry Andric bool MadeChanges = false; 1216*fe6060f1SDimitry Andric 1217*fe6060f1SDimitry Andric for (BasicBlock &BB : F) { 1218*fe6060f1SDimitry Andric auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 1219*fe6060f1SDimitry Andric if (!SI) 1220*fe6060f1SDimitry Andric continue; 1221*fe6060f1SDimitry Andric 1222*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 1223*fe6060f1SDimitry Andric << " is predictable\n"); 1224*fe6060f1SDimitry Andric MainSwitch Switch(SI, ORE); 1225*fe6060f1SDimitry Andric 1226*fe6060f1SDimitry Andric if (!Switch.getInstr()) 1227*fe6060f1SDimitry Andric continue; 1228*fe6060f1SDimitry Andric 1229*fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 1230*fe6060f1SDimitry Andric << "candidate for jump threading\n"); 1231*fe6060f1SDimitry Andric LLVM_DEBUG(SI->dump()); 1232*fe6060f1SDimitry Andric 1233*fe6060f1SDimitry Andric unfoldSelectInstrs(DT, Switch.getSelectInsts()); 1234*fe6060f1SDimitry Andric if (!Switch.getSelectInsts().empty()) 1235*fe6060f1SDimitry Andric MadeChanges = true; 1236*fe6060f1SDimitry Andric 1237*fe6060f1SDimitry Andric AllSwitchPaths SwitchPaths(&Switch, ORE); 1238*fe6060f1SDimitry Andric SwitchPaths.run(); 1239*fe6060f1SDimitry Andric 1240*fe6060f1SDimitry Andric if (SwitchPaths.getNumThreadingPaths() > 0) { 1241*fe6060f1SDimitry Andric ThreadableLoops.push_back(SwitchPaths); 1242*fe6060f1SDimitry Andric 1243*fe6060f1SDimitry Andric // For the time being limit this optimization to occurring once in a 1244*fe6060f1SDimitry Andric // function since it can change the CFG significantly. This is not a 1245*fe6060f1SDimitry Andric // strict requirement but it can cause buggy behavior if there is an 1246*fe6060f1SDimitry Andric // overlap of blocks in different opportunities. There is a lot of room to 1247*fe6060f1SDimitry Andric // experiment with catching more opportunities here. 1248*fe6060f1SDimitry Andric break; 1249*fe6060f1SDimitry Andric } 1250*fe6060f1SDimitry Andric } 1251*fe6060f1SDimitry Andric 1252*fe6060f1SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 1253*fe6060f1SDimitry Andric if (ThreadableLoops.size() > 0) 1254*fe6060f1SDimitry Andric CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 1255*fe6060f1SDimitry Andric 1256*fe6060f1SDimitry Andric for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 1257*fe6060f1SDimitry Andric TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 1258*fe6060f1SDimitry Andric Transform.run(); 1259*fe6060f1SDimitry Andric MadeChanges = true; 1260*fe6060f1SDimitry Andric } 1261*fe6060f1SDimitry Andric 1262*fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS 1263*fe6060f1SDimitry Andric assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1264*fe6060f1SDimitry Andric verifyFunction(F, &dbgs()); 1265*fe6060f1SDimitry Andric #endif 1266*fe6060f1SDimitry Andric 1267*fe6060f1SDimitry Andric return MadeChanges; 1268*fe6060f1SDimitry Andric } 1269*fe6060f1SDimitry Andric 1270*fe6060f1SDimitry Andric } // end anonymous namespace 1271*fe6060f1SDimitry Andric 1272*fe6060f1SDimitry Andric /// Integrate with the new Pass Manager 1273*fe6060f1SDimitry Andric PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 1274*fe6060f1SDimitry Andric FunctionAnalysisManager &AM) { 1275*fe6060f1SDimitry Andric AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 1276*fe6060f1SDimitry Andric DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1277*fe6060f1SDimitry Andric TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 1278*fe6060f1SDimitry Andric OptimizationRemarkEmitter ORE(&F); 1279*fe6060f1SDimitry Andric 1280*fe6060f1SDimitry Andric if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F)) 1281*fe6060f1SDimitry Andric return PreservedAnalyses::all(); 1282*fe6060f1SDimitry Andric 1283*fe6060f1SDimitry Andric PreservedAnalyses PA; 1284*fe6060f1SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 1285*fe6060f1SDimitry Andric return PA; 1286*fe6060f1SDimitry Andric } 1287