1fe6060f1SDimitry Andric //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 2fe6060f1SDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric // Transform each threading path to effectively jump thread the DFA. For 10fe6060f1SDimitry Andric // example, the CFG below could be transformed as follows, where the cloned 11fe6060f1SDimitry Andric // blocks unconditionally branch to the next correct case based on what is 12fe6060f1SDimitry Andric // identified in the analysis. 13fe6060f1SDimitry Andric // 14fe6060f1SDimitry Andric // sw.bb sw.bb 15fe6060f1SDimitry Andric // / | \ / | \ 16fe6060f1SDimitry Andric // case1 case2 case3 case1 case2 case3 17fe6060f1SDimitry Andric // \ | / | | | 18fe6060f1SDimitry Andric // determinator det.2 det.3 det.1 19fe6060f1SDimitry Andric // br sw.bb / | \ 20fe6060f1SDimitry Andric // sw.bb.2 sw.bb.3 sw.bb.1 21fe6060f1SDimitry Andric // br case2 br case3 br case1§ 22fe6060f1SDimitry Andric // 23fe6060f1SDimitry Andric // Definitions and Terminology: 24fe6060f1SDimitry Andric // 25fe6060f1SDimitry Andric // * Threading path: 26fe6060f1SDimitry Andric // a list of basic blocks, the exit state, and the block that determines 27fe6060f1SDimitry Andric // the next state, for which the following notation will be used: 28fe6060f1SDimitry Andric // < path of BBs that form a cycle > [ state, determinator ] 29fe6060f1SDimitry Andric // 30fe6060f1SDimitry Andric // * Predictable switch: 31fe6060f1SDimitry Andric // The switch variable is always a known constant so that all conditional 32fe6060f1SDimitry Andric // jumps based on switch variable can be converted to unconditional jump. 33fe6060f1SDimitry Andric // 34fe6060f1SDimitry Andric // * Determinator: 35fe6060f1SDimitry Andric // The basic block that determines the next state of the DFA. 36fe6060f1SDimitry Andric // 37fe6060f1SDimitry Andric // Representing the optimization in C-like pseudocode: the code pattern on the 38fe6060f1SDimitry Andric // left could functionally be transformed to the right pattern if the switch 39fe6060f1SDimitry Andric // condition is predictable. 40fe6060f1SDimitry Andric // 41fe6060f1SDimitry Andric // X = A goto A 42fe6060f1SDimitry Andric // for (...) A: 43fe6060f1SDimitry Andric // switch (X) ... 44fe6060f1SDimitry Andric // case A goto B 45fe6060f1SDimitry Andric // X = B B: 46fe6060f1SDimitry Andric // case B ... 47fe6060f1SDimitry Andric // X = C goto C 48fe6060f1SDimitry Andric // 49fe6060f1SDimitry Andric // The pass first checks that switch variable X is decided by the control flow 50fe6060f1SDimitry Andric // path taken in the loop; for example, in case B, the next value of X is 51fe6060f1SDimitry Andric // decided to be C. It then enumerates through all paths in the loop and labels 52fe6060f1SDimitry Andric // the basic blocks where the next state is decided. 53fe6060f1SDimitry Andric // 54fe6060f1SDimitry Andric // Using this information it creates new paths that unconditionally branch to 55fe6060f1SDimitry Andric // the next case. This involves cloning code, so it only gets triggered if the 56fe6060f1SDimitry Andric // amount of code duplicated is below a threshold. 57fe6060f1SDimitry Andric // 58fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 59fe6060f1SDimitry Andric 60fe6060f1SDimitry Andric #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 61fe6060f1SDimitry Andric #include "llvm/ADT/APInt.h" 62fe6060f1SDimitry Andric #include "llvm/ADT/DenseMap.h" 63fe6060f1SDimitry Andric #include "llvm/ADT/SmallSet.h" 64fe6060f1SDimitry Andric #include "llvm/ADT/Statistic.h" 65fe6060f1SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 66fe6060f1SDimitry Andric #include "llvm/Analysis/CodeMetrics.h" 6781ad6265SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 68fe6060f1SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 69fe6060f1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 70fe6060f1SDimitry Andric #include "llvm/IR/CFG.h" 71fe6060f1SDimitry Andric #include "llvm/IR/Constants.h" 72fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 73fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h" 74fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 75fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 76fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 77fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 78fe6060f1SDimitry Andric #include <algorithm> 79fe6060f1SDimitry Andric #include <deque> 80fe6060f1SDimitry Andric 8181ad6265SDimitry Andric #ifdef EXPENSIVE_CHECKS 8281ad6265SDimitry Andric #include "llvm/IR/Verifier.h" 8381ad6265SDimitry Andric #endif 8481ad6265SDimitry Andric 85fe6060f1SDimitry Andric using namespace llvm; 86fe6060f1SDimitry Andric 87fe6060f1SDimitry Andric #define DEBUG_TYPE "dfa-jump-threading" 88fe6060f1SDimitry Andric 89fe6060f1SDimitry Andric STATISTIC(NumTransforms, "Number of transformations done"); 90fe6060f1SDimitry Andric STATISTIC(NumCloned, "Number of blocks cloned"); 91fe6060f1SDimitry Andric STATISTIC(NumPaths, "Number of individual paths threaded"); 92fe6060f1SDimitry Andric 93fe6060f1SDimitry Andric static cl::opt<bool> 94fe6060f1SDimitry Andric ClViewCfgBefore("dfa-jump-view-cfg-before", 95fe6060f1SDimitry Andric cl::desc("View the CFG before DFA Jump Threading"), 96fe6060f1SDimitry Andric cl::Hidden, cl::init(false)); 97fe6060f1SDimitry Andric 98fe6060f1SDimitry Andric static cl::opt<unsigned> MaxPathLength( 99fe6060f1SDimitry Andric "dfa-max-path-length", 100fe6060f1SDimitry Andric cl::desc("Max number of blocks searched to find a threading path"), 101fe6060f1SDimitry Andric cl::Hidden, cl::init(20)); 102fe6060f1SDimitry Andric 103*5f757f3fSDimitry Andric static cl::opt<unsigned> 104*5f757f3fSDimitry Andric MaxNumPaths("dfa-max-num-paths", 10581ad6265SDimitry Andric cl::desc("Max number of paths enumerated around a switch"), 10681ad6265SDimitry Andric cl::Hidden, cl::init(200)); 10781ad6265SDimitry Andric 108fe6060f1SDimitry Andric static cl::opt<unsigned> 109fe6060f1SDimitry Andric CostThreshold("dfa-cost-threshold", 110fe6060f1SDimitry Andric cl::desc("Maximum cost accepted for the transformation"), 111fe6060f1SDimitry Andric cl::Hidden, cl::init(50)); 112fe6060f1SDimitry Andric 113fe6060f1SDimitry Andric namespace { 114fe6060f1SDimitry Andric 115fe6060f1SDimitry Andric class SelectInstToUnfold { 116fe6060f1SDimitry Andric SelectInst *SI; 117fe6060f1SDimitry Andric PHINode *SIUse; 118fe6060f1SDimitry Andric 119fe6060f1SDimitry Andric public: 120fe6060f1SDimitry Andric SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 121fe6060f1SDimitry Andric 122fe6060f1SDimitry Andric SelectInst *getInst() { return SI; } 123fe6060f1SDimitry Andric PHINode *getUse() { return SIUse; } 124fe6060f1SDimitry Andric 125fe6060f1SDimitry Andric explicit operator bool() const { return SI && SIUse; } 126fe6060f1SDimitry Andric }; 127fe6060f1SDimitry Andric 128fe6060f1SDimitry Andric void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 129fe6060f1SDimitry Andric std::vector<SelectInstToUnfold> *NewSIsToUnfold, 130fe6060f1SDimitry Andric std::vector<BasicBlock *> *NewBBs); 131fe6060f1SDimitry Andric 132fe6060f1SDimitry Andric class DFAJumpThreading { 133fe6060f1SDimitry Andric public: 134fe6060f1SDimitry Andric DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, 135fe6060f1SDimitry Andric TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 136fe6060f1SDimitry Andric : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {} 137fe6060f1SDimitry Andric 138fe6060f1SDimitry Andric bool run(Function &F); 139fe6060f1SDimitry Andric 140fe6060f1SDimitry Andric private: 141fe6060f1SDimitry Andric void 142fe6060f1SDimitry Andric unfoldSelectInstrs(DominatorTree *DT, 143fe6060f1SDimitry Andric const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 144fe6060f1SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 145fe6060f1SDimitry Andric SmallVector<SelectInstToUnfold, 4> Stack; 146fe6060f1SDimitry Andric for (SelectInstToUnfold SIToUnfold : SelectInsts) 147fe6060f1SDimitry Andric Stack.push_back(SIToUnfold); 148fe6060f1SDimitry Andric 149fe6060f1SDimitry Andric while (!Stack.empty()) { 150349cc55cSDimitry Andric SelectInstToUnfold SIToUnfold = Stack.pop_back_val(); 151fe6060f1SDimitry Andric 152fe6060f1SDimitry Andric std::vector<SelectInstToUnfold> NewSIsToUnfold; 153fe6060f1SDimitry Andric std::vector<BasicBlock *> NewBBs; 154fe6060f1SDimitry Andric unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs); 155fe6060f1SDimitry Andric 156fe6060f1SDimitry Andric // Put newly discovered select instructions into the work list. 157fe6060f1SDimitry Andric for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) 158fe6060f1SDimitry Andric Stack.push_back(NewSIToUnfold); 159fe6060f1SDimitry Andric } 160fe6060f1SDimitry Andric } 161fe6060f1SDimitry Andric 162fe6060f1SDimitry Andric AssumptionCache *AC; 163fe6060f1SDimitry Andric DominatorTree *DT; 164fe6060f1SDimitry Andric TargetTransformInfo *TTI; 165fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE; 166fe6060f1SDimitry Andric }; 167fe6060f1SDimitry Andric 168fe6060f1SDimitry Andric } // end anonymous namespace 169fe6060f1SDimitry Andric 170fe6060f1SDimitry Andric namespace { 171fe6060f1SDimitry Andric 172fe6060f1SDimitry Andric /// Create a new basic block and sink \p SIToSink into it. 173fe6060f1SDimitry Andric void createBasicBlockAndSinkSelectInst( 174fe6060f1SDimitry Andric DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink, 175fe6060f1SDimitry Andric BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock, 176fe6060f1SDimitry Andric BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold, 177fe6060f1SDimitry Andric std::vector<BasicBlock *> *NewBBs) { 178fe6060f1SDimitry Andric assert(SIToSink->hasOneUse()); 179fe6060f1SDimitry Andric assert(NewBlock); 180fe6060f1SDimitry Andric assert(NewBranch); 181fe6060f1SDimitry Andric *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName, 182fe6060f1SDimitry Andric EndBlock->getParent(), EndBlock); 183fe6060f1SDimitry Andric NewBBs->push_back(*NewBlock); 184fe6060f1SDimitry Andric *NewBranch = BranchInst::Create(EndBlock, *NewBlock); 185fe6060f1SDimitry Andric SIToSink->moveBefore(*NewBranch); 186fe6060f1SDimitry Andric NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse)); 187fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}}); 188fe6060f1SDimitry Andric } 189fe6060f1SDimitry Andric 190fe6060f1SDimitry Andric /// Unfold the select instruction held in \p SIToUnfold by replacing it with 191fe6060f1SDimitry Andric /// control flow. 192fe6060f1SDimitry Andric /// 193fe6060f1SDimitry Andric /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 194fe6060f1SDimitry Andric /// created basic blocks into \p NewBBs. 195fe6060f1SDimitry Andric /// 196fe6060f1SDimitry Andric /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 197fe6060f1SDimitry Andric void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 198fe6060f1SDimitry Andric std::vector<SelectInstToUnfold> *NewSIsToUnfold, 199fe6060f1SDimitry Andric std::vector<BasicBlock *> *NewBBs) { 200fe6060f1SDimitry Andric SelectInst *SI = SIToUnfold.getInst(); 201fe6060f1SDimitry Andric PHINode *SIUse = SIToUnfold.getUse(); 202fe6060f1SDimitry Andric BasicBlock *StartBlock = SI->getParent(); 203fe6060f1SDimitry Andric BasicBlock *EndBlock = SIUse->getParent(); 204fe6060f1SDimitry Andric BranchInst *StartBlockTerm = 205fe6060f1SDimitry Andric dyn_cast<BranchInst>(StartBlock->getTerminator()); 206fe6060f1SDimitry Andric 207fe6060f1SDimitry Andric assert(StartBlockTerm && StartBlockTerm->isUnconditional()); 208fe6060f1SDimitry Andric assert(SI->hasOneUse()); 209fe6060f1SDimitry Andric 210fe6060f1SDimitry Andric // These are the new basic blocks for the conditional branch. 211fe6060f1SDimitry Andric // At least one will become an actual new basic block. 212fe6060f1SDimitry Andric BasicBlock *TrueBlock = nullptr; 213fe6060f1SDimitry Andric BasicBlock *FalseBlock = nullptr; 214fe6060f1SDimitry Andric BranchInst *TrueBranch = nullptr; 215fe6060f1SDimitry Andric BranchInst *FalseBranch = nullptr; 216fe6060f1SDimitry Andric 217fe6060f1SDimitry Andric // Sink select instructions to be able to unfold them later. 218fe6060f1SDimitry Andric if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) { 219fe6060f1SDimitry Andric createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 220fe6060f1SDimitry Andric "si.unfold.true", &TrueBlock, &TrueBranch, 221fe6060f1SDimitry Andric NewSIsToUnfold, NewBBs); 222fe6060f1SDimitry Andric } 223fe6060f1SDimitry Andric if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) { 224fe6060f1SDimitry Andric createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 225fe6060f1SDimitry Andric "si.unfold.false", &FalseBlock, 226fe6060f1SDimitry Andric &FalseBranch, NewSIsToUnfold, NewBBs); 227fe6060f1SDimitry Andric } 228fe6060f1SDimitry Andric 229fe6060f1SDimitry Andric // If there was nothing to sink, then arbitrarily choose the 'false' side 230fe6060f1SDimitry Andric // for a new input value to the PHI. 231fe6060f1SDimitry Andric if (!TrueBlock && !FalseBlock) { 232fe6060f1SDimitry Andric FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false", 233fe6060f1SDimitry Andric EndBlock->getParent(), EndBlock); 234fe6060f1SDimitry Andric NewBBs->push_back(FalseBlock); 235fe6060f1SDimitry Andric BranchInst::Create(EndBlock, FalseBlock); 236fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}}); 237fe6060f1SDimitry Andric } 238fe6060f1SDimitry Andric 239fe6060f1SDimitry Andric // Insert the real conditional branch based on the original condition. 240fe6060f1SDimitry Andric // If we did not create a new block for one of the 'true' or 'false' paths 241fe6060f1SDimitry Andric // of the condition, it means that side of the branch goes to the end block 242fe6060f1SDimitry Andric // directly and the path originates from the start block from the point of 243fe6060f1SDimitry Andric // view of the new PHI. 244fe6060f1SDimitry Andric BasicBlock *TT = EndBlock; 245fe6060f1SDimitry Andric BasicBlock *FT = EndBlock; 246fe6060f1SDimitry Andric if (TrueBlock && FalseBlock) { 247fe6060f1SDimitry Andric // A diamond. 248fe6060f1SDimitry Andric TT = TrueBlock; 249fe6060f1SDimitry Andric FT = FalseBlock; 250fe6060f1SDimitry Andric 251fe6060f1SDimitry Andric // Update the phi node of SI. 252fe6060f1SDimitry Andric SIUse->addIncoming(SI->getTrueValue(), TrueBlock); 253fe6060f1SDimitry Andric SIUse->addIncoming(SI->getFalseValue(), FalseBlock); 254fe6060f1SDimitry Andric 255fe6060f1SDimitry Andric // Update any other PHI nodes in EndBlock. 256fe6060f1SDimitry Andric for (PHINode &Phi : EndBlock->phis()) { 257fe6060f1SDimitry Andric if (&Phi != SIUse) { 258*5f757f3fSDimitry Andric Value *OrigValue = Phi.getIncomingValueForBlock(StartBlock); 259*5f757f3fSDimitry Andric Phi.addIncoming(OrigValue, TrueBlock); 260*5f757f3fSDimitry Andric Phi.addIncoming(OrigValue, FalseBlock); 261fe6060f1SDimitry Andric } 262*5f757f3fSDimitry Andric 263*5f757f3fSDimitry Andric // Remove incoming place of original StartBlock, which comes in a indirect 264*5f757f3fSDimitry Andric // way (through TrueBlock and FalseBlock) now. 265*5f757f3fSDimitry Andric Phi.removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false); 266fe6060f1SDimitry Andric } 267fe6060f1SDimitry Andric } else { 268fe6060f1SDimitry Andric BasicBlock *NewBlock = nullptr; 269fe6060f1SDimitry Andric Value *SIOp1 = SI->getTrueValue(); 270fe6060f1SDimitry Andric Value *SIOp2 = SI->getFalseValue(); 271fe6060f1SDimitry Andric 272fe6060f1SDimitry Andric // A triangle pointing right. 273fe6060f1SDimitry Andric if (!TrueBlock) { 274fe6060f1SDimitry Andric NewBlock = FalseBlock; 275fe6060f1SDimitry Andric FT = FalseBlock; 276fe6060f1SDimitry Andric } 277fe6060f1SDimitry Andric // A triangle pointing left. 278fe6060f1SDimitry Andric else { 279fe6060f1SDimitry Andric NewBlock = TrueBlock; 280fe6060f1SDimitry Andric TT = TrueBlock; 281fe6060f1SDimitry Andric std::swap(SIOp1, SIOp2); 282fe6060f1SDimitry Andric } 283fe6060f1SDimitry Andric 284fe6060f1SDimitry Andric // Update the phi node of SI. 285fe6060f1SDimitry Andric for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { 286fe6060f1SDimitry Andric if (SIUse->getIncomingBlock(Idx) == StartBlock) 287fe6060f1SDimitry Andric SIUse->setIncomingValue(Idx, SIOp1); 288fe6060f1SDimitry Andric } 289fe6060f1SDimitry Andric SIUse->addIncoming(SIOp2, NewBlock); 290fe6060f1SDimitry Andric 291fe6060f1SDimitry Andric // Update any other PHI nodes in EndBlock. 292fe6060f1SDimitry Andric for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 293fe6060f1SDimitry Andric ++II) { 294fe6060f1SDimitry Andric if (Phi != SIUse) 295fe6060f1SDimitry Andric Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock); 296fe6060f1SDimitry Andric } 297fe6060f1SDimitry Andric } 298fe6060f1SDimitry Andric StartBlockTerm->eraseFromParent(); 299fe6060f1SDimitry Andric BranchInst::Create(TT, FT, SI->getCondition(), StartBlock); 300fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT}, 301fe6060f1SDimitry Andric {DominatorTree::Insert, StartBlock, FT}}); 302fe6060f1SDimitry Andric 303fe6060f1SDimitry Andric // The select is now dead. 304*5f757f3fSDimitry Andric assert(SI->use_empty() && "Select must be dead now"); 305fe6060f1SDimitry Andric SI->eraseFromParent(); 306fe6060f1SDimitry Andric } 307fe6060f1SDimitry Andric 308fe6060f1SDimitry Andric struct ClonedBlock { 309fe6060f1SDimitry Andric BasicBlock *BB; 310fe6060f1SDimitry Andric uint64_t State; ///< \p State corresponds to the next value of a switch stmnt. 311fe6060f1SDimitry Andric }; 312fe6060f1SDimitry Andric 313fe6060f1SDimitry Andric typedef std::deque<BasicBlock *> PathType; 314fe6060f1SDimitry Andric typedef std::vector<PathType> PathsType; 315349cc55cSDimitry Andric typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; 316fe6060f1SDimitry Andric typedef std::vector<ClonedBlock> CloneList; 317fe6060f1SDimitry Andric 318fe6060f1SDimitry Andric // This data structure keeps track of all blocks that have been cloned. If two 319fe6060f1SDimitry Andric // different ThreadingPaths clone the same block for a certain state it should 320fe6060f1SDimitry Andric // be reused, and it can be looked up in this map. 321fe6060f1SDimitry Andric typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 322fe6060f1SDimitry Andric 323fe6060f1SDimitry Andric // This map keeps track of all the new definitions for an instruction. This 324fe6060f1SDimitry Andric // information is needed when restoring SSA form after cloning blocks. 3251fd87a68SDimitry Andric typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; 326fe6060f1SDimitry Andric 327fe6060f1SDimitry Andric inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 328fe6060f1SDimitry Andric OS << "< "; 329fe6060f1SDimitry Andric for (const BasicBlock *BB : Path) { 330fe6060f1SDimitry Andric std::string BBName; 331fe6060f1SDimitry Andric if (BB->hasName()) 332fe6060f1SDimitry Andric raw_string_ostream(BBName) << BB->getName(); 333fe6060f1SDimitry Andric else 334fe6060f1SDimitry Andric raw_string_ostream(BBName) << BB; 335fe6060f1SDimitry Andric OS << BBName << " "; 336fe6060f1SDimitry Andric } 337fe6060f1SDimitry Andric OS << ">"; 338fe6060f1SDimitry Andric return OS; 339fe6060f1SDimitry Andric } 340fe6060f1SDimitry Andric 341fe6060f1SDimitry Andric /// ThreadingPath is a path in the control flow of a loop that can be threaded 342fe6060f1SDimitry Andric /// by cloning necessary basic blocks and replacing conditional branches with 343fe6060f1SDimitry Andric /// unconditional ones. A threading path includes a list of basic blocks, the 344fe6060f1SDimitry Andric /// exit state, and the block that determines the next state. 345fe6060f1SDimitry Andric struct ThreadingPath { 346fe6060f1SDimitry Andric /// Exit value is DFA's exit state for the given path. 347fe6060f1SDimitry Andric uint64_t getExitValue() const { return ExitVal; } 348fe6060f1SDimitry Andric void setExitValue(const ConstantInt *V) { 349fe6060f1SDimitry Andric ExitVal = V->getZExtValue(); 350fe6060f1SDimitry Andric IsExitValSet = true; 351fe6060f1SDimitry Andric } 352fe6060f1SDimitry Andric bool isExitValueSet() const { return IsExitValSet; } 353fe6060f1SDimitry Andric 354fe6060f1SDimitry Andric /// Determinator is the basic block that determines the next state of the DFA. 355fe6060f1SDimitry Andric const BasicBlock *getDeterminatorBB() const { return DBB; } 356fe6060f1SDimitry Andric void setDeterminator(const BasicBlock *BB) { DBB = BB; } 357fe6060f1SDimitry Andric 358fe6060f1SDimitry Andric /// Path is a list of basic blocks. 359fe6060f1SDimitry Andric const PathType &getPath() const { return Path; } 360fe6060f1SDimitry Andric void setPath(const PathType &NewPath) { Path = NewPath; } 361fe6060f1SDimitry Andric 362fe6060f1SDimitry Andric void print(raw_ostream &OS) const { 363fe6060f1SDimitry Andric OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 364fe6060f1SDimitry Andric } 365fe6060f1SDimitry Andric 366fe6060f1SDimitry Andric private: 367fe6060f1SDimitry Andric PathType Path; 368fe6060f1SDimitry Andric uint64_t ExitVal; 369fe6060f1SDimitry Andric const BasicBlock *DBB = nullptr; 370fe6060f1SDimitry Andric bool IsExitValSet = false; 371fe6060f1SDimitry Andric }; 372fe6060f1SDimitry Andric 373fe6060f1SDimitry Andric #ifndef NDEBUG 374fe6060f1SDimitry Andric inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 375fe6060f1SDimitry Andric TPath.print(OS); 376fe6060f1SDimitry Andric return OS; 377fe6060f1SDimitry Andric } 378fe6060f1SDimitry Andric #endif 379fe6060f1SDimitry Andric 380fe6060f1SDimitry Andric struct MainSwitch { 381fe6060f1SDimitry Andric MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) { 38281ad6265SDimitry Andric if (isCandidate(SI)) { 383fe6060f1SDimitry Andric Instr = SI; 384fe6060f1SDimitry Andric } else { 385fe6060f1SDimitry Andric ORE->emit([&]() { 386fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 387fe6060f1SDimitry Andric << "Switch instruction is not predictable."; 388fe6060f1SDimitry Andric }); 389fe6060f1SDimitry Andric } 390fe6060f1SDimitry Andric } 391fe6060f1SDimitry Andric 392fe6060f1SDimitry Andric virtual ~MainSwitch() = default; 393fe6060f1SDimitry Andric 394fe6060f1SDimitry Andric SwitchInst *getInstr() const { return Instr; } 395fe6060f1SDimitry Andric const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 396fe6060f1SDimitry Andric return SelectInsts; 397fe6060f1SDimitry Andric } 398fe6060f1SDimitry Andric 399fe6060f1SDimitry Andric private: 40081ad6265SDimitry Andric /// Do a use-def chain traversal starting from the switch condition to see if 40181ad6265SDimitry Andric /// \p SI is a potential condidate. 40281ad6265SDimitry Andric /// 40381ad6265SDimitry Andric /// Also, collect select instructions to unfold. 40481ad6265SDimitry Andric bool isCandidate(const SwitchInst *SI) { 40581ad6265SDimitry Andric std::deque<Value *> Q; 406fe6060f1SDimitry Andric SmallSet<Value *, 16> SeenValues; 407fe6060f1SDimitry Andric SelectInsts.clear(); 408fe6060f1SDimitry Andric 40981ad6265SDimitry Andric Value *SICond = SI->getCondition(); 41081ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n"); 41181ad6265SDimitry Andric if (!isa<PHINode>(SICond)) 412fe6060f1SDimitry Andric return false; 413fe6060f1SDimitry Andric 41481ad6265SDimitry Andric addToQueue(SICond, Q, SeenValues); 415fe6060f1SDimitry Andric 416fe6060f1SDimitry Andric while (!Q.empty()) { 41781ad6265SDimitry Andric Value *Current = Q.front(); 418fe6060f1SDimitry Andric Q.pop_front(); 419fe6060f1SDimitry Andric 420fe6060f1SDimitry Andric if (auto *Phi = dyn_cast<PHINode>(Current)) { 421fe6060f1SDimitry Andric for (Value *Incoming : Phi->incoming_values()) { 42281ad6265SDimitry Andric addToQueue(Incoming, Q, SeenValues); 423fe6060f1SDimitry Andric } 42481ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n"); 425fe6060f1SDimitry Andric } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 426fe6060f1SDimitry Andric if (!isValidSelectInst(SelI)) 427fe6060f1SDimitry Andric return false; 42881ad6265SDimitry Andric addToQueue(SelI->getTrueValue(), Q, SeenValues); 42981ad6265SDimitry Andric addToQueue(SelI->getFalseValue(), Q, SeenValues); 43081ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n"); 431fe6060f1SDimitry Andric if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 432fe6060f1SDimitry Andric SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 43381ad6265SDimitry Andric } else if (isa<Constant>(Current)) { 43481ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n"); 43581ad6265SDimitry Andric continue; 436fe6060f1SDimitry Andric } else { 43781ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n"); 43881ad6265SDimitry Andric // Allow unpredictable values. The hope is that those will be the 43981ad6265SDimitry Andric // initial switch values that can be ignored (they will hit the 44081ad6265SDimitry Andric // unthreaded switch) but this assumption will get checked later after 44181ad6265SDimitry Andric // paths have been enumerated (in function getStateDefMap). 44281ad6265SDimitry Andric continue; 443fe6060f1SDimitry Andric } 444fe6060f1SDimitry Andric } 445fe6060f1SDimitry Andric 446fe6060f1SDimitry Andric return true; 447fe6060f1SDimitry Andric } 448fe6060f1SDimitry Andric 44981ad6265SDimitry Andric void addToQueue(Value *Val, std::deque<Value *> &Q, 450fe6060f1SDimitry Andric SmallSet<Value *, 16> &SeenValues) { 451349cc55cSDimitry Andric if (SeenValues.contains(Val)) 452fe6060f1SDimitry Andric return; 45381ad6265SDimitry Andric Q.push_back(Val); 454fe6060f1SDimitry Andric SeenValues.insert(Val); 455fe6060f1SDimitry Andric } 456fe6060f1SDimitry Andric 457fe6060f1SDimitry Andric bool isValidSelectInst(SelectInst *SI) { 458fe6060f1SDimitry Andric if (!SI->hasOneUse()) 459fe6060f1SDimitry Andric return false; 460fe6060f1SDimitry Andric 461fe6060f1SDimitry Andric Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 462fe6060f1SDimitry Andric // The use of the select inst should be either a phi or another select. 463fe6060f1SDimitry Andric if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 464fe6060f1SDimitry Andric return false; 465fe6060f1SDimitry Andric 466fe6060f1SDimitry Andric BasicBlock *SIBB = SI->getParent(); 467fe6060f1SDimitry Andric 468fe6060f1SDimitry Andric // Currently, we can only expand select instructions in basic blocks with 469fe6060f1SDimitry Andric // one successor. 470fe6060f1SDimitry Andric BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 471fe6060f1SDimitry Andric if (!SITerm || !SITerm->isUnconditional()) 472fe6060f1SDimitry Andric return false; 473fe6060f1SDimitry Andric 474*5f757f3fSDimitry Andric // Only fold the select coming from directly where it is defined. 475*5f757f3fSDimitry Andric PHINode *PHIUser = dyn_cast<PHINode>(SIUse); 476*5f757f3fSDimitry Andric if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB) 477fe6060f1SDimitry Andric return false; 478fe6060f1SDimitry Andric 479fe6060f1SDimitry Andric // If select will not be sunk during unfolding, and it is in the same basic 480fe6060f1SDimitry Andric // block as another state defining select, then cannot unfold both. 481fe6060f1SDimitry Andric for (SelectInstToUnfold SIToUnfold : SelectInsts) { 482fe6060f1SDimitry Andric SelectInst *PrevSI = SIToUnfold.getInst(); 483fe6060f1SDimitry Andric if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 484fe6060f1SDimitry Andric PrevSI->getParent() == SI->getParent()) 485fe6060f1SDimitry Andric return false; 486fe6060f1SDimitry Andric } 487fe6060f1SDimitry Andric 488fe6060f1SDimitry Andric return true; 489fe6060f1SDimitry Andric } 490fe6060f1SDimitry Andric 491fe6060f1SDimitry Andric SwitchInst *Instr = nullptr; 492fe6060f1SDimitry Andric SmallVector<SelectInstToUnfold, 4> SelectInsts; 493fe6060f1SDimitry Andric }; 494fe6060f1SDimitry Andric 495fe6060f1SDimitry Andric struct AllSwitchPaths { 496fe6060f1SDimitry Andric AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE) 497fe6060f1SDimitry Andric : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), 498fe6060f1SDimitry Andric ORE(ORE) {} 499fe6060f1SDimitry Andric 500fe6060f1SDimitry Andric std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 501fe6060f1SDimitry Andric unsigned getNumThreadingPaths() { return TPaths.size(); } 502fe6060f1SDimitry Andric SwitchInst *getSwitchInst() { return Switch; } 503fe6060f1SDimitry Andric BasicBlock *getSwitchBlock() { return SwitchBlock; } 504fe6060f1SDimitry Andric 505fe6060f1SDimitry Andric void run() { 506fe6060f1SDimitry Andric VisitedBlocks Visited; 507fe6060f1SDimitry Andric PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); 50881ad6265SDimitry Andric StateDefMap StateDef = getStateDefMap(LoopPaths); 50981ad6265SDimitry Andric 51081ad6265SDimitry Andric if (StateDef.empty()) { 51181ad6265SDimitry Andric ORE->emit([&]() { 51281ad6265SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", 51381ad6265SDimitry Andric Switch) 51481ad6265SDimitry Andric << "Switch instruction is not predictable."; 51581ad6265SDimitry Andric }); 51681ad6265SDimitry Andric return; 51781ad6265SDimitry Andric } 518fe6060f1SDimitry Andric 519fe6060f1SDimitry Andric for (PathType Path : LoopPaths) { 520fe6060f1SDimitry Andric ThreadingPath TPath; 521fe6060f1SDimitry Andric 522fe6060f1SDimitry Andric const BasicBlock *PrevBB = Path.back(); 523fe6060f1SDimitry Andric for (const BasicBlock *BB : Path) { 524fe6060f1SDimitry Andric if (StateDef.count(BB) != 0) { 525fe6060f1SDimitry Andric const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]); 526fe6060f1SDimitry Andric assert(Phi && "Expected a state-defining instr to be a phi node."); 527fe6060f1SDimitry Andric 528fe6060f1SDimitry Andric const Value *V = Phi->getIncomingValueForBlock(PrevBB); 529fe6060f1SDimitry Andric if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) { 530fe6060f1SDimitry Andric TPath.setExitValue(C); 531fe6060f1SDimitry Andric TPath.setDeterminator(BB); 532fe6060f1SDimitry Andric TPath.setPath(Path); 533fe6060f1SDimitry Andric } 534fe6060f1SDimitry Andric } 535fe6060f1SDimitry Andric 536fe6060f1SDimitry Andric // Switch block is the determinator, this is the final exit value. 537fe6060f1SDimitry Andric if (TPath.isExitValueSet() && BB == Path.front()) 538fe6060f1SDimitry Andric break; 539fe6060f1SDimitry Andric 540fe6060f1SDimitry Andric PrevBB = BB; 541fe6060f1SDimitry Andric } 542fe6060f1SDimitry Andric 5430eae32dcSDimitry Andric if (TPath.isExitValueSet() && isSupported(TPath)) 544fe6060f1SDimitry Andric TPaths.push_back(TPath); 545fe6060f1SDimitry Andric } 546fe6060f1SDimitry Andric } 547fe6060f1SDimitry Andric 548fe6060f1SDimitry Andric private: 549fe6060f1SDimitry Andric // Value: an instruction that defines a switch state; 550fe6060f1SDimitry Andric // Key: the parent basic block of that instruction. 551fe6060f1SDimitry Andric typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 552fe6060f1SDimitry Andric 553fe6060f1SDimitry Andric PathsType paths(BasicBlock *BB, VisitedBlocks &Visited, 554fe6060f1SDimitry Andric unsigned PathDepth) const { 555fe6060f1SDimitry Andric PathsType Res; 556fe6060f1SDimitry Andric 557fe6060f1SDimitry Andric // Stop exploring paths after visiting MaxPathLength blocks 558fe6060f1SDimitry Andric if (PathDepth > MaxPathLength) { 559fe6060f1SDimitry Andric ORE->emit([&]() { 560fe6060f1SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 561fe6060f1SDimitry Andric Switch) 562fe6060f1SDimitry Andric << "Exploration stopped after visiting MaxPathLength=" 563fe6060f1SDimitry Andric << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 564fe6060f1SDimitry Andric }); 565fe6060f1SDimitry Andric return Res; 566fe6060f1SDimitry Andric } 567fe6060f1SDimitry Andric 568fe6060f1SDimitry Andric Visited.insert(BB); 569fe6060f1SDimitry Andric 570fe6060f1SDimitry Andric // Some blocks have multiple edges to the same successor, and this set 571fe6060f1SDimitry Andric // is used to prevent a duplicate path from being generated 572fe6060f1SDimitry Andric SmallSet<BasicBlock *, 4> Successors; 573349cc55cSDimitry Andric for (BasicBlock *Succ : successors(BB)) { 574349cc55cSDimitry Andric if (!Successors.insert(Succ).second) 575fe6060f1SDimitry Andric continue; 576fe6060f1SDimitry Andric 577fe6060f1SDimitry Andric // Found a cycle through the SwitchBlock 578fe6060f1SDimitry Andric if (Succ == SwitchBlock) { 579fe6060f1SDimitry Andric Res.push_back({BB}); 580fe6060f1SDimitry Andric continue; 581fe6060f1SDimitry Andric } 582fe6060f1SDimitry Andric 583fe6060f1SDimitry Andric // We have encountered a cycle, do not get caught in it 584349cc55cSDimitry Andric if (Visited.contains(Succ)) 585fe6060f1SDimitry Andric continue; 586fe6060f1SDimitry Andric 587fe6060f1SDimitry Andric PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1); 58806c3fb27SDimitry Andric for (const PathType &Path : SuccPaths) { 589fe6060f1SDimitry Andric PathType NewPath(Path); 590fe6060f1SDimitry Andric NewPath.push_front(BB); 591fe6060f1SDimitry Andric Res.push_back(NewPath); 59281ad6265SDimitry Andric if (Res.size() >= MaxNumPaths) { 59381ad6265SDimitry Andric return Res; 59481ad6265SDimitry Andric } 595fe6060f1SDimitry Andric } 596fe6060f1SDimitry Andric } 597fe6060f1SDimitry Andric // This block could now be visited again from a different predecessor. Note 598fe6060f1SDimitry Andric // that this will result in exponential runtime. Subpaths could possibly be 599fe6060f1SDimitry Andric // cached but it takes a lot of memory to store them. 600fe6060f1SDimitry Andric Visited.erase(BB); 601fe6060f1SDimitry Andric return Res; 602fe6060f1SDimitry Andric } 603fe6060f1SDimitry Andric 604fe6060f1SDimitry Andric /// Walk the use-def chain and collect all the state-defining instructions. 60581ad6265SDimitry Andric /// 60681ad6265SDimitry Andric /// Return an empty map if unpredictable values encountered inside the basic 60781ad6265SDimitry Andric /// blocks of \p LoopPaths. 60881ad6265SDimitry Andric StateDefMap getStateDefMap(const PathsType &LoopPaths) const { 609fe6060f1SDimitry Andric StateDefMap Res; 610fe6060f1SDimitry Andric 61181ad6265SDimitry Andric // Basic blocks belonging to any of the loops around the switch statement. 61281ad6265SDimitry Andric SmallPtrSet<BasicBlock *, 16> LoopBBs; 61381ad6265SDimitry Andric for (const PathType &Path : LoopPaths) { 61481ad6265SDimitry Andric for (BasicBlock *BB : Path) 61581ad6265SDimitry Andric LoopBBs.insert(BB); 61681ad6265SDimitry Andric } 61781ad6265SDimitry Andric 618fe6060f1SDimitry Andric Value *FirstDef = Switch->getOperand(0); 619fe6060f1SDimitry Andric 62081ad6265SDimitry Andric assert(isa<PHINode>(FirstDef) && "The first definition must be a phi."); 621fe6060f1SDimitry Andric 622fe6060f1SDimitry Andric SmallVector<PHINode *, 8> Stack; 623fe6060f1SDimitry Andric Stack.push_back(dyn_cast<PHINode>(FirstDef)); 624fe6060f1SDimitry Andric SmallSet<Value *, 16> SeenValues; 625fe6060f1SDimitry Andric 626fe6060f1SDimitry Andric while (!Stack.empty()) { 627349cc55cSDimitry Andric PHINode *CurPhi = Stack.pop_back_val(); 628fe6060f1SDimitry Andric 629fe6060f1SDimitry Andric Res[CurPhi->getParent()] = CurPhi; 630fe6060f1SDimitry Andric SeenValues.insert(CurPhi); 631fe6060f1SDimitry Andric 63281ad6265SDimitry Andric for (BasicBlock *IncomingBB : CurPhi->blocks()) { 63381ad6265SDimitry Andric Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB); 63481ad6265SDimitry Andric bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0; 635fe6060f1SDimitry Andric if (Incoming == FirstDef || isa<ConstantInt>(Incoming) || 63681ad6265SDimitry Andric SeenValues.contains(Incoming) || IsOutsideLoops) { 637fe6060f1SDimitry Andric continue; 638fe6060f1SDimitry Andric } 639fe6060f1SDimitry Andric 64081ad6265SDimitry Andric // Any unpredictable value inside the loops means we must bail out. 64181ad6265SDimitry Andric if (!isa<PHINode>(Incoming)) 64281ad6265SDimitry Andric return StateDefMap(); 643fe6060f1SDimitry Andric 644fe6060f1SDimitry Andric Stack.push_back(cast<PHINode>(Incoming)); 645fe6060f1SDimitry Andric } 646fe6060f1SDimitry Andric } 647fe6060f1SDimitry Andric 648fe6060f1SDimitry Andric return Res; 649fe6060f1SDimitry Andric } 650fe6060f1SDimitry Andric 6510eae32dcSDimitry Andric /// The determinator BB should precede the switch-defining BB. 6520eae32dcSDimitry Andric /// 6530eae32dcSDimitry Andric /// Otherwise, it is possible that the state defined in the determinator block 6540eae32dcSDimitry Andric /// defines the state for the next iteration of the loop, rather than for the 6550eae32dcSDimitry Andric /// current one. 6560eae32dcSDimitry Andric /// 6570eae32dcSDimitry Andric /// Currently supported paths: 6580eae32dcSDimitry Andric /// \code 6590eae32dcSDimitry Andric /// < switch bb1 determ def > [ 42, determ ] 6600eae32dcSDimitry Andric /// < switch_and_def bb1 determ > [ 42, determ ] 6610eae32dcSDimitry Andric /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ] 6620eae32dcSDimitry Andric /// \endcode 6630eae32dcSDimitry Andric /// 6640eae32dcSDimitry Andric /// Unsupported paths: 6650eae32dcSDimitry Andric /// \code 6660eae32dcSDimitry Andric /// < switch bb1 def determ > [ 43, determ ] 6670eae32dcSDimitry Andric /// < switch_and_determ bb1 def > [ 43, switch_and_determ ] 6680eae32dcSDimitry Andric /// \endcode 6690eae32dcSDimitry Andric bool isSupported(const ThreadingPath &TPath) { 6700eae32dcSDimitry Andric Instruction *SwitchCondI = dyn_cast<Instruction>(Switch->getCondition()); 6710eae32dcSDimitry Andric assert(SwitchCondI); 6720eae32dcSDimitry Andric if (!SwitchCondI) 6730eae32dcSDimitry Andric return false; 6740eae32dcSDimitry Andric 6750eae32dcSDimitry Andric const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent(); 6760eae32dcSDimitry Andric const BasicBlock *SwitchCondUseBB = Switch->getParent(); 6770eae32dcSDimitry Andric const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB(); 6780eae32dcSDimitry Andric 6790eae32dcSDimitry Andric assert( 6800eae32dcSDimitry Andric SwitchCondUseBB == TPath.getPath().front() && 6810eae32dcSDimitry Andric "The first BB in a threading path should have the switch instruction"); 6820eae32dcSDimitry Andric if (SwitchCondUseBB != TPath.getPath().front()) 6830eae32dcSDimitry Andric return false; 6840eae32dcSDimitry Andric 6850eae32dcSDimitry Andric // Make DeterminatorBB the first element in Path. 6860eae32dcSDimitry Andric PathType Path = TPath.getPath(); 687bdd1243dSDimitry Andric auto ItDet = llvm::find(Path, DeterminatorBB); 6880eae32dcSDimitry Andric std::rotate(Path.begin(), ItDet, Path.end()); 6890eae32dcSDimitry Andric 6900eae32dcSDimitry Andric bool IsDetBBSeen = false; 6910eae32dcSDimitry Andric bool IsDefBBSeen = false; 6920eae32dcSDimitry Andric bool IsUseBBSeen = false; 6930eae32dcSDimitry Andric for (BasicBlock *BB : Path) { 6940eae32dcSDimitry Andric if (BB == DeterminatorBB) 6950eae32dcSDimitry Andric IsDetBBSeen = true; 6960eae32dcSDimitry Andric if (BB == SwitchCondDefBB) 6970eae32dcSDimitry Andric IsDefBBSeen = true; 6980eae32dcSDimitry Andric if (BB == SwitchCondUseBB) 6990eae32dcSDimitry Andric IsUseBBSeen = true; 7000eae32dcSDimitry Andric if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen) 7010eae32dcSDimitry Andric return false; 7020eae32dcSDimitry Andric } 7030eae32dcSDimitry Andric 7040eae32dcSDimitry Andric return true; 7050eae32dcSDimitry Andric } 7060eae32dcSDimitry Andric 707fe6060f1SDimitry Andric SwitchInst *Switch; 708fe6060f1SDimitry Andric BasicBlock *SwitchBlock; 709fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE; 710fe6060f1SDimitry Andric std::vector<ThreadingPath> TPaths; 711fe6060f1SDimitry Andric }; 712fe6060f1SDimitry Andric 713fe6060f1SDimitry Andric struct TransformDFA { 714fe6060f1SDimitry Andric TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 715fe6060f1SDimitry Andric AssumptionCache *AC, TargetTransformInfo *TTI, 716fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE, 717fe6060f1SDimitry Andric SmallPtrSet<const Value *, 32> EphValues) 718fe6060f1SDimitry Andric : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 719fe6060f1SDimitry Andric EphValues(EphValues) {} 720fe6060f1SDimitry Andric 721fe6060f1SDimitry Andric void run() { 722fe6060f1SDimitry Andric if (isLegalAndProfitableToTransform()) { 723fe6060f1SDimitry Andric createAllExitPaths(); 724fe6060f1SDimitry Andric NumTransforms++; 725fe6060f1SDimitry Andric } 726fe6060f1SDimitry Andric } 727fe6060f1SDimitry Andric 728fe6060f1SDimitry Andric private: 729fe6060f1SDimitry Andric /// This function performs both a legality check and profitability check at 730fe6060f1SDimitry Andric /// the same time since it is convenient to do so. It iterates through all 731fe6060f1SDimitry Andric /// blocks that will be cloned, and keeps track of the duplication cost. It 732fe6060f1SDimitry Andric /// also returns false if it is illegal to clone some required block. 733fe6060f1SDimitry Andric bool isLegalAndProfitableToTransform() { 734fe6060f1SDimitry Andric CodeMetrics Metrics; 735fe6060f1SDimitry Andric SwitchInst *Switch = SwitchPaths->getSwitchInst(); 736fe6060f1SDimitry Andric 737*5f757f3fSDimitry Andric // Don't thread switch without multiple successors. 738*5f757f3fSDimitry Andric if (Switch->getNumSuccessors() <= 1) 739*5f757f3fSDimitry Andric return false; 740*5f757f3fSDimitry Andric 741fe6060f1SDimitry Andric // Note that DuplicateBlockMap is not being used as intended here. It is 742fe6060f1SDimitry Andric // just being used to ensure (BB, State) pairs are only counted once. 743fe6060f1SDimitry Andric DuplicateBlockMap DuplicateMap; 744fe6060f1SDimitry Andric 745fe6060f1SDimitry Andric for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 746fe6060f1SDimitry Andric PathType PathBBs = TPath.getPath(); 747fe6060f1SDimitry Andric uint64_t NextState = TPath.getExitValue(); 748fe6060f1SDimitry Andric const BasicBlock *Determinator = TPath.getDeterminatorBB(); 749fe6060f1SDimitry Andric 750fe6060f1SDimitry Andric // Update Metrics for the Switch block, this is always cloned 751fe6060f1SDimitry Andric BasicBlock *BB = SwitchPaths->getSwitchBlock(); 752fe6060f1SDimitry Andric BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 753fe6060f1SDimitry Andric if (!VisitedBB) { 754fe6060f1SDimitry Andric Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 755fe6060f1SDimitry Andric DuplicateMap[BB].push_back({BB, NextState}); 756fe6060f1SDimitry Andric } 757fe6060f1SDimitry Andric 758fe6060f1SDimitry Andric // If the Switch block is the Determinator, then we can continue since 759fe6060f1SDimitry Andric // this is the only block that is cloned and we already counted for it. 760fe6060f1SDimitry Andric if (PathBBs.front() == Determinator) 761fe6060f1SDimitry Andric continue; 762fe6060f1SDimitry Andric 763fe6060f1SDimitry Andric // Otherwise update Metrics for all blocks that will be cloned. If any 764fe6060f1SDimitry Andric // block is already cloned and would be reused, don't double count it. 765bdd1243dSDimitry Andric auto DetIt = llvm::find(PathBBs, Determinator); 766fe6060f1SDimitry Andric for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 767fe6060f1SDimitry Andric BB = *BBIt; 768fe6060f1SDimitry Andric VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 769fe6060f1SDimitry Andric if (VisitedBB) 770fe6060f1SDimitry Andric continue; 771fe6060f1SDimitry Andric Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 772fe6060f1SDimitry Andric DuplicateMap[BB].push_back({BB, NextState}); 773fe6060f1SDimitry Andric } 774fe6060f1SDimitry Andric 775fe6060f1SDimitry Andric if (Metrics.notDuplicatable) { 776fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 777fe6060f1SDimitry Andric << "non-duplicatable instructions.\n"); 778fe6060f1SDimitry Andric ORE->emit([&]() { 779fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 780fe6060f1SDimitry Andric Switch) 781fe6060f1SDimitry Andric << "Contains non-duplicatable instructions."; 782fe6060f1SDimitry Andric }); 783fe6060f1SDimitry Andric return false; 784fe6060f1SDimitry Andric } 785fe6060f1SDimitry Andric 786fe6060f1SDimitry Andric if (Metrics.convergent) { 787fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 788fe6060f1SDimitry Andric << "convergent instructions.\n"); 789fe6060f1SDimitry Andric ORE->emit([&]() { 790fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 791fe6060f1SDimitry Andric << "Contains convergent instructions."; 792fe6060f1SDimitry Andric }); 793fe6060f1SDimitry Andric return false; 794fe6060f1SDimitry Andric } 79581ad6265SDimitry Andric 79681ad6265SDimitry Andric if (!Metrics.NumInsts.isValid()) { 79781ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 79881ad6265SDimitry Andric << "instructions with invalid cost.\n"); 79981ad6265SDimitry Andric ORE->emit([&]() { 80081ad6265SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 80181ad6265SDimitry Andric << "Contains instructions with invalid cost."; 80281ad6265SDimitry Andric }); 80381ad6265SDimitry Andric return false; 80481ad6265SDimitry Andric } 805fe6060f1SDimitry Andric } 806fe6060f1SDimitry Andric 807bdd1243dSDimitry Andric InstructionCost DuplicationCost = 0; 808fe6060f1SDimitry Andric 809fe6060f1SDimitry Andric unsigned JumpTableSize = 0; 810fe6060f1SDimitry Andric TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 811fe6060f1SDimitry Andric nullptr); 812fe6060f1SDimitry Andric if (JumpTableSize == 0) { 813fe6060f1SDimitry Andric // Factor in the number of conditional branches reduced from jump 814fe6060f1SDimitry Andric // threading. Assume that lowering the switch block is implemented by 815fe6060f1SDimitry Andric // using binary search, hence the LogBase2(). 816fe6060f1SDimitry Andric unsigned CondBranches = 817fe6060f1SDimitry Andric APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 818*5f757f3fSDimitry Andric assert(CondBranches > 0 && 819*5f757f3fSDimitry Andric "The threaded switch must have multiple branches"); 820bdd1243dSDimitry Andric DuplicationCost = Metrics.NumInsts / CondBranches; 821fe6060f1SDimitry Andric } else { 822fe6060f1SDimitry Andric // Compared with jump tables, the DFA optimizer removes an indirect branch 823fe6060f1SDimitry Andric // on each loop iteration, thus making branch prediction more precise. The 824fe6060f1SDimitry Andric // more branch targets there are, the more likely it is for the branch 825fe6060f1SDimitry Andric // predictor to make a mistake, and the more benefit there is in the DFA 826fe6060f1SDimitry Andric // optimizer. Thus, the more branch targets there are, the lower is the 827fe6060f1SDimitry Andric // cost of the DFA opt. 828bdd1243dSDimitry Andric DuplicationCost = Metrics.NumInsts / JumpTableSize; 829fe6060f1SDimitry Andric } 830fe6060f1SDimitry Andric 831fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 832fe6060f1SDimitry Andric << SwitchPaths->getSwitchBlock()->getName() 833fe6060f1SDimitry Andric << " is: " << DuplicationCost << "\n\n"); 834fe6060f1SDimitry Andric 835fe6060f1SDimitry Andric if (DuplicationCost > CostThreshold) { 836fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 837fe6060f1SDimitry Andric << "cost threshold.\n"); 838fe6060f1SDimitry Andric ORE->emit([&]() { 839fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 840fe6060f1SDimitry Andric << "Duplication cost exceeds the cost threshold (cost=" 841fe6060f1SDimitry Andric << ore::NV("Cost", DuplicationCost) 842fe6060f1SDimitry Andric << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 843fe6060f1SDimitry Andric }); 844fe6060f1SDimitry Andric return false; 845fe6060f1SDimitry Andric } 846fe6060f1SDimitry Andric 847fe6060f1SDimitry Andric ORE->emit([&]() { 848fe6060f1SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 849fe6060f1SDimitry Andric << "Switch statement jump-threaded."; 850fe6060f1SDimitry Andric }); 851fe6060f1SDimitry Andric 852fe6060f1SDimitry Andric return true; 853fe6060f1SDimitry Andric } 854fe6060f1SDimitry Andric 855fe6060f1SDimitry Andric /// Transform each threading path to effectively jump thread the DFA. 856fe6060f1SDimitry Andric void createAllExitPaths() { 857fe6060f1SDimitry Andric DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 858fe6060f1SDimitry Andric 859fe6060f1SDimitry Andric // Move the switch block to the end of the path, since it will be duplicated 860fe6060f1SDimitry Andric BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 861fe6060f1SDimitry Andric for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 862fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << TPath << "\n"); 863fe6060f1SDimitry Andric PathType NewPath(TPath.getPath()); 864fe6060f1SDimitry Andric NewPath.push_back(SwitchBlock); 865fe6060f1SDimitry Andric TPath.setPath(NewPath); 866fe6060f1SDimitry Andric } 867fe6060f1SDimitry Andric 868fe6060f1SDimitry Andric // Transform the ThreadingPaths and keep track of the cloned values 869fe6060f1SDimitry Andric DuplicateBlockMap DuplicateMap; 870fe6060f1SDimitry Andric DefMap NewDefs; 871fe6060f1SDimitry Andric 872fe6060f1SDimitry Andric SmallSet<BasicBlock *, 16> BlocksToClean; 873fe6060f1SDimitry Andric for (BasicBlock *BB : successors(SwitchBlock)) 874fe6060f1SDimitry Andric BlocksToClean.insert(BB); 875fe6060f1SDimitry Andric 876fe6060f1SDimitry Andric for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 877fe6060f1SDimitry Andric createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 878fe6060f1SDimitry Andric NumPaths++; 879fe6060f1SDimitry Andric } 880fe6060f1SDimitry Andric 881fe6060f1SDimitry Andric // After all paths are cloned, now update the last successor of the cloned 882fe6060f1SDimitry Andric // path so it skips over the switch statement 883fe6060f1SDimitry Andric for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 884fe6060f1SDimitry Andric updateLastSuccessor(TPath, DuplicateMap, &DTU); 885fe6060f1SDimitry Andric 886fe6060f1SDimitry Andric // For each instruction that was cloned and used outside, update its uses 887fe6060f1SDimitry Andric updateSSA(NewDefs); 888fe6060f1SDimitry Andric 889fe6060f1SDimitry Andric // Clean PHI Nodes for the newly created blocks 890fe6060f1SDimitry Andric for (BasicBlock *BB : BlocksToClean) 891fe6060f1SDimitry Andric cleanPhiNodes(BB); 892fe6060f1SDimitry Andric } 893fe6060f1SDimitry Andric 894fe6060f1SDimitry Andric /// For a specific ThreadingPath \p Path, create an exit path starting from 895fe6060f1SDimitry Andric /// the determinator block. 896fe6060f1SDimitry Andric /// 897fe6060f1SDimitry Andric /// To remember the correct destination, we have to duplicate blocks 898fe6060f1SDimitry Andric /// corresponding to each state. Also update the terminating instruction of 899fe6060f1SDimitry Andric /// the predecessors, and phis in the successor blocks. 900fe6060f1SDimitry Andric void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 901fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap, 902fe6060f1SDimitry Andric SmallSet<BasicBlock *, 16> &BlocksToClean, 903fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 904fe6060f1SDimitry Andric uint64_t NextState = Path.getExitValue(); 905fe6060f1SDimitry Andric const BasicBlock *Determinator = Path.getDeterminatorBB(); 906fe6060f1SDimitry Andric PathType PathBBs = Path.getPath(); 907fe6060f1SDimitry Andric 908fe6060f1SDimitry Andric // Don't select the placeholder block in front 909fe6060f1SDimitry Andric if (PathBBs.front() == Determinator) 910fe6060f1SDimitry Andric PathBBs.pop_front(); 911fe6060f1SDimitry Andric 912bdd1243dSDimitry Andric auto DetIt = llvm::find(PathBBs, Determinator); 913fe6060f1SDimitry Andric auto Prev = std::prev(DetIt); 914fe6060f1SDimitry Andric BasicBlock *PrevBB = *Prev; 915fe6060f1SDimitry Andric for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 916fe6060f1SDimitry Andric BasicBlock *BB = *BBIt; 917fe6060f1SDimitry Andric BlocksToClean.insert(BB); 918fe6060f1SDimitry Andric 919fe6060f1SDimitry Andric // We already cloned BB for this NextState, now just update the branch 920fe6060f1SDimitry Andric // and continue. 921fe6060f1SDimitry Andric BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 922fe6060f1SDimitry Andric if (NextBB) { 923fe6060f1SDimitry Andric updatePredecessor(PrevBB, BB, NextBB, DTU); 924fe6060f1SDimitry Andric PrevBB = NextBB; 925fe6060f1SDimitry Andric continue; 926fe6060f1SDimitry Andric } 927fe6060f1SDimitry Andric 928fe6060f1SDimitry Andric // Clone the BB and update the successor of Prev to jump to the new block 929fe6060f1SDimitry Andric BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 930fe6060f1SDimitry Andric BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 931fe6060f1SDimitry Andric DuplicateMap[BB].push_back({NewBB, NextState}); 932fe6060f1SDimitry Andric BlocksToClean.insert(NewBB); 933fe6060f1SDimitry Andric PrevBB = NewBB; 934fe6060f1SDimitry Andric } 935fe6060f1SDimitry Andric } 936fe6060f1SDimitry Andric 937fe6060f1SDimitry Andric /// Restore SSA form after cloning blocks. 938fe6060f1SDimitry Andric /// 939fe6060f1SDimitry Andric /// Each cloned block creates new defs for a variable, and the uses need to be 940fe6060f1SDimitry Andric /// updated to reflect this. The uses may be replaced with a cloned value, or 941fe6060f1SDimitry Andric /// some derived phi instruction. Note that all uses of a value defined in the 942fe6060f1SDimitry Andric /// same block were already remapped when cloning the block. 943fe6060f1SDimitry Andric void updateSSA(DefMap &NewDefs) { 944fe6060f1SDimitry Andric SSAUpdaterBulk SSAUpdate; 945fe6060f1SDimitry Andric SmallVector<Use *, 16> UsesToRename; 946fe6060f1SDimitry Andric 94706c3fb27SDimitry Andric for (const auto &KV : NewDefs) { 948fe6060f1SDimitry Andric Instruction *I = KV.first; 949fe6060f1SDimitry Andric BasicBlock *BB = I->getParent(); 950fe6060f1SDimitry Andric std::vector<Instruction *> Cloned = KV.second; 951fe6060f1SDimitry Andric 952fe6060f1SDimitry Andric // Scan all uses of this instruction to see if it is used outside of its 953fe6060f1SDimitry Andric // block, and if so, record them in UsesToRename. 954fe6060f1SDimitry Andric for (Use &U : I->uses()) { 955fe6060f1SDimitry Andric Instruction *User = cast<Instruction>(U.getUser()); 956fe6060f1SDimitry Andric if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 957fe6060f1SDimitry Andric if (UserPN->getIncomingBlock(U) == BB) 958fe6060f1SDimitry Andric continue; 959fe6060f1SDimitry Andric } else if (User->getParent() == BB) { 960fe6060f1SDimitry Andric continue; 961fe6060f1SDimitry Andric } 962fe6060f1SDimitry Andric 963fe6060f1SDimitry Andric UsesToRename.push_back(&U); 964fe6060f1SDimitry Andric } 965fe6060f1SDimitry Andric 966fe6060f1SDimitry Andric // If there are no uses outside the block, we're done with this 967fe6060f1SDimitry Andric // instruction. 968fe6060f1SDimitry Andric if (UsesToRename.empty()) 969fe6060f1SDimitry Andric continue; 970fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 971fe6060f1SDimitry Andric << "\n"); 972fe6060f1SDimitry Andric 973fe6060f1SDimitry Andric // We found a use of I outside of BB. Rename all uses of I that are 974fe6060f1SDimitry Andric // outside its block to be uses of the appropriate PHI node etc. See 975fe6060f1SDimitry Andric // ValuesInBlocks with the values we know. 976fe6060f1SDimitry Andric unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 977fe6060f1SDimitry Andric SSAUpdate.AddAvailableValue(VarNum, BB, I); 978fe6060f1SDimitry Andric for (Instruction *New : Cloned) 979fe6060f1SDimitry Andric SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 980fe6060f1SDimitry Andric 981fe6060f1SDimitry Andric while (!UsesToRename.empty()) 982fe6060f1SDimitry Andric SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 983fe6060f1SDimitry Andric 984fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 985fe6060f1SDimitry Andric } 986fe6060f1SDimitry Andric // SSAUpdater handles phi placement and renaming uses with the appropriate 987fe6060f1SDimitry Andric // value. 988fe6060f1SDimitry Andric SSAUpdate.RewriteAllUses(DT); 989fe6060f1SDimitry Andric } 990fe6060f1SDimitry Andric 991fe6060f1SDimitry Andric /// Clones a basic block, and adds it to the CFG. 992fe6060f1SDimitry Andric /// 993fe6060f1SDimitry Andric /// This function also includes updating phi nodes in the successors of the 994fe6060f1SDimitry Andric /// BB, and remapping uses that were defined locally in the cloned BB. 995fe6060f1SDimitry Andric BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 996fe6060f1SDimitry Andric uint64_t NextState, 997fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap, 998fe6060f1SDimitry Andric DefMap &NewDefs, 999fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 1000fe6060f1SDimitry Andric ValueToValueMapTy VMap; 1001fe6060f1SDimitry Andric BasicBlock *NewBB = CloneBasicBlock( 1002fe6060f1SDimitry Andric BB, VMap, ".jt" + std::to_string(NextState), BB->getParent()); 1003fe6060f1SDimitry Andric NewBB->moveAfter(BB); 1004fe6060f1SDimitry Andric NumCloned++; 1005fe6060f1SDimitry Andric 1006fe6060f1SDimitry Andric for (Instruction &I : *NewBB) { 1007fe6060f1SDimitry Andric // Do not remap operands of PHINode in case a definition in BB is an 1008fe6060f1SDimitry Andric // incoming value to a phi in the same block. This incoming value will 1009fe6060f1SDimitry Andric // be renamed later while restoring SSA. 1010fe6060f1SDimitry Andric if (isa<PHINode>(&I)) 1011fe6060f1SDimitry Andric continue; 1012fe6060f1SDimitry Andric RemapInstruction(&I, VMap, 1013fe6060f1SDimitry Andric RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 1014fe6060f1SDimitry Andric if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 1015fe6060f1SDimitry Andric AC->registerAssumption(II); 1016fe6060f1SDimitry Andric } 1017fe6060f1SDimitry Andric 1018fe6060f1SDimitry Andric updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 1019fe6060f1SDimitry Andric updatePredecessor(PrevBB, BB, NewBB, DTU); 1020fe6060f1SDimitry Andric updateDefMap(NewDefs, VMap); 1021fe6060f1SDimitry Andric 1022fe6060f1SDimitry Andric // Add all successors to the DominatorTree 1023fe6060f1SDimitry Andric SmallPtrSet<BasicBlock *, 4> SuccSet; 1024fe6060f1SDimitry Andric for (auto *SuccBB : successors(NewBB)) { 1025fe6060f1SDimitry Andric if (SuccSet.insert(SuccBB).second) 1026fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 1027fe6060f1SDimitry Andric } 1028fe6060f1SDimitry Andric SuccSet.clear(); 1029fe6060f1SDimitry Andric return NewBB; 1030fe6060f1SDimitry Andric } 1031fe6060f1SDimitry Andric 1032fe6060f1SDimitry Andric /// Update the phi nodes in BB's successors. 1033fe6060f1SDimitry Andric /// 1034fe6060f1SDimitry Andric /// This means creating a new incoming value from NewBB with the new 1035fe6060f1SDimitry Andric /// instruction wherever there is an incoming value from BB. 1036fe6060f1SDimitry Andric void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1037fe6060f1SDimitry Andric uint64_t NextState, ValueToValueMapTy &VMap, 1038fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap) { 1039fe6060f1SDimitry Andric std::vector<BasicBlock *> BlocksToUpdate; 1040fe6060f1SDimitry Andric 1041fe6060f1SDimitry Andric // If BB is the last block in the path, we can simply update the one case 1042fe6060f1SDimitry Andric // successor that will be reached. 1043fe6060f1SDimitry Andric if (BB == SwitchPaths->getSwitchBlock()) { 1044fe6060f1SDimitry Andric SwitchInst *Switch = SwitchPaths->getSwitchInst(); 1045fe6060f1SDimitry Andric BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1046fe6060f1SDimitry Andric BlocksToUpdate.push_back(NextCase); 1047fe6060f1SDimitry Andric BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 1048fe6060f1SDimitry Andric if (ClonedSucc) 1049fe6060f1SDimitry Andric BlocksToUpdate.push_back(ClonedSucc); 1050fe6060f1SDimitry Andric } 1051fe6060f1SDimitry Andric // Otherwise update phis in all successors. 1052fe6060f1SDimitry Andric else { 1053fe6060f1SDimitry Andric for (BasicBlock *Succ : successors(BB)) { 1054fe6060f1SDimitry Andric BlocksToUpdate.push_back(Succ); 1055fe6060f1SDimitry Andric 1056fe6060f1SDimitry Andric // Check if a successor has already been cloned for the particular exit 1057fe6060f1SDimitry Andric // value. In this case if a successor was already cloned, the phi nodes 1058fe6060f1SDimitry Andric // in the cloned block should be updated directly. 1059fe6060f1SDimitry Andric BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 1060fe6060f1SDimitry Andric if (ClonedSucc) 1061fe6060f1SDimitry Andric BlocksToUpdate.push_back(ClonedSucc); 1062fe6060f1SDimitry Andric } 1063fe6060f1SDimitry Andric } 1064fe6060f1SDimitry Andric 1065fe6060f1SDimitry Andric // If there is a phi with an incoming value from BB, create a new incoming 1066fe6060f1SDimitry Andric // value for the new predecessor ClonedBB. The value will either be the same 1067fe6060f1SDimitry Andric // value from BB or a cloned value. 1068fe6060f1SDimitry Andric for (BasicBlock *Succ : BlocksToUpdate) { 1069fe6060f1SDimitry Andric for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 1070fe6060f1SDimitry Andric ++II) { 1071fe6060f1SDimitry Andric Value *Incoming = Phi->getIncomingValueForBlock(BB); 1072fe6060f1SDimitry Andric if (Incoming) { 1073fe6060f1SDimitry Andric if (isa<Constant>(Incoming)) { 1074fe6060f1SDimitry Andric Phi->addIncoming(Incoming, ClonedBB); 1075fe6060f1SDimitry Andric continue; 1076fe6060f1SDimitry Andric } 1077fe6060f1SDimitry Andric Value *ClonedVal = VMap[Incoming]; 1078fe6060f1SDimitry Andric if (ClonedVal) 1079fe6060f1SDimitry Andric Phi->addIncoming(ClonedVal, ClonedBB); 1080fe6060f1SDimitry Andric else 1081fe6060f1SDimitry Andric Phi->addIncoming(Incoming, ClonedBB); 1082fe6060f1SDimitry Andric } 1083fe6060f1SDimitry Andric } 1084fe6060f1SDimitry Andric } 1085fe6060f1SDimitry Andric } 1086fe6060f1SDimitry Andric 1087fe6060f1SDimitry Andric /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 1088fe6060f1SDimitry Andric /// other successors are kept as well. 1089fe6060f1SDimitry Andric void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 1090fe6060f1SDimitry Andric BasicBlock *NewBB, DomTreeUpdater *DTU) { 1091fe6060f1SDimitry Andric // When a path is reused, there is a chance that predecessors were already 1092fe6060f1SDimitry Andric // updated before. Check if the predecessor needs to be updated first. 1093fe6060f1SDimitry Andric if (!isPredecessor(OldBB, PrevBB)) 1094fe6060f1SDimitry Andric return; 1095fe6060f1SDimitry Andric 1096fe6060f1SDimitry Andric Instruction *PrevTerm = PrevBB->getTerminator(); 1097fe6060f1SDimitry Andric for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 1098fe6060f1SDimitry Andric if (PrevTerm->getSuccessor(Idx) == OldBB) { 1099fe6060f1SDimitry Andric OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 1100fe6060f1SDimitry Andric PrevTerm->setSuccessor(Idx, NewBB); 1101fe6060f1SDimitry Andric } 1102fe6060f1SDimitry Andric } 1103fe6060f1SDimitry Andric DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 1104fe6060f1SDimitry Andric {DominatorTree::Insert, PrevBB, NewBB}}); 1105fe6060f1SDimitry Andric } 1106fe6060f1SDimitry Andric 1107fe6060f1SDimitry Andric /// Add new value mappings to the DefMap to keep track of all new definitions 1108fe6060f1SDimitry Andric /// for a particular instruction. These will be used while updating SSA form. 1109fe6060f1SDimitry Andric void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 11101fd87a68SDimitry Andric SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; 11111fd87a68SDimitry Andric NewDefsVector.reserve(VMap.size()); 11121fd87a68SDimitry Andric 1113fe6060f1SDimitry Andric for (auto Entry : VMap) { 1114fe6060f1SDimitry Andric Instruction *Inst = 1115fe6060f1SDimitry Andric dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 1116fe6060f1SDimitry Andric if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 1117fe6060f1SDimitry Andric isa<SwitchInst>(Inst)) { 1118fe6060f1SDimitry Andric continue; 1119fe6060f1SDimitry Andric } 1120fe6060f1SDimitry Andric 1121fe6060f1SDimitry Andric Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 1122fe6060f1SDimitry Andric if (!Cloned) 1123fe6060f1SDimitry Andric continue; 1124fe6060f1SDimitry Andric 11251fd87a68SDimitry Andric NewDefsVector.push_back({Inst, Cloned}); 1126fe6060f1SDimitry Andric } 11271fd87a68SDimitry Andric 11281fd87a68SDimitry Andric // Sort the defs to get deterministic insertion order into NewDefs. 11291fd87a68SDimitry Andric sort(NewDefsVector, [](const auto &LHS, const auto &RHS) { 11301fd87a68SDimitry Andric if (LHS.first == RHS.first) 11311fd87a68SDimitry Andric return LHS.second->comesBefore(RHS.second); 11321fd87a68SDimitry Andric return LHS.first->comesBefore(RHS.first); 11331fd87a68SDimitry Andric }); 11341fd87a68SDimitry Andric 11351fd87a68SDimitry Andric for (const auto &KV : NewDefsVector) 11361fd87a68SDimitry Andric NewDefs[KV.first].push_back(KV.second); 1137fe6060f1SDimitry Andric } 1138fe6060f1SDimitry Andric 1139fe6060f1SDimitry Andric /// Update the last branch of a particular cloned path to point to the correct 1140fe6060f1SDimitry Andric /// case successor. 1141fe6060f1SDimitry Andric /// 1142fe6060f1SDimitry Andric /// Note that this is an optional step and would have been done in later 1143fe6060f1SDimitry Andric /// optimizations, but it makes the CFG significantly easier to work with. 1144fe6060f1SDimitry Andric void updateLastSuccessor(ThreadingPath &TPath, 1145fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap, 1146fe6060f1SDimitry Andric DomTreeUpdater *DTU) { 1147fe6060f1SDimitry Andric uint64_t NextState = TPath.getExitValue(); 1148fe6060f1SDimitry Andric BasicBlock *BB = TPath.getPath().back(); 1149fe6060f1SDimitry Andric BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 1150fe6060f1SDimitry Andric 1151fe6060f1SDimitry Andric // Note multiple paths can end at the same block so check that it is not 1152fe6060f1SDimitry Andric // updated yet 1153fe6060f1SDimitry Andric if (!isa<SwitchInst>(LastBlock->getTerminator())) 1154fe6060f1SDimitry Andric return; 1155fe6060f1SDimitry Andric SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 1156fe6060f1SDimitry Andric BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1157fe6060f1SDimitry Andric 1158fe6060f1SDimitry Andric std::vector<DominatorTree::UpdateType> DTUpdates; 1159fe6060f1SDimitry Andric SmallPtrSet<BasicBlock *, 4> SuccSet; 1160fe6060f1SDimitry Andric for (BasicBlock *Succ : successors(LastBlock)) { 1161fe6060f1SDimitry Andric if (Succ != NextCase && SuccSet.insert(Succ).second) 1162fe6060f1SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 1163fe6060f1SDimitry Andric } 1164fe6060f1SDimitry Andric 1165fe6060f1SDimitry Andric Switch->eraseFromParent(); 1166fe6060f1SDimitry Andric BranchInst::Create(NextCase, LastBlock); 1167fe6060f1SDimitry Andric 1168fe6060f1SDimitry Andric DTU->applyUpdates(DTUpdates); 1169fe6060f1SDimitry Andric } 1170fe6060f1SDimitry Andric 1171fe6060f1SDimitry Andric /// After cloning blocks, some of the phi nodes have extra incoming values 1172fe6060f1SDimitry Andric /// that are no longer used. This function removes them. 1173fe6060f1SDimitry Andric void cleanPhiNodes(BasicBlock *BB) { 1174fe6060f1SDimitry Andric // If BB is no longer reachable, remove any remaining phi nodes 1175fe6060f1SDimitry Andric if (pred_empty(BB)) { 1176fe6060f1SDimitry Andric std::vector<PHINode *> PhiToRemove; 1177fe6060f1SDimitry Andric for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1178fe6060f1SDimitry Andric PhiToRemove.push_back(Phi); 1179fe6060f1SDimitry Andric } 1180fe6060f1SDimitry Andric for (PHINode *PN : PhiToRemove) { 118181ad6265SDimitry Andric PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); 1182fe6060f1SDimitry Andric PN->eraseFromParent(); 1183fe6060f1SDimitry Andric } 1184fe6060f1SDimitry Andric return; 1185fe6060f1SDimitry Andric } 1186fe6060f1SDimitry Andric 1187fe6060f1SDimitry Andric // Remove any incoming values that come from an invalid predecessor 1188fe6060f1SDimitry Andric for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1189fe6060f1SDimitry Andric std::vector<BasicBlock *> BlocksToRemove; 1190fe6060f1SDimitry Andric for (BasicBlock *IncomingBB : Phi->blocks()) { 1191fe6060f1SDimitry Andric if (!isPredecessor(BB, IncomingBB)) 1192fe6060f1SDimitry Andric BlocksToRemove.push_back(IncomingBB); 1193fe6060f1SDimitry Andric } 1194fe6060f1SDimitry Andric for (BasicBlock *BB : BlocksToRemove) 1195fe6060f1SDimitry Andric Phi->removeIncomingValue(BB); 1196fe6060f1SDimitry Andric } 1197fe6060f1SDimitry Andric } 1198fe6060f1SDimitry Andric 1199fe6060f1SDimitry Andric /// Checks if BB was already cloned for a particular next state value. If it 1200fe6060f1SDimitry Andric /// was then it returns this cloned block, and otherwise null. 1201fe6060f1SDimitry Andric BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState, 1202fe6060f1SDimitry Andric DuplicateBlockMap &DuplicateMap) { 1203fe6060f1SDimitry Andric CloneList ClonedBBs = DuplicateMap[BB]; 1204fe6060f1SDimitry Andric 1205fe6060f1SDimitry Andric // Find an entry in the CloneList with this NextState. If it exists then 1206fe6060f1SDimitry Andric // return the corresponding BB 1207fe6060f1SDimitry Andric auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 1208fe6060f1SDimitry Andric return C.State == NextState; 1209fe6060f1SDimitry Andric }); 1210fe6060f1SDimitry Andric return It != ClonedBBs.end() ? (*It).BB : nullptr; 1211fe6060f1SDimitry Andric } 1212fe6060f1SDimitry Andric 1213fe6060f1SDimitry Andric /// Helper to get the successor corresponding to a particular case value for 1214fe6060f1SDimitry Andric /// a switch statement. 1215fe6060f1SDimitry Andric BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) { 1216fe6060f1SDimitry Andric BasicBlock *NextCase = nullptr; 1217fe6060f1SDimitry Andric for (auto Case : Switch->cases()) { 1218fe6060f1SDimitry Andric if (Case.getCaseValue()->getZExtValue() == NextState) { 1219fe6060f1SDimitry Andric NextCase = Case.getCaseSuccessor(); 1220fe6060f1SDimitry Andric break; 1221fe6060f1SDimitry Andric } 1222fe6060f1SDimitry Andric } 1223fe6060f1SDimitry Andric if (!NextCase) 1224fe6060f1SDimitry Andric NextCase = Switch->getDefaultDest(); 1225fe6060f1SDimitry Andric return NextCase; 1226fe6060f1SDimitry Andric } 1227fe6060f1SDimitry Andric 1228fe6060f1SDimitry Andric /// Returns true if IncomingBB is a predecessor of BB. 1229fe6060f1SDimitry Andric bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 123081ad6265SDimitry Andric return llvm::is_contained(predecessors(BB), IncomingBB); 1231fe6060f1SDimitry Andric } 1232fe6060f1SDimitry Andric 1233fe6060f1SDimitry Andric AllSwitchPaths *SwitchPaths; 1234fe6060f1SDimitry Andric DominatorTree *DT; 1235fe6060f1SDimitry Andric AssumptionCache *AC; 1236fe6060f1SDimitry Andric TargetTransformInfo *TTI; 1237fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE; 1238fe6060f1SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 1239fe6060f1SDimitry Andric std::vector<ThreadingPath> TPaths; 1240fe6060f1SDimitry Andric }; 1241fe6060f1SDimitry Andric 1242fe6060f1SDimitry Andric bool DFAJumpThreading::run(Function &F) { 1243fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 1244fe6060f1SDimitry Andric 1245fe6060f1SDimitry Andric if (F.hasOptSize()) { 1246fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 1247fe6060f1SDimitry Andric return false; 1248fe6060f1SDimitry Andric } 1249fe6060f1SDimitry Andric 1250fe6060f1SDimitry Andric if (ClViewCfgBefore) 1251fe6060f1SDimitry Andric F.viewCFG(); 1252fe6060f1SDimitry Andric 1253fe6060f1SDimitry Andric SmallVector<AllSwitchPaths, 2> ThreadableLoops; 1254fe6060f1SDimitry Andric bool MadeChanges = false; 1255fe6060f1SDimitry Andric 1256fe6060f1SDimitry Andric for (BasicBlock &BB : F) { 1257fe6060f1SDimitry Andric auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 1258fe6060f1SDimitry Andric if (!SI) 1259fe6060f1SDimitry Andric continue; 1260fe6060f1SDimitry Andric 1261fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 126281ad6265SDimitry Andric << " is a candidate\n"); 1263fe6060f1SDimitry Andric MainSwitch Switch(SI, ORE); 1264fe6060f1SDimitry Andric 1265fe6060f1SDimitry Andric if (!Switch.getInstr()) 1266fe6060f1SDimitry Andric continue; 1267fe6060f1SDimitry Andric 1268fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 1269fe6060f1SDimitry Andric << "candidate for jump threading\n"); 1270fe6060f1SDimitry Andric LLVM_DEBUG(SI->dump()); 1271fe6060f1SDimitry Andric 1272fe6060f1SDimitry Andric unfoldSelectInstrs(DT, Switch.getSelectInsts()); 1273fe6060f1SDimitry Andric if (!Switch.getSelectInsts().empty()) 1274fe6060f1SDimitry Andric MadeChanges = true; 1275fe6060f1SDimitry Andric 1276fe6060f1SDimitry Andric AllSwitchPaths SwitchPaths(&Switch, ORE); 1277fe6060f1SDimitry Andric SwitchPaths.run(); 1278fe6060f1SDimitry Andric 1279fe6060f1SDimitry Andric if (SwitchPaths.getNumThreadingPaths() > 0) { 1280fe6060f1SDimitry Andric ThreadableLoops.push_back(SwitchPaths); 1281fe6060f1SDimitry Andric 1282fe6060f1SDimitry Andric // For the time being limit this optimization to occurring once in a 1283fe6060f1SDimitry Andric // function since it can change the CFG significantly. This is not a 1284fe6060f1SDimitry Andric // strict requirement but it can cause buggy behavior if there is an 1285fe6060f1SDimitry Andric // overlap of blocks in different opportunities. There is a lot of room to 1286fe6060f1SDimitry Andric // experiment with catching more opportunities here. 1287fe6060f1SDimitry Andric break; 1288fe6060f1SDimitry Andric } 1289fe6060f1SDimitry Andric } 1290fe6060f1SDimitry Andric 1291fe6060f1SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 1292fe6060f1SDimitry Andric if (ThreadableLoops.size() > 0) 1293fe6060f1SDimitry Andric CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 1294fe6060f1SDimitry Andric 1295fe6060f1SDimitry Andric for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 1296fe6060f1SDimitry Andric TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 1297fe6060f1SDimitry Andric Transform.run(); 1298fe6060f1SDimitry Andric MadeChanges = true; 1299fe6060f1SDimitry Andric } 1300fe6060f1SDimitry Andric 1301fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS 1302fe6060f1SDimitry Andric assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1303fe6060f1SDimitry Andric verifyFunction(F, &dbgs()); 1304fe6060f1SDimitry Andric #endif 1305fe6060f1SDimitry Andric 1306fe6060f1SDimitry Andric return MadeChanges; 1307fe6060f1SDimitry Andric } 1308fe6060f1SDimitry Andric 1309fe6060f1SDimitry Andric } // end anonymous namespace 1310fe6060f1SDimitry Andric 1311fe6060f1SDimitry Andric /// Integrate with the new Pass Manager 1312fe6060f1SDimitry Andric PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 1313fe6060f1SDimitry Andric FunctionAnalysisManager &AM) { 1314fe6060f1SDimitry Andric AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 1315fe6060f1SDimitry Andric DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1316fe6060f1SDimitry Andric TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 1317fe6060f1SDimitry Andric OptimizationRemarkEmitter ORE(&F); 1318fe6060f1SDimitry Andric 1319fe6060f1SDimitry Andric if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F)) 1320fe6060f1SDimitry Andric return PreservedAnalyses::all(); 1321fe6060f1SDimitry Andric 1322fe6060f1SDimitry Andric PreservedAnalyses PA; 1323fe6060f1SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 1324fe6060f1SDimitry Andric return PA; 1325fe6060f1SDimitry Andric } 1326