xref: /llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp (revision 5fb57131b744c52f74919f9487f4a9fa69f455fb)
102077da7SAlexey Zhikhartsev //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
202077da7SAlexey Zhikhartsev //
3c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information.
5c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
602077da7SAlexey Zhikhartsev //
702077da7SAlexey Zhikhartsev //===----------------------------------------------------------------------===//
802077da7SAlexey Zhikhartsev //
902077da7SAlexey Zhikhartsev // Transform each threading path to effectively jump thread the DFA. For
1002077da7SAlexey Zhikhartsev // example, the CFG below could be transformed as follows, where the cloned
1102077da7SAlexey Zhikhartsev // blocks unconditionally branch to the next correct case based on what is
1202077da7SAlexey Zhikhartsev // identified in the analysis.
1302077da7SAlexey Zhikhartsev //
1402077da7SAlexey Zhikhartsev //          sw.bb                        sw.bb
1502077da7SAlexey Zhikhartsev //        /   |   \                    /   |   \
1602077da7SAlexey Zhikhartsev //   case1  case2  case3          case1  case2  case3
1702077da7SAlexey Zhikhartsev //        \   |   /                 |      |      |
1802077da7SAlexey Zhikhartsev //       determinator            det.2   det.3  det.1
1902077da7SAlexey Zhikhartsev //        br sw.bb                /        |        \
2002077da7SAlexey Zhikhartsev //                          sw.bb.2     sw.bb.3     sw.bb.1
2102077da7SAlexey Zhikhartsev //                           br case2    br case3    br case1§
2202077da7SAlexey Zhikhartsev //
2302077da7SAlexey Zhikhartsev // Definitions and Terminology:
2402077da7SAlexey Zhikhartsev //
2502077da7SAlexey Zhikhartsev // * Threading path:
2602077da7SAlexey Zhikhartsev //   a list of basic blocks, the exit state, and the block that determines
2702077da7SAlexey Zhikhartsev //   the next state, for which the following notation will be used:
2802077da7SAlexey Zhikhartsev //   < path of BBs that form a cycle > [ state, determinator ]
2902077da7SAlexey Zhikhartsev //
3002077da7SAlexey Zhikhartsev // * Predictable switch:
3102077da7SAlexey Zhikhartsev //   The switch variable is always a known constant so that all conditional
3202077da7SAlexey Zhikhartsev //   jumps based on switch variable can be converted to unconditional jump.
3302077da7SAlexey Zhikhartsev //
3402077da7SAlexey Zhikhartsev // * Determinator:
3502077da7SAlexey Zhikhartsev //   The basic block that determines the next state of the DFA.
3602077da7SAlexey Zhikhartsev //
3702077da7SAlexey Zhikhartsev // Representing the optimization in C-like pseudocode: the code pattern on the
3802077da7SAlexey Zhikhartsev // left could functionally be transformed to the right pattern if the switch
3902077da7SAlexey Zhikhartsev // condition is predictable.
4002077da7SAlexey Zhikhartsev //
4102077da7SAlexey Zhikhartsev //  X = A                       goto A
4202077da7SAlexey Zhikhartsev //  for (...)                   A:
4302077da7SAlexey Zhikhartsev //    switch (X)                  ...
4402077da7SAlexey Zhikhartsev //      case A                    goto B
4502077da7SAlexey Zhikhartsev //        X = B                 B:
4602077da7SAlexey Zhikhartsev //      case B                    ...
4702077da7SAlexey Zhikhartsev //        X = C                   goto C
4802077da7SAlexey Zhikhartsev //
4902077da7SAlexey Zhikhartsev // The pass first checks that switch variable X is decided by the control flow
5002077da7SAlexey Zhikhartsev // path taken in the loop; for example, in case B, the next value of X is
5102077da7SAlexey Zhikhartsev // decided to be C. It then enumerates through all paths in the loop and labels
5202077da7SAlexey Zhikhartsev // the basic blocks where the next state is decided.
5302077da7SAlexey Zhikhartsev //
5402077da7SAlexey Zhikhartsev // Using this information it creates new paths that unconditionally branch to
5502077da7SAlexey Zhikhartsev // the next case. This involves cloning code, so it only gets triggered if the
5602077da7SAlexey Zhikhartsev // amount of code duplicated is below a threshold.
5702077da7SAlexey Zhikhartsev //
5802077da7SAlexey Zhikhartsev //===----------------------------------------------------------------------===//
5902077da7SAlexey Zhikhartsev 
6002077da7SAlexey Zhikhartsev #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
6102077da7SAlexey Zhikhartsev #include "llvm/ADT/APInt.h"
6202077da7SAlexey Zhikhartsev #include "llvm/ADT/DenseMap.h"
6302077da7SAlexey Zhikhartsev #include "llvm/ADT/SmallSet.h"
6402077da7SAlexey Zhikhartsev #include "llvm/ADT/Statistic.h"
6502077da7SAlexey Zhikhartsev #include "llvm/Analysis/AssumptionCache.h"
6602077da7SAlexey Zhikhartsev #include "llvm/Analysis/CodeMetrics.h"
67a494ae43Sserge-sans-paille #include "llvm/Analysis/DomTreeUpdater.h"
686b53ada6SXChy #include "llvm/Analysis/LoopInfo.h"
6902077da7SAlexey Zhikhartsev #include "llvm/Analysis/OptimizationRemarkEmitter.h"
7002077da7SAlexey Zhikhartsev #include "llvm/Analysis/TargetTransformInfo.h"
7102077da7SAlexey Zhikhartsev #include "llvm/IR/CFG.h"
7202077da7SAlexey Zhikhartsev #include "llvm/IR/Constants.h"
7302077da7SAlexey Zhikhartsev #include "llvm/IR/IntrinsicInst.h"
7402077da7SAlexey Zhikhartsev #include "llvm/Support/CommandLine.h"
7502077da7SAlexey Zhikhartsev #include "llvm/Support/Debug.h"
7602077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/Cloning.h"
7702077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
7802077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/ValueMapper.h"
7902077da7SAlexey Zhikhartsev #include <deque>
8002077da7SAlexey Zhikhartsev 
81f90a66a5Sserge-sans-paille #ifdef EXPENSIVE_CHECKS
82f90a66a5Sserge-sans-paille #include "llvm/IR/Verifier.h"
83f90a66a5Sserge-sans-paille #endif
84f90a66a5Sserge-sans-paille 
8502077da7SAlexey Zhikhartsev using namespace llvm;
8602077da7SAlexey Zhikhartsev 
8702077da7SAlexey Zhikhartsev #define DEBUG_TYPE "dfa-jump-threading"
8802077da7SAlexey Zhikhartsev 
8902077da7SAlexey Zhikhartsev STATISTIC(NumTransforms, "Number of transformations done");
9002077da7SAlexey Zhikhartsev STATISTIC(NumCloned, "Number of blocks cloned");
9102077da7SAlexey Zhikhartsev STATISTIC(NumPaths, "Number of individual paths threaded");
9202077da7SAlexey Zhikhartsev 
9302077da7SAlexey Zhikhartsev static cl::opt<bool>
9402077da7SAlexey Zhikhartsev     ClViewCfgBefore("dfa-jump-view-cfg-before",
9502077da7SAlexey Zhikhartsev                     cl::desc("View the CFG before DFA Jump Threading"),
9602077da7SAlexey Zhikhartsev                     cl::Hidden, cl::init(false));
9702077da7SAlexey Zhikhartsev 
98c9325f8aSUsman Nadeem static cl::opt<bool> EarlyExitHeuristic(
99c9325f8aSUsman Nadeem     "dfa-early-exit-heuristic",
100c9325f8aSUsman Nadeem     cl::desc("Exit early if an unpredictable value come from the same loop"),
101c9325f8aSUsman Nadeem     cl::Hidden, cl::init(true));
102c9325f8aSUsman Nadeem 
10302077da7SAlexey Zhikhartsev static cl::opt<unsigned> MaxPathLength(
10402077da7SAlexey Zhikhartsev     "dfa-max-path-length",
10502077da7SAlexey Zhikhartsev     cl::desc("Max number of blocks searched to find a threading path"),
10602077da7SAlexey Zhikhartsev     cl::Hidden, cl::init(20));
10702077da7SAlexey Zhikhartsev 
108b167ada8SUsman Nadeem static cl::opt<unsigned> MaxNumVisitiedPaths(
109b167ada8SUsman Nadeem     "dfa-max-num-visited-paths",
110b167ada8SUsman Nadeem     cl::desc(
111b167ada8SUsman Nadeem         "Max number of blocks visited while enumerating paths around a switch"),
112*5fb57131SUsman Nadeem     cl::Hidden, cl::init(2500));
113b167ada8SUsman Nadeem 
1147fa41d8aSXChy static cl::opt<unsigned>
1157fa41d8aSXChy     MaxNumPaths("dfa-max-num-paths",
1168b0d7634SAlex Zhikhartsev                 cl::desc("Max number of paths enumerated around a switch"),
1178b0d7634SAlex Zhikhartsev                 cl::Hidden, cl::init(200));
1188b0d7634SAlex Zhikhartsev 
11902077da7SAlexey Zhikhartsev static cl::opt<unsigned>
12002077da7SAlexey Zhikhartsev     CostThreshold("dfa-cost-threshold",
12102077da7SAlexey Zhikhartsev                   cl::desc("Maximum cost accepted for the transformation"),
12202077da7SAlexey Zhikhartsev                   cl::Hidden, cl::init(50));
12302077da7SAlexey Zhikhartsev 
12402077da7SAlexey Zhikhartsev namespace {
12502077da7SAlexey Zhikhartsev 
12602077da7SAlexey Zhikhartsev class SelectInstToUnfold {
12702077da7SAlexey Zhikhartsev   SelectInst *SI;
12802077da7SAlexey Zhikhartsev   PHINode *SIUse;
12902077da7SAlexey Zhikhartsev 
13002077da7SAlexey Zhikhartsev public:
13102077da7SAlexey Zhikhartsev   SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
13202077da7SAlexey Zhikhartsev 
13302077da7SAlexey Zhikhartsev   SelectInst *getInst() { return SI; }
13402077da7SAlexey Zhikhartsev   PHINode *getUse() { return SIUse; }
13502077da7SAlexey Zhikhartsev 
13602077da7SAlexey Zhikhartsev   explicit operator bool() const { return SI && SIUse; }
13702077da7SAlexey Zhikhartsev };
13802077da7SAlexey Zhikhartsev 
139c229f767SXChy void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
14002077da7SAlexey Zhikhartsev             std::vector<SelectInstToUnfold> *NewSIsToUnfold,
14102077da7SAlexey Zhikhartsev             std::vector<BasicBlock *> *NewBBs);
14202077da7SAlexey Zhikhartsev 
14302077da7SAlexey Zhikhartsev class DFAJumpThreading {
14402077da7SAlexey Zhikhartsev public:
1456b53ada6SXChy   DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
14602077da7SAlexey Zhikhartsev                    TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
1476b53ada6SXChy       : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
14802077da7SAlexey Zhikhartsev 
14902077da7SAlexey Zhikhartsev   bool run(Function &F);
150c229f767SXChy   bool LoopInfoBroken;
15102077da7SAlexey Zhikhartsev 
15202077da7SAlexey Zhikhartsev private:
15302077da7SAlexey Zhikhartsev   void
15402077da7SAlexey Zhikhartsev   unfoldSelectInstrs(DominatorTree *DT,
15502077da7SAlexey Zhikhartsev                      const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
15602077da7SAlexey Zhikhartsev     DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
15734d48279SKazu Hirata     SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
15802077da7SAlexey Zhikhartsev 
15902077da7SAlexey Zhikhartsev     while (!Stack.empty()) {
16084b07c9bSKazu Hirata       SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
16102077da7SAlexey Zhikhartsev 
16202077da7SAlexey Zhikhartsev       std::vector<SelectInstToUnfold> NewSIsToUnfold;
16302077da7SAlexey Zhikhartsev       std::vector<BasicBlock *> NewBBs;
164c229f767SXChy       unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
16502077da7SAlexey Zhikhartsev 
16602077da7SAlexey Zhikhartsev       // Put newly discovered select instructions into the work list.
16702077da7SAlexey Zhikhartsev       for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
16802077da7SAlexey Zhikhartsev         Stack.push_back(NewSIToUnfold);
16902077da7SAlexey Zhikhartsev     }
17002077da7SAlexey Zhikhartsev   }
17102077da7SAlexey Zhikhartsev 
17202077da7SAlexey Zhikhartsev   AssumptionCache *AC;
17302077da7SAlexey Zhikhartsev   DominatorTree *DT;
1746b53ada6SXChy   LoopInfo *LI;
17502077da7SAlexey Zhikhartsev   TargetTransformInfo *TTI;
17602077da7SAlexey Zhikhartsev   OptimizationRemarkEmitter *ORE;
17702077da7SAlexey Zhikhartsev };
17802077da7SAlexey Zhikhartsev 
17902077da7SAlexey Zhikhartsev } // end anonymous namespace
18002077da7SAlexey Zhikhartsev 
18102077da7SAlexey Zhikhartsev namespace {
18202077da7SAlexey Zhikhartsev 
18302077da7SAlexey Zhikhartsev /// Unfold the select instruction held in \p SIToUnfold by replacing it with
18402077da7SAlexey Zhikhartsev /// control flow.
18502077da7SAlexey Zhikhartsev ///
18602077da7SAlexey Zhikhartsev /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
18702077da7SAlexey Zhikhartsev /// created basic blocks into \p NewBBs.
18802077da7SAlexey Zhikhartsev ///
18902077da7SAlexey Zhikhartsev /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
190c229f767SXChy void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
19102077da7SAlexey Zhikhartsev             std::vector<SelectInstToUnfold> *NewSIsToUnfold,
19202077da7SAlexey Zhikhartsev             std::vector<BasicBlock *> *NewBBs) {
19302077da7SAlexey Zhikhartsev   SelectInst *SI = SIToUnfold.getInst();
19402077da7SAlexey Zhikhartsev   PHINode *SIUse = SIToUnfold.getUse();
19502077da7SAlexey Zhikhartsev   BasicBlock *StartBlock = SI->getParent();
19602077da7SAlexey Zhikhartsev   BranchInst *StartBlockTerm =
19702077da7SAlexey Zhikhartsev       dyn_cast<BranchInst>(StartBlock->getTerminator());
19802077da7SAlexey Zhikhartsev 
199b167ada8SUsman Nadeem   assert(StartBlockTerm);
20002077da7SAlexey Zhikhartsev   assert(SI->hasOneUse());
20102077da7SAlexey Zhikhartsev 
202b167ada8SUsman Nadeem   if (StartBlockTerm->isUnconditional()) {
203d4a38c8fSUsman Nadeem     BasicBlock *EndBlock = StartBlock->getUniqueSuccessor();
204b167ada8SUsman Nadeem     // Arbitrarily choose the 'false' side for a new input value to the PHI.
205b167ada8SUsman Nadeem     BasicBlock *NewBlock = BasicBlock::Create(
206b167ada8SUsman Nadeem         SI->getContext(), Twine(SI->getName(), ".si.unfold.false"),
20702077da7SAlexey Zhikhartsev         EndBlock->getParent(), EndBlock);
208b167ada8SUsman Nadeem     NewBBs->push_back(NewBlock);
209b167ada8SUsman Nadeem     BranchInst::Create(EndBlock, NewBlock);
210b167ada8SUsman Nadeem     DTU->applyUpdates({{DominatorTree::Insert, NewBlock, EndBlock}});
21102077da7SAlexey Zhikhartsev 
212b167ada8SUsman Nadeem     // StartBlock
213b167ada8SUsman Nadeem     //   |  \
214b167ada8SUsman Nadeem     //   |  NewBlock
215b167ada8SUsman Nadeem     //   |  /
216b167ada8SUsman Nadeem     // EndBlock
21702077da7SAlexey Zhikhartsev     Value *SIOp1 = SI->getTrueValue();
21802077da7SAlexey Zhikhartsev     Value *SIOp2 = SI->getFalseValue();
21902077da7SAlexey Zhikhartsev 
220b167ada8SUsman Nadeem     PHINode *NewPhi = PHINode::Create(SIUse->getType(), 1,
221b167ada8SUsman Nadeem                                       Twine(SIOp2->getName(), ".si.unfold.phi"),
222b167ada8SUsman Nadeem                                       NewBlock->getFirstInsertionPt());
223b167ada8SUsman Nadeem     NewPhi->addIncoming(SIOp2, StartBlock);
224b167ada8SUsman Nadeem 
225d4a38c8fSUsman Nadeem     // Update any other PHI nodes in EndBlock.
226d4a38c8fSUsman Nadeem     for (PHINode &Phi : EndBlock->phis()) {
227d4a38c8fSUsman Nadeem       if (SIUse == &Phi)
228d4a38c8fSUsman Nadeem         continue;
229d4a38c8fSUsman Nadeem       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock);
230d4a38c8fSUsman Nadeem     }
231d4a38c8fSUsman Nadeem 
232d4a38c8fSUsman Nadeem     // Update the phi node of SI, which is its only use.
233d4a38c8fSUsman Nadeem     if (EndBlock == SIUse->getParent()) {
234d4a38c8fSUsman Nadeem       SIUse->addIncoming(NewPhi, NewBlock);
235d4a38c8fSUsman Nadeem       SIUse->replaceUsesOfWith(SI, SIOp1);
236d4a38c8fSUsman Nadeem     } else {
237d4a38c8fSUsman Nadeem       PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock),
238d4a38c8fSUsman Nadeem                                         Twine(SI->getName(), ".si.unfold.phi"),
239d4a38c8fSUsman Nadeem                                         EndBlock->getFirstInsertionPt());
240d4a38c8fSUsman Nadeem       for (BasicBlock *Pred : predecessors(EndBlock)) {
241d4a38c8fSUsman Nadeem         if (Pred != StartBlock && Pred != NewBlock)
242d4a38c8fSUsman Nadeem           EndPhi->addIncoming(EndPhi, Pred);
243d4a38c8fSUsman Nadeem       }
244d4a38c8fSUsman Nadeem 
245d4a38c8fSUsman Nadeem       EndPhi->addIncoming(SIOp1, StartBlock);
246d4a38c8fSUsman Nadeem       EndPhi->addIncoming(NewPhi, NewBlock);
247d4a38c8fSUsman Nadeem       SIUse->replaceUsesOfWith(SI, EndPhi);
248d4a38c8fSUsman Nadeem       SIUse = EndPhi;
249d4a38c8fSUsman Nadeem     }
250d4a38c8fSUsman Nadeem 
251b167ada8SUsman Nadeem     if (auto *OpSi = dyn_cast<SelectInst>(SIOp1))
252b167ada8SUsman Nadeem       NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse));
253b167ada8SUsman Nadeem     if (auto *OpSi = dyn_cast<SelectInst>(SIOp2))
254b167ada8SUsman Nadeem       NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi));
25502077da7SAlexey Zhikhartsev 
256b167ada8SUsman Nadeem     // Insert the real conditional branch based on the original condition.
257d4a38c8fSUsman Nadeem     StartBlockTerm->eraseFromParent();
258b167ada8SUsman Nadeem     BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock);
259b167ada8SUsman Nadeem     DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock},
260b167ada8SUsman Nadeem                        {DominatorTree::Insert, StartBlock, NewBlock}});
261b167ada8SUsman Nadeem   } else {
262d4a38c8fSUsman Nadeem     BasicBlock *EndBlock = SIUse->getParent();
263b167ada8SUsman Nadeem     BasicBlock *NewBlockT = BasicBlock::Create(
264b167ada8SUsman Nadeem         SI->getContext(), Twine(SI->getName(), ".si.unfold.true"),
265b167ada8SUsman Nadeem         EndBlock->getParent(), EndBlock);
266b167ada8SUsman Nadeem     BasicBlock *NewBlockF = BasicBlock::Create(
267b167ada8SUsman Nadeem         SI->getContext(), Twine(SI->getName(), ".si.unfold.false"),
268b167ada8SUsman Nadeem         EndBlock->getParent(), EndBlock);
269b167ada8SUsman Nadeem 
270b167ada8SUsman Nadeem     NewBBs->push_back(NewBlockT);
271b167ada8SUsman Nadeem     NewBBs->push_back(NewBlockF);
272b167ada8SUsman Nadeem 
273b167ada8SUsman Nadeem     // Def only has one use in EndBlock.
274b167ada8SUsman Nadeem     // Before transformation:
275b167ada8SUsman Nadeem     // StartBlock(Def)
276b167ada8SUsman Nadeem     //   |      \
277b167ada8SUsman Nadeem     // EndBlock  OtherBlock
278b167ada8SUsman Nadeem     //  (Use)
279b167ada8SUsman Nadeem     //
280b167ada8SUsman Nadeem     // After transformation:
281b167ada8SUsman Nadeem     // StartBlock(Def)
282b167ada8SUsman Nadeem     //   |      \
283b167ada8SUsman Nadeem     //   |       OtherBlock
284b167ada8SUsman Nadeem     // NewBlockT
285b167ada8SUsman Nadeem     //   |     \
286b167ada8SUsman Nadeem     //   |   NewBlockF
287b167ada8SUsman Nadeem     //   |      /
288b167ada8SUsman Nadeem     //   |     /
289b167ada8SUsman Nadeem     // EndBlock
290b167ada8SUsman Nadeem     //  (Use)
291b167ada8SUsman Nadeem     BranchInst::Create(EndBlock, NewBlockF);
292b167ada8SUsman Nadeem     // Insert the real conditional branch based on the original condition.
293b167ada8SUsman Nadeem     BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT);
294b167ada8SUsman Nadeem     DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF},
295b167ada8SUsman Nadeem                        {DominatorTree::Insert, NewBlockT, EndBlock},
296b167ada8SUsman Nadeem                        {DominatorTree::Insert, NewBlockF, EndBlock}});
297b167ada8SUsman Nadeem 
298b167ada8SUsman Nadeem     Value *TrueVal = SI->getTrueValue();
299b167ada8SUsman Nadeem     Value *FalseVal = SI->getFalseValue();
300b167ada8SUsman Nadeem 
301b167ada8SUsman Nadeem     PHINode *NewPhiT = PHINode::Create(
302b167ada8SUsman Nadeem         SIUse->getType(), 1, Twine(TrueVal->getName(), ".si.unfold.phi"),
303b167ada8SUsman Nadeem         NewBlockT->getFirstInsertionPt());
304b167ada8SUsman Nadeem     PHINode *NewPhiF = PHINode::Create(
305b167ada8SUsman Nadeem         SIUse->getType(), 1, Twine(FalseVal->getName(), ".si.unfold.phi"),
306b167ada8SUsman Nadeem         NewBlockF->getFirstInsertionPt());
307b167ada8SUsman Nadeem     NewPhiT->addIncoming(TrueVal, StartBlock);
308b167ada8SUsman Nadeem     NewPhiF->addIncoming(FalseVal, NewBlockT);
309b167ada8SUsman Nadeem 
310b167ada8SUsman Nadeem     if (auto *TrueSI = dyn_cast<SelectInst>(TrueVal))
311b167ada8SUsman Nadeem       NewSIsToUnfold->push_back(SelectInstToUnfold(TrueSI, NewPhiT));
312b167ada8SUsman Nadeem     if (auto *FalseSi = dyn_cast<SelectInst>(FalseVal))
313b167ada8SUsman Nadeem       NewSIsToUnfold->push_back(SelectInstToUnfold(FalseSi, NewPhiF));
314b167ada8SUsman Nadeem 
315b167ada8SUsman Nadeem     SIUse->addIncoming(NewPhiT, NewBlockT);
316b167ada8SUsman Nadeem     SIUse->addIncoming(NewPhiF, NewBlockF);
317b167ada8SUsman Nadeem     SIUse->removeIncomingValue(StartBlock);
318b167ada8SUsman Nadeem 
319b167ada8SUsman Nadeem     // Update any other PHI nodes in EndBlock.
320b167ada8SUsman Nadeem     for (PHINode &Phi : EndBlock->phis()) {
321b167ada8SUsman Nadeem       if (SIUse == &Phi)
322b167ada8SUsman Nadeem         continue;
323b167ada8SUsman Nadeem       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockT);
324b167ada8SUsman Nadeem       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockF);
325b167ada8SUsman Nadeem       Phi.removeIncomingValue(StartBlock);
326b167ada8SUsman Nadeem     }
327b167ada8SUsman Nadeem 
328b167ada8SUsman Nadeem     // Update the appropriate successor of the start block to point to the new
329b167ada8SUsman Nadeem     // unfolded block.
330b167ada8SUsman Nadeem     unsigned SuccNum = StartBlockTerm->getSuccessor(1) == EndBlock ? 1 : 0;
331b167ada8SUsman Nadeem     StartBlockTerm->setSuccessor(SuccNum, NewBlockT);
332b167ada8SUsman Nadeem     DTU->applyUpdates({{DominatorTree::Delete, StartBlock, EndBlock},
333b167ada8SUsman Nadeem                        {DominatorTree::Insert, StartBlock, NewBlockT}});
334b167ada8SUsman Nadeem   }
33502077da7SAlexey Zhikhartsev 
336c229f767SXChy   // Preserve loop info
337c229f767SXChy   if (Loop *L = LI->getLoopFor(SI->getParent())) {
338c229f767SXChy     for (BasicBlock *NewBB : *NewBBs)
339c229f767SXChy       L->addBasicBlockToLoop(NewBB, *LI);
340c229f767SXChy   }
341c229f767SXChy 
34202077da7SAlexey Zhikhartsev   // The select is now dead.
3437fa41d8aSXChy   assert(SI->use_empty() && "Select must be dead now");
34402077da7SAlexey Zhikhartsev   SI->eraseFromParent();
34502077da7SAlexey Zhikhartsev }
34602077da7SAlexey Zhikhartsev 
34702077da7SAlexey Zhikhartsev struct ClonedBlock {
34802077da7SAlexey Zhikhartsev   BasicBlock *BB;
349019ffbf3SXChy   APInt State; ///< \p State corresponds to the next value of a switch stmnt.
35002077da7SAlexey Zhikhartsev };
35102077da7SAlexey Zhikhartsev 
35202077da7SAlexey Zhikhartsev typedef std::deque<BasicBlock *> PathType;
35302077da7SAlexey Zhikhartsev typedef std::vector<PathType> PathsType;
354380b8a60SNikita Popov typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
35502077da7SAlexey Zhikhartsev typedef std::vector<ClonedBlock> CloneList;
35602077da7SAlexey Zhikhartsev 
35702077da7SAlexey Zhikhartsev // This data structure keeps track of all blocks that have been cloned.  If two
35802077da7SAlexey Zhikhartsev // different ThreadingPaths clone the same block for a certain state it should
35902077da7SAlexey Zhikhartsev // be reused, and it can be looked up in this map.
36002077da7SAlexey Zhikhartsev typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
36102077da7SAlexey Zhikhartsev 
36202077da7SAlexey Zhikhartsev // This map keeps track of all the new definitions for an instruction. This
36302077da7SAlexey Zhikhartsev // information is needed when restoring SSA form after cloning blocks.
3649d555b4aSOlle Fredriksson typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
36502077da7SAlexey Zhikhartsev 
36602077da7SAlexey Zhikhartsev inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
36702077da7SAlexey Zhikhartsev   OS << "< ";
36802077da7SAlexey Zhikhartsev   for (const BasicBlock *BB : Path) {
36902077da7SAlexey Zhikhartsev     std::string BBName;
37002077da7SAlexey Zhikhartsev     if (BB->hasName())
37102077da7SAlexey Zhikhartsev       raw_string_ostream(BBName) << BB->getName();
37202077da7SAlexey Zhikhartsev     else
37302077da7SAlexey Zhikhartsev       raw_string_ostream(BBName) << BB;
37402077da7SAlexey Zhikhartsev     OS << BBName << " ";
37502077da7SAlexey Zhikhartsev   }
37602077da7SAlexey Zhikhartsev   OS << ">";
37702077da7SAlexey Zhikhartsev   return OS;
37802077da7SAlexey Zhikhartsev }
37902077da7SAlexey Zhikhartsev 
38002077da7SAlexey Zhikhartsev /// ThreadingPath is a path in the control flow of a loop that can be threaded
38102077da7SAlexey Zhikhartsev /// by cloning necessary basic blocks and replacing conditional branches with
38202077da7SAlexey Zhikhartsev /// unconditional ones. A threading path includes a list of basic blocks, the
38302077da7SAlexey Zhikhartsev /// exit state, and the block that determines the next state.
38402077da7SAlexey Zhikhartsev struct ThreadingPath {
38502077da7SAlexey Zhikhartsev   /// Exit value is DFA's exit state for the given path.
386019ffbf3SXChy   APInt getExitValue() const { return ExitVal; }
38702077da7SAlexey Zhikhartsev   void setExitValue(const ConstantInt *V) {
388019ffbf3SXChy     ExitVal = V->getValue();
38902077da7SAlexey Zhikhartsev     IsExitValSet = true;
39002077da7SAlexey Zhikhartsev   }
39102077da7SAlexey Zhikhartsev   bool isExitValueSet() const { return IsExitValSet; }
39202077da7SAlexey Zhikhartsev 
39302077da7SAlexey Zhikhartsev   /// Determinator is the basic block that determines the next state of the DFA.
39402077da7SAlexey Zhikhartsev   const BasicBlock *getDeterminatorBB() const { return DBB; }
39502077da7SAlexey Zhikhartsev   void setDeterminator(const BasicBlock *BB) { DBB = BB; }
39602077da7SAlexey Zhikhartsev 
39702077da7SAlexey Zhikhartsev   /// Path is a list of basic blocks.
39802077da7SAlexey Zhikhartsev   const PathType &getPath() const { return Path; }
39902077da7SAlexey Zhikhartsev   void setPath(const PathType &NewPath) { Path = NewPath; }
400b167ada8SUsman Nadeem   void push_back(BasicBlock *BB) { Path.push_back(BB); }
401b167ada8SUsman Nadeem   void push_front(BasicBlock *BB) { Path.push_front(BB); }
402b167ada8SUsman Nadeem   void appendExcludingFirst(const PathType &OtherPath) {
403b167ada8SUsman Nadeem     Path.insert(Path.end(), OtherPath.begin() + 1, OtherPath.end());
404b167ada8SUsman Nadeem   }
40502077da7SAlexey Zhikhartsev 
40602077da7SAlexey Zhikhartsev   void print(raw_ostream &OS) const {
40702077da7SAlexey Zhikhartsev     OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
40802077da7SAlexey Zhikhartsev   }
40902077da7SAlexey Zhikhartsev 
41002077da7SAlexey Zhikhartsev private:
41102077da7SAlexey Zhikhartsev   PathType Path;
412019ffbf3SXChy   APInt ExitVal;
41302077da7SAlexey Zhikhartsev   const BasicBlock *DBB = nullptr;
41402077da7SAlexey Zhikhartsev   bool IsExitValSet = false;
41502077da7SAlexey Zhikhartsev };
41602077da7SAlexey Zhikhartsev 
41702077da7SAlexey Zhikhartsev #ifndef NDEBUG
41802077da7SAlexey Zhikhartsev inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
41902077da7SAlexey Zhikhartsev   TPath.print(OS);
42002077da7SAlexey Zhikhartsev   return OS;
42102077da7SAlexey Zhikhartsev }
42202077da7SAlexey Zhikhartsev #endif
42302077da7SAlexey Zhikhartsev 
42402077da7SAlexey Zhikhartsev struct MainSwitch {
4256b53ada6SXChy   MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
4266b53ada6SXChy       : LI(LI) {
4278b0d7634SAlex Zhikhartsev     if (isCandidate(SI)) {
42802077da7SAlexey Zhikhartsev       Instr = SI;
42902077da7SAlexey Zhikhartsev     } else {
43002077da7SAlexey Zhikhartsev       ORE->emit([&]() {
43102077da7SAlexey Zhikhartsev         return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
43202077da7SAlexey Zhikhartsev                << "Switch instruction is not predictable.";
43302077da7SAlexey Zhikhartsev       });
43402077da7SAlexey Zhikhartsev     }
43502077da7SAlexey Zhikhartsev   }
43602077da7SAlexey Zhikhartsev 
43702077da7SAlexey Zhikhartsev   virtual ~MainSwitch() = default;
43802077da7SAlexey Zhikhartsev 
43902077da7SAlexey Zhikhartsev   SwitchInst *getInstr() const { return Instr; }
44002077da7SAlexey Zhikhartsev   const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
44102077da7SAlexey Zhikhartsev     return SelectInsts;
44202077da7SAlexey Zhikhartsev   }
44302077da7SAlexey Zhikhartsev 
44402077da7SAlexey Zhikhartsev private:
4458b0d7634SAlex Zhikhartsev   /// Do a use-def chain traversal starting from the switch condition to see if
4468b0d7634SAlex Zhikhartsev   /// \p SI is a potential condidate.
4478b0d7634SAlex Zhikhartsev   ///
4488b0d7634SAlex Zhikhartsev   /// Also, collect select instructions to unfold.
4498b0d7634SAlex Zhikhartsev   bool isCandidate(const SwitchInst *SI) {
450c9325f8aSUsman Nadeem     std::deque<std::pair<Value *, BasicBlock *>> Q;
45102077da7SAlexey Zhikhartsev     SmallSet<Value *, 16> SeenValues;
45202077da7SAlexey Zhikhartsev     SelectInsts.clear();
45302077da7SAlexey Zhikhartsev 
4548b0d7634SAlex Zhikhartsev     Value *SICond = SI->getCondition();
4558b0d7634SAlex Zhikhartsev     LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
4568b0d7634SAlex Zhikhartsev     if (!isa<PHINode>(SICond))
45702077da7SAlexey Zhikhartsev       return false;
45802077da7SAlexey Zhikhartsev 
4596b53ada6SXChy     // The switch must be in a loop.
460c9325f8aSUsman Nadeem     const Loop *L = LI->getLoopFor(SI->getParent());
461c9325f8aSUsman Nadeem     if (!L)
4626b53ada6SXChy       return false;
4636b53ada6SXChy 
464c9325f8aSUsman Nadeem     addToQueue(SICond, nullptr, Q, SeenValues);
46502077da7SAlexey Zhikhartsev 
46602077da7SAlexey Zhikhartsev     while (!Q.empty()) {
467c9325f8aSUsman Nadeem       Value *Current = Q.front().first;
468c9325f8aSUsman Nadeem       BasicBlock *CurrentIncomingBB = Q.front().second;
46902077da7SAlexey Zhikhartsev       Q.pop_front();
47002077da7SAlexey Zhikhartsev 
47102077da7SAlexey Zhikhartsev       if (auto *Phi = dyn_cast<PHINode>(Current)) {
472c9325f8aSUsman Nadeem         for (BasicBlock *IncomingBB : Phi->blocks()) {
473c9325f8aSUsman Nadeem           Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);
474c9325f8aSUsman Nadeem           addToQueue(Incoming, IncomingBB, Q, SeenValues);
47502077da7SAlexey Zhikhartsev         }
4768b0d7634SAlex Zhikhartsev         LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
47702077da7SAlexey Zhikhartsev       } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
47802077da7SAlexey Zhikhartsev         if (!isValidSelectInst(SelI))
47902077da7SAlexey Zhikhartsev           return false;
480c9325f8aSUsman Nadeem         addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
481c9325f8aSUsman Nadeem         addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
4828b0d7634SAlex Zhikhartsev         LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
48302077da7SAlexey Zhikhartsev         if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
48402077da7SAlexey Zhikhartsev           SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
4858b0d7634SAlex Zhikhartsev       } else if (isa<Constant>(Current)) {
4868b0d7634SAlex Zhikhartsev         LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
4878b0d7634SAlex Zhikhartsev         continue;
48802077da7SAlexey Zhikhartsev       } else {
4898b0d7634SAlex Zhikhartsev         LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
4908b0d7634SAlex Zhikhartsev         // Allow unpredictable values. The hope is that those will be the
4918b0d7634SAlex Zhikhartsev         // initial switch values that can be ignored (they will hit the
4928b0d7634SAlex Zhikhartsev         // unthreaded switch) but this assumption will get checked later after
4938b0d7634SAlex Zhikhartsev         // paths have been enumerated (in function getStateDefMap).
494c9325f8aSUsman Nadeem 
495c9325f8aSUsman Nadeem         // If the unpredictable value comes from the same inner loop it is
496c9325f8aSUsman Nadeem         // likely that it will also be on the enumerated paths, causing us to
497c9325f8aSUsman Nadeem         // exit after we have enumerated all the paths. This heuristic save
498c9325f8aSUsman Nadeem         // compile time because a search for all the paths can become expensive.
499c9325f8aSUsman Nadeem         if (EarlyExitHeuristic &&
500c9325f8aSUsman Nadeem             L->contains(LI->getLoopFor(CurrentIncomingBB))) {
501c9325f8aSUsman Nadeem           LLVM_DEBUG(dbgs()
502c9325f8aSUsman Nadeem                      << "\tExiting early due to unpredictability heuristic.\n");
503c9325f8aSUsman Nadeem           return false;
504c9325f8aSUsman Nadeem         }
505c9325f8aSUsman Nadeem 
5068b0d7634SAlex Zhikhartsev         continue;
50702077da7SAlexey Zhikhartsev       }
50802077da7SAlexey Zhikhartsev     }
50902077da7SAlexey Zhikhartsev 
51002077da7SAlexey Zhikhartsev     return true;
51102077da7SAlexey Zhikhartsev   }
51202077da7SAlexey Zhikhartsev 
513c9325f8aSUsman Nadeem   void addToQueue(Value *Val, BasicBlock *BB,
514c9325f8aSUsman Nadeem                   std::deque<std::pair<Value *, BasicBlock *>> &Q,
51502077da7SAlexey Zhikhartsev                   SmallSet<Value *, 16> &SeenValues) {
51623a26e71SKazu Hirata     if (SeenValues.insert(Val).second)
517c9325f8aSUsman Nadeem       Q.push_back({Val, BB});
51802077da7SAlexey Zhikhartsev   }
51902077da7SAlexey Zhikhartsev 
52002077da7SAlexey Zhikhartsev   bool isValidSelectInst(SelectInst *SI) {
52102077da7SAlexey Zhikhartsev     if (!SI->hasOneUse())
52202077da7SAlexey Zhikhartsev       return false;
52302077da7SAlexey Zhikhartsev 
52402077da7SAlexey Zhikhartsev     Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());
52502077da7SAlexey Zhikhartsev     // The use of the select inst should be either a phi or another select.
52602077da7SAlexey Zhikhartsev     if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))
52702077da7SAlexey Zhikhartsev       return false;
52802077da7SAlexey Zhikhartsev 
52902077da7SAlexey Zhikhartsev     BasicBlock *SIBB = SI->getParent();
53002077da7SAlexey Zhikhartsev 
53102077da7SAlexey Zhikhartsev     // Currently, we can only expand select instructions in basic blocks with
53202077da7SAlexey Zhikhartsev     // one successor.
53302077da7SAlexey Zhikhartsev     BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());
53402077da7SAlexey Zhikhartsev     if (!SITerm || !SITerm->isUnconditional())
53502077da7SAlexey Zhikhartsev       return false;
53602077da7SAlexey Zhikhartsev 
5377fa41d8aSXChy     // Only fold the select coming from directly where it is defined.
5387fa41d8aSXChy     PHINode *PHIUser = dyn_cast<PHINode>(SIUse);
5397fa41d8aSXChy     if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB)
54002077da7SAlexey Zhikhartsev       return false;
54102077da7SAlexey Zhikhartsev 
54202077da7SAlexey Zhikhartsev     // If select will not be sunk during unfolding, and it is in the same basic
54302077da7SAlexey Zhikhartsev     // block as another state defining select, then cannot unfold both.
54402077da7SAlexey Zhikhartsev     for (SelectInstToUnfold SIToUnfold : SelectInsts) {
54502077da7SAlexey Zhikhartsev       SelectInst *PrevSI = SIToUnfold.getInst();
54602077da7SAlexey Zhikhartsev       if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
54702077da7SAlexey Zhikhartsev           PrevSI->getParent() == SI->getParent())
54802077da7SAlexey Zhikhartsev         return false;
54902077da7SAlexey Zhikhartsev     }
55002077da7SAlexey Zhikhartsev 
55102077da7SAlexey Zhikhartsev     return true;
55202077da7SAlexey Zhikhartsev   }
55302077da7SAlexey Zhikhartsev 
5546b53ada6SXChy   LoopInfo *LI;
55502077da7SAlexey Zhikhartsev   SwitchInst *Instr = nullptr;
55602077da7SAlexey Zhikhartsev   SmallVector<SelectInstToUnfold, 4> SelectInsts;
55702077da7SAlexey Zhikhartsev };
55802077da7SAlexey Zhikhartsev 
55902077da7SAlexey Zhikhartsev struct AllSwitchPaths {
560c229f767SXChy   AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
561b167ada8SUsman Nadeem                  LoopInfo *LI, Loop *L)
562c229f767SXChy       : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
563b167ada8SUsman Nadeem         LI(LI), SwitchOuterLoop(L) {}
56402077da7SAlexey Zhikhartsev 
56502077da7SAlexey Zhikhartsev   std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
56602077da7SAlexey Zhikhartsev   unsigned getNumThreadingPaths() { return TPaths.size(); }
56702077da7SAlexey Zhikhartsev   SwitchInst *getSwitchInst() { return Switch; }
56802077da7SAlexey Zhikhartsev   BasicBlock *getSwitchBlock() { return SwitchBlock; }
56902077da7SAlexey Zhikhartsev 
57002077da7SAlexey Zhikhartsev   void run() {
571b167ada8SUsman Nadeem     StateDefMap StateDef = getStateDefMap();
5728b0d7634SAlex Zhikhartsev     if (StateDef.empty()) {
5738b0d7634SAlex Zhikhartsev       ORE->emit([&]() {
5748b0d7634SAlex Zhikhartsev         return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
5758b0d7634SAlex Zhikhartsev                                         Switch)
5768b0d7634SAlex Zhikhartsev                << "Switch instruction is not predictable.";
5778b0d7634SAlex Zhikhartsev       });
5788b0d7634SAlex Zhikhartsev       return;
5798b0d7634SAlex Zhikhartsev     }
58002077da7SAlexey Zhikhartsev 
581b167ada8SUsman Nadeem     auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
582b167ada8SUsman Nadeem     auto *SwitchPhiDefBB = SwitchPhi->getParent();
583b167ada8SUsman Nadeem     VisitedBlocks VB;
584b167ada8SUsman Nadeem     // Get paths from the determinator BBs to SwitchPhiDefBB
585b167ada8SUsman Nadeem     std::vector<ThreadingPath> PathsToPhiDef =
586b167ada8SUsman Nadeem         getPathsFromStateDefMap(StateDef, SwitchPhi, VB);
587b167ada8SUsman Nadeem     if (SwitchPhiDefBB == SwitchBlock) {
588b167ada8SUsman Nadeem       TPaths = std::move(PathsToPhiDef);
589b167ada8SUsman Nadeem       return;
59002077da7SAlexey Zhikhartsev     }
59102077da7SAlexey Zhikhartsev 
592b167ada8SUsman Nadeem     // Find and append paths from SwitchPhiDefBB to SwitchBlock.
593b167ada8SUsman Nadeem     PathsType PathsToSwitchBB =
594b167ada8SUsman Nadeem         paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1);
595b167ada8SUsman Nadeem     if (PathsToSwitchBB.empty())
596b167ada8SUsman Nadeem       return;
59702077da7SAlexey Zhikhartsev 
598b167ada8SUsman Nadeem     std::vector<ThreadingPath> TempList;
599b167ada8SUsman Nadeem     for (const ThreadingPath &Path : PathsToPhiDef) {
600b167ada8SUsman Nadeem       for (const PathType &PathToSw : PathsToSwitchBB) {
601b167ada8SUsman Nadeem         ThreadingPath PathCopy(Path);
602b167ada8SUsman Nadeem         PathCopy.appendExcludingFirst(PathToSw);
603b167ada8SUsman Nadeem         TempList.push_back(PathCopy);
60402077da7SAlexey Zhikhartsev       }
60502077da7SAlexey Zhikhartsev     }
606b167ada8SUsman Nadeem     TPaths = std::move(TempList);
60702077da7SAlexey Zhikhartsev   }
60802077da7SAlexey Zhikhartsev 
60902077da7SAlexey Zhikhartsev private:
61002077da7SAlexey Zhikhartsev   // Value: an instruction that defines a switch state;
61102077da7SAlexey Zhikhartsev   // Key: the parent basic block of that instruction.
61202077da7SAlexey Zhikhartsev   typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
613b167ada8SUsman Nadeem   std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef,
614b167ada8SUsman Nadeem                                                      PHINode *Phi,
615b167ada8SUsman Nadeem                                                      VisitedBlocks &VB) {
616b167ada8SUsman Nadeem     std::vector<ThreadingPath> Res;
617b167ada8SUsman Nadeem     auto *PhiBB = Phi->getParent();
618b167ada8SUsman Nadeem     VB.insert(PhiBB);
61902077da7SAlexey Zhikhartsev 
620b167ada8SUsman Nadeem     VisitedBlocks UniqueBlocks;
621b167ada8SUsman Nadeem     for (auto *IncomingBB : Phi->blocks()) {
622b167ada8SUsman Nadeem       if (!UniqueBlocks.insert(IncomingBB).second)
623b167ada8SUsman Nadeem         continue;
624b167ada8SUsman Nadeem       if (!SwitchOuterLoop->contains(IncomingBB))
625b167ada8SUsman Nadeem         continue;
626b167ada8SUsman Nadeem 
627b167ada8SUsman Nadeem       Value *IncomingValue = Phi->getIncomingValueForBlock(IncomingBB);
628b167ada8SUsman Nadeem       // We found the determinator. This is the start of our path.
629b167ada8SUsman Nadeem       if (auto *C = dyn_cast<ConstantInt>(IncomingValue)) {
630b167ada8SUsman Nadeem         // SwitchBlock is the determinator, unsupported unless its also the def.
631b167ada8SUsman Nadeem         if (PhiBB == SwitchBlock &&
632b167ada8SUsman Nadeem             SwitchBlock != cast<PHINode>(Switch->getOperand(0))->getParent())
633b167ada8SUsman Nadeem           continue;
634b167ada8SUsman Nadeem         ThreadingPath NewPath;
635b167ada8SUsman Nadeem         NewPath.setDeterminator(PhiBB);
636b167ada8SUsman Nadeem         NewPath.setExitValue(C);
637b167ada8SUsman Nadeem         // Don't add SwitchBlock at the start, this is handled later.
638b167ada8SUsman Nadeem         if (IncomingBB != SwitchBlock)
639b167ada8SUsman Nadeem           NewPath.push_back(IncomingBB);
640b167ada8SUsman Nadeem         NewPath.push_back(PhiBB);
641b167ada8SUsman Nadeem         Res.push_back(NewPath);
642b167ada8SUsman Nadeem         continue;
643b167ada8SUsman Nadeem       }
644b167ada8SUsman Nadeem       // Don't get into a cycle.
645b167ada8SUsman Nadeem       if (VB.contains(IncomingBB) || IncomingBB == SwitchBlock)
646b167ada8SUsman Nadeem         continue;
647b167ada8SUsman Nadeem       // Recurse up the PHI chain.
648b167ada8SUsman Nadeem       auto *IncomingPhi = dyn_cast<PHINode>(IncomingValue);
649b167ada8SUsman Nadeem       if (!IncomingPhi)
650b167ada8SUsman Nadeem         continue;
651b167ada8SUsman Nadeem       auto *IncomingPhiDefBB = IncomingPhi->getParent();
652b167ada8SUsman Nadeem       if (!StateDef.contains(IncomingPhiDefBB))
653b167ada8SUsman Nadeem         continue;
654b167ada8SUsman Nadeem 
655b167ada8SUsman Nadeem       // Direct predecessor, just add to the path.
656b167ada8SUsman Nadeem       if (IncomingPhiDefBB == IncomingBB) {
657b167ada8SUsman Nadeem         std::vector<ThreadingPath> PredPaths =
658b167ada8SUsman Nadeem             getPathsFromStateDefMap(StateDef, IncomingPhi, VB);
659b167ada8SUsman Nadeem         for (ThreadingPath &Path : PredPaths) {
660b167ada8SUsman Nadeem           Path.push_back(PhiBB);
661b167ada8SUsman Nadeem           Res.push_back(std::move(Path));
662b167ada8SUsman Nadeem         }
663b167ada8SUsman Nadeem         continue;
664b167ada8SUsman Nadeem       }
665b167ada8SUsman Nadeem       // Not a direct predecessor, find intermediate paths to append to the
666b167ada8SUsman Nadeem       // existing path.
667b167ada8SUsman Nadeem       if (VB.contains(IncomingPhiDefBB))
668b167ada8SUsman Nadeem         continue;
669b167ada8SUsman Nadeem 
670b167ada8SUsman Nadeem       PathsType IntermediatePaths;
671b167ada8SUsman Nadeem       IntermediatePaths =
672b167ada8SUsman Nadeem           paths(IncomingPhiDefBB, IncomingBB, VB, /* PathDepth = */ 1);
673b167ada8SUsman Nadeem       if (IntermediatePaths.empty())
674b167ada8SUsman Nadeem         continue;
675b167ada8SUsman Nadeem 
676b167ada8SUsman Nadeem       std::vector<ThreadingPath> PredPaths =
677b167ada8SUsman Nadeem           getPathsFromStateDefMap(StateDef, IncomingPhi, VB);
678b167ada8SUsman Nadeem       for (const ThreadingPath &Path : PredPaths) {
679b167ada8SUsman Nadeem         for (const PathType &IPath : IntermediatePaths) {
680b167ada8SUsman Nadeem           ThreadingPath NewPath(Path);
681b167ada8SUsman Nadeem           NewPath.appendExcludingFirst(IPath);
682b167ada8SUsman Nadeem           NewPath.push_back(PhiBB);
683b167ada8SUsman Nadeem           Res.push_back(NewPath);
684b167ada8SUsman Nadeem         }
685b167ada8SUsman Nadeem       }
686b167ada8SUsman Nadeem     }
687b167ada8SUsman Nadeem     VB.erase(PhiBB);
688b167ada8SUsman Nadeem     return Res;
689b167ada8SUsman Nadeem   }
690b167ada8SUsman Nadeem 
691b167ada8SUsman Nadeem   PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited,
692b167ada8SUsman Nadeem                   unsigned PathDepth) {
69302077da7SAlexey Zhikhartsev     PathsType Res;
69402077da7SAlexey Zhikhartsev 
69502077da7SAlexey Zhikhartsev     // Stop exploring paths after visiting MaxPathLength blocks
69602077da7SAlexey Zhikhartsev     if (PathDepth > MaxPathLength) {
69702077da7SAlexey Zhikhartsev       ORE->emit([&]() {
69802077da7SAlexey Zhikhartsev         return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
69902077da7SAlexey Zhikhartsev                                           Switch)
70002077da7SAlexey Zhikhartsev                << "Exploration stopped after visiting MaxPathLength="
70102077da7SAlexey Zhikhartsev                << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
70202077da7SAlexey Zhikhartsev       });
70302077da7SAlexey Zhikhartsev       return Res;
70402077da7SAlexey Zhikhartsev     }
70502077da7SAlexey Zhikhartsev 
70602077da7SAlexey Zhikhartsev     Visited.insert(BB);
707b167ada8SUsman Nadeem     if (++NumVisited > MaxNumVisitiedPaths)
708b167ada8SUsman Nadeem       return Res;
70902077da7SAlexey Zhikhartsev 
710c229f767SXChy     // Stop if we have reached the BB out of loop, since its successors have no
711c229f767SXChy     // impact on the DFA.
712b167ada8SUsman Nadeem     if (!SwitchOuterLoop->contains(BB))
713c229f767SXChy       return Res;
714c229f767SXChy 
71502077da7SAlexey Zhikhartsev     // Some blocks have multiple edges to the same successor, and this set
71602077da7SAlexey Zhikhartsev     // is used to prevent a duplicate path from being generated
71702077da7SAlexey Zhikhartsev     SmallSet<BasicBlock *, 4> Successors;
7183f7aea1aSNikita Popov     for (BasicBlock *Succ : successors(BB)) {
7193f7aea1aSNikita Popov       if (!Successors.insert(Succ).second)
72002077da7SAlexey Zhikhartsev         continue;
72102077da7SAlexey Zhikhartsev 
722b167ada8SUsman Nadeem       // Found a cycle through the final block.
723b167ada8SUsman Nadeem       if (Succ == ToBB) {
724b167ada8SUsman Nadeem         Res.push_back({BB, ToBB});
72502077da7SAlexey Zhikhartsev         continue;
72602077da7SAlexey Zhikhartsev       }
72702077da7SAlexey Zhikhartsev 
72802077da7SAlexey Zhikhartsev       // We have encountered a cycle, do not get caught in it
729380b8a60SNikita Popov       if (Visited.contains(Succ))
73002077da7SAlexey Zhikhartsev         continue;
73102077da7SAlexey Zhikhartsev 
732b167ada8SUsman Nadeem       auto *CurrLoop = LI->getLoopFor(BB);
733b167ada8SUsman Nadeem       // Unlikely to be beneficial.
734b167ada8SUsman Nadeem       if (Succ == CurrLoop->getHeader())
735b167ada8SUsman Nadeem         continue;
736b167ada8SUsman Nadeem       // Skip for now, revisit this condition later to see the impact on
737b167ada8SUsman Nadeem       // coverage and compile time.
738b167ada8SUsman Nadeem       if (LI->getLoopFor(Succ) != CurrLoop)
739b167ada8SUsman Nadeem         continue;
740b167ada8SUsman Nadeem 
741b167ada8SUsman Nadeem       PathsType SuccPaths = paths(Succ, ToBB, Visited, PathDepth + 1);
742b167ada8SUsman Nadeem       for (PathType &Path : SuccPaths) {
743b167ada8SUsman Nadeem         Path.push_front(BB);
744b167ada8SUsman Nadeem         Res.push_back(Path);
7458b0d7634SAlex Zhikhartsev         if (Res.size() >= MaxNumPaths) {
7468b0d7634SAlex Zhikhartsev           return Res;
7478b0d7634SAlex Zhikhartsev         }
74802077da7SAlexey Zhikhartsev       }
74902077da7SAlexey Zhikhartsev     }
75002077da7SAlexey Zhikhartsev     // This block could now be visited again from a different predecessor. Note
75102077da7SAlexey Zhikhartsev     // that this will result in exponential runtime. Subpaths could possibly be
75202077da7SAlexey Zhikhartsev     // cached but it takes a lot of memory to store them.
75302077da7SAlexey Zhikhartsev     Visited.erase(BB);
75402077da7SAlexey Zhikhartsev     return Res;
75502077da7SAlexey Zhikhartsev   }
75602077da7SAlexey Zhikhartsev 
757*5fb57131SUsman Nadeem   /// Walk the use-def chain and collect all the state-defining blocks and the
758*5fb57131SUsman Nadeem   /// PHI nodes in those blocks that define the state.
759b167ada8SUsman Nadeem   StateDefMap getStateDefMap() const {
76002077da7SAlexey Zhikhartsev     StateDefMap Res;
761*5fb57131SUsman Nadeem     PHINode *FirstDef = dyn_cast<PHINode>(Switch->getOperand(0));
762*5fb57131SUsman Nadeem     assert(FirstDef && "The first definition must be a phi.");
76302077da7SAlexey Zhikhartsev 
76402077da7SAlexey Zhikhartsev     SmallVector<PHINode *, 8> Stack;
765*5fb57131SUsman Nadeem     Stack.push_back(FirstDef);
76602077da7SAlexey Zhikhartsev     SmallSet<Value *, 16> SeenValues;
76702077da7SAlexey Zhikhartsev 
76802077da7SAlexey Zhikhartsev     while (!Stack.empty()) {
76984b07c9bSKazu Hirata       PHINode *CurPhi = Stack.pop_back_val();
77002077da7SAlexey Zhikhartsev 
77102077da7SAlexey Zhikhartsev       Res[CurPhi->getParent()] = CurPhi;
77202077da7SAlexey Zhikhartsev       SeenValues.insert(CurPhi);
77302077da7SAlexey Zhikhartsev 
7748b0d7634SAlex Zhikhartsev       for (BasicBlock *IncomingBB : CurPhi->blocks()) {
775*5fb57131SUsman Nadeem         PHINode *IncomingPhi =
776*5fb57131SUsman Nadeem             dyn_cast<PHINode>(CurPhi->getIncomingValueForBlock(IncomingBB));
777*5fb57131SUsman Nadeem         if (!IncomingPhi)
77802077da7SAlexey Zhikhartsev           continue;
779*5fb57131SUsman Nadeem         bool IsOutsideLoops = !SwitchOuterLoop->contains(IncomingBB);
780*5fb57131SUsman Nadeem         if (SeenValues.contains(IncomingPhi) || IsOutsideLoops)
781*5fb57131SUsman Nadeem           continue;
78202077da7SAlexey Zhikhartsev 
783*5fb57131SUsman Nadeem         Stack.push_back(IncomingPhi);
78402077da7SAlexey Zhikhartsev       }
78502077da7SAlexey Zhikhartsev     }
78602077da7SAlexey Zhikhartsev 
78702077da7SAlexey Zhikhartsev     return Res;
78802077da7SAlexey Zhikhartsev   }
78902077da7SAlexey Zhikhartsev 
790b167ada8SUsman Nadeem   unsigned NumVisited = 0;
79102077da7SAlexey Zhikhartsev   SwitchInst *Switch;
79202077da7SAlexey Zhikhartsev   BasicBlock *SwitchBlock;
79302077da7SAlexey Zhikhartsev   OptimizationRemarkEmitter *ORE;
79402077da7SAlexey Zhikhartsev   std::vector<ThreadingPath> TPaths;
795c229f767SXChy   LoopInfo *LI;
796b167ada8SUsman Nadeem   Loop *SwitchOuterLoop;
79702077da7SAlexey Zhikhartsev };
79802077da7SAlexey Zhikhartsev 
79902077da7SAlexey Zhikhartsev struct TransformDFA {
80002077da7SAlexey Zhikhartsev   TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
80102077da7SAlexey Zhikhartsev                AssumptionCache *AC, TargetTransformInfo *TTI,
80202077da7SAlexey Zhikhartsev                OptimizationRemarkEmitter *ORE,
80302077da7SAlexey Zhikhartsev                SmallPtrSet<const Value *, 32> EphValues)
80402077da7SAlexey Zhikhartsev       : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
80502077da7SAlexey Zhikhartsev         EphValues(EphValues) {}
80602077da7SAlexey Zhikhartsev 
80702077da7SAlexey Zhikhartsev   void run() {
80802077da7SAlexey Zhikhartsev     if (isLegalAndProfitableToTransform()) {
80902077da7SAlexey Zhikhartsev       createAllExitPaths();
81002077da7SAlexey Zhikhartsev       NumTransforms++;
81102077da7SAlexey Zhikhartsev     }
81202077da7SAlexey Zhikhartsev   }
81302077da7SAlexey Zhikhartsev 
81402077da7SAlexey Zhikhartsev private:
81502077da7SAlexey Zhikhartsev   /// This function performs both a legality check and profitability check at
81602077da7SAlexey Zhikhartsev   /// the same time since it is convenient to do so. It iterates through all
81702077da7SAlexey Zhikhartsev   /// blocks that will be cloned, and keeps track of the duplication cost. It
81802077da7SAlexey Zhikhartsev   /// also returns false if it is illegal to clone some required block.
81902077da7SAlexey Zhikhartsev   bool isLegalAndProfitableToTransform() {
82002077da7SAlexey Zhikhartsev     CodeMetrics Metrics;
82102077da7SAlexey Zhikhartsev     SwitchInst *Switch = SwitchPaths->getSwitchInst();
82202077da7SAlexey Zhikhartsev 
8232fba4694SXChy     // Don't thread switch without multiple successors.
8242fba4694SXChy     if (Switch->getNumSuccessors() <= 1)
8252fba4694SXChy       return false;
8262fba4694SXChy 
82702077da7SAlexey Zhikhartsev     // Note that DuplicateBlockMap is not being used as intended here. It is
82802077da7SAlexey Zhikhartsev     // just being used to ensure (BB, State) pairs are only counted once.
82902077da7SAlexey Zhikhartsev     DuplicateBlockMap DuplicateMap;
83002077da7SAlexey Zhikhartsev 
83102077da7SAlexey Zhikhartsev     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
83202077da7SAlexey Zhikhartsev       PathType PathBBs = TPath.getPath();
833019ffbf3SXChy       APInt NextState = TPath.getExitValue();
83402077da7SAlexey Zhikhartsev       const BasicBlock *Determinator = TPath.getDeterminatorBB();
83502077da7SAlexey Zhikhartsev 
83602077da7SAlexey Zhikhartsev       // Update Metrics for the Switch block, this is always cloned
83702077da7SAlexey Zhikhartsev       BasicBlock *BB = SwitchPaths->getSwitchBlock();
83802077da7SAlexey Zhikhartsev       BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
83902077da7SAlexey Zhikhartsev       if (!VisitedBB) {
84002077da7SAlexey Zhikhartsev         Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
84102077da7SAlexey Zhikhartsev         DuplicateMap[BB].push_back({BB, NextState});
84202077da7SAlexey Zhikhartsev       }
84302077da7SAlexey Zhikhartsev 
84402077da7SAlexey Zhikhartsev       // If the Switch block is the Determinator, then we can continue since
84502077da7SAlexey Zhikhartsev       // this is the only block that is cloned and we already counted for it.
84602077da7SAlexey Zhikhartsev       if (PathBBs.front() == Determinator)
84702077da7SAlexey Zhikhartsev         continue;
84802077da7SAlexey Zhikhartsev 
84902077da7SAlexey Zhikhartsev       // Otherwise update Metrics for all blocks that will be cloned. If any
85002077da7SAlexey Zhikhartsev       // block is already cloned and would be reused, don't double count it.
8515ea31555SKazu Hirata       auto DetIt = llvm::find(PathBBs, Determinator);
85202077da7SAlexey Zhikhartsev       for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
85302077da7SAlexey Zhikhartsev         BB = *BBIt;
85402077da7SAlexey Zhikhartsev         VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
85502077da7SAlexey Zhikhartsev         if (VisitedBB)
85602077da7SAlexey Zhikhartsev           continue;
85702077da7SAlexey Zhikhartsev         Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
85802077da7SAlexey Zhikhartsev         DuplicateMap[BB].push_back({BB, NextState});
85902077da7SAlexey Zhikhartsev       }
86002077da7SAlexey Zhikhartsev 
86102077da7SAlexey Zhikhartsev       if (Metrics.notDuplicatable) {
86202077da7SAlexey Zhikhartsev         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
86302077da7SAlexey Zhikhartsev                           << "non-duplicatable instructions.\n");
86402077da7SAlexey Zhikhartsev         ORE->emit([&]() {
86502077da7SAlexey Zhikhartsev           return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
86602077da7SAlexey Zhikhartsev                                           Switch)
86702077da7SAlexey Zhikhartsev                  << "Contains non-duplicatable instructions.";
86802077da7SAlexey Zhikhartsev         });
86902077da7SAlexey Zhikhartsev         return false;
87002077da7SAlexey Zhikhartsev       }
87102077da7SAlexey Zhikhartsev 
872e0ac087fSSameer Sahasrabuddhe       // FIXME: Allow jump threading with controlled convergence.
873e0ac087fSSameer Sahasrabuddhe       if (Metrics.Convergence != ConvergenceKind::None) {
87402077da7SAlexey Zhikhartsev         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
87502077da7SAlexey Zhikhartsev                           << "convergent instructions.\n");
87602077da7SAlexey Zhikhartsev         ORE->emit([&]() {
87702077da7SAlexey Zhikhartsev           return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
87802077da7SAlexey Zhikhartsev                  << "Contains convergent instructions.";
87902077da7SAlexey Zhikhartsev         });
88002077da7SAlexey Zhikhartsev         return false;
88102077da7SAlexey Zhikhartsev       }
882f85c5079SPhilip Reames 
883f85c5079SPhilip Reames       if (!Metrics.NumInsts.isValid()) {
884f85c5079SPhilip Reames         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
885f85c5079SPhilip Reames                           << "instructions with invalid cost.\n");
886f85c5079SPhilip Reames         ORE->emit([&]() {
887f85c5079SPhilip Reames           return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
888f85c5079SPhilip Reames                  << "Contains instructions with invalid cost.";
889f85c5079SPhilip Reames         });
890f85c5079SPhilip Reames         return false;
891f85c5079SPhilip Reames       }
89202077da7SAlexey Zhikhartsev     }
89302077da7SAlexey Zhikhartsev 
8949c710ebbSDaniil Fukalov     InstructionCost DuplicationCost = 0;
89502077da7SAlexey Zhikhartsev 
89602077da7SAlexey Zhikhartsev     unsigned JumpTableSize = 0;
89702077da7SAlexey Zhikhartsev     TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
89802077da7SAlexey Zhikhartsev                                           nullptr);
89902077da7SAlexey Zhikhartsev     if (JumpTableSize == 0) {
90002077da7SAlexey Zhikhartsev       // Factor in the number of conditional branches reduced from jump
90102077da7SAlexey Zhikhartsev       // threading. Assume that lowering the switch block is implemented by
90202077da7SAlexey Zhikhartsev       // using binary search, hence the LogBase2().
90302077da7SAlexey Zhikhartsev       unsigned CondBranches =
90402077da7SAlexey Zhikhartsev           APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
9052fba4694SXChy       assert(CondBranches > 0 &&
9062fba4694SXChy              "The threaded switch must have multiple branches");
9079c710ebbSDaniil Fukalov       DuplicationCost = Metrics.NumInsts / CondBranches;
90802077da7SAlexey Zhikhartsev     } else {
90902077da7SAlexey Zhikhartsev       // Compared with jump tables, the DFA optimizer removes an indirect branch
91002077da7SAlexey Zhikhartsev       // on each loop iteration, thus making branch prediction more precise. The
91102077da7SAlexey Zhikhartsev       // more branch targets there are, the more likely it is for the branch
91202077da7SAlexey Zhikhartsev       // predictor to make a mistake, and the more benefit there is in the DFA
91302077da7SAlexey Zhikhartsev       // optimizer. Thus, the more branch targets there are, the lower is the
91402077da7SAlexey Zhikhartsev       // cost of the DFA opt.
9159c710ebbSDaniil Fukalov       DuplicationCost = Metrics.NumInsts / JumpTableSize;
91602077da7SAlexey Zhikhartsev     }
91702077da7SAlexey Zhikhartsev 
91802077da7SAlexey Zhikhartsev     LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
91902077da7SAlexey Zhikhartsev                       << SwitchPaths->getSwitchBlock()->getName()
92002077da7SAlexey Zhikhartsev                       << " is: " << DuplicationCost << "\n\n");
92102077da7SAlexey Zhikhartsev 
92202077da7SAlexey Zhikhartsev     if (DuplicationCost > CostThreshold) {
92302077da7SAlexey Zhikhartsev       LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
92402077da7SAlexey Zhikhartsev                         << "cost threshold.\n");
92502077da7SAlexey Zhikhartsev       ORE->emit([&]() {
92602077da7SAlexey Zhikhartsev         return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
92702077da7SAlexey Zhikhartsev                << "Duplication cost exceeds the cost threshold (cost="
92802077da7SAlexey Zhikhartsev                << ore::NV("Cost", DuplicationCost)
92902077da7SAlexey Zhikhartsev                << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
93002077da7SAlexey Zhikhartsev       });
93102077da7SAlexey Zhikhartsev       return false;
93202077da7SAlexey Zhikhartsev     }
93302077da7SAlexey Zhikhartsev 
93402077da7SAlexey Zhikhartsev     ORE->emit([&]() {
93502077da7SAlexey Zhikhartsev       return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
93602077da7SAlexey Zhikhartsev              << "Switch statement jump-threaded.";
93702077da7SAlexey Zhikhartsev     });
93802077da7SAlexey Zhikhartsev 
93902077da7SAlexey Zhikhartsev     return true;
94002077da7SAlexey Zhikhartsev   }
94102077da7SAlexey Zhikhartsev 
94202077da7SAlexey Zhikhartsev   /// Transform each threading path to effectively jump thread the DFA.
94302077da7SAlexey Zhikhartsev   void createAllExitPaths() {
94402077da7SAlexey Zhikhartsev     DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
94502077da7SAlexey Zhikhartsev 
94602077da7SAlexey Zhikhartsev     // Move the switch block to the end of the path, since it will be duplicated
94702077da7SAlexey Zhikhartsev     BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
94802077da7SAlexey Zhikhartsev     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
94902077da7SAlexey Zhikhartsev       LLVM_DEBUG(dbgs() << TPath << "\n");
950b167ada8SUsman Nadeem       // TODO: Fix exit path creation logic so that we dont need this
951b167ada8SUsman Nadeem       // placeholder.
952b167ada8SUsman Nadeem       TPath.push_front(SwitchBlock);
95302077da7SAlexey Zhikhartsev     }
95402077da7SAlexey Zhikhartsev 
95502077da7SAlexey Zhikhartsev     // Transform the ThreadingPaths and keep track of the cloned values
95602077da7SAlexey Zhikhartsev     DuplicateBlockMap DuplicateMap;
95702077da7SAlexey Zhikhartsev     DefMap NewDefs;
95802077da7SAlexey Zhikhartsev 
95902077da7SAlexey Zhikhartsev     SmallSet<BasicBlock *, 16> BlocksToClean;
96002077da7SAlexey Zhikhartsev     for (BasicBlock *BB : successors(SwitchBlock))
96102077da7SAlexey Zhikhartsev       BlocksToClean.insert(BB);
96202077da7SAlexey Zhikhartsev 
96302077da7SAlexey Zhikhartsev     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
96402077da7SAlexey Zhikhartsev       createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
96502077da7SAlexey Zhikhartsev       NumPaths++;
96602077da7SAlexey Zhikhartsev     }
96702077da7SAlexey Zhikhartsev 
96802077da7SAlexey Zhikhartsev     // After all paths are cloned, now update the last successor of the cloned
96902077da7SAlexey Zhikhartsev     // path so it skips over the switch statement
97002077da7SAlexey Zhikhartsev     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
97102077da7SAlexey Zhikhartsev       updateLastSuccessor(TPath, DuplicateMap, &DTU);
97202077da7SAlexey Zhikhartsev 
97302077da7SAlexey Zhikhartsev     // For each instruction that was cloned and used outside, update its uses
97402077da7SAlexey Zhikhartsev     updateSSA(NewDefs);
97502077da7SAlexey Zhikhartsev 
97602077da7SAlexey Zhikhartsev     // Clean PHI Nodes for the newly created blocks
97702077da7SAlexey Zhikhartsev     for (BasicBlock *BB : BlocksToClean)
97802077da7SAlexey Zhikhartsev       cleanPhiNodes(BB);
97902077da7SAlexey Zhikhartsev   }
98002077da7SAlexey Zhikhartsev 
98102077da7SAlexey Zhikhartsev   /// For a specific ThreadingPath \p Path, create an exit path starting from
98202077da7SAlexey Zhikhartsev   /// the determinator block.
98302077da7SAlexey Zhikhartsev   ///
98402077da7SAlexey Zhikhartsev   /// To remember the correct destination, we have to duplicate blocks
98502077da7SAlexey Zhikhartsev   /// corresponding to each state. Also update the terminating instruction of
98602077da7SAlexey Zhikhartsev   /// the predecessors, and phis in the successor blocks.
98702077da7SAlexey Zhikhartsev   void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
98802077da7SAlexey Zhikhartsev                       DuplicateBlockMap &DuplicateMap,
98902077da7SAlexey Zhikhartsev                       SmallSet<BasicBlock *, 16> &BlocksToClean,
99002077da7SAlexey Zhikhartsev                       DomTreeUpdater *DTU) {
991019ffbf3SXChy     APInt NextState = Path.getExitValue();
99202077da7SAlexey Zhikhartsev     const BasicBlock *Determinator = Path.getDeterminatorBB();
99302077da7SAlexey Zhikhartsev     PathType PathBBs = Path.getPath();
99402077da7SAlexey Zhikhartsev 
99502077da7SAlexey Zhikhartsev     // Don't select the placeholder block in front
99602077da7SAlexey Zhikhartsev     if (PathBBs.front() == Determinator)
99702077da7SAlexey Zhikhartsev       PathBBs.pop_front();
99802077da7SAlexey Zhikhartsev 
9995ea31555SKazu Hirata     auto DetIt = llvm::find(PathBBs, Determinator);
10002c0fc0f3SXChy     // When there is only one BB in PathBBs, the determinator takes itself as a
10012c0fc0f3SXChy     // direct predecessor.
10022c0fc0f3SXChy     BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt);
100302077da7SAlexey Zhikhartsev     for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
100402077da7SAlexey Zhikhartsev       BasicBlock *BB = *BBIt;
100502077da7SAlexey Zhikhartsev       BlocksToClean.insert(BB);
100602077da7SAlexey Zhikhartsev 
100702077da7SAlexey Zhikhartsev       // We already cloned BB for this NextState, now just update the branch
100802077da7SAlexey Zhikhartsev       // and continue.
100902077da7SAlexey Zhikhartsev       BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
101002077da7SAlexey Zhikhartsev       if (NextBB) {
101102077da7SAlexey Zhikhartsev         updatePredecessor(PrevBB, BB, NextBB, DTU);
101202077da7SAlexey Zhikhartsev         PrevBB = NextBB;
101302077da7SAlexey Zhikhartsev         continue;
101402077da7SAlexey Zhikhartsev       }
101502077da7SAlexey Zhikhartsev 
101602077da7SAlexey Zhikhartsev       // Clone the BB and update the successor of Prev to jump to the new block
101702077da7SAlexey Zhikhartsev       BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
101802077da7SAlexey Zhikhartsev           BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
101902077da7SAlexey Zhikhartsev       DuplicateMap[BB].push_back({NewBB, NextState});
102002077da7SAlexey Zhikhartsev       BlocksToClean.insert(NewBB);
102102077da7SAlexey Zhikhartsev       PrevBB = NewBB;
102202077da7SAlexey Zhikhartsev     }
102302077da7SAlexey Zhikhartsev   }
102402077da7SAlexey Zhikhartsev 
102502077da7SAlexey Zhikhartsev   /// Restore SSA form after cloning blocks.
102602077da7SAlexey Zhikhartsev   ///
102702077da7SAlexey Zhikhartsev   /// Each cloned block creates new defs for a variable, and the uses need to be
102802077da7SAlexey Zhikhartsev   /// updated to reflect this. The uses may be replaced with a cloned value, or
102902077da7SAlexey Zhikhartsev   /// some derived phi instruction. Note that all uses of a value defined in the
103002077da7SAlexey Zhikhartsev   /// same block were already remapped when cloning the block.
103102077da7SAlexey Zhikhartsev   void updateSSA(DefMap &NewDefs) {
103202077da7SAlexey Zhikhartsev     SSAUpdaterBulk SSAUpdate;
103302077da7SAlexey Zhikhartsev     SmallVector<Use *, 16> UsesToRename;
103402077da7SAlexey Zhikhartsev 
1035c83c4b58SKazu Hirata     for (const auto &KV : NewDefs) {
103602077da7SAlexey Zhikhartsev       Instruction *I = KV.first;
103702077da7SAlexey Zhikhartsev       BasicBlock *BB = I->getParent();
103802077da7SAlexey Zhikhartsev       std::vector<Instruction *> Cloned = KV.second;
103902077da7SAlexey Zhikhartsev 
104002077da7SAlexey Zhikhartsev       // Scan all uses of this instruction to see if it is used outside of its
104102077da7SAlexey Zhikhartsev       // block, and if so, record them in UsesToRename.
104202077da7SAlexey Zhikhartsev       for (Use &U : I->uses()) {
104302077da7SAlexey Zhikhartsev         Instruction *User = cast<Instruction>(U.getUser());
104402077da7SAlexey Zhikhartsev         if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
104502077da7SAlexey Zhikhartsev           if (UserPN->getIncomingBlock(U) == BB)
104602077da7SAlexey Zhikhartsev             continue;
104702077da7SAlexey Zhikhartsev         } else if (User->getParent() == BB) {
104802077da7SAlexey Zhikhartsev           continue;
104902077da7SAlexey Zhikhartsev         }
105002077da7SAlexey Zhikhartsev 
105102077da7SAlexey Zhikhartsev         UsesToRename.push_back(&U);
105202077da7SAlexey Zhikhartsev       }
105302077da7SAlexey Zhikhartsev 
105402077da7SAlexey Zhikhartsev       // If there are no uses outside the block, we're done with this
105502077da7SAlexey Zhikhartsev       // instruction.
105602077da7SAlexey Zhikhartsev       if (UsesToRename.empty())
105702077da7SAlexey Zhikhartsev         continue;
105802077da7SAlexey Zhikhartsev       LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
105902077da7SAlexey Zhikhartsev                         << "\n");
106002077da7SAlexey Zhikhartsev 
106102077da7SAlexey Zhikhartsev       // We found a use of I outside of BB.  Rename all uses of I that are
106202077da7SAlexey Zhikhartsev       // outside its block to be uses of the appropriate PHI node etc.  See
106302077da7SAlexey Zhikhartsev       // ValuesInBlocks with the values we know.
106402077da7SAlexey Zhikhartsev       unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());
106502077da7SAlexey Zhikhartsev       SSAUpdate.AddAvailableValue(VarNum, BB, I);
106602077da7SAlexey Zhikhartsev       for (Instruction *New : Cloned)
106702077da7SAlexey Zhikhartsev         SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);
106802077da7SAlexey Zhikhartsev 
106902077da7SAlexey Zhikhartsev       while (!UsesToRename.empty())
107002077da7SAlexey Zhikhartsev         SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());
107102077da7SAlexey Zhikhartsev 
107202077da7SAlexey Zhikhartsev       LLVM_DEBUG(dbgs() << "\n");
107302077da7SAlexey Zhikhartsev     }
107402077da7SAlexey Zhikhartsev     // SSAUpdater handles phi placement and renaming uses with the appropriate
107502077da7SAlexey Zhikhartsev     // value.
107602077da7SAlexey Zhikhartsev     SSAUpdate.RewriteAllUses(DT);
107702077da7SAlexey Zhikhartsev   }
107802077da7SAlexey Zhikhartsev 
107902077da7SAlexey Zhikhartsev   /// Clones a basic block, and adds it to the CFG.
108002077da7SAlexey Zhikhartsev   ///
108102077da7SAlexey Zhikhartsev   /// This function also includes updating phi nodes in the successors of the
108202077da7SAlexey Zhikhartsev   /// BB, and remapping uses that were defined locally in the cloned BB.
108302077da7SAlexey Zhikhartsev   BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
1084019ffbf3SXChy                                              const APInt &NextState,
108502077da7SAlexey Zhikhartsev                                              DuplicateBlockMap &DuplicateMap,
108602077da7SAlexey Zhikhartsev                                              DefMap &NewDefs,
108702077da7SAlexey Zhikhartsev                                              DomTreeUpdater *DTU) {
108802077da7SAlexey Zhikhartsev     ValueToValueMapTy VMap;
108902077da7SAlexey Zhikhartsev     BasicBlock *NewBB = CloneBasicBlock(
1090019ffbf3SXChy         BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()),
1091019ffbf3SXChy         BB->getParent());
109202077da7SAlexey Zhikhartsev     NewBB->moveAfter(BB);
109302077da7SAlexey Zhikhartsev     NumCloned++;
109402077da7SAlexey Zhikhartsev 
109502077da7SAlexey Zhikhartsev     for (Instruction &I : *NewBB) {
109602077da7SAlexey Zhikhartsev       // Do not remap operands of PHINode in case a definition in BB is an
109702077da7SAlexey Zhikhartsev       // incoming value to a phi in the same block. This incoming value will
109802077da7SAlexey Zhikhartsev       // be renamed later while restoring SSA.
109902077da7SAlexey Zhikhartsev       if (isa<PHINode>(&I))
110002077da7SAlexey Zhikhartsev         continue;
110102077da7SAlexey Zhikhartsev       RemapInstruction(&I, VMap,
110202077da7SAlexey Zhikhartsev                        RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
110302077da7SAlexey Zhikhartsev       if (AssumeInst *II = dyn_cast<AssumeInst>(&I))
110402077da7SAlexey Zhikhartsev         AC->registerAssumption(II);
110502077da7SAlexey Zhikhartsev     }
110602077da7SAlexey Zhikhartsev 
110702077da7SAlexey Zhikhartsev     updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
110802077da7SAlexey Zhikhartsev     updatePredecessor(PrevBB, BB, NewBB, DTU);
110902077da7SAlexey Zhikhartsev     updateDefMap(NewDefs, VMap);
111002077da7SAlexey Zhikhartsev 
111102077da7SAlexey Zhikhartsev     // Add all successors to the DominatorTree
111202077da7SAlexey Zhikhartsev     SmallPtrSet<BasicBlock *, 4> SuccSet;
111302077da7SAlexey Zhikhartsev     for (auto *SuccBB : successors(NewBB)) {
111402077da7SAlexey Zhikhartsev       if (SuccSet.insert(SuccBB).second)
111502077da7SAlexey Zhikhartsev         DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
111602077da7SAlexey Zhikhartsev     }
111702077da7SAlexey Zhikhartsev     SuccSet.clear();
111802077da7SAlexey Zhikhartsev     return NewBB;
111902077da7SAlexey Zhikhartsev   }
112002077da7SAlexey Zhikhartsev 
112102077da7SAlexey Zhikhartsev   /// Update the phi nodes in BB's successors.
112202077da7SAlexey Zhikhartsev   ///
112302077da7SAlexey Zhikhartsev   /// This means creating a new incoming value from NewBB with the new
112402077da7SAlexey Zhikhartsev   /// instruction wherever there is an incoming value from BB.
112502077da7SAlexey Zhikhartsev   void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
1126019ffbf3SXChy                            const APInt &NextState, ValueToValueMapTy &VMap,
112702077da7SAlexey Zhikhartsev                            DuplicateBlockMap &DuplicateMap) {
112802077da7SAlexey Zhikhartsev     std::vector<BasicBlock *> BlocksToUpdate;
112902077da7SAlexey Zhikhartsev 
113002077da7SAlexey Zhikhartsev     // If BB is the last block in the path, we can simply update the one case
113102077da7SAlexey Zhikhartsev     // successor that will be reached.
113202077da7SAlexey Zhikhartsev     if (BB == SwitchPaths->getSwitchBlock()) {
113302077da7SAlexey Zhikhartsev       SwitchInst *Switch = SwitchPaths->getSwitchInst();
113402077da7SAlexey Zhikhartsev       BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
113502077da7SAlexey Zhikhartsev       BlocksToUpdate.push_back(NextCase);
113602077da7SAlexey Zhikhartsev       BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
113702077da7SAlexey Zhikhartsev       if (ClonedSucc)
113802077da7SAlexey Zhikhartsev         BlocksToUpdate.push_back(ClonedSucc);
113902077da7SAlexey Zhikhartsev     }
114002077da7SAlexey Zhikhartsev     // Otherwise update phis in all successors.
114102077da7SAlexey Zhikhartsev     else {
114202077da7SAlexey Zhikhartsev       for (BasicBlock *Succ : successors(BB)) {
114302077da7SAlexey Zhikhartsev         BlocksToUpdate.push_back(Succ);
114402077da7SAlexey Zhikhartsev 
114502077da7SAlexey Zhikhartsev         // Check if a successor has already been cloned for the particular exit
114602077da7SAlexey Zhikhartsev         // value. In this case if a successor was already cloned, the phi nodes
114702077da7SAlexey Zhikhartsev         // in the cloned block should be updated directly.
114802077da7SAlexey Zhikhartsev         BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
114902077da7SAlexey Zhikhartsev         if (ClonedSucc)
115002077da7SAlexey Zhikhartsev           BlocksToUpdate.push_back(ClonedSucc);
115102077da7SAlexey Zhikhartsev       }
115202077da7SAlexey Zhikhartsev     }
115302077da7SAlexey Zhikhartsev 
115402077da7SAlexey Zhikhartsev     // If there is a phi with an incoming value from BB, create a new incoming
115502077da7SAlexey Zhikhartsev     // value for the new predecessor ClonedBB. The value will either be the same
115602077da7SAlexey Zhikhartsev     // value from BB or a cloned value.
115702077da7SAlexey Zhikhartsev     for (BasicBlock *Succ : BlocksToUpdate) {
115802077da7SAlexey Zhikhartsev       for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
115902077da7SAlexey Zhikhartsev            ++II) {
116002077da7SAlexey Zhikhartsev         Value *Incoming = Phi->getIncomingValueForBlock(BB);
116102077da7SAlexey Zhikhartsev         if (Incoming) {
116202077da7SAlexey Zhikhartsev           if (isa<Constant>(Incoming)) {
116302077da7SAlexey Zhikhartsev             Phi->addIncoming(Incoming, ClonedBB);
116402077da7SAlexey Zhikhartsev             continue;
116502077da7SAlexey Zhikhartsev           }
116602077da7SAlexey Zhikhartsev           Value *ClonedVal = VMap[Incoming];
116702077da7SAlexey Zhikhartsev           if (ClonedVal)
116802077da7SAlexey Zhikhartsev             Phi->addIncoming(ClonedVal, ClonedBB);
116902077da7SAlexey Zhikhartsev           else
117002077da7SAlexey Zhikhartsev             Phi->addIncoming(Incoming, ClonedBB);
117102077da7SAlexey Zhikhartsev         }
117202077da7SAlexey Zhikhartsev       }
117302077da7SAlexey Zhikhartsev     }
117402077da7SAlexey Zhikhartsev   }
117502077da7SAlexey Zhikhartsev 
117602077da7SAlexey Zhikhartsev   /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
117702077da7SAlexey Zhikhartsev   /// other successors are kept as well.
117802077da7SAlexey Zhikhartsev   void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
117902077da7SAlexey Zhikhartsev                          BasicBlock *NewBB, DomTreeUpdater *DTU) {
118002077da7SAlexey Zhikhartsev     // When a path is reused, there is a chance that predecessors were already
118102077da7SAlexey Zhikhartsev     // updated before. Check if the predecessor needs to be updated first.
118202077da7SAlexey Zhikhartsev     if (!isPredecessor(OldBB, PrevBB))
118302077da7SAlexey Zhikhartsev       return;
118402077da7SAlexey Zhikhartsev 
118502077da7SAlexey Zhikhartsev     Instruction *PrevTerm = PrevBB->getTerminator();
118602077da7SAlexey Zhikhartsev     for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
118702077da7SAlexey Zhikhartsev       if (PrevTerm->getSuccessor(Idx) == OldBB) {
118802077da7SAlexey Zhikhartsev         OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);
118902077da7SAlexey Zhikhartsev         PrevTerm->setSuccessor(Idx, NewBB);
119002077da7SAlexey Zhikhartsev       }
119102077da7SAlexey Zhikhartsev     }
119202077da7SAlexey Zhikhartsev     DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
119302077da7SAlexey Zhikhartsev                        {DominatorTree::Insert, PrevBB, NewBB}});
119402077da7SAlexey Zhikhartsev   }
119502077da7SAlexey Zhikhartsev 
119602077da7SAlexey Zhikhartsev   /// Add new value mappings to the DefMap to keep track of all new definitions
119702077da7SAlexey Zhikhartsev   /// for a particular instruction. These will be used while updating SSA form.
119802077da7SAlexey Zhikhartsev   void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
11999d555b4aSOlle Fredriksson     SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
12009d555b4aSOlle Fredriksson     NewDefsVector.reserve(VMap.size());
12019d555b4aSOlle Fredriksson 
120202077da7SAlexey Zhikhartsev     for (auto Entry : VMap) {
120302077da7SAlexey Zhikhartsev       Instruction *Inst =
120402077da7SAlexey Zhikhartsev           dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
120502077da7SAlexey Zhikhartsev       if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||
120602077da7SAlexey Zhikhartsev           isa<SwitchInst>(Inst)) {
120702077da7SAlexey Zhikhartsev         continue;
120802077da7SAlexey Zhikhartsev       }
120902077da7SAlexey Zhikhartsev 
121002077da7SAlexey Zhikhartsev       Instruction *Cloned = dyn_cast<Instruction>(Entry.second);
121102077da7SAlexey Zhikhartsev       if (!Cloned)
121202077da7SAlexey Zhikhartsev         continue;
121302077da7SAlexey Zhikhartsev 
12149d555b4aSOlle Fredriksson       NewDefsVector.push_back({Inst, Cloned});
121502077da7SAlexey Zhikhartsev     }
12169d555b4aSOlle Fredriksson 
12179d555b4aSOlle Fredriksson     // Sort the defs to get deterministic insertion order into NewDefs.
12189d555b4aSOlle Fredriksson     sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
12199d555b4aSOlle Fredriksson       if (LHS.first == RHS.first)
12209d555b4aSOlle Fredriksson         return LHS.second->comesBefore(RHS.second);
12219d555b4aSOlle Fredriksson       return LHS.first->comesBefore(RHS.first);
12229d555b4aSOlle Fredriksson     });
12239d555b4aSOlle Fredriksson 
12249d555b4aSOlle Fredriksson     for (const auto &KV : NewDefsVector)
12259d555b4aSOlle Fredriksson       NewDefs[KV.first].push_back(KV.second);
122602077da7SAlexey Zhikhartsev   }
122702077da7SAlexey Zhikhartsev 
122802077da7SAlexey Zhikhartsev   /// Update the last branch of a particular cloned path to point to the correct
122902077da7SAlexey Zhikhartsev   /// case successor.
123002077da7SAlexey Zhikhartsev   ///
123102077da7SAlexey Zhikhartsev   /// Note that this is an optional step and would have been done in later
123202077da7SAlexey Zhikhartsev   /// optimizations, but it makes the CFG significantly easier to work with.
123302077da7SAlexey Zhikhartsev   void updateLastSuccessor(ThreadingPath &TPath,
123402077da7SAlexey Zhikhartsev                            DuplicateBlockMap &DuplicateMap,
123502077da7SAlexey Zhikhartsev                            DomTreeUpdater *DTU) {
1236019ffbf3SXChy     APInt NextState = TPath.getExitValue();
123702077da7SAlexey Zhikhartsev     BasicBlock *BB = TPath.getPath().back();
123802077da7SAlexey Zhikhartsev     BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
123902077da7SAlexey Zhikhartsev 
124002077da7SAlexey Zhikhartsev     // Note multiple paths can end at the same block so check that it is not
124102077da7SAlexey Zhikhartsev     // updated yet
124202077da7SAlexey Zhikhartsev     if (!isa<SwitchInst>(LastBlock->getTerminator()))
124302077da7SAlexey Zhikhartsev       return;
124402077da7SAlexey Zhikhartsev     SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());
124502077da7SAlexey Zhikhartsev     BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
124602077da7SAlexey Zhikhartsev 
124702077da7SAlexey Zhikhartsev     std::vector<DominatorTree::UpdateType> DTUpdates;
124802077da7SAlexey Zhikhartsev     SmallPtrSet<BasicBlock *, 4> SuccSet;
124902077da7SAlexey Zhikhartsev     for (BasicBlock *Succ : successors(LastBlock)) {
125002077da7SAlexey Zhikhartsev       if (Succ != NextCase && SuccSet.insert(Succ).second)
125102077da7SAlexey Zhikhartsev         DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
125202077da7SAlexey Zhikhartsev     }
125302077da7SAlexey Zhikhartsev 
125402077da7SAlexey Zhikhartsev     Switch->eraseFromParent();
125502077da7SAlexey Zhikhartsev     BranchInst::Create(NextCase, LastBlock);
125602077da7SAlexey Zhikhartsev 
125702077da7SAlexey Zhikhartsev     DTU->applyUpdates(DTUpdates);
125802077da7SAlexey Zhikhartsev   }
125902077da7SAlexey Zhikhartsev 
126002077da7SAlexey Zhikhartsev   /// After cloning blocks, some of the phi nodes have extra incoming values
126102077da7SAlexey Zhikhartsev   /// that are no longer used. This function removes them.
126202077da7SAlexey Zhikhartsev   void cleanPhiNodes(BasicBlock *BB) {
126302077da7SAlexey Zhikhartsev     // If BB is no longer reachable, remove any remaining phi nodes
126402077da7SAlexey Zhikhartsev     if (pred_empty(BB)) {
126502077da7SAlexey Zhikhartsev       std::vector<PHINode *> PhiToRemove;
126602077da7SAlexey Zhikhartsev       for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
126702077da7SAlexey Zhikhartsev         PhiToRemove.push_back(Phi);
126802077da7SAlexey Zhikhartsev       }
126902077da7SAlexey Zhikhartsev       for (PHINode *PN : PhiToRemove) {
127053dc0f10SNuno Lopes         PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
127102077da7SAlexey Zhikhartsev         PN->eraseFromParent();
127202077da7SAlexey Zhikhartsev       }
127302077da7SAlexey Zhikhartsev       return;
127402077da7SAlexey Zhikhartsev     }
127502077da7SAlexey Zhikhartsev 
127602077da7SAlexey Zhikhartsev     // Remove any incoming values that come from an invalid predecessor
127702077da7SAlexey Zhikhartsev     for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
127802077da7SAlexey Zhikhartsev       std::vector<BasicBlock *> BlocksToRemove;
127902077da7SAlexey Zhikhartsev       for (BasicBlock *IncomingBB : Phi->blocks()) {
128002077da7SAlexey Zhikhartsev         if (!isPredecessor(BB, IncomingBB))
128102077da7SAlexey Zhikhartsev           BlocksToRemove.push_back(IncomingBB);
128202077da7SAlexey Zhikhartsev       }
128302077da7SAlexey Zhikhartsev       for (BasicBlock *BB : BlocksToRemove)
128402077da7SAlexey Zhikhartsev         Phi->removeIncomingValue(BB);
128502077da7SAlexey Zhikhartsev     }
128602077da7SAlexey Zhikhartsev   }
128702077da7SAlexey Zhikhartsev 
128802077da7SAlexey Zhikhartsev   /// Checks if BB was already cloned for a particular next state value. If it
128902077da7SAlexey Zhikhartsev   /// was then it returns this cloned block, and otherwise null.
1290019ffbf3SXChy   BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState,
129102077da7SAlexey Zhikhartsev                           DuplicateBlockMap &DuplicateMap) {
129202077da7SAlexey Zhikhartsev     CloneList ClonedBBs = DuplicateMap[BB];
129302077da7SAlexey Zhikhartsev 
129402077da7SAlexey Zhikhartsev     // Find an entry in the CloneList with this NextState. If it exists then
129502077da7SAlexey Zhikhartsev     // return the corresponding BB
129602077da7SAlexey Zhikhartsev     auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {
129702077da7SAlexey Zhikhartsev       return C.State == NextState;
129802077da7SAlexey Zhikhartsev     });
129902077da7SAlexey Zhikhartsev     return It != ClonedBBs.end() ? (*It).BB : nullptr;
130002077da7SAlexey Zhikhartsev   }
130102077da7SAlexey Zhikhartsev 
130202077da7SAlexey Zhikhartsev   /// Helper to get the successor corresponding to a particular case value for
130302077da7SAlexey Zhikhartsev   /// a switch statement.
1304019ffbf3SXChy   BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
130502077da7SAlexey Zhikhartsev     BasicBlock *NextCase = nullptr;
130602077da7SAlexey Zhikhartsev     for (auto Case : Switch->cases()) {
1307019ffbf3SXChy       if (Case.getCaseValue()->getValue() == NextState) {
130802077da7SAlexey Zhikhartsev         NextCase = Case.getCaseSuccessor();
130902077da7SAlexey Zhikhartsev         break;
131002077da7SAlexey Zhikhartsev       }
131102077da7SAlexey Zhikhartsev     }
131202077da7SAlexey Zhikhartsev     if (!NextCase)
131302077da7SAlexey Zhikhartsev       NextCase = Switch->getDefaultDest();
131402077da7SAlexey Zhikhartsev     return NextCase;
131502077da7SAlexey Zhikhartsev   }
131602077da7SAlexey Zhikhartsev 
131702077da7SAlexey Zhikhartsev   /// Returns true if IncomingBB is a predecessor of BB.
131802077da7SAlexey Zhikhartsev   bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
1319f83a88a1SKazu Hirata     return llvm::is_contained(predecessors(BB), IncomingBB);
132002077da7SAlexey Zhikhartsev   }
132102077da7SAlexey Zhikhartsev 
132202077da7SAlexey Zhikhartsev   AllSwitchPaths *SwitchPaths;
132302077da7SAlexey Zhikhartsev   DominatorTree *DT;
132402077da7SAlexey Zhikhartsev   AssumptionCache *AC;
132502077da7SAlexey Zhikhartsev   TargetTransformInfo *TTI;
132602077da7SAlexey Zhikhartsev   OptimizationRemarkEmitter *ORE;
132702077da7SAlexey Zhikhartsev   SmallPtrSet<const Value *, 32> EphValues;
132802077da7SAlexey Zhikhartsev   std::vector<ThreadingPath> TPaths;
132902077da7SAlexey Zhikhartsev };
133002077da7SAlexey Zhikhartsev 
133102077da7SAlexey Zhikhartsev bool DFAJumpThreading::run(Function &F) {
133202077da7SAlexey Zhikhartsev   LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
133302077da7SAlexey Zhikhartsev 
133402077da7SAlexey Zhikhartsev   if (F.hasOptSize()) {
133502077da7SAlexey Zhikhartsev     LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
133602077da7SAlexey Zhikhartsev     return false;
133702077da7SAlexey Zhikhartsev   }
133802077da7SAlexey Zhikhartsev 
133902077da7SAlexey Zhikhartsev   if (ClViewCfgBefore)
134002077da7SAlexey Zhikhartsev     F.viewCFG();
134102077da7SAlexey Zhikhartsev 
134202077da7SAlexey Zhikhartsev   SmallVector<AllSwitchPaths, 2> ThreadableLoops;
134302077da7SAlexey Zhikhartsev   bool MadeChanges = false;
1344c229f767SXChy   LoopInfoBroken = false;
134502077da7SAlexey Zhikhartsev 
134602077da7SAlexey Zhikhartsev   for (BasicBlock &BB : F) {
134702077da7SAlexey Zhikhartsev     auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
134802077da7SAlexey Zhikhartsev     if (!SI)
134902077da7SAlexey Zhikhartsev       continue;
135002077da7SAlexey Zhikhartsev 
135102077da7SAlexey Zhikhartsev     LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
13528b0d7634SAlex Zhikhartsev                       << " is a candidate\n");
13536b53ada6SXChy     MainSwitch Switch(SI, LI, ORE);
135402077da7SAlexey Zhikhartsev 
1355b167ada8SUsman Nadeem     if (!Switch.getInstr()) {
1356b167ada8SUsman Nadeem       LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a "
1357b167ada8SUsman Nadeem                         << "candidate for jump threading\n");
135802077da7SAlexey Zhikhartsev       continue;
1359b167ada8SUsman Nadeem     }
136002077da7SAlexey Zhikhartsev 
136102077da7SAlexey Zhikhartsev     LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
136202077da7SAlexey Zhikhartsev                       << "candidate for jump threading\n");
136302077da7SAlexey Zhikhartsev     LLVM_DEBUG(SI->dump());
136402077da7SAlexey Zhikhartsev 
136502077da7SAlexey Zhikhartsev     unfoldSelectInstrs(DT, Switch.getSelectInsts());
136602077da7SAlexey Zhikhartsev     if (!Switch.getSelectInsts().empty())
136702077da7SAlexey Zhikhartsev       MadeChanges = true;
136802077da7SAlexey Zhikhartsev 
1369b167ada8SUsman Nadeem     AllSwitchPaths SwitchPaths(&Switch, ORE, LI,
1370b167ada8SUsman Nadeem                                LI->getLoopFor(&BB)->getOutermostLoop());
137102077da7SAlexey Zhikhartsev     SwitchPaths.run();
137202077da7SAlexey Zhikhartsev 
137302077da7SAlexey Zhikhartsev     if (SwitchPaths.getNumThreadingPaths() > 0) {
137402077da7SAlexey Zhikhartsev       ThreadableLoops.push_back(SwitchPaths);
137502077da7SAlexey Zhikhartsev 
137602077da7SAlexey Zhikhartsev       // For the time being limit this optimization to occurring once in a
137702077da7SAlexey Zhikhartsev       // function since it can change the CFG significantly. This is not a
137802077da7SAlexey Zhikhartsev       // strict requirement but it can cause buggy behavior if there is an
137902077da7SAlexey Zhikhartsev       // overlap of blocks in different opportunities. There is a lot of room to
138002077da7SAlexey Zhikhartsev       // experiment with catching more opportunities here.
1381c229f767SXChy       // NOTE: To release this contraint, we must handle LoopInfo invalidation
138202077da7SAlexey Zhikhartsev       break;
138302077da7SAlexey Zhikhartsev     }
138402077da7SAlexey Zhikhartsev   }
138502077da7SAlexey Zhikhartsev 
1386c229f767SXChy #ifdef NDEBUG
1387c229f767SXChy   LI->verify(*DT);
1388c229f767SXChy #endif
1389c229f767SXChy 
139002077da7SAlexey Zhikhartsev   SmallPtrSet<const Value *, 32> EphValues;
139102077da7SAlexey Zhikhartsev   if (ThreadableLoops.size() > 0)
139202077da7SAlexey Zhikhartsev     CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
139302077da7SAlexey Zhikhartsev 
139402077da7SAlexey Zhikhartsev   for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
139502077da7SAlexey Zhikhartsev     TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
139602077da7SAlexey Zhikhartsev     Transform.run();
139702077da7SAlexey Zhikhartsev     MadeChanges = true;
1398c229f767SXChy     LoopInfoBroken = true;
139902077da7SAlexey Zhikhartsev   }
140002077da7SAlexey Zhikhartsev 
140102077da7SAlexey Zhikhartsev #ifdef EXPENSIVE_CHECKS
140202077da7SAlexey Zhikhartsev   assert(DT->verify(DominatorTree::VerificationLevel::Full));
140302077da7SAlexey Zhikhartsev   verifyFunction(F, &dbgs());
140402077da7SAlexey Zhikhartsev #endif
140502077da7SAlexey Zhikhartsev 
140602077da7SAlexey Zhikhartsev   return MadeChanges;
140702077da7SAlexey Zhikhartsev }
140802077da7SAlexey Zhikhartsev 
140902077da7SAlexey Zhikhartsev } // end anonymous namespace
141002077da7SAlexey Zhikhartsev 
141102077da7SAlexey Zhikhartsev /// Integrate with the new Pass Manager
141202077da7SAlexey Zhikhartsev PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
141302077da7SAlexey Zhikhartsev                                             FunctionAnalysisManager &AM) {
141402077da7SAlexey Zhikhartsev   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
141502077da7SAlexey Zhikhartsev   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
14166b53ada6SXChy   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
141702077da7SAlexey Zhikhartsev   TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
141802077da7SAlexey Zhikhartsev   OptimizationRemarkEmitter ORE(&F);
1419c229f767SXChy   DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
1420c229f767SXChy   if (!ThreadImpl.run(F))
142102077da7SAlexey Zhikhartsev     return PreservedAnalyses::all();
142202077da7SAlexey Zhikhartsev 
142302077da7SAlexey Zhikhartsev   PreservedAnalyses PA;
142402077da7SAlexey Zhikhartsev   PA.preserve<DominatorTreeAnalysis>();
1425c229f767SXChy   if (!ThreadImpl.LoopInfoBroken)
1426c229f767SXChy     PA.preserve<LoopAnalysis>();
142702077da7SAlexey Zhikhartsev   return PA;
142802077da7SAlexey Zhikhartsev }
1429