xref: /llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp (revision d4a38c8ff5c993e14c42895b51a47272fb03a857)
102077da7SAlexey Zhikhartsev //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
202077da7SAlexey Zhikhartsev //
3c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information.
5c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
602077da7SAlexey Zhikhartsev //
702077da7SAlexey Zhikhartsev //===----------------------------------------------------------------------===//
802077da7SAlexey Zhikhartsev //
902077da7SAlexey Zhikhartsev // Transform each threading path to effectively jump thread the DFA. For
1002077da7SAlexey Zhikhartsev // example, the CFG below could be transformed as follows, where the cloned
1102077da7SAlexey Zhikhartsev // blocks unconditionally branch to the next correct case based on what is
1202077da7SAlexey Zhikhartsev // identified in the analysis.
1302077da7SAlexey Zhikhartsev //
1402077da7SAlexey Zhikhartsev //          sw.bb                        sw.bb
1502077da7SAlexey Zhikhartsev //        /   |   \                    /   |   \
1602077da7SAlexey Zhikhartsev //   case1  case2  case3          case1  case2  case3
1702077da7SAlexey Zhikhartsev //        \   |   /                 |      |      |
1802077da7SAlexey Zhikhartsev //       determinator            det.2   det.3  det.1
1902077da7SAlexey Zhikhartsev //        br sw.bb                /        |        \
2002077da7SAlexey Zhikhartsev //                          sw.bb.2     sw.bb.3     sw.bb.1
2102077da7SAlexey Zhikhartsev //                           br case2    br case3    br case1§
2202077da7SAlexey Zhikhartsev //
2302077da7SAlexey Zhikhartsev // Definitions and Terminology:
2402077da7SAlexey Zhikhartsev //
2502077da7SAlexey Zhikhartsev // * Threading path:
2602077da7SAlexey Zhikhartsev //   a list of basic blocks, the exit state, and the block that determines
2702077da7SAlexey Zhikhartsev //   the next state, for which the following notation will be used:
2802077da7SAlexey Zhikhartsev //   < path of BBs that form a cycle > [ state, determinator ]
2902077da7SAlexey Zhikhartsev //
3002077da7SAlexey Zhikhartsev // * Predictable switch:
3102077da7SAlexey Zhikhartsev //   The switch variable is always a known constant so that all conditional
3202077da7SAlexey Zhikhartsev //   jumps based on switch variable can be converted to unconditional jump.
3302077da7SAlexey Zhikhartsev //
3402077da7SAlexey Zhikhartsev // * Determinator:
3502077da7SAlexey Zhikhartsev //   The basic block that determines the next state of the DFA.
3602077da7SAlexey Zhikhartsev //
3702077da7SAlexey Zhikhartsev // Representing the optimization in C-like pseudocode: the code pattern on the
3802077da7SAlexey Zhikhartsev // left could functionally be transformed to the right pattern if the switch
3902077da7SAlexey Zhikhartsev // condition is predictable.
4002077da7SAlexey Zhikhartsev //
4102077da7SAlexey Zhikhartsev //  X = A                       goto A
4202077da7SAlexey Zhikhartsev //  for (...)                   A:
4302077da7SAlexey Zhikhartsev //    switch (X)                  ...
4402077da7SAlexey Zhikhartsev //      case A                    goto B
4502077da7SAlexey Zhikhartsev //        X = B                 B:
4602077da7SAlexey Zhikhartsev //      case B                    ...
4702077da7SAlexey Zhikhartsev //        X = C                   goto C
4802077da7SAlexey Zhikhartsev //
4902077da7SAlexey Zhikhartsev // The pass first checks that switch variable X is decided by the control flow
5002077da7SAlexey Zhikhartsev // path taken in the loop; for example, in case B, the next value of X is
5102077da7SAlexey Zhikhartsev // decided to be C. It then enumerates through all paths in the loop and labels
5202077da7SAlexey Zhikhartsev // the basic blocks where the next state is decided.
5302077da7SAlexey Zhikhartsev //
5402077da7SAlexey Zhikhartsev // Using this information it creates new paths that unconditionally branch to
5502077da7SAlexey Zhikhartsev // the next case. This involves cloning code, so it only gets triggered if the
5602077da7SAlexey Zhikhartsev // amount of code duplicated is below a threshold.
5702077da7SAlexey Zhikhartsev //
5802077da7SAlexey Zhikhartsev //===----------------------------------------------------------------------===//
5902077da7SAlexey Zhikhartsev 
6002077da7SAlexey Zhikhartsev #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
6102077da7SAlexey Zhikhartsev #include "llvm/ADT/APInt.h"
6202077da7SAlexey Zhikhartsev #include "llvm/ADT/DenseMap.h"
6302077da7SAlexey Zhikhartsev #include "llvm/ADT/SmallSet.h"
6402077da7SAlexey Zhikhartsev #include "llvm/ADT/Statistic.h"
6502077da7SAlexey Zhikhartsev #include "llvm/Analysis/AssumptionCache.h"
6602077da7SAlexey Zhikhartsev #include "llvm/Analysis/CodeMetrics.h"
67a494ae43Sserge-sans-paille #include "llvm/Analysis/DomTreeUpdater.h"
686b53ada6SXChy #include "llvm/Analysis/LoopInfo.h"
6902077da7SAlexey Zhikhartsev #include "llvm/Analysis/OptimizationRemarkEmitter.h"
7002077da7SAlexey Zhikhartsev #include "llvm/Analysis/TargetTransformInfo.h"
7102077da7SAlexey Zhikhartsev #include "llvm/IR/CFG.h"
7202077da7SAlexey Zhikhartsev #include "llvm/IR/Constants.h"
7302077da7SAlexey Zhikhartsev #include "llvm/IR/IntrinsicInst.h"
7402077da7SAlexey Zhikhartsev #include "llvm/Support/CommandLine.h"
7502077da7SAlexey Zhikhartsev #include "llvm/Support/Debug.h"
7602077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/Cloning.h"
7702077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
7802077da7SAlexey Zhikhartsev #include "llvm/Transforms/Utils/ValueMapper.h"
7902077da7SAlexey Zhikhartsev #include <algorithm>
8002077da7SAlexey Zhikhartsev #include <deque>
8102077da7SAlexey Zhikhartsev 
82f90a66a5Sserge-sans-paille #ifdef EXPENSIVE_CHECKS
83f90a66a5Sserge-sans-paille #include "llvm/IR/Verifier.h"
84f90a66a5Sserge-sans-paille #endif
85f90a66a5Sserge-sans-paille 
8602077da7SAlexey Zhikhartsev using namespace llvm;
8702077da7SAlexey Zhikhartsev 
8802077da7SAlexey Zhikhartsev #define DEBUG_TYPE "dfa-jump-threading"
8902077da7SAlexey Zhikhartsev 
9002077da7SAlexey Zhikhartsev STATISTIC(NumTransforms, "Number of transformations done");
9102077da7SAlexey Zhikhartsev STATISTIC(NumCloned, "Number of blocks cloned");
9202077da7SAlexey Zhikhartsev STATISTIC(NumPaths, "Number of individual paths threaded");
9302077da7SAlexey Zhikhartsev 
9402077da7SAlexey Zhikhartsev static cl::opt<bool>
9502077da7SAlexey Zhikhartsev     ClViewCfgBefore("dfa-jump-view-cfg-before",
9602077da7SAlexey Zhikhartsev                     cl::desc("View the CFG before DFA Jump Threading"),
9702077da7SAlexey Zhikhartsev                     cl::Hidden, cl::init(false));
9802077da7SAlexey Zhikhartsev 
99c9325f8aSUsman Nadeem static cl::opt<bool> EarlyExitHeuristic(
100c9325f8aSUsman Nadeem     "dfa-early-exit-heuristic",
101c9325f8aSUsman Nadeem     cl::desc("Exit early if an unpredictable value come from the same loop"),
102c9325f8aSUsman Nadeem     cl::Hidden, cl::init(true));
103c9325f8aSUsman Nadeem 
10402077da7SAlexey Zhikhartsev static cl::opt<unsigned> MaxPathLength(
10502077da7SAlexey Zhikhartsev     "dfa-max-path-length",
10602077da7SAlexey Zhikhartsev     cl::desc("Max number of blocks searched to find a threading path"),
10702077da7SAlexey Zhikhartsev     cl::Hidden, cl::init(20));
10802077da7SAlexey Zhikhartsev 
109b167ada8SUsman Nadeem static cl::opt<unsigned> MaxNumVisitiedPaths(
110b167ada8SUsman Nadeem     "dfa-max-num-visited-paths",
111b167ada8SUsman Nadeem     cl::desc(
112b167ada8SUsman Nadeem         "Max number of blocks visited while enumerating paths around a switch"),
113b167ada8SUsman Nadeem     cl::Hidden, cl::init(2000));
114b167ada8SUsman Nadeem 
1157fa41d8aSXChy static cl::opt<unsigned>
1167fa41d8aSXChy     MaxNumPaths("dfa-max-num-paths",
1178b0d7634SAlex Zhikhartsev                 cl::desc("Max number of paths enumerated around a switch"),
1188b0d7634SAlex Zhikhartsev                 cl::Hidden, cl::init(200));
1198b0d7634SAlex Zhikhartsev 
12002077da7SAlexey Zhikhartsev static cl::opt<unsigned>
12102077da7SAlexey Zhikhartsev     CostThreshold("dfa-cost-threshold",
12202077da7SAlexey Zhikhartsev                   cl::desc("Maximum cost accepted for the transformation"),
12302077da7SAlexey Zhikhartsev                   cl::Hidden, cl::init(50));
12402077da7SAlexey Zhikhartsev 
12502077da7SAlexey Zhikhartsev namespace {
12602077da7SAlexey Zhikhartsev 
12702077da7SAlexey Zhikhartsev class SelectInstToUnfold {
12802077da7SAlexey Zhikhartsev   SelectInst *SI;
12902077da7SAlexey Zhikhartsev   PHINode *SIUse;
13002077da7SAlexey Zhikhartsev 
13102077da7SAlexey Zhikhartsev public:
13202077da7SAlexey Zhikhartsev   SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
13302077da7SAlexey Zhikhartsev 
13402077da7SAlexey Zhikhartsev   SelectInst *getInst() { return SI; }
13502077da7SAlexey Zhikhartsev   PHINode *getUse() { return SIUse; }
13602077da7SAlexey Zhikhartsev 
13702077da7SAlexey Zhikhartsev   explicit operator bool() const { return SI && SIUse; }
13802077da7SAlexey Zhikhartsev };
13902077da7SAlexey Zhikhartsev 
140c229f767SXChy void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
14102077da7SAlexey Zhikhartsev             std::vector<SelectInstToUnfold> *NewSIsToUnfold,
14202077da7SAlexey Zhikhartsev             std::vector<BasicBlock *> *NewBBs);
14302077da7SAlexey Zhikhartsev 
14402077da7SAlexey Zhikhartsev class DFAJumpThreading {
14502077da7SAlexey Zhikhartsev public:
1466b53ada6SXChy   DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
14702077da7SAlexey Zhikhartsev                    TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
1486b53ada6SXChy       : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
14902077da7SAlexey Zhikhartsev 
15002077da7SAlexey Zhikhartsev   bool run(Function &F);
151c229f767SXChy   bool LoopInfoBroken;
15202077da7SAlexey Zhikhartsev 
15302077da7SAlexey Zhikhartsev private:
15402077da7SAlexey Zhikhartsev   void
15502077da7SAlexey Zhikhartsev   unfoldSelectInstrs(DominatorTree *DT,
15602077da7SAlexey Zhikhartsev                      const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
15702077da7SAlexey Zhikhartsev     DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
15834d48279SKazu Hirata     SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
15902077da7SAlexey Zhikhartsev 
16002077da7SAlexey Zhikhartsev     while (!Stack.empty()) {
16184b07c9bSKazu Hirata       SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
16202077da7SAlexey Zhikhartsev 
16302077da7SAlexey Zhikhartsev       std::vector<SelectInstToUnfold> NewSIsToUnfold;
16402077da7SAlexey Zhikhartsev       std::vector<BasicBlock *> NewBBs;
165c229f767SXChy       unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
16602077da7SAlexey Zhikhartsev 
16702077da7SAlexey Zhikhartsev       // Put newly discovered select instructions into the work list.
16802077da7SAlexey Zhikhartsev       for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
16902077da7SAlexey Zhikhartsev         Stack.push_back(NewSIToUnfold);
17002077da7SAlexey Zhikhartsev     }
17102077da7SAlexey Zhikhartsev   }
17202077da7SAlexey Zhikhartsev 
17302077da7SAlexey Zhikhartsev   AssumptionCache *AC;
17402077da7SAlexey Zhikhartsev   DominatorTree *DT;
1756b53ada6SXChy   LoopInfo *LI;
17602077da7SAlexey Zhikhartsev   TargetTransformInfo *TTI;
17702077da7SAlexey Zhikhartsev   OptimizationRemarkEmitter *ORE;
17802077da7SAlexey Zhikhartsev };
17902077da7SAlexey Zhikhartsev 
18002077da7SAlexey Zhikhartsev } // end anonymous namespace
18102077da7SAlexey Zhikhartsev 
18202077da7SAlexey Zhikhartsev namespace {
18302077da7SAlexey Zhikhartsev 
18402077da7SAlexey Zhikhartsev /// Unfold the select instruction held in \p SIToUnfold by replacing it with
18502077da7SAlexey Zhikhartsev /// control flow.
18602077da7SAlexey Zhikhartsev ///
18702077da7SAlexey Zhikhartsev /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
18802077da7SAlexey Zhikhartsev /// created basic blocks into \p NewBBs.
18902077da7SAlexey Zhikhartsev ///
19002077da7SAlexey Zhikhartsev /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
191c229f767SXChy void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
19202077da7SAlexey Zhikhartsev             std::vector<SelectInstToUnfold> *NewSIsToUnfold,
19302077da7SAlexey Zhikhartsev             std::vector<BasicBlock *> *NewBBs) {
19402077da7SAlexey Zhikhartsev   SelectInst *SI = SIToUnfold.getInst();
19502077da7SAlexey Zhikhartsev   PHINode *SIUse = SIToUnfold.getUse();
19602077da7SAlexey Zhikhartsev   BasicBlock *StartBlock = SI->getParent();
19702077da7SAlexey Zhikhartsev   BranchInst *StartBlockTerm =
19802077da7SAlexey Zhikhartsev       dyn_cast<BranchInst>(StartBlock->getTerminator());
19902077da7SAlexey Zhikhartsev 
200b167ada8SUsman Nadeem   assert(StartBlockTerm);
20102077da7SAlexey Zhikhartsev   assert(SI->hasOneUse());
20202077da7SAlexey Zhikhartsev 
203b167ada8SUsman Nadeem   if (StartBlockTerm->isUnconditional()) {
204*d4a38c8fSUsman Nadeem     BasicBlock *EndBlock = StartBlock->getUniqueSuccessor();
205b167ada8SUsman Nadeem     // Arbitrarily choose the 'false' side for a new input value to the PHI.
206b167ada8SUsman Nadeem     BasicBlock *NewBlock = BasicBlock::Create(
207b167ada8SUsman Nadeem         SI->getContext(), Twine(SI->getName(), ".si.unfold.false"),
20802077da7SAlexey Zhikhartsev         EndBlock->getParent(), EndBlock);
209b167ada8SUsman Nadeem     NewBBs->push_back(NewBlock);
210b167ada8SUsman Nadeem     BranchInst::Create(EndBlock, NewBlock);
211b167ada8SUsman Nadeem     DTU->applyUpdates({{DominatorTree::Insert, NewBlock, EndBlock}});
21202077da7SAlexey Zhikhartsev 
213b167ada8SUsman Nadeem     // StartBlock
214b167ada8SUsman Nadeem     //   |  \
215b167ada8SUsman Nadeem     //   |  NewBlock
216b167ada8SUsman Nadeem     //   |  /
217b167ada8SUsman Nadeem     // EndBlock
21802077da7SAlexey Zhikhartsev     Value *SIOp1 = SI->getTrueValue();
21902077da7SAlexey Zhikhartsev     Value *SIOp2 = SI->getFalseValue();
22002077da7SAlexey Zhikhartsev 
221b167ada8SUsman Nadeem     PHINode *NewPhi = PHINode::Create(SIUse->getType(), 1,
222b167ada8SUsman Nadeem                                       Twine(SIOp2->getName(), ".si.unfold.phi"),
223b167ada8SUsman Nadeem                                       NewBlock->getFirstInsertionPt());
224b167ada8SUsman Nadeem     NewPhi->addIncoming(SIOp2, StartBlock);
225b167ada8SUsman Nadeem 
226*d4a38c8fSUsman Nadeem     // Update any other PHI nodes in EndBlock.
227*d4a38c8fSUsman Nadeem     for (PHINode &Phi : EndBlock->phis()) {
228*d4a38c8fSUsman Nadeem       if (SIUse == &Phi)
229*d4a38c8fSUsman Nadeem         continue;
230*d4a38c8fSUsman Nadeem       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock);
231*d4a38c8fSUsman Nadeem     }
232*d4a38c8fSUsman Nadeem 
233*d4a38c8fSUsman Nadeem     // Update the phi node of SI, which is its only use.
234*d4a38c8fSUsman Nadeem     if (EndBlock == SIUse->getParent()) {
235*d4a38c8fSUsman Nadeem       SIUse->addIncoming(NewPhi, NewBlock);
236*d4a38c8fSUsman Nadeem       SIUse->replaceUsesOfWith(SI, SIOp1);
237*d4a38c8fSUsman Nadeem     } else {
238*d4a38c8fSUsman Nadeem       PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock),
239*d4a38c8fSUsman Nadeem                                         Twine(SI->getName(), ".si.unfold.phi"),
240*d4a38c8fSUsman Nadeem                                         EndBlock->getFirstInsertionPt());
241*d4a38c8fSUsman Nadeem       for (BasicBlock *Pred : predecessors(EndBlock)) {
242*d4a38c8fSUsman Nadeem         if (Pred != StartBlock && Pred != NewBlock)
243*d4a38c8fSUsman Nadeem           EndPhi->addIncoming(EndPhi, Pred);
244*d4a38c8fSUsman Nadeem       }
245*d4a38c8fSUsman Nadeem 
246*d4a38c8fSUsman Nadeem       EndPhi->addIncoming(SIOp1, StartBlock);
247*d4a38c8fSUsman Nadeem       EndPhi->addIncoming(NewPhi, NewBlock);
248*d4a38c8fSUsman Nadeem       SIUse->replaceUsesOfWith(SI, EndPhi);
249*d4a38c8fSUsman Nadeem       SIUse = EndPhi;
250*d4a38c8fSUsman Nadeem     }
251*d4a38c8fSUsman Nadeem 
252b167ada8SUsman Nadeem     if (auto *OpSi = dyn_cast<SelectInst>(SIOp1))
253b167ada8SUsman Nadeem       NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse));
254b167ada8SUsman Nadeem     if (auto *OpSi = dyn_cast<SelectInst>(SIOp2))
255b167ada8SUsman Nadeem       NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi));
25602077da7SAlexey Zhikhartsev 
257b167ada8SUsman Nadeem     // Insert the real conditional branch based on the original condition.
258*d4a38c8fSUsman Nadeem     StartBlockTerm->eraseFromParent();
259b167ada8SUsman Nadeem     BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock);
260b167ada8SUsman Nadeem     DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock},
261b167ada8SUsman Nadeem                        {DominatorTree::Insert, StartBlock, NewBlock}});
262b167ada8SUsman Nadeem   } else {
263*d4a38c8fSUsman Nadeem     BasicBlock *EndBlock = SIUse->getParent();
264b167ada8SUsman Nadeem     BasicBlock *NewBlockT = BasicBlock::Create(
265b167ada8SUsman Nadeem         SI->getContext(), Twine(SI->getName(), ".si.unfold.true"),
266b167ada8SUsman Nadeem         EndBlock->getParent(), EndBlock);
267b167ada8SUsman Nadeem     BasicBlock *NewBlockF = BasicBlock::Create(
268b167ada8SUsman Nadeem         SI->getContext(), Twine(SI->getName(), ".si.unfold.false"),
269b167ada8SUsman Nadeem         EndBlock->getParent(), EndBlock);
270b167ada8SUsman Nadeem 
271b167ada8SUsman Nadeem     NewBBs->push_back(NewBlockT);
272b167ada8SUsman Nadeem     NewBBs->push_back(NewBlockF);
273b167ada8SUsman Nadeem 
274b167ada8SUsman Nadeem     // Def only has one use in EndBlock.
275b167ada8SUsman Nadeem     // Before transformation:
276b167ada8SUsman Nadeem     // StartBlock(Def)
277b167ada8SUsman Nadeem     //   |      \
278b167ada8SUsman Nadeem     // EndBlock  OtherBlock
279b167ada8SUsman Nadeem     //  (Use)
280b167ada8SUsman Nadeem     //
281b167ada8SUsman Nadeem     // After transformation:
282b167ada8SUsman Nadeem     // StartBlock(Def)
283b167ada8SUsman Nadeem     //   |      \
284b167ada8SUsman Nadeem     //   |       OtherBlock
285b167ada8SUsman Nadeem     // NewBlockT
286b167ada8SUsman Nadeem     //   |     \
287b167ada8SUsman Nadeem     //   |   NewBlockF
288b167ada8SUsman Nadeem     //   |      /
289b167ada8SUsman Nadeem     //   |     /
290b167ada8SUsman Nadeem     // EndBlock
291b167ada8SUsman Nadeem     //  (Use)
292b167ada8SUsman Nadeem     BranchInst::Create(EndBlock, NewBlockF);
293b167ada8SUsman Nadeem     // Insert the real conditional branch based on the original condition.
294b167ada8SUsman Nadeem     BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT);
295b167ada8SUsman Nadeem     DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF},
296b167ada8SUsman Nadeem                        {DominatorTree::Insert, NewBlockT, EndBlock},
297b167ada8SUsman Nadeem                        {DominatorTree::Insert, NewBlockF, EndBlock}});
298b167ada8SUsman Nadeem 
299b167ada8SUsman Nadeem     Value *TrueVal = SI->getTrueValue();
300b167ada8SUsman Nadeem     Value *FalseVal = SI->getFalseValue();
301b167ada8SUsman Nadeem 
302b167ada8SUsman Nadeem     PHINode *NewPhiT = PHINode::Create(
303b167ada8SUsman Nadeem         SIUse->getType(), 1, Twine(TrueVal->getName(), ".si.unfold.phi"),
304b167ada8SUsman Nadeem         NewBlockT->getFirstInsertionPt());
305b167ada8SUsman Nadeem     PHINode *NewPhiF = PHINode::Create(
306b167ada8SUsman Nadeem         SIUse->getType(), 1, Twine(FalseVal->getName(), ".si.unfold.phi"),
307b167ada8SUsman Nadeem         NewBlockF->getFirstInsertionPt());
308b167ada8SUsman Nadeem     NewPhiT->addIncoming(TrueVal, StartBlock);
309b167ada8SUsman Nadeem     NewPhiF->addIncoming(FalseVal, NewBlockT);
310b167ada8SUsman Nadeem 
311b167ada8SUsman Nadeem     if (auto *TrueSI = dyn_cast<SelectInst>(TrueVal))
312b167ada8SUsman Nadeem       NewSIsToUnfold->push_back(SelectInstToUnfold(TrueSI, NewPhiT));
313b167ada8SUsman Nadeem     if (auto *FalseSi = dyn_cast<SelectInst>(FalseVal))
314b167ada8SUsman Nadeem       NewSIsToUnfold->push_back(SelectInstToUnfold(FalseSi, NewPhiF));
315b167ada8SUsman Nadeem 
316b167ada8SUsman Nadeem     SIUse->addIncoming(NewPhiT, NewBlockT);
317b167ada8SUsman Nadeem     SIUse->addIncoming(NewPhiF, NewBlockF);
318b167ada8SUsman Nadeem     SIUse->removeIncomingValue(StartBlock);
319b167ada8SUsman Nadeem 
320b167ada8SUsman Nadeem     // Update any other PHI nodes in EndBlock.
321b167ada8SUsman Nadeem     for (PHINode &Phi : EndBlock->phis()) {
322b167ada8SUsman Nadeem       if (SIUse == &Phi)
323b167ada8SUsman Nadeem         continue;
324b167ada8SUsman Nadeem       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockT);
325b167ada8SUsman Nadeem       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockF);
326b167ada8SUsman Nadeem       Phi.removeIncomingValue(StartBlock);
327b167ada8SUsman Nadeem     }
328b167ada8SUsman Nadeem 
329b167ada8SUsman Nadeem     // Update the appropriate successor of the start block to point to the new
330b167ada8SUsman Nadeem     // unfolded block.
331b167ada8SUsman Nadeem     unsigned SuccNum = StartBlockTerm->getSuccessor(1) == EndBlock ? 1 : 0;
332b167ada8SUsman Nadeem     StartBlockTerm->setSuccessor(SuccNum, NewBlockT);
333b167ada8SUsman Nadeem     DTU->applyUpdates({{DominatorTree::Delete, StartBlock, EndBlock},
334b167ada8SUsman Nadeem                        {DominatorTree::Insert, StartBlock, NewBlockT}});
335b167ada8SUsman Nadeem   }
33602077da7SAlexey Zhikhartsev 
337c229f767SXChy   // Preserve loop info
338c229f767SXChy   if (Loop *L = LI->getLoopFor(SI->getParent())) {
339c229f767SXChy     for (BasicBlock *NewBB : *NewBBs)
340c229f767SXChy       L->addBasicBlockToLoop(NewBB, *LI);
341c229f767SXChy   }
342c229f767SXChy 
34302077da7SAlexey Zhikhartsev   // The select is now dead.
3447fa41d8aSXChy   assert(SI->use_empty() && "Select must be dead now");
34502077da7SAlexey Zhikhartsev   SI->eraseFromParent();
34602077da7SAlexey Zhikhartsev }
34702077da7SAlexey Zhikhartsev 
34802077da7SAlexey Zhikhartsev struct ClonedBlock {
34902077da7SAlexey Zhikhartsev   BasicBlock *BB;
350019ffbf3SXChy   APInt State; ///< \p State corresponds to the next value of a switch stmnt.
35102077da7SAlexey Zhikhartsev };
35202077da7SAlexey Zhikhartsev 
35302077da7SAlexey Zhikhartsev typedef std::deque<BasicBlock *> PathType;
35402077da7SAlexey Zhikhartsev typedef std::vector<PathType> PathsType;
355380b8a60SNikita Popov typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
35602077da7SAlexey Zhikhartsev typedef std::vector<ClonedBlock> CloneList;
35702077da7SAlexey Zhikhartsev 
35802077da7SAlexey Zhikhartsev // This data structure keeps track of all blocks that have been cloned.  If two
35902077da7SAlexey Zhikhartsev // different ThreadingPaths clone the same block for a certain state it should
36002077da7SAlexey Zhikhartsev // be reused, and it can be looked up in this map.
36102077da7SAlexey Zhikhartsev typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
36202077da7SAlexey Zhikhartsev 
36302077da7SAlexey Zhikhartsev // This map keeps track of all the new definitions for an instruction. This
36402077da7SAlexey Zhikhartsev // information is needed when restoring SSA form after cloning blocks.
3659d555b4aSOlle Fredriksson typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
36602077da7SAlexey Zhikhartsev 
36702077da7SAlexey Zhikhartsev inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
36802077da7SAlexey Zhikhartsev   OS << "< ";
36902077da7SAlexey Zhikhartsev   for (const BasicBlock *BB : Path) {
37002077da7SAlexey Zhikhartsev     std::string BBName;
37102077da7SAlexey Zhikhartsev     if (BB->hasName())
37202077da7SAlexey Zhikhartsev       raw_string_ostream(BBName) << BB->getName();
37302077da7SAlexey Zhikhartsev     else
37402077da7SAlexey Zhikhartsev       raw_string_ostream(BBName) << BB;
37502077da7SAlexey Zhikhartsev     OS << BBName << " ";
37602077da7SAlexey Zhikhartsev   }
37702077da7SAlexey Zhikhartsev   OS << ">";
37802077da7SAlexey Zhikhartsev   return OS;
37902077da7SAlexey Zhikhartsev }
38002077da7SAlexey Zhikhartsev 
38102077da7SAlexey Zhikhartsev /// ThreadingPath is a path in the control flow of a loop that can be threaded
38202077da7SAlexey Zhikhartsev /// by cloning necessary basic blocks and replacing conditional branches with
38302077da7SAlexey Zhikhartsev /// unconditional ones. A threading path includes a list of basic blocks, the
38402077da7SAlexey Zhikhartsev /// exit state, and the block that determines the next state.
38502077da7SAlexey Zhikhartsev struct ThreadingPath {
38602077da7SAlexey Zhikhartsev   /// Exit value is DFA's exit state for the given path.
387019ffbf3SXChy   APInt getExitValue() const { return ExitVal; }
38802077da7SAlexey Zhikhartsev   void setExitValue(const ConstantInt *V) {
389019ffbf3SXChy     ExitVal = V->getValue();
39002077da7SAlexey Zhikhartsev     IsExitValSet = true;
39102077da7SAlexey Zhikhartsev   }
39202077da7SAlexey Zhikhartsev   bool isExitValueSet() const { return IsExitValSet; }
39302077da7SAlexey Zhikhartsev 
39402077da7SAlexey Zhikhartsev   /// Determinator is the basic block that determines the next state of the DFA.
39502077da7SAlexey Zhikhartsev   const BasicBlock *getDeterminatorBB() const { return DBB; }
39602077da7SAlexey Zhikhartsev   void setDeterminator(const BasicBlock *BB) { DBB = BB; }
39702077da7SAlexey Zhikhartsev 
39802077da7SAlexey Zhikhartsev   /// Path is a list of basic blocks.
39902077da7SAlexey Zhikhartsev   const PathType &getPath() const { return Path; }
40002077da7SAlexey Zhikhartsev   void setPath(const PathType &NewPath) { Path = NewPath; }
401b167ada8SUsman Nadeem   void push_back(BasicBlock *BB) { Path.push_back(BB); }
402b167ada8SUsman Nadeem   void push_front(BasicBlock *BB) { Path.push_front(BB); }
403b167ada8SUsman Nadeem   void appendExcludingFirst(const PathType &OtherPath) {
404b167ada8SUsman Nadeem     Path.insert(Path.end(), OtherPath.begin() + 1, OtherPath.end());
405b167ada8SUsman Nadeem   }
40602077da7SAlexey Zhikhartsev 
40702077da7SAlexey Zhikhartsev   void print(raw_ostream &OS) const {
40802077da7SAlexey Zhikhartsev     OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
40902077da7SAlexey Zhikhartsev   }
41002077da7SAlexey Zhikhartsev 
41102077da7SAlexey Zhikhartsev private:
41202077da7SAlexey Zhikhartsev   PathType Path;
413019ffbf3SXChy   APInt ExitVal;
41402077da7SAlexey Zhikhartsev   const BasicBlock *DBB = nullptr;
41502077da7SAlexey Zhikhartsev   bool IsExitValSet = false;
41602077da7SAlexey Zhikhartsev };
41702077da7SAlexey Zhikhartsev 
41802077da7SAlexey Zhikhartsev #ifndef NDEBUG
41902077da7SAlexey Zhikhartsev inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
42002077da7SAlexey Zhikhartsev   TPath.print(OS);
42102077da7SAlexey Zhikhartsev   return OS;
42202077da7SAlexey Zhikhartsev }
42302077da7SAlexey Zhikhartsev #endif
42402077da7SAlexey Zhikhartsev 
42502077da7SAlexey Zhikhartsev struct MainSwitch {
4266b53ada6SXChy   MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
4276b53ada6SXChy       : LI(LI) {
4288b0d7634SAlex Zhikhartsev     if (isCandidate(SI)) {
42902077da7SAlexey Zhikhartsev       Instr = SI;
43002077da7SAlexey Zhikhartsev     } else {
43102077da7SAlexey Zhikhartsev       ORE->emit([&]() {
43202077da7SAlexey Zhikhartsev         return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
43302077da7SAlexey Zhikhartsev                << "Switch instruction is not predictable.";
43402077da7SAlexey Zhikhartsev       });
43502077da7SAlexey Zhikhartsev     }
43602077da7SAlexey Zhikhartsev   }
43702077da7SAlexey Zhikhartsev 
43802077da7SAlexey Zhikhartsev   virtual ~MainSwitch() = default;
43902077da7SAlexey Zhikhartsev 
44002077da7SAlexey Zhikhartsev   SwitchInst *getInstr() const { return Instr; }
44102077da7SAlexey Zhikhartsev   const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
44202077da7SAlexey Zhikhartsev     return SelectInsts;
44302077da7SAlexey Zhikhartsev   }
44402077da7SAlexey Zhikhartsev 
44502077da7SAlexey Zhikhartsev private:
4468b0d7634SAlex Zhikhartsev   /// Do a use-def chain traversal starting from the switch condition to see if
4478b0d7634SAlex Zhikhartsev   /// \p SI is a potential condidate.
4488b0d7634SAlex Zhikhartsev   ///
4498b0d7634SAlex Zhikhartsev   /// Also, collect select instructions to unfold.
4508b0d7634SAlex Zhikhartsev   bool isCandidate(const SwitchInst *SI) {
451c9325f8aSUsman Nadeem     std::deque<std::pair<Value *, BasicBlock *>> Q;
45202077da7SAlexey Zhikhartsev     SmallSet<Value *, 16> SeenValues;
45302077da7SAlexey Zhikhartsev     SelectInsts.clear();
45402077da7SAlexey Zhikhartsev 
4558b0d7634SAlex Zhikhartsev     Value *SICond = SI->getCondition();
4568b0d7634SAlex Zhikhartsev     LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
4578b0d7634SAlex Zhikhartsev     if (!isa<PHINode>(SICond))
45802077da7SAlexey Zhikhartsev       return false;
45902077da7SAlexey Zhikhartsev 
4606b53ada6SXChy     // The switch must be in a loop.
461c9325f8aSUsman Nadeem     const Loop *L = LI->getLoopFor(SI->getParent());
462c9325f8aSUsman Nadeem     if (!L)
4636b53ada6SXChy       return false;
4646b53ada6SXChy 
465c9325f8aSUsman Nadeem     addToQueue(SICond, nullptr, Q, SeenValues);
46602077da7SAlexey Zhikhartsev 
46702077da7SAlexey Zhikhartsev     while (!Q.empty()) {
468c9325f8aSUsman Nadeem       Value *Current = Q.front().first;
469c9325f8aSUsman Nadeem       BasicBlock *CurrentIncomingBB = Q.front().second;
47002077da7SAlexey Zhikhartsev       Q.pop_front();
47102077da7SAlexey Zhikhartsev 
47202077da7SAlexey Zhikhartsev       if (auto *Phi = dyn_cast<PHINode>(Current)) {
473c9325f8aSUsman Nadeem         for (BasicBlock *IncomingBB : Phi->blocks()) {
474c9325f8aSUsman Nadeem           Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);
475c9325f8aSUsman Nadeem           addToQueue(Incoming, IncomingBB, Q, SeenValues);
47602077da7SAlexey Zhikhartsev         }
4778b0d7634SAlex Zhikhartsev         LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
47802077da7SAlexey Zhikhartsev       } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
47902077da7SAlexey Zhikhartsev         if (!isValidSelectInst(SelI))
48002077da7SAlexey Zhikhartsev           return false;
481c9325f8aSUsman Nadeem         addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
482c9325f8aSUsman Nadeem         addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
4838b0d7634SAlex Zhikhartsev         LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
48402077da7SAlexey Zhikhartsev         if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
48502077da7SAlexey Zhikhartsev           SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
4868b0d7634SAlex Zhikhartsev       } else if (isa<Constant>(Current)) {
4878b0d7634SAlex Zhikhartsev         LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
4888b0d7634SAlex Zhikhartsev         continue;
48902077da7SAlexey Zhikhartsev       } else {
4908b0d7634SAlex Zhikhartsev         LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
4918b0d7634SAlex Zhikhartsev         // Allow unpredictable values. The hope is that those will be the
4928b0d7634SAlex Zhikhartsev         // initial switch values that can be ignored (they will hit the
4938b0d7634SAlex Zhikhartsev         // unthreaded switch) but this assumption will get checked later after
4948b0d7634SAlex Zhikhartsev         // paths have been enumerated (in function getStateDefMap).
495c9325f8aSUsman Nadeem 
496c9325f8aSUsman Nadeem         // If the unpredictable value comes from the same inner loop it is
497c9325f8aSUsman Nadeem         // likely that it will also be on the enumerated paths, causing us to
498c9325f8aSUsman Nadeem         // exit after we have enumerated all the paths. This heuristic save
499c9325f8aSUsman Nadeem         // compile time because a search for all the paths can become expensive.
500c9325f8aSUsman Nadeem         if (EarlyExitHeuristic &&
501c9325f8aSUsman Nadeem             L->contains(LI->getLoopFor(CurrentIncomingBB))) {
502c9325f8aSUsman Nadeem           LLVM_DEBUG(dbgs()
503c9325f8aSUsman Nadeem                      << "\tExiting early due to unpredictability heuristic.\n");
504c9325f8aSUsman Nadeem           return false;
505c9325f8aSUsman Nadeem         }
506c9325f8aSUsman Nadeem 
5078b0d7634SAlex Zhikhartsev         continue;
50802077da7SAlexey Zhikhartsev       }
50902077da7SAlexey Zhikhartsev     }
51002077da7SAlexey Zhikhartsev 
51102077da7SAlexey Zhikhartsev     return true;
51202077da7SAlexey Zhikhartsev   }
51302077da7SAlexey Zhikhartsev 
514c9325f8aSUsman Nadeem   void addToQueue(Value *Val, BasicBlock *BB,
515c9325f8aSUsman Nadeem                   std::deque<std::pair<Value *, BasicBlock *>> &Q,
51602077da7SAlexey Zhikhartsev                   SmallSet<Value *, 16> &SeenValues) {
51723a26e71SKazu Hirata     if (SeenValues.insert(Val).second)
518c9325f8aSUsman Nadeem       Q.push_back({Val, BB});
51902077da7SAlexey Zhikhartsev   }
52002077da7SAlexey Zhikhartsev 
52102077da7SAlexey Zhikhartsev   bool isValidSelectInst(SelectInst *SI) {
52202077da7SAlexey Zhikhartsev     if (!SI->hasOneUse())
52302077da7SAlexey Zhikhartsev       return false;
52402077da7SAlexey Zhikhartsev 
52502077da7SAlexey Zhikhartsev     Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());
52602077da7SAlexey Zhikhartsev     // The use of the select inst should be either a phi or another select.
52702077da7SAlexey Zhikhartsev     if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))
52802077da7SAlexey Zhikhartsev       return false;
52902077da7SAlexey Zhikhartsev 
53002077da7SAlexey Zhikhartsev     BasicBlock *SIBB = SI->getParent();
53102077da7SAlexey Zhikhartsev 
53202077da7SAlexey Zhikhartsev     // Currently, we can only expand select instructions in basic blocks with
53302077da7SAlexey Zhikhartsev     // one successor.
53402077da7SAlexey Zhikhartsev     BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());
53502077da7SAlexey Zhikhartsev     if (!SITerm || !SITerm->isUnconditional())
53602077da7SAlexey Zhikhartsev       return false;
53702077da7SAlexey Zhikhartsev 
5387fa41d8aSXChy     // Only fold the select coming from directly where it is defined.
5397fa41d8aSXChy     PHINode *PHIUser = dyn_cast<PHINode>(SIUse);
5407fa41d8aSXChy     if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB)
54102077da7SAlexey Zhikhartsev       return false;
54202077da7SAlexey Zhikhartsev 
54302077da7SAlexey Zhikhartsev     // If select will not be sunk during unfolding, and it is in the same basic
54402077da7SAlexey Zhikhartsev     // block as another state defining select, then cannot unfold both.
54502077da7SAlexey Zhikhartsev     for (SelectInstToUnfold SIToUnfold : SelectInsts) {
54602077da7SAlexey Zhikhartsev       SelectInst *PrevSI = SIToUnfold.getInst();
54702077da7SAlexey Zhikhartsev       if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
54802077da7SAlexey Zhikhartsev           PrevSI->getParent() == SI->getParent())
54902077da7SAlexey Zhikhartsev         return false;
55002077da7SAlexey Zhikhartsev     }
55102077da7SAlexey Zhikhartsev 
55202077da7SAlexey Zhikhartsev     return true;
55302077da7SAlexey Zhikhartsev   }
55402077da7SAlexey Zhikhartsev 
5556b53ada6SXChy   LoopInfo *LI;
55602077da7SAlexey Zhikhartsev   SwitchInst *Instr = nullptr;
55702077da7SAlexey Zhikhartsev   SmallVector<SelectInstToUnfold, 4> SelectInsts;
55802077da7SAlexey Zhikhartsev };
55902077da7SAlexey Zhikhartsev 
56002077da7SAlexey Zhikhartsev struct AllSwitchPaths {
561c229f767SXChy   AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
562b167ada8SUsman Nadeem                  LoopInfo *LI, Loop *L)
563c229f767SXChy       : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
564b167ada8SUsman Nadeem         LI(LI), SwitchOuterLoop(L) {}
56502077da7SAlexey Zhikhartsev 
56602077da7SAlexey Zhikhartsev   std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
56702077da7SAlexey Zhikhartsev   unsigned getNumThreadingPaths() { return TPaths.size(); }
56802077da7SAlexey Zhikhartsev   SwitchInst *getSwitchInst() { return Switch; }
56902077da7SAlexey Zhikhartsev   BasicBlock *getSwitchBlock() { return SwitchBlock; }
57002077da7SAlexey Zhikhartsev 
57102077da7SAlexey Zhikhartsev   void run() {
572b167ada8SUsman Nadeem     StateDefMap StateDef = getStateDefMap();
5738b0d7634SAlex Zhikhartsev     if (StateDef.empty()) {
5748b0d7634SAlex Zhikhartsev       ORE->emit([&]() {
5758b0d7634SAlex Zhikhartsev         return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
5768b0d7634SAlex Zhikhartsev                                         Switch)
5778b0d7634SAlex Zhikhartsev                << "Switch instruction is not predictable.";
5788b0d7634SAlex Zhikhartsev       });
5798b0d7634SAlex Zhikhartsev       return;
5808b0d7634SAlex Zhikhartsev     }
58102077da7SAlexey Zhikhartsev 
582b167ada8SUsman Nadeem     auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
583b167ada8SUsman Nadeem     auto *SwitchPhiDefBB = SwitchPhi->getParent();
584b167ada8SUsman Nadeem     VisitedBlocks VB;
585b167ada8SUsman Nadeem     // Get paths from the determinator BBs to SwitchPhiDefBB
586b167ada8SUsman Nadeem     std::vector<ThreadingPath> PathsToPhiDef =
587b167ada8SUsman Nadeem         getPathsFromStateDefMap(StateDef, SwitchPhi, VB);
588b167ada8SUsman Nadeem     if (SwitchPhiDefBB == SwitchBlock) {
589b167ada8SUsman Nadeem       TPaths = std::move(PathsToPhiDef);
590b167ada8SUsman Nadeem       return;
59102077da7SAlexey Zhikhartsev     }
59202077da7SAlexey Zhikhartsev 
593b167ada8SUsman Nadeem     // Find and append paths from SwitchPhiDefBB to SwitchBlock.
594b167ada8SUsman Nadeem     PathsType PathsToSwitchBB =
595b167ada8SUsman Nadeem         paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1);
596b167ada8SUsman Nadeem     if (PathsToSwitchBB.empty())
597b167ada8SUsman Nadeem       return;
59802077da7SAlexey Zhikhartsev 
599b167ada8SUsman Nadeem     std::vector<ThreadingPath> TempList;
600b167ada8SUsman Nadeem     for (const ThreadingPath &Path : PathsToPhiDef) {
601b167ada8SUsman Nadeem       for (const PathType &PathToSw : PathsToSwitchBB) {
602b167ada8SUsman Nadeem         ThreadingPath PathCopy(Path);
603b167ada8SUsman Nadeem         PathCopy.appendExcludingFirst(PathToSw);
604b167ada8SUsman Nadeem         TempList.push_back(PathCopy);
60502077da7SAlexey Zhikhartsev       }
60602077da7SAlexey Zhikhartsev     }
607b167ada8SUsman Nadeem     TPaths = std::move(TempList);
60802077da7SAlexey Zhikhartsev   }
60902077da7SAlexey Zhikhartsev 
61002077da7SAlexey Zhikhartsev private:
61102077da7SAlexey Zhikhartsev   // Value: an instruction that defines a switch state;
61202077da7SAlexey Zhikhartsev   // Key: the parent basic block of that instruction.
61302077da7SAlexey Zhikhartsev   typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
614b167ada8SUsman Nadeem   std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef,
615b167ada8SUsman Nadeem                                                      PHINode *Phi,
616b167ada8SUsman Nadeem                                                      VisitedBlocks &VB) {
617b167ada8SUsman Nadeem     std::vector<ThreadingPath> Res;
618b167ada8SUsman Nadeem     auto *PhiBB = Phi->getParent();
619b167ada8SUsman Nadeem     VB.insert(PhiBB);
62002077da7SAlexey Zhikhartsev 
621b167ada8SUsman Nadeem     VisitedBlocks UniqueBlocks;
622b167ada8SUsman Nadeem     for (auto *IncomingBB : Phi->blocks()) {
623b167ada8SUsman Nadeem       if (!UniqueBlocks.insert(IncomingBB).second)
624b167ada8SUsman Nadeem         continue;
625b167ada8SUsman Nadeem       if (!SwitchOuterLoop->contains(IncomingBB))
626b167ada8SUsman Nadeem         continue;
627b167ada8SUsman Nadeem 
628b167ada8SUsman Nadeem       Value *IncomingValue = Phi->getIncomingValueForBlock(IncomingBB);
629b167ada8SUsman Nadeem       // We found the determinator. This is the start of our path.
630b167ada8SUsman Nadeem       if (auto *C = dyn_cast<ConstantInt>(IncomingValue)) {
631b167ada8SUsman Nadeem         // SwitchBlock is the determinator, unsupported unless its also the def.
632b167ada8SUsman Nadeem         if (PhiBB == SwitchBlock &&
633b167ada8SUsman Nadeem             SwitchBlock != cast<PHINode>(Switch->getOperand(0))->getParent())
634b167ada8SUsman Nadeem           continue;
635b167ada8SUsman Nadeem         ThreadingPath NewPath;
636b167ada8SUsman Nadeem         NewPath.setDeterminator(PhiBB);
637b167ada8SUsman Nadeem         NewPath.setExitValue(C);
638b167ada8SUsman Nadeem         // Don't add SwitchBlock at the start, this is handled later.
639b167ada8SUsman Nadeem         if (IncomingBB != SwitchBlock)
640b167ada8SUsman Nadeem           NewPath.push_back(IncomingBB);
641b167ada8SUsman Nadeem         NewPath.push_back(PhiBB);
642b167ada8SUsman Nadeem         Res.push_back(NewPath);
643b167ada8SUsman Nadeem         continue;
644b167ada8SUsman Nadeem       }
645b167ada8SUsman Nadeem       // Don't get into a cycle.
646b167ada8SUsman Nadeem       if (VB.contains(IncomingBB) || IncomingBB == SwitchBlock)
647b167ada8SUsman Nadeem         continue;
648b167ada8SUsman Nadeem       // Recurse up the PHI chain.
649b167ada8SUsman Nadeem       auto *IncomingPhi = dyn_cast<PHINode>(IncomingValue);
650b167ada8SUsman Nadeem       if (!IncomingPhi)
651b167ada8SUsman Nadeem         continue;
652b167ada8SUsman Nadeem       auto *IncomingPhiDefBB = IncomingPhi->getParent();
653b167ada8SUsman Nadeem       if (!StateDef.contains(IncomingPhiDefBB))
654b167ada8SUsman Nadeem         continue;
655b167ada8SUsman Nadeem 
656b167ada8SUsman Nadeem       // Direct predecessor, just add to the path.
657b167ada8SUsman Nadeem       if (IncomingPhiDefBB == IncomingBB) {
658b167ada8SUsman Nadeem         std::vector<ThreadingPath> PredPaths =
659b167ada8SUsman Nadeem             getPathsFromStateDefMap(StateDef, IncomingPhi, VB);
660b167ada8SUsman Nadeem         for (ThreadingPath &Path : PredPaths) {
661b167ada8SUsman Nadeem           Path.push_back(PhiBB);
662b167ada8SUsman Nadeem           Res.push_back(std::move(Path));
663b167ada8SUsman Nadeem         }
664b167ada8SUsman Nadeem         continue;
665b167ada8SUsman Nadeem       }
666b167ada8SUsman Nadeem       // Not a direct predecessor, find intermediate paths to append to the
667b167ada8SUsman Nadeem       // existing path.
668b167ada8SUsman Nadeem       if (VB.contains(IncomingPhiDefBB))
669b167ada8SUsman Nadeem         continue;
670b167ada8SUsman Nadeem 
671b167ada8SUsman Nadeem       PathsType IntermediatePaths;
672b167ada8SUsman Nadeem       IntermediatePaths =
673b167ada8SUsman Nadeem           paths(IncomingPhiDefBB, IncomingBB, VB, /* PathDepth = */ 1);
674b167ada8SUsman Nadeem       if (IntermediatePaths.empty())
675b167ada8SUsman Nadeem         continue;
676b167ada8SUsman Nadeem 
677b167ada8SUsman Nadeem       std::vector<ThreadingPath> PredPaths =
678b167ada8SUsman Nadeem           getPathsFromStateDefMap(StateDef, IncomingPhi, VB);
679b167ada8SUsman Nadeem       for (const ThreadingPath &Path : PredPaths) {
680b167ada8SUsman Nadeem         for (const PathType &IPath : IntermediatePaths) {
681b167ada8SUsman Nadeem           ThreadingPath NewPath(Path);
682b167ada8SUsman Nadeem           NewPath.appendExcludingFirst(IPath);
683b167ada8SUsman Nadeem           NewPath.push_back(PhiBB);
684b167ada8SUsman Nadeem           Res.push_back(NewPath);
685b167ada8SUsman Nadeem         }
686b167ada8SUsman Nadeem       }
687b167ada8SUsman Nadeem     }
688b167ada8SUsman Nadeem     VB.erase(PhiBB);
689b167ada8SUsman Nadeem     return Res;
690b167ada8SUsman Nadeem   }
691b167ada8SUsman Nadeem 
692b167ada8SUsman Nadeem   PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited,
693b167ada8SUsman Nadeem                   unsigned PathDepth) {
69402077da7SAlexey Zhikhartsev     PathsType Res;
69502077da7SAlexey Zhikhartsev 
69602077da7SAlexey Zhikhartsev     // Stop exploring paths after visiting MaxPathLength blocks
69702077da7SAlexey Zhikhartsev     if (PathDepth > MaxPathLength) {
69802077da7SAlexey Zhikhartsev       ORE->emit([&]() {
69902077da7SAlexey Zhikhartsev         return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
70002077da7SAlexey Zhikhartsev                                           Switch)
70102077da7SAlexey Zhikhartsev                << "Exploration stopped after visiting MaxPathLength="
70202077da7SAlexey Zhikhartsev                << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
70302077da7SAlexey Zhikhartsev       });
70402077da7SAlexey Zhikhartsev       return Res;
70502077da7SAlexey Zhikhartsev     }
70602077da7SAlexey Zhikhartsev 
70702077da7SAlexey Zhikhartsev     Visited.insert(BB);
708b167ada8SUsman Nadeem     if (++NumVisited > MaxNumVisitiedPaths)
709b167ada8SUsman Nadeem       return Res;
71002077da7SAlexey Zhikhartsev 
711c229f767SXChy     // Stop if we have reached the BB out of loop, since its successors have no
712c229f767SXChy     // impact on the DFA.
713b167ada8SUsman Nadeem     if (!SwitchOuterLoop->contains(BB))
714c229f767SXChy       return Res;
715c229f767SXChy 
71602077da7SAlexey Zhikhartsev     // Some blocks have multiple edges to the same successor, and this set
71702077da7SAlexey Zhikhartsev     // is used to prevent a duplicate path from being generated
71802077da7SAlexey Zhikhartsev     SmallSet<BasicBlock *, 4> Successors;
7193f7aea1aSNikita Popov     for (BasicBlock *Succ : successors(BB)) {
7203f7aea1aSNikita Popov       if (!Successors.insert(Succ).second)
72102077da7SAlexey Zhikhartsev         continue;
72202077da7SAlexey Zhikhartsev 
723b167ada8SUsman Nadeem       // Found a cycle through the final block.
724b167ada8SUsman Nadeem       if (Succ == ToBB) {
725b167ada8SUsman Nadeem         Res.push_back({BB, ToBB});
72602077da7SAlexey Zhikhartsev         continue;
72702077da7SAlexey Zhikhartsev       }
72802077da7SAlexey Zhikhartsev 
72902077da7SAlexey Zhikhartsev       // We have encountered a cycle, do not get caught in it
730380b8a60SNikita Popov       if (Visited.contains(Succ))
73102077da7SAlexey Zhikhartsev         continue;
73202077da7SAlexey Zhikhartsev 
733b167ada8SUsman Nadeem       auto *CurrLoop = LI->getLoopFor(BB);
734b167ada8SUsman Nadeem       // Unlikely to be beneficial.
735b167ada8SUsman Nadeem       if (Succ == CurrLoop->getHeader())
736b167ada8SUsman Nadeem         continue;
737b167ada8SUsman Nadeem       // Skip for now, revisit this condition later to see the impact on
738b167ada8SUsman Nadeem       // coverage and compile time.
739b167ada8SUsman Nadeem       if (LI->getLoopFor(Succ) != CurrLoop)
740b167ada8SUsman Nadeem         continue;
741b167ada8SUsman Nadeem 
742b167ada8SUsman Nadeem       PathsType SuccPaths = paths(Succ, ToBB, Visited, PathDepth + 1);
743b167ada8SUsman Nadeem       for (PathType &Path : SuccPaths) {
744b167ada8SUsman Nadeem         Path.push_front(BB);
745b167ada8SUsman Nadeem         Res.push_back(Path);
7468b0d7634SAlex Zhikhartsev         if (Res.size() >= MaxNumPaths) {
7478b0d7634SAlex Zhikhartsev           return Res;
7488b0d7634SAlex Zhikhartsev         }
74902077da7SAlexey Zhikhartsev       }
75002077da7SAlexey Zhikhartsev     }
75102077da7SAlexey Zhikhartsev     // This block could now be visited again from a different predecessor. Note
75202077da7SAlexey Zhikhartsev     // that this will result in exponential runtime. Subpaths could possibly be
75302077da7SAlexey Zhikhartsev     // cached but it takes a lot of memory to store them.
75402077da7SAlexey Zhikhartsev     Visited.erase(BB);
75502077da7SAlexey Zhikhartsev     return Res;
75602077da7SAlexey Zhikhartsev   }
75702077da7SAlexey Zhikhartsev 
75802077da7SAlexey Zhikhartsev   /// Walk the use-def chain and collect all the state-defining instructions.
7598b0d7634SAlex Zhikhartsev   ///
7608b0d7634SAlex Zhikhartsev   /// Return an empty map if unpredictable values encountered inside the basic
7618b0d7634SAlex Zhikhartsev   /// blocks of \p LoopPaths.
762b167ada8SUsman Nadeem   StateDefMap getStateDefMap() const {
76302077da7SAlexey Zhikhartsev     StateDefMap Res;
76402077da7SAlexey Zhikhartsev     Value *FirstDef = Switch->getOperand(0);
7658b0d7634SAlex Zhikhartsev     assert(isa<PHINode>(FirstDef) && "The first definition must be a phi.");
76602077da7SAlexey Zhikhartsev 
76702077da7SAlexey Zhikhartsev     SmallVector<PHINode *, 8> Stack;
76802077da7SAlexey Zhikhartsev     Stack.push_back(dyn_cast<PHINode>(FirstDef));
76902077da7SAlexey Zhikhartsev     SmallSet<Value *, 16> SeenValues;
77002077da7SAlexey Zhikhartsev 
77102077da7SAlexey Zhikhartsev     while (!Stack.empty()) {
77284b07c9bSKazu Hirata       PHINode *CurPhi = Stack.pop_back_val();
77302077da7SAlexey Zhikhartsev 
77402077da7SAlexey Zhikhartsev       Res[CurPhi->getParent()] = CurPhi;
77502077da7SAlexey Zhikhartsev       SeenValues.insert(CurPhi);
77602077da7SAlexey Zhikhartsev 
7778b0d7634SAlex Zhikhartsev       for (BasicBlock *IncomingBB : CurPhi->blocks()) {
7788b0d7634SAlex Zhikhartsev         Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB);
779b167ada8SUsman Nadeem         bool IsOutsideLoops = !SwitchOuterLoop->contains(IncomingBB);
78002077da7SAlexey Zhikhartsev         if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
7818b0d7634SAlex Zhikhartsev             SeenValues.contains(Incoming) || IsOutsideLoops) {
78202077da7SAlexey Zhikhartsev           continue;
78302077da7SAlexey Zhikhartsev         }
78402077da7SAlexey Zhikhartsev 
7858b0d7634SAlex Zhikhartsev         // Any unpredictable value inside the loops means we must bail out.
7868b0d7634SAlex Zhikhartsev         if (!isa<PHINode>(Incoming))
7878b0d7634SAlex Zhikhartsev           return StateDefMap();
78802077da7SAlexey Zhikhartsev 
78902077da7SAlexey Zhikhartsev         Stack.push_back(cast<PHINode>(Incoming));
79002077da7SAlexey Zhikhartsev       }
79102077da7SAlexey Zhikhartsev     }
79202077da7SAlexey Zhikhartsev 
79302077da7SAlexey Zhikhartsev     return Res;
79402077da7SAlexey Zhikhartsev   }
79502077da7SAlexey Zhikhartsev 
796b167ada8SUsman Nadeem   unsigned NumVisited = 0;
79702077da7SAlexey Zhikhartsev   SwitchInst *Switch;
79802077da7SAlexey Zhikhartsev   BasicBlock *SwitchBlock;
79902077da7SAlexey Zhikhartsev   OptimizationRemarkEmitter *ORE;
80002077da7SAlexey Zhikhartsev   std::vector<ThreadingPath> TPaths;
801c229f767SXChy   LoopInfo *LI;
802b167ada8SUsman Nadeem   Loop *SwitchOuterLoop;
80302077da7SAlexey Zhikhartsev };
80402077da7SAlexey Zhikhartsev 
80502077da7SAlexey Zhikhartsev struct TransformDFA {
80602077da7SAlexey Zhikhartsev   TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
80702077da7SAlexey Zhikhartsev                AssumptionCache *AC, TargetTransformInfo *TTI,
80802077da7SAlexey Zhikhartsev                OptimizationRemarkEmitter *ORE,
80902077da7SAlexey Zhikhartsev                SmallPtrSet<const Value *, 32> EphValues)
81002077da7SAlexey Zhikhartsev       : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
81102077da7SAlexey Zhikhartsev         EphValues(EphValues) {}
81202077da7SAlexey Zhikhartsev 
81302077da7SAlexey Zhikhartsev   void run() {
81402077da7SAlexey Zhikhartsev     if (isLegalAndProfitableToTransform()) {
81502077da7SAlexey Zhikhartsev       createAllExitPaths();
81602077da7SAlexey Zhikhartsev       NumTransforms++;
81702077da7SAlexey Zhikhartsev     }
81802077da7SAlexey Zhikhartsev   }
81902077da7SAlexey Zhikhartsev 
82002077da7SAlexey Zhikhartsev private:
82102077da7SAlexey Zhikhartsev   /// This function performs both a legality check and profitability check at
82202077da7SAlexey Zhikhartsev   /// the same time since it is convenient to do so. It iterates through all
82302077da7SAlexey Zhikhartsev   /// blocks that will be cloned, and keeps track of the duplication cost. It
82402077da7SAlexey Zhikhartsev   /// also returns false if it is illegal to clone some required block.
82502077da7SAlexey Zhikhartsev   bool isLegalAndProfitableToTransform() {
82602077da7SAlexey Zhikhartsev     CodeMetrics Metrics;
82702077da7SAlexey Zhikhartsev     SwitchInst *Switch = SwitchPaths->getSwitchInst();
82802077da7SAlexey Zhikhartsev 
8292fba4694SXChy     // Don't thread switch without multiple successors.
8302fba4694SXChy     if (Switch->getNumSuccessors() <= 1)
8312fba4694SXChy       return false;
8322fba4694SXChy 
83302077da7SAlexey Zhikhartsev     // Note that DuplicateBlockMap is not being used as intended here. It is
83402077da7SAlexey Zhikhartsev     // just being used to ensure (BB, State) pairs are only counted once.
83502077da7SAlexey Zhikhartsev     DuplicateBlockMap DuplicateMap;
83602077da7SAlexey Zhikhartsev 
83702077da7SAlexey Zhikhartsev     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
83802077da7SAlexey Zhikhartsev       PathType PathBBs = TPath.getPath();
839019ffbf3SXChy       APInt NextState = TPath.getExitValue();
84002077da7SAlexey Zhikhartsev       const BasicBlock *Determinator = TPath.getDeterminatorBB();
84102077da7SAlexey Zhikhartsev 
84202077da7SAlexey Zhikhartsev       // Update Metrics for the Switch block, this is always cloned
84302077da7SAlexey Zhikhartsev       BasicBlock *BB = SwitchPaths->getSwitchBlock();
84402077da7SAlexey Zhikhartsev       BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
84502077da7SAlexey Zhikhartsev       if (!VisitedBB) {
84602077da7SAlexey Zhikhartsev         Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
84702077da7SAlexey Zhikhartsev         DuplicateMap[BB].push_back({BB, NextState});
84802077da7SAlexey Zhikhartsev       }
84902077da7SAlexey Zhikhartsev 
85002077da7SAlexey Zhikhartsev       // If the Switch block is the Determinator, then we can continue since
85102077da7SAlexey Zhikhartsev       // this is the only block that is cloned and we already counted for it.
85202077da7SAlexey Zhikhartsev       if (PathBBs.front() == Determinator)
85302077da7SAlexey Zhikhartsev         continue;
85402077da7SAlexey Zhikhartsev 
85502077da7SAlexey Zhikhartsev       // Otherwise update Metrics for all blocks that will be cloned. If any
85602077da7SAlexey Zhikhartsev       // block is already cloned and would be reused, don't double count it.
8575ea31555SKazu Hirata       auto DetIt = llvm::find(PathBBs, Determinator);
85802077da7SAlexey Zhikhartsev       for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
85902077da7SAlexey Zhikhartsev         BB = *BBIt;
86002077da7SAlexey Zhikhartsev         VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
86102077da7SAlexey Zhikhartsev         if (VisitedBB)
86202077da7SAlexey Zhikhartsev           continue;
86302077da7SAlexey Zhikhartsev         Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
86402077da7SAlexey Zhikhartsev         DuplicateMap[BB].push_back({BB, NextState});
86502077da7SAlexey Zhikhartsev       }
86602077da7SAlexey Zhikhartsev 
86702077da7SAlexey Zhikhartsev       if (Metrics.notDuplicatable) {
86802077da7SAlexey Zhikhartsev         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
86902077da7SAlexey Zhikhartsev                           << "non-duplicatable instructions.\n");
87002077da7SAlexey Zhikhartsev         ORE->emit([&]() {
87102077da7SAlexey Zhikhartsev           return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
87202077da7SAlexey Zhikhartsev                                           Switch)
87302077da7SAlexey Zhikhartsev                  << "Contains non-duplicatable instructions.";
87402077da7SAlexey Zhikhartsev         });
87502077da7SAlexey Zhikhartsev         return false;
87602077da7SAlexey Zhikhartsev       }
87702077da7SAlexey Zhikhartsev 
878e0ac087fSSameer Sahasrabuddhe       // FIXME: Allow jump threading with controlled convergence.
879e0ac087fSSameer Sahasrabuddhe       if (Metrics.Convergence != ConvergenceKind::None) {
88002077da7SAlexey Zhikhartsev         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
88102077da7SAlexey Zhikhartsev                           << "convergent instructions.\n");
88202077da7SAlexey Zhikhartsev         ORE->emit([&]() {
88302077da7SAlexey Zhikhartsev           return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
88402077da7SAlexey Zhikhartsev                  << "Contains convergent instructions.";
88502077da7SAlexey Zhikhartsev         });
88602077da7SAlexey Zhikhartsev         return false;
88702077da7SAlexey Zhikhartsev       }
888f85c5079SPhilip Reames 
889f85c5079SPhilip Reames       if (!Metrics.NumInsts.isValid()) {
890f85c5079SPhilip Reames         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
891f85c5079SPhilip Reames                           << "instructions with invalid cost.\n");
892f85c5079SPhilip Reames         ORE->emit([&]() {
893f85c5079SPhilip Reames           return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
894f85c5079SPhilip Reames                  << "Contains instructions with invalid cost.";
895f85c5079SPhilip Reames         });
896f85c5079SPhilip Reames         return false;
897f85c5079SPhilip Reames       }
89802077da7SAlexey Zhikhartsev     }
89902077da7SAlexey Zhikhartsev 
9009c710ebbSDaniil Fukalov     InstructionCost DuplicationCost = 0;
90102077da7SAlexey Zhikhartsev 
90202077da7SAlexey Zhikhartsev     unsigned JumpTableSize = 0;
90302077da7SAlexey Zhikhartsev     TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
90402077da7SAlexey Zhikhartsev                                           nullptr);
90502077da7SAlexey Zhikhartsev     if (JumpTableSize == 0) {
90602077da7SAlexey Zhikhartsev       // Factor in the number of conditional branches reduced from jump
90702077da7SAlexey Zhikhartsev       // threading. Assume that lowering the switch block is implemented by
90802077da7SAlexey Zhikhartsev       // using binary search, hence the LogBase2().
90902077da7SAlexey Zhikhartsev       unsigned CondBranches =
91002077da7SAlexey Zhikhartsev           APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
9112fba4694SXChy       assert(CondBranches > 0 &&
9122fba4694SXChy              "The threaded switch must have multiple branches");
9139c710ebbSDaniil Fukalov       DuplicationCost = Metrics.NumInsts / CondBranches;
91402077da7SAlexey Zhikhartsev     } else {
91502077da7SAlexey Zhikhartsev       // Compared with jump tables, the DFA optimizer removes an indirect branch
91602077da7SAlexey Zhikhartsev       // on each loop iteration, thus making branch prediction more precise. The
91702077da7SAlexey Zhikhartsev       // more branch targets there are, the more likely it is for the branch
91802077da7SAlexey Zhikhartsev       // predictor to make a mistake, and the more benefit there is in the DFA
91902077da7SAlexey Zhikhartsev       // optimizer. Thus, the more branch targets there are, the lower is the
92002077da7SAlexey Zhikhartsev       // cost of the DFA opt.
9219c710ebbSDaniil Fukalov       DuplicationCost = Metrics.NumInsts / JumpTableSize;
92202077da7SAlexey Zhikhartsev     }
92302077da7SAlexey Zhikhartsev 
92402077da7SAlexey Zhikhartsev     LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
92502077da7SAlexey Zhikhartsev                       << SwitchPaths->getSwitchBlock()->getName()
92602077da7SAlexey Zhikhartsev                       << " is: " << DuplicationCost << "\n\n");
92702077da7SAlexey Zhikhartsev 
92802077da7SAlexey Zhikhartsev     if (DuplicationCost > CostThreshold) {
92902077da7SAlexey Zhikhartsev       LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
93002077da7SAlexey Zhikhartsev                         << "cost threshold.\n");
93102077da7SAlexey Zhikhartsev       ORE->emit([&]() {
93202077da7SAlexey Zhikhartsev         return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
93302077da7SAlexey Zhikhartsev                << "Duplication cost exceeds the cost threshold (cost="
93402077da7SAlexey Zhikhartsev                << ore::NV("Cost", DuplicationCost)
93502077da7SAlexey Zhikhartsev                << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
93602077da7SAlexey Zhikhartsev       });
93702077da7SAlexey Zhikhartsev       return false;
93802077da7SAlexey Zhikhartsev     }
93902077da7SAlexey Zhikhartsev 
94002077da7SAlexey Zhikhartsev     ORE->emit([&]() {
94102077da7SAlexey Zhikhartsev       return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
94202077da7SAlexey Zhikhartsev              << "Switch statement jump-threaded.";
94302077da7SAlexey Zhikhartsev     });
94402077da7SAlexey Zhikhartsev 
94502077da7SAlexey Zhikhartsev     return true;
94602077da7SAlexey Zhikhartsev   }
94702077da7SAlexey Zhikhartsev 
94802077da7SAlexey Zhikhartsev   /// Transform each threading path to effectively jump thread the DFA.
94902077da7SAlexey Zhikhartsev   void createAllExitPaths() {
95002077da7SAlexey Zhikhartsev     DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
95102077da7SAlexey Zhikhartsev 
95202077da7SAlexey Zhikhartsev     // Move the switch block to the end of the path, since it will be duplicated
95302077da7SAlexey Zhikhartsev     BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
95402077da7SAlexey Zhikhartsev     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
95502077da7SAlexey Zhikhartsev       LLVM_DEBUG(dbgs() << TPath << "\n");
956b167ada8SUsman Nadeem       // TODO: Fix exit path creation logic so that we dont need this
957b167ada8SUsman Nadeem       // placeholder.
958b167ada8SUsman Nadeem       TPath.push_front(SwitchBlock);
95902077da7SAlexey Zhikhartsev     }
96002077da7SAlexey Zhikhartsev 
96102077da7SAlexey Zhikhartsev     // Transform the ThreadingPaths and keep track of the cloned values
96202077da7SAlexey Zhikhartsev     DuplicateBlockMap DuplicateMap;
96302077da7SAlexey Zhikhartsev     DefMap NewDefs;
96402077da7SAlexey Zhikhartsev 
96502077da7SAlexey Zhikhartsev     SmallSet<BasicBlock *, 16> BlocksToClean;
96602077da7SAlexey Zhikhartsev     for (BasicBlock *BB : successors(SwitchBlock))
96702077da7SAlexey Zhikhartsev       BlocksToClean.insert(BB);
96802077da7SAlexey Zhikhartsev 
96902077da7SAlexey Zhikhartsev     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
97002077da7SAlexey Zhikhartsev       createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
97102077da7SAlexey Zhikhartsev       NumPaths++;
97202077da7SAlexey Zhikhartsev     }
97302077da7SAlexey Zhikhartsev 
97402077da7SAlexey Zhikhartsev     // After all paths are cloned, now update the last successor of the cloned
97502077da7SAlexey Zhikhartsev     // path so it skips over the switch statement
97602077da7SAlexey Zhikhartsev     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
97702077da7SAlexey Zhikhartsev       updateLastSuccessor(TPath, DuplicateMap, &DTU);
97802077da7SAlexey Zhikhartsev 
97902077da7SAlexey Zhikhartsev     // For each instruction that was cloned and used outside, update its uses
98002077da7SAlexey Zhikhartsev     updateSSA(NewDefs);
98102077da7SAlexey Zhikhartsev 
98202077da7SAlexey Zhikhartsev     // Clean PHI Nodes for the newly created blocks
98302077da7SAlexey Zhikhartsev     for (BasicBlock *BB : BlocksToClean)
98402077da7SAlexey Zhikhartsev       cleanPhiNodes(BB);
98502077da7SAlexey Zhikhartsev   }
98602077da7SAlexey Zhikhartsev 
98702077da7SAlexey Zhikhartsev   /// For a specific ThreadingPath \p Path, create an exit path starting from
98802077da7SAlexey Zhikhartsev   /// the determinator block.
98902077da7SAlexey Zhikhartsev   ///
99002077da7SAlexey Zhikhartsev   /// To remember the correct destination, we have to duplicate blocks
99102077da7SAlexey Zhikhartsev   /// corresponding to each state. Also update the terminating instruction of
99202077da7SAlexey Zhikhartsev   /// the predecessors, and phis in the successor blocks.
99302077da7SAlexey Zhikhartsev   void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
99402077da7SAlexey Zhikhartsev                       DuplicateBlockMap &DuplicateMap,
99502077da7SAlexey Zhikhartsev                       SmallSet<BasicBlock *, 16> &BlocksToClean,
99602077da7SAlexey Zhikhartsev                       DomTreeUpdater *DTU) {
997019ffbf3SXChy     APInt NextState = Path.getExitValue();
99802077da7SAlexey Zhikhartsev     const BasicBlock *Determinator = Path.getDeterminatorBB();
99902077da7SAlexey Zhikhartsev     PathType PathBBs = Path.getPath();
100002077da7SAlexey Zhikhartsev 
100102077da7SAlexey Zhikhartsev     // Don't select the placeholder block in front
100202077da7SAlexey Zhikhartsev     if (PathBBs.front() == Determinator)
100302077da7SAlexey Zhikhartsev       PathBBs.pop_front();
100402077da7SAlexey Zhikhartsev 
10055ea31555SKazu Hirata     auto DetIt = llvm::find(PathBBs, Determinator);
10062c0fc0f3SXChy     // When there is only one BB in PathBBs, the determinator takes itself as a
10072c0fc0f3SXChy     // direct predecessor.
10082c0fc0f3SXChy     BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt);
100902077da7SAlexey Zhikhartsev     for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
101002077da7SAlexey Zhikhartsev       BasicBlock *BB = *BBIt;
101102077da7SAlexey Zhikhartsev       BlocksToClean.insert(BB);
101202077da7SAlexey Zhikhartsev 
101302077da7SAlexey Zhikhartsev       // We already cloned BB for this NextState, now just update the branch
101402077da7SAlexey Zhikhartsev       // and continue.
101502077da7SAlexey Zhikhartsev       BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
101602077da7SAlexey Zhikhartsev       if (NextBB) {
101702077da7SAlexey Zhikhartsev         updatePredecessor(PrevBB, BB, NextBB, DTU);
101802077da7SAlexey Zhikhartsev         PrevBB = NextBB;
101902077da7SAlexey Zhikhartsev         continue;
102002077da7SAlexey Zhikhartsev       }
102102077da7SAlexey Zhikhartsev 
102202077da7SAlexey Zhikhartsev       // Clone the BB and update the successor of Prev to jump to the new block
102302077da7SAlexey Zhikhartsev       BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
102402077da7SAlexey Zhikhartsev           BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
102502077da7SAlexey Zhikhartsev       DuplicateMap[BB].push_back({NewBB, NextState});
102602077da7SAlexey Zhikhartsev       BlocksToClean.insert(NewBB);
102702077da7SAlexey Zhikhartsev       PrevBB = NewBB;
102802077da7SAlexey Zhikhartsev     }
102902077da7SAlexey Zhikhartsev   }
103002077da7SAlexey Zhikhartsev 
103102077da7SAlexey Zhikhartsev   /// Restore SSA form after cloning blocks.
103202077da7SAlexey Zhikhartsev   ///
103302077da7SAlexey Zhikhartsev   /// Each cloned block creates new defs for a variable, and the uses need to be
103402077da7SAlexey Zhikhartsev   /// updated to reflect this. The uses may be replaced with a cloned value, or
103502077da7SAlexey Zhikhartsev   /// some derived phi instruction. Note that all uses of a value defined in the
103602077da7SAlexey Zhikhartsev   /// same block were already remapped when cloning the block.
103702077da7SAlexey Zhikhartsev   void updateSSA(DefMap &NewDefs) {
103802077da7SAlexey Zhikhartsev     SSAUpdaterBulk SSAUpdate;
103902077da7SAlexey Zhikhartsev     SmallVector<Use *, 16> UsesToRename;
104002077da7SAlexey Zhikhartsev 
1041c83c4b58SKazu Hirata     for (const auto &KV : NewDefs) {
104202077da7SAlexey Zhikhartsev       Instruction *I = KV.first;
104302077da7SAlexey Zhikhartsev       BasicBlock *BB = I->getParent();
104402077da7SAlexey Zhikhartsev       std::vector<Instruction *> Cloned = KV.second;
104502077da7SAlexey Zhikhartsev 
104602077da7SAlexey Zhikhartsev       // Scan all uses of this instruction to see if it is used outside of its
104702077da7SAlexey Zhikhartsev       // block, and if so, record them in UsesToRename.
104802077da7SAlexey Zhikhartsev       for (Use &U : I->uses()) {
104902077da7SAlexey Zhikhartsev         Instruction *User = cast<Instruction>(U.getUser());
105002077da7SAlexey Zhikhartsev         if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
105102077da7SAlexey Zhikhartsev           if (UserPN->getIncomingBlock(U) == BB)
105202077da7SAlexey Zhikhartsev             continue;
105302077da7SAlexey Zhikhartsev         } else if (User->getParent() == BB) {
105402077da7SAlexey Zhikhartsev           continue;
105502077da7SAlexey Zhikhartsev         }
105602077da7SAlexey Zhikhartsev 
105702077da7SAlexey Zhikhartsev         UsesToRename.push_back(&U);
105802077da7SAlexey Zhikhartsev       }
105902077da7SAlexey Zhikhartsev 
106002077da7SAlexey Zhikhartsev       // If there are no uses outside the block, we're done with this
106102077da7SAlexey Zhikhartsev       // instruction.
106202077da7SAlexey Zhikhartsev       if (UsesToRename.empty())
106302077da7SAlexey Zhikhartsev         continue;
106402077da7SAlexey Zhikhartsev       LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
106502077da7SAlexey Zhikhartsev                         << "\n");
106602077da7SAlexey Zhikhartsev 
106702077da7SAlexey Zhikhartsev       // We found a use of I outside of BB.  Rename all uses of I that are
106802077da7SAlexey Zhikhartsev       // outside its block to be uses of the appropriate PHI node etc.  See
106902077da7SAlexey Zhikhartsev       // ValuesInBlocks with the values we know.
107002077da7SAlexey Zhikhartsev       unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());
107102077da7SAlexey Zhikhartsev       SSAUpdate.AddAvailableValue(VarNum, BB, I);
107202077da7SAlexey Zhikhartsev       for (Instruction *New : Cloned)
107302077da7SAlexey Zhikhartsev         SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);
107402077da7SAlexey Zhikhartsev 
107502077da7SAlexey Zhikhartsev       while (!UsesToRename.empty())
107602077da7SAlexey Zhikhartsev         SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());
107702077da7SAlexey Zhikhartsev 
107802077da7SAlexey Zhikhartsev       LLVM_DEBUG(dbgs() << "\n");
107902077da7SAlexey Zhikhartsev     }
108002077da7SAlexey Zhikhartsev     // SSAUpdater handles phi placement and renaming uses with the appropriate
108102077da7SAlexey Zhikhartsev     // value.
108202077da7SAlexey Zhikhartsev     SSAUpdate.RewriteAllUses(DT);
108302077da7SAlexey Zhikhartsev   }
108402077da7SAlexey Zhikhartsev 
108502077da7SAlexey Zhikhartsev   /// Clones a basic block, and adds it to the CFG.
108602077da7SAlexey Zhikhartsev   ///
108702077da7SAlexey Zhikhartsev   /// This function also includes updating phi nodes in the successors of the
108802077da7SAlexey Zhikhartsev   /// BB, and remapping uses that were defined locally in the cloned BB.
108902077da7SAlexey Zhikhartsev   BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
1090019ffbf3SXChy                                              const APInt &NextState,
109102077da7SAlexey Zhikhartsev                                              DuplicateBlockMap &DuplicateMap,
109202077da7SAlexey Zhikhartsev                                              DefMap &NewDefs,
109302077da7SAlexey Zhikhartsev                                              DomTreeUpdater *DTU) {
109402077da7SAlexey Zhikhartsev     ValueToValueMapTy VMap;
109502077da7SAlexey Zhikhartsev     BasicBlock *NewBB = CloneBasicBlock(
1096019ffbf3SXChy         BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()),
1097019ffbf3SXChy         BB->getParent());
109802077da7SAlexey Zhikhartsev     NewBB->moveAfter(BB);
109902077da7SAlexey Zhikhartsev     NumCloned++;
110002077da7SAlexey Zhikhartsev 
110102077da7SAlexey Zhikhartsev     for (Instruction &I : *NewBB) {
110202077da7SAlexey Zhikhartsev       // Do not remap operands of PHINode in case a definition in BB is an
110302077da7SAlexey Zhikhartsev       // incoming value to a phi in the same block. This incoming value will
110402077da7SAlexey Zhikhartsev       // be renamed later while restoring SSA.
110502077da7SAlexey Zhikhartsev       if (isa<PHINode>(&I))
110602077da7SAlexey Zhikhartsev         continue;
110702077da7SAlexey Zhikhartsev       RemapInstruction(&I, VMap,
110802077da7SAlexey Zhikhartsev                        RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
110902077da7SAlexey Zhikhartsev       if (AssumeInst *II = dyn_cast<AssumeInst>(&I))
111002077da7SAlexey Zhikhartsev         AC->registerAssumption(II);
111102077da7SAlexey Zhikhartsev     }
111202077da7SAlexey Zhikhartsev 
111302077da7SAlexey Zhikhartsev     updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
111402077da7SAlexey Zhikhartsev     updatePredecessor(PrevBB, BB, NewBB, DTU);
111502077da7SAlexey Zhikhartsev     updateDefMap(NewDefs, VMap);
111602077da7SAlexey Zhikhartsev 
111702077da7SAlexey Zhikhartsev     // Add all successors to the DominatorTree
111802077da7SAlexey Zhikhartsev     SmallPtrSet<BasicBlock *, 4> SuccSet;
111902077da7SAlexey Zhikhartsev     for (auto *SuccBB : successors(NewBB)) {
112002077da7SAlexey Zhikhartsev       if (SuccSet.insert(SuccBB).second)
112102077da7SAlexey Zhikhartsev         DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
112202077da7SAlexey Zhikhartsev     }
112302077da7SAlexey Zhikhartsev     SuccSet.clear();
112402077da7SAlexey Zhikhartsev     return NewBB;
112502077da7SAlexey Zhikhartsev   }
112602077da7SAlexey Zhikhartsev 
112702077da7SAlexey Zhikhartsev   /// Update the phi nodes in BB's successors.
112802077da7SAlexey Zhikhartsev   ///
112902077da7SAlexey Zhikhartsev   /// This means creating a new incoming value from NewBB with the new
113002077da7SAlexey Zhikhartsev   /// instruction wherever there is an incoming value from BB.
113102077da7SAlexey Zhikhartsev   void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
1132019ffbf3SXChy                            const APInt &NextState, ValueToValueMapTy &VMap,
113302077da7SAlexey Zhikhartsev                            DuplicateBlockMap &DuplicateMap) {
113402077da7SAlexey Zhikhartsev     std::vector<BasicBlock *> BlocksToUpdate;
113502077da7SAlexey Zhikhartsev 
113602077da7SAlexey Zhikhartsev     // If BB is the last block in the path, we can simply update the one case
113702077da7SAlexey Zhikhartsev     // successor that will be reached.
113802077da7SAlexey Zhikhartsev     if (BB == SwitchPaths->getSwitchBlock()) {
113902077da7SAlexey Zhikhartsev       SwitchInst *Switch = SwitchPaths->getSwitchInst();
114002077da7SAlexey Zhikhartsev       BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
114102077da7SAlexey Zhikhartsev       BlocksToUpdate.push_back(NextCase);
114202077da7SAlexey Zhikhartsev       BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
114302077da7SAlexey Zhikhartsev       if (ClonedSucc)
114402077da7SAlexey Zhikhartsev         BlocksToUpdate.push_back(ClonedSucc);
114502077da7SAlexey Zhikhartsev     }
114602077da7SAlexey Zhikhartsev     // Otherwise update phis in all successors.
114702077da7SAlexey Zhikhartsev     else {
114802077da7SAlexey Zhikhartsev       for (BasicBlock *Succ : successors(BB)) {
114902077da7SAlexey Zhikhartsev         BlocksToUpdate.push_back(Succ);
115002077da7SAlexey Zhikhartsev 
115102077da7SAlexey Zhikhartsev         // Check if a successor has already been cloned for the particular exit
115202077da7SAlexey Zhikhartsev         // value. In this case if a successor was already cloned, the phi nodes
115302077da7SAlexey Zhikhartsev         // in the cloned block should be updated directly.
115402077da7SAlexey Zhikhartsev         BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
115502077da7SAlexey Zhikhartsev         if (ClonedSucc)
115602077da7SAlexey Zhikhartsev           BlocksToUpdate.push_back(ClonedSucc);
115702077da7SAlexey Zhikhartsev       }
115802077da7SAlexey Zhikhartsev     }
115902077da7SAlexey Zhikhartsev 
116002077da7SAlexey Zhikhartsev     // If there is a phi with an incoming value from BB, create a new incoming
116102077da7SAlexey Zhikhartsev     // value for the new predecessor ClonedBB. The value will either be the same
116202077da7SAlexey Zhikhartsev     // value from BB or a cloned value.
116302077da7SAlexey Zhikhartsev     for (BasicBlock *Succ : BlocksToUpdate) {
116402077da7SAlexey Zhikhartsev       for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
116502077da7SAlexey Zhikhartsev            ++II) {
116602077da7SAlexey Zhikhartsev         Value *Incoming = Phi->getIncomingValueForBlock(BB);
116702077da7SAlexey Zhikhartsev         if (Incoming) {
116802077da7SAlexey Zhikhartsev           if (isa<Constant>(Incoming)) {
116902077da7SAlexey Zhikhartsev             Phi->addIncoming(Incoming, ClonedBB);
117002077da7SAlexey Zhikhartsev             continue;
117102077da7SAlexey Zhikhartsev           }
117202077da7SAlexey Zhikhartsev           Value *ClonedVal = VMap[Incoming];
117302077da7SAlexey Zhikhartsev           if (ClonedVal)
117402077da7SAlexey Zhikhartsev             Phi->addIncoming(ClonedVal, ClonedBB);
117502077da7SAlexey Zhikhartsev           else
117602077da7SAlexey Zhikhartsev             Phi->addIncoming(Incoming, ClonedBB);
117702077da7SAlexey Zhikhartsev         }
117802077da7SAlexey Zhikhartsev       }
117902077da7SAlexey Zhikhartsev     }
118002077da7SAlexey Zhikhartsev   }
118102077da7SAlexey Zhikhartsev 
118202077da7SAlexey Zhikhartsev   /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
118302077da7SAlexey Zhikhartsev   /// other successors are kept as well.
118402077da7SAlexey Zhikhartsev   void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
118502077da7SAlexey Zhikhartsev                          BasicBlock *NewBB, DomTreeUpdater *DTU) {
118602077da7SAlexey Zhikhartsev     // When a path is reused, there is a chance that predecessors were already
118702077da7SAlexey Zhikhartsev     // updated before. Check if the predecessor needs to be updated first.
118802077da7SAlexey Zhikhartsev     if (!isPredecessor(OldBB, PrevBB))
118902077da7SAlexey Zhikhartsev       return;
119002077da7SAlexey Zhikhartsev 
119102077da7SAlexey Zhikhartsev     Instruction *PrevTerm = PrevBB->getTerminator();
119202077da7SAlexey Zhikhartsev     for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
119302077da7SAlexey Zhikhartsev       if (PrevTerm->getSuccessor(Idx) == OldBB) {
119402077da7SAlexey Zhikhartsev         OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);
119502077da7SAlexey Zhikhartsev         PrevTerm->setSuccessor(Idx, NewBB);
119602077da7SAlexey Zhikhartsev       }
119702077da7SAlexey Zhikhartsev     }
119802077da7SAlexey Zhikhartsev     DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
119902077da7SAlexey Zhikhartsev                        {DominatorTree::Insert, PrevBB, NewBB}});
120002077da7SAlexey Zhikhartsev   }
120102077da7SAlexey Zhikhartsev 
120202077da7SAlexey Zhikhartsev   /// Add new value mappings to the DefMap to keep track of all new definitions
120302077da7SAlexey Zhikhartsev   /// for a particular instruction. These will be used while updating SSA form.
120402077da7SAlexey Zhikhartsev   void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
12059d555b4aSOlle Fredriksson     SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
12069d555b4aSOlle Fredriksson     NewDefsVector.reserve(VMap.size());
12079d555b4aSOlle Fredriksson 
120802077da7SAlexey Zhikhartsev     for (auto Entry : VMap) {
120902077da7SAlexey Zhikhartsev       Instruction *Inst =
121002077da7SAlexey Zhikhartsev           dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
121102077da7SAlexey Zhikhartsev       if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||
121202077da7SAlexey Zhikhartsev           isa<SwitchInst>(Inst)) {
121302077da7SAlexey Zhikhartsev         continue;
121402077da7SAlexey Zhikhartsev       }
121502077da7SAlexey Zhikhartsev 
121602077da7SAlexey Zhikhartsev       Instruction *Cloned = dyn_cast<Instruction>(Entry.second);
121702077da7SAlexey Zhikhartsev       if (!Cloned)
121802077da7SAlexey Zhikhartsev         continue;
121902077da7SAlexey Zhikhartsev 
12209d555b4aSOlle Fredriksson       NewDefsVector.push_back({Inst, Cloned});
122102077da7SAlexey Zhikhartsev     }
12229d555b4aSOlle Fredriksson 
12239d555b4aSOlle Fredriksson     // Sort the defs to get deterministic insertion order into NewDefs.
12249d555b4aSOlle Fredriksson     sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
12259d555b4aSOlle Fredriksson       if (LHS.first == RHS.first)
12269d555b4aSOlle Fredriksson         return LHS.second->comesBefore(RHS.second);
12279d555b4aSOlle Fredriksson       return LHS.first->comesBefore(RHS.first);
12289d555b4aSOlle Fredriksson     });
12299d555b4aSOlle Fredriksson 
12309d555b4aSOlle Fredriksson     for (const auto &KV : NewDefsVector)
12319d555b4aSOlle Fredriksson       NewDefs[KV.first].push_back(KV.second);
123202077da7SAlexey Zhikhartsev   }
123302077da7SAlexey Zhikhartsev 
123402077da7SAlexey Zhikhartsev   /// Update the last branch of a particular cloned path to point to the correct
123502077da7SAlexey Zhikhartsev   /// case successor.
123602077da7SAlexey Zhikhartsev   ///
123702077da7SAlexey Zhikhartsev   /// Note that this is an optional step and would have been done in later
123802077da7SAlexey Zhikhartsev   /// optimizations, but it makes the CFG significantly easier to work with.
123902077da7SAlexey Zhikhartsev   void updateLastSuccessor(ThreadingPath &TPath,
124002077da7SAlexey Zhikhartsev                            DuplicateBlockMap &DuplicateMap,
124102077da7SAlexey Zhikhartsev                            DomTreeUpdater *DTU) {
1242019ffbf3SXChy     APInt NextState = TPath.getExitValue();
124302077da7SAlexey Zhikhartsev     BasicBlock *BB = TPath.getPath().back();
124402077da7SAlexey Zhikhartsev     BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
124502077da7SAlexey Zhikhartsev 
124602077da7SAlexey Zhikhartsev     // Note multiple paths can end at the same block so check that it is not
124702077da7SAlexey Zhikhartsev     // updated yet
124802077da7SAlexey Zhikhartsev     if (!isa<SwitchInst>(LastBlock->getTerminator()))
124902077da7SAlexey Zhikhartsev       return;
125002077da7SAlexey Zhikhartsev     SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());
125102077da7SAlexey Zhikhartsev     BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
125202077da7SAlexey Zhikhartsev 
125302077da7SAlexey Zhikhartsev     std::vector<DominatorTree::UpdateType> DTUpdates;
125402077da7SAlexey Zhikhartsev     SmallPtrSet<BasicBlock *, 4> SuccSet;
125502077da7SAlexey Zhikhartsev     for (BasicBlock *Succ : successors(LastBlock)) {
125602077da7SAlexey Zhikhartsev       if (Succ != NextCase && SuccSet.insert(Succ).second)
125702077da7SAlexey Zhikhartsev         DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
125802077da7SAlexey Zhikhartsev     }
125902077da7SAlexey Zhikhartsev 
126002077da7SAlexey Zhikhartsev     Switch->eraseFromParent();
126102077da7SAlexey Zhikhartsev     BranchInst::Create(NextCase, LastBlock);
126202077da7SAlexey Zhikhartsev 
126302077da7SAlexey Zhikhartsev     DTU->applyUpdates(DTUpdates);
126402077da7SAlexey Zhikhartsev   }
126502077da7SAlexey Zhikhartsev 
126602077da7SAlexey Zhikhartsev   /// After cloning blocks, some of the phi nodes have extra incoming values
126702077da7SAlexey Zhikhartsev   /// that are no longer used. This function removes them.
126802077da7SAlexey Zhikhartsev   void cleanPhiNodes(BasicBlock *BB) {
126902077da7SAlexey Zhikhartsev     // If BB is no longer reachable, remove any remaining phi nodes
127002077da7SAlexey Zhikhartsev     if (pred_empty(BB)) {
127102077da7SAlexey Zhikhartsev       std::vector<PHINode *> PhiToRemove;
127202077da7SAlexey Zhikhartsev       for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
127302077da7SAlexey Zhikhartsev         PhiToRemove.push_back(Phi);
127402077da7SAlexey Zhikhartsev       }
127502077da7SAlexey Zhikhartsev       for (PHINode *PN : PhiToRemove) {
127653dc0f10SNuno Lopes         PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
127702077da7SAlexey Zhikhartsev         PN->eraseFromParent();
127802077da7SAlexey Zhikhartsev       }
127902077da7SAlexey Zhikhartsev       return;
128002077da7SAlexey Zhikhartsev     }
128102077da7SAlexey Zhikhartsev 
128202077da7SAlexey Zhikhartsev     // Remove any incoming values that come from an invalid predecessor
128302077da7SAlexey Zhikhartsev     for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
128402077da7SAlexey Zhikhartsev       std::vector<BasicBlock *> BlocksToRemove;
128502077da7SAlexey Zhikhartsev       for (BasicBlock *IncomingBB : Phi->blocks()) {
128602077da7SAlexey Zhikhartsev         if (!isPredecessor(BB, IncomingBB))
128702077da7SAlexey Zhikhartsev           BlocksToRemove.push_back(IncomingBB);
128802077da7SAlexey Zhikhartsev       }
128902077da7SAlexey Zhikhartsev       for (BasicBlock *BB : BlocksToRemove)
129002077da7SAlexey Zhikhartsev         Phi->removeIncomingValue(BB);
129102077da7SAlexey Zhikhartsev     }
129202077da7SAlexey Zhikhartsev   }
129302077da7SAlexey Zhikhartsev 
129402077da7SAlexey Zhikhartsev   /// Checks if BB was already cloned for a particular next state value. If it
129502077da7SAlexey Zhikhartsev   /// was then it returns this cloned block, and otherwise null.
1296019ffbf3SXChy   BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState,
129702077da7SAlexey Zhikhartsev                           DuplicateBlockMap &DuplicateMap) {
129802077da7SAlexey Zhikhartsev     CloneList ClonedBBs = DuplicateMap[BB];
129902077da7SAlexey Zhikhartsev 
130002077da7SAlexey Zhikhartsev     // Find an entry in the CloneList with this NextState. If it exists then
130102077da7SAlexey Zhikhartsev     // return the corresponding BB
130202077da7SAlexey Zhikhartsev     auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {
130302077da7SAlexey Zhikhartsev       return C.State == NextState;
130402077da7SAlexey Zhikhartsev     });
130502077da7SAlexey Zhikhartsev     return It != ClonedBBs.end() ? (*It).BB : nullptr;
130602077da7SAlexey Zhikhartsev   }
130702077da7SAlexey Zhikhartsev 
130802077da7SAlexey Zhikhartsev   /// Helper to get the successor corresponding to a particular case value for
130902077da7SAlexey Zhikhartsev   /// a switch statement.
1310019ffbf3SXChy   BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
131102077da7SAlexey Zhikhartsev     BasicBlock *NextCase = nullptr;
131202077da7SAlexey Zhikhartsev     for (auto Case : Switch->cases()) {
1313019ffbf3SXChy       if (Case.getCaseValue()->getValue() == NextState) {
131402077da7SAlexey Zhikhartsev         NextCase = Case.getCaseSuccessor();
131502077da7SAlexey Zhikhartsev         break;
131602077da7SAlexey Zhikhartsev       }
131702077da7SAlexey Zhikhartsev     }
131802077da7SAlexey Zhikhartsev     if (!NextCase)
131902077da7SAlexey Zhikhartsev       NextCase = Switch->getDefaultDest();
132002077da7SAlexey Zhikhartsev     return NextCase;
132102077da7SAlexey Zhikhartsev   }
132202077da7SAlexey Zhikhartsev 
132302077da7SAlexey Zhikhartsev   /// Returns true if IncomingBB is a predecessor of BB.
132402077da7SAlexey Zhikhartsev   bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
1325f83a88a1SKazu Hirata     return llvm::is_contained(predecessors(BB), IncomingBB);
132602077da7SAlexey Zhikhartsev   }
132702077da7SAlexey Zhikhartsev 
132802077da7SAlexey Zhikhartsev   AllSwitchPaths *SwitchPaths;
132902077da7SAlexey Zhikhartsev   DominatorTree *DT;
133002077da7SAlexey Zhikhartsev   AssumptionCache *AC;
133102077da7SAlexey Zhikhartsev   TargetTransformInfo *TTI;
133202077da7SAlexey Zhikhartsev   OptimizationRemarkEmitter *ORE;
133302077da7SAlexey Zhikhartsev   SmallPtrSet<const Value *, 32> EphValues;
133402077da7SAlexey Zhikhartsev   std::vector<ThreadingPath> TPaths;
133502077da7SAlexey Zhikhartsev };
133602077da7SAlexey Zhikhartsev 
133702077da7SAlexey Zhikhartsev bool DFAJumpThreading::run(Function &F) {
133802077da7SAlexey Zhikhartsev   LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
133902077da7SAlexey Zhikhartsev 
134002077da7SAlexey Zhikhartsev   if (F.hasOptSize()) {
134102077da7SAlexey Zhikhartsev     LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
134202077da7SAlexey Zhikhartsev     return false;
134302077da7SAlexey Zhikhartsev   }
134402077da7SAlexey Zhikhartsev 
134502077da7SAlexey Zhikhartsev   if (ClViewCfgBefore)
134602077da7SAlexey Zhikhartsev     F.viewCFG();
134702077da7SAlexey Zhikhartsev 
134802077da7SAlexey Zhikhartsev   SmallVector<AllSwitchPaths, 2> ThreadableLoops;
134902077da7SAlexey Zhikhartsev   bool MadeChanges = false;
1350c229f767SXChy   LoopInfoBroken = false;
135102077da7SAlexey Zhikhartsev 
135202077da7SAlexey Zhikhartsev   for (BasicBlock &BB : F) {
135302077da7SAlexey Zhikhartsev     auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
135402077da7SAlexey Zhikhartsev     if (!SI)
135502077da7SAlexey Zhikhartsev       continue;
135602077da7SAlexey Zhikhartsev 
135702077da7SAlexey Zhikhartsev     LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
13588b0d7634SAlex Zhikhartsev                       << " is a candidate\n");
13596b53ada6SXChy     MainSwitch Switch(SI, LI, ORE);
136002077da7SAlexey Zhikhartsev 
1361b167ada8SUsman Nadeem     if (!Switch.getInstr()) {
1362b167ada8SUsman Nadeem       LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a "
1363b167ada8SUsman Nadeem                         << "candidate for jump threading\n");
136402077da7SAlexey Zhikhartsev       continue;
1365b167ada8SUsman Nadeem     }
136602077da7SAlexey Zhikhartsev 
136702077da7SAlexey Zhikhartsev     LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
136802077da7SAlexey Zhikhartsev                       << "candidate for jump threading\n");
136902077da7SAlexey Zhikhartsev     LLVM_DEBUG(SI->dump());
137002077da7SAlexey Zhikhartsev 
137102077da7SAlexey Zhikhartsev     unfoldSelectInstrs(DT, Switch.getSelectInsts());
137202077da7SAlexey Zhikhartsev     if (!Switch.getSelectInsts().empty())
137302077da7SAlexey Zhikhartsev       MadeChanges = true;
137402077da7SAlexey Zhikhartsev 
1375b167ada8SUsman Nadeem     AllSwitchPaths SwitchPaths(&Switch, ORE, LI,
1376b167ada8SUsman Nadeem                                LI->getLoopFor(&BB)->getOutermostLoop());
137702077da7SAlexey Zhikhartsev     SwitchPaths.run();
137802077da7SAlexey Zhikhartsev 
137902077da7SAlexey Zhikhartsev     if (SwitchPaths.getNumThreadingPaths() > 0) {
138002077da7SAlexey Zhikhartsev       ThreadableLoops.push_back(SwitchPaths);
138102077da7SAlexey Zhikhartsev 
138202077da7SAlexey Zhikhartsev       // For the time being limit this optimization to occurring once in a
138302077da7SAlexey Zhikhartsev       // function since it can change the CFG significantly. This is not a
138402077da7SAlexey Zhikhartsev       // strict requirement but it can cause buggy behavior if there is an
138502077da7SAlexey Zhikhartsev       // overlap of blocks in different opportunities. There is a lot of room to
138602077da7SAlexey Zhikhartsev       // experiment with catching more opportunities here.
1387c229f767SXChy       // NOTE: To release this contraint, we must handle LoopInfo invalidation
138802077da7SAlexey Zhikhartsev       break;
138902077da7SAlexey Zhikhartsev     }
139002077da7SAlexey Zhikhartsev   }
139102077da7SAlexey Zhikhartsev 
1392c229f767SXChy #ifdef NDEBUG
1393c229f767SXChy   LI->verify(*DT);
1394c229f767SXChy #endif
1395c229f767SXChy 
139602077da7SAlexey Zhikhartsev   SmallPtrSet<const Value *, 32> EphValues;
139702077da7SAlexey Zhikhartsev   if (ThreadableLoops.size() > 0)
139802077da7SAlexey Zhikhartsev     CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
139902077da7SAlexey Zhikhartsev 
140002077da7SAlexey Zhikhartsev   for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
140102077da7SAlexey Zhikhartsev     TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
140202077da7SAlexey Zhikhartsev     Transform.run();
140302077da7SAlexey Zhikhartsev     MadeChanges = true;
1404c229f767SXChy     LoopInfoBroken = true;
140502077da7SAlexey Zhikhartsev   }
140602077da7SAlexey Zhikhartsev 
140702077da7SAlexey Zhikhartsev #ifdef EXPENSIVE_CHECKS
140802077da7SAlexey Zhikhartsev   assert(DT->verify(DominatorTree::VerificationLevel::Full));
140902077da7SAlexey Zhikhartsev   verifyFunction(F, &dbgs());
141002077da7SAlexey Zhikhartsev #endif
141102077da7SAlexey Zhikhartsev 
141202077da7SAlexey Zhikhartsev   return MadeChanges;
141302077da7SAlexey Zhikhartsev }
141402077da7SAlexey Zhikhartsev 
141502077da7SAlexey Zhikhartsev } // end anonymous namespace
141602077da7SAlexey Zhikhartsev 
141702077da7SAlexey Zhikhartsev /// Integrate with the new Pass Manager
141802077da7SAlexey Zhikhartsev PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
141902077da7SAlexey Zhikhartsev                                             FunctionAnalysisManager &AM) {
142002077da7SAlexey Zhikhartsev   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
142102077da7SAlexey Zhikhartsev   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
14226b53ada6SXChy   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
142302077da7SAlexey Zhikhartsev   TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
142402077da7SAlexey Zhikhartsev   OptimizationRemarkEmitter ORE(&F);
1425c229f767SXChy   DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
1426c229f767SXChy   if (!ThreadImpl.run(F))
142702077da7SAlexey Zhikhartsev     return PreservedAnalyses::all();
142802077da7SAlexey Zhikhartsev 
142902077da7SAlexey Zhikhartsev   PreservedAnalyses PA;
143002077da7SAlexey Zhikhartsev   PA.preserve<DominatorTreeAnalysis>();
1431c229f767SXChy   if (!ThreadImpl.LoopInfoBroken)
1432c229f767SXChy     PA.preserve<LoopAnalysis>();
143302077da7SAlexey Zhikhartsev   return PA;
143402077da7SAlexey Zhikhartsev }
1435