xref: /llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp (revision f209649dfcfef475de740a4755a6c480db360a6f)
1 //===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/Sequence.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/AssumptionCache.h"
20 #include "llvm/Analysis/CFG.h"
21 #include "llvm/Analysis/CodeMetrics.h"
22 #include "llvm/Analysis/LoopAnalysisManager.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/LoopIterator.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constant.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Use.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/GenericDomTree.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
44 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
45 #include "llvm/Transforms/Utils/Cloning.h"
46 #include "llvm/Transforms/Utils/LoopUtils.h"
47 #include "llvm/Transforms/Utils/ValueMapper.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <iterator>
51 #include <numeric>
52 #include <utility>
53 
54 #define DEBUG_TYPE "simple-loop-unswitch"
55 
56 using namespace llvm;
57 
58 STATISTIC(NumBranches, "Number of branches unswitched");
59 STATISTIC(NumSwitches, "Number of switches unswitched");
60 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
61 
62 static cl::opt<bool> EnableNonTrivialUnswitch(
63     "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
64     cl::desc("Forcibly enables non-trivial loop unswitching rather than "
65              "following the configuration passed into the pass."));
66 
67 static cl::opt<int>
68     UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
69                       cl::desc("The cost threshold for unswitching a loop."));
70 
71 static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
72                                         Constant &Replacement) {
73   assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
74 
75   // Replace uses of LIC in the loop with the given constant.
76   for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
77     // Grab the use and walk past it so we can clobber it in the use list.
78     Use *U = &*UI++;
79     Instruction *UserI = dyn_cast<Instruction>(U->getUser());
80     if (!UserI || !L.contains(UserI))
81       continue;
82 
83     // Replace this use within the loop body.
84     *U = &Replacement;
85   }
86 }
87 
88 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
89 /// incoming values along this edge.
90 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
91                                          BasicBlock &ExitBB) {
92   for (Instruction &I : ExitBB) {
93     auto *PN = dyn_cast<PHINode>(&I);
94     if (!PN)
95       // No more PHIs to check.
96       return true;
97 
98     // If the incoming value for this edge isn't loop invariant the unswitch
99     // won't be trivial.
100     if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
101       return false;
102   }
103   llvm_unreachable("Basic blocks should never be empty!");
104 }
105 
106 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
107 ///
108 /// Requires that the loop exit and unswitched basic block are the same, and
109 /// that the exiting block was a unique predecessor of that block. Rewrites the
110 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
111 /// PHI nodes from the old preheader that now contains the unswitched
112 /// terminator.
113 static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
114                                                   BasicBlock &OldExitingBB,
115                                                   BasicBlock &OldPH) {
116   for (PHINode &PN : UnswitchedBB.phis()) {
117     // When the loop exit is directly unswitched we just need to update the
118     // incoming basic block. We loop to handle weird cases with repeated
119     // incoming blocks, but expect to typically only have one operand here.
120     for (auto i : seq<int>(0, PN.getNumOperands())) {
121       assert(PN.getIncomingBlock(i) == &OldExitingBB &&
122              "Found incoming block different from unique predecessor!");
123       PN.setIncomingBlock(i, &OldPH);
124     }
125   }
126 }
127 
128 /// Rewrite the PHI nodes in the loop exit basic block and the split off
129 /// unswitched block.
130 ///
131 /// Because the exit block remains an exit from the loop, this rewrites the
132 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
133 /// nodes into the unswitched basic block to select between the value in the
134 /// old preheader and the loop exit.
135 static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
136                                                       BasicBlock &UnswitchedBB,
137                                                       BasicBlock &OldExitingBB,
138                                                       BasicBlock &OldPH) {
139   assert(&ExitBB != &UnswitchedBB &&
140          "Must have different loop exit and unswitched blocks!");
141   Instruction *InsertPt = &*UnswitchedBB.begin();
142   for (PHINode &PN : ExitBB.phis()) {
143     auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
144                                   PN.getName() + ".split", InsertPt);
145 
146     // Walk backwards over the old PHI node's inputs to minimize the cost of
147     // removing each one. We have to do this weird loop manually so that we
148     // create the same number of new incoming edges in the new PHI as we expect
149     // each case-based edge to be included in the unswitched switch in some
150     // cases.
151     // FIXME: This is really, really gross. It would be much cleaner if LLVM
152     // allowed us to create a single entry for a predecessor block without
153     // having separate entries for each "edge" even though these edges are
154     // required to produce identical results.
155     for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
156       if (PN.getIncomingBlock(i) != &OldExitingBB)
157         continue;
158 
159       Value *Incoming = PN.removeIncomingValue(i);
160       NewPN->addIncoming(Incoming, &OldPH);
161     }
162 
163     // Now replace the old PHI with the new one and wire the old one in as an
164     // input to the new one.
165     PN.replaceAllUsesWith(NewPN);
166     NewPN->addIncoming(&PN, &ExitBB);
167   }
168 }
169 
170 /// Unswitch a trivial branch if the condition is loop invariant.
171 ///
172 /// This routine should only be called when loop code leading to the branch has
173 /// been validated as trivial (no side effects). This routine checks if the
174 /// condition is invariant and one of the successors is a loop exit. This
175 /// allows us to unswitch without duplicating the loop, making it trivial.
176 ///
177 /// If this routine fails to unswitch the branch it returns false.
178 ///
179 /// If the branch can be unswitched, this routine splits the preheader and
180 /// hoists the branch above that split. Preserves loop simplified form
181 /// (splitting the exit block as necessary). It simplifies the branch within
182 /// the loop to an unconditional branch but doesn't remove it entirely. Further
183 /// cleanup can be done with some simplify-cfg like pass.
184 static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
185                                   LoopInfo &LI) {
186   assert(BI.isConditional() && "Can only unswitch a conditional branch!");
187   LLVM_DEBUG(dbgs() << "  Trying to unswitch branch: " << BI << "\n");
188 
189   Value *LoopCond = BI.getCondition();
190 
191   // Need a trivial loop condition to unswitch.
192   if (!L.isLoopInvariant(LoopCond))
193     return false;
194 
195   // Check to see if a successor of the branch is guaranteed to
196   // exit through a unique exit block without having any
197   // side-effects.  If so, determine the value of Cond that causes
198   // it to do this.
199   ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext());
200   ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
201   int LoopExitSuccIdx = 0;
202   auto *LoopExitBB = BI.getSuccessor(0);
203   if (L.contains(LoopExitBB)) {
204     std::swap(CondVal, Replacement);
205     LoopExitSuccIdx = 1;
206     LoopExitBB = BI.getSuccessor(1);
207     if (L.contains(LoopExitBB))
208       return false;
209   }
210   auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
211   auto *ParentBB = BI.getParent();
212   if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
213     return false;
214 
215   LLVM_DEBUG(dbgs() << "    unswitching trivial branch when: " << CondVal
216                     << " == " << LoopCond << "\n");
217 
218   // Split the preheader, so that we know that there is a safe place to insert
219   // the conditional branch. We will change the preheader to have a conditional
220   // branch on LoopCond.
221   BasicBlock *OldPH = L.getLoopPreheader();
222   BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
223 
224   // Now that we have a place to insert the conditional branch, create a place
225   // to branch to: this is the exit block out of the loop that we are
226   // unswitching. We need to split this if there are other loop predecessors.
227   // Because the loop is in simplified form, *any* other predecessor is enough.
228   BasicBlock *UnswitchedBB;
229   if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
230     (void)PredBB;
231     assert(PredBB == BI.getParent() &&
232            "A branch's parent isn't a predecessor!");
233     UnswitchedBB = LoopExitBB;
234   } else {
235     UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
236   }
237 
238   // Now splice the branch to gate reaching the new preheader and re-point its
239   // successors.
240   OldPH->getInstList().splice(std::prev(OldPH->end()),
241                               BI.getParent()->getInstList(), BI);
242   OldPH->getTerminator()->eraseFromParent();
243   BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
244   BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
245 
246   // Create a new unconditional branch that will continue the loop as a new
247   // terminator.
248   BranchInst::Create(ContinueBB, ParentBB);
249 
250   // Rewrite the relevant PHI nodes.
251   if (UnswitchedBB == LoopExitBB)
252     rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
253   else
254     rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
255                                               *ParentBB, *OldPH);
256 
257   // Now we need to update the dominator tree.
258   DT.applyUpdates(
259       {{DT.Delete, ParentBB, UnswitchedBB}, {DT.Insert, OldPH, UnswitchedBB}});
260 
261   // Since this is an i1 condition we can also trivially replace uses of it
262   // within the loop with a constant.
263   replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
264 
265   ++NumTrivial;
266   ++NumBranches;
267   return true;
268 }
269 
270 /// Unswitch a trivial switch if the condition is loop invariant.
271 ///
272 /// This routine should only be called when loop code leading to the switch has
273 /// been validated as trivial (no side effects). This routine checks if the
274 /// condition is invariant and that at least one of the successors is a loop
275 /// exit. This allows us to unswitch without duplicating the loop, making it
276 /// trivial.
277 ///
278 /// If this routine fails to unswitch the switch it returns false.
279 ///
280 /// If the switch can be unswitched, this routine splits the preheader and
281 /// copies the switch above that split. If the default case is one of the
282 /// exiting cases, it copies the non-exiting cases and points them at the new
283 /// preheader. If the default case is not exiting, it copies the exiting cases
284 /// and points the default at the preheader. It preserves loop simplified form
285 /// (splitting the exit blocks as necessary). It simplifies the switch within
286 /// the loop by removing now-dead cases. If the default case is one of those
287 /// unswitched, it replaces its destination with a new basic block containing
288 /// only unreachable. Such basic blocks, while technically loop exits, are not
289 /// considered for unswitching so this is a stable transform and the same
290 /// switch will not be revisited. If after unswitching there is only a single
291 /// in-loop successor, the switch is further simplified to an unconditional
292 /// branch. Still more cleanup can be done with some simplify-cfg like pass.
293 static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
294                                   LoopInfo &LI) {
295   LLVM_DEBUG(dbgs() << "  Trying to unswitch switch: " << SI << "\n");
296   Value *LoopCond = SI.getCondition();
297 
298   // If this isn't switching on an invariant condition, we can't unswitch it.
299   if (!L.isLoopInvariant(LoopCond))
300     return false;
301 
302   auto *ParentBB = SI.getParent();
303 
304   SmallVector<int, 4> ExitCaseIndices;
305   for (auto Case : SI.cases()) {
306     auto *SuccBB = Case.getCaseSuccessor();
307     if (!L.contains(SuccBB) &&
308         areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
309       ExitCaseIndices.push_back(Case.getCaseIndex());
310   }
311   BasicBlock *DefaultExitBB = nullptr;
312   if (!L.contains(SI.getDefaultDest()) &&
313       areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
314       !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
315     DefaultExitBB = SI.getDefaultDest();
316   else if (ExitCaseIndices.empty())
317     return false;
318 
319   LLVM_DEBUG(dbgs() << "    unswitching trivial cases...\n");
320 
321   SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
322   ExitCases.reserve(ExitCaseIndices.size());
323   // We walk the case indices backwards so that we remove the last case first
324   // and don't disrupt the earlier indices.
325   for (unsigned Index : reverse(ExitCaseIndices)) {
326     auto CaseI = SI.case_begin() + Index;
327     // Save the value of this case.
328     ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
329     // Delete the unswitched cases.
330     SI.removeCase(CaseI);
331   }
332 
333   // Check if after this all of the remaining cases point at the same
334   // successor.
335   BasicBlock *CommonSuccBB = nullptr;
336   if (SI.getNumCases() > 0 &&
337       std::all_of(std::next(SI.case_begin()), SI.case_end(),
338                   [&SI](const SwitchInst::CaseHandle &Case) {
339                     return Case.getCaseSuccessor() ==
340                            SI.case_begin()->getCaseSuccessor();
341                   }))
342     CommonSuccBB = SI.case_begin()->getCaseSuccessor();
343 
344   if (DefaultExitBB) {
345     // We can't remove the default edge so replace it with an edge to either
346     // the single common remaining successor (if we have one) or an unreachable
347     // block.
348     if (CommonSuccBB) {
349       SI.setDefaultDest(CommonSuccBB);
350     } else {
351       BasicBlock *UnreachableBB = BasicBlock::Create(
352           ParentBB->getContext(),
353           Twine(ParentBB->getName()) + ".unreachable_default",
354           ParentBB->getParent());
355       new UnreachableInst(ParentBB->getContext(), UnreachableBB);
356       SI.setDefaultDest(UnreachableBB);
357       DT.addNewBlock(UnreachableBB, ParentBB);
358     }
359   } else {
360     // If we're not unswitching the default, we need it to match any cases to
361     // have a common successor or if we have no cases it is the common
362     // successor.
363     if (SI.getNumCases() == 0)
364       CommonSuccBB = SI.getDefaultDest();
365     else if (SI.getDefaultDest() != CommonSuccBB)
366       CommonSuccBB = nullptr;
367   }
368 
369   // Split the preheader, so that we know that there is a safe place to insert
370   // the switch.
371   BasicBlock *OldPH = L.getLoopPreheader();
372   BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
373   OldPH->getTerminator()->eraseFromParent();
374 
375   // Now add the unswitched switch.
376   auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
377 
378   // Rewrite the IR for the unswitched basic blocks. This requires two steps.
379   // First, we split any exit blocks with remaining in-loop predecessors. Then
380   // we update the PHIs in one of two ways depending on if there was a split.
381   // We walk in reverse so that we split in the same order as the cases
382   // appeared. This is purely for convenience of reading the resulting IR, but
383   // it doesn't cost anything really.
384   SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
385   SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
386   // Handle the default exit if necessary.
387   // FIXME: It'd be great if we could merge this with the loop below but LLVM's
388   // ranges aren't quite powerful enough yet.
389   if (DefaultExitBB) {
390     if (pred_empty(DefaultExitBB)) {
391       UnswitchedExitBBs.insert(DefaultExitBB);
392       rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
393     } else {
394       auto *SplitBB =
395           SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
396       rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
397                                                 *ParentBB, *OldPH);
398       DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
399     }
400   }
401   // Note that we must use a reference in the for loop so that we update the
402   // container.
403   for (auto &CasePair : reverse(ExitCases)) {
404     // Grab a reference to the exit block in the pair so that we can update it.
405     BasicBlock *ExitBB = CasePair.second;
406 
407     // If this case is the last edge into the exit block, we can simply reuse it
408     // as it will no longer be a loop exit. No mapping necessary.
409     if (pred_empty(ExitBB)) {
410       // Only rewrite once.
411       if (UnswitchedExitBBs.insert(ExitBB).second)
412         rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
413       continue;
414     }
415 
416     // Otherwise we need to split the exit block so that we retain an exit
417     // block from the loop and a target for the unswitched condition.
418     BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
419     if (!SplitExitBB) {
420       // If this is the first time we see this, do the split and remember it.
421       SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
422       rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
423                                                 *ParentBB, *OldPH);
424     }
425     // Update the case pair to point to the split block.
426     CasePair.second = SplitExitBB;
427   }
428 
429   // Now add the unswitched cases. We do this in reverse order as we built them
430   // in reverse order.
431   for (auto CasePair : reverse(ExitCases)) {
432     ConstantInt *CaseVal = CasePair.first;
433     BasicBlock *UnswitchedBB = CasePair.second;
434 
435     NewSI->addCase(CaseVal, UnswitchedBB);
436   }
437 
438   // If the default was unswitched, re-point it and add explicit cases for
439   // entering the loop.
440   if (DefaultExitBB) {
441     NewSI->setDefaultDest(DefaultExitBB);
442 
443     // We removed all the exit cases, so we just copy the cases to the
444     // unswitched switch.
445     for (auto Case : SI.cases())
446       NewSI->addCase(Case.getCaseValue(), NewPH);
447   }
448 
449   // If we ended up with a common successor for every path through the switch
450   // after unswitching, rewrite it to an unconditional branch to make it easy
451   // to recognize. Otherwise we potentially have to recognize the default case
452   // pointing at unreachable and other complexity.
453   if (CommonSuccBB) {
454     BasicBlock *BB = SI.getParent();
455     SI.eraseFromParent();
456     BranchInst::Create(CommonSuccBB, BB);
457   }
458 
459   // Walk the unswitched exit blocks and the unswitched split blocks and update
460   // the dominator tree based on the CFG edits. While we are walking unordered
461   // containers here, the API for applyUpdates takes an unordered list of
462   // updates and requires them to not contain duplicates.
463   SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
464   for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
465     DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
466     DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
467   }
468   for (auto SplitUnswitchedPair : SplitExitBBMap) {
469     auto *UnswitchedBB = SplitUnswitchedPair.second;
470     DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedBB});
471     DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
472   }
473   DT.applyUpdates(DTUpdates);
474 
475   assert(DT.verify(DominatorTree::VerificationLevel::Fast));
476   ++NumTrivial;
477   ++NumSwitches;
478   return true;
479 }
480 
481 /// This routine scans the loop to find a branch or switch which occurs before
482 /// any side effects occur. These can potentially be unswitched without
483 /// duplicating the loop. If a branch or switch is successfully unswitched the
484 /// scanning continues to see if subsequent branches or switches have become
485 /// trivial. Once all trivial candidates have been unswitched, this routine
486 /// returns.
487 ///
488 /// The return value indicates whether anything was unswitched (and therefore
489 /// changed).
490 static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
491                                          LoopInfo &LI) {
492   bool Changed = false;
493 
494   // If loop header has only one reachable successor we should keep looking for
495   // trivial condition candidates in the successor as well. An alternative is
496   // to constant fold conditions and merge successors into loop header (then we
497   // only need to check header's terminator). The reason for not doing this in
498   // LoopUnswitch pass is that it could potentially break LoopPassManager's
499   // invariants. Folding dead branches could either eliminate the current loop
500   // or make other loops unreachable. LCSSA form might also not be preserved
501   // after deleting branches. The following code keeps traversing loop header's
502   // successors until it finds the trivial condition candidate (condition that
503   // is not a constant). Since unswitching generates branches with constant
504   // conditions, this scenario could be very common in practice.
505   BasicBlock *CurrentBB = L.getHeader();
506   SmallPtrSet<BasicBlock *, 8> Visited;
507   Visited.insert(CurrentBB);
508   do {
509     // Check if there are any side-effecting instructions (e.g. stores, calls,
510     // volatile loads) in the part of the loop that the code *would* execute
511     // without unswitching.
512     if (llvm::any_of(*CurrentBB,
513                      [](Instruction &I) { return I.mayHaveSideEffects(); }))
514       return Changed;
515 
516     TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
517 
518     if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
519       // Don't bother trying to unswitch past a switch with a constant
520       // condition. This should be removed prior to running this pass by
521       // simplify-cfg.
522       if (isa<Constant>(SI->getCondition()))
523         return Changed;
524 
525       if (!unswitchTrivialSwitch(L, *SI, DT, LI))
526         // Couldn't unswitch this one so we're done.
527         return Changed;
528 
529       // Mark that we managed to unswitch something.
530       Changed = true;
531 
532       // If unswitching turned the terminator into an unconditional branch then
533       // we can continue. The unswitching logic specifically works to fold any
534       // cases it can into an unconditional branch to make it easier to
535       // recognize here.
536       auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
537       if (!BI || BI->isConditional())
538         return Changed;
539 
540       CurrentBB = BI->getSuccessor(0);
541       continue;
542     }
543 
544     auto *BI = dyn_cast<BranchInst>(CurrentTerm);
545     if (!BI)
546       // We do not understand other terminator instructions.
547       return Changed;
548 
549     // Don't bother trying to unswitch past an unconditional branch or a branch
550     // with a constant value. These should be removed by simplify-cfg prior to
551     // running this pass.
552     if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
553       return Changed;
554 
555     // Found a trivial condition candidate: non-foldable conditional branch. If
556     // we fail to unswitch this, we can't do anything else that is trivial.
557     if (!unswitchTrivialBranch(L, *BI, DT, LI))
558       return Changed;
559 
560     // Mark that we managed to unswitch something.
561     Changed = true;
562 
563     // We unswitched the branch. This should always leave us with an
564     // unconditional branch that we can follow now.
565     BI = cast<BranchInst>(CurrentBB->getTerminator());
566     assert(!BI->isConditional() &&
567            "Cannot form a conditional branch by unswitching1");
568     CurrentBB = BI->getSuccessor(0);
569 
570     // When continuing, if we exit the loop or reach a previous visited block,
571     // then we can not reach any trivial condition candidates (unfoldable
572     // branch instructions or switch instructions) and no unswitch can happen.
573   } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
574 
575   return Changed;
576 }
577 
578 /// Build the cloned blocks for an unswitched copy of the given loop.
579 ///
580 /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
581 /// after the split block (`SplitBB`) that will be used to select between the
582 /// cloned and original loop.
583 ///
584 /// This routine handles cloning all of the necessary loop blocks and exit
585 /// blocks including rewriting their instructions and the relevant PHI nodes.
586 /// It skips loop and exit blocks that are not necessary based on the provided
587 /// set. It also correctly creates the unconditional branch in the cloned
588 /// unswitched parent block to only point at the unswitched successor.
589 ///
590 /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
591 /// block splitting is correctly reflected in `LoopInfo`, essentially all of
592 /// the cloned blocks (and their loops) are left without full `LoopInfo`
593 /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
594 /// blocks to them but doesn't create the cloned `DominatorTree` structure and
595 /// instead the caller must recompute an accurate DT. It *does* correctly
596 /// update the `AssumptionCache` provided in `AC`.
597 static BasicBlock *buildClonedLoopBlocks(
598     Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
599     ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
600     BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
601     const SmallPtrSetImpl<BasicBlock *> &SkippedLoopAndExitBlocks,
602     ValueToValueMapTy &VMap,
603     SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
604     DominatorTree &DT, LoopInfo &LI) {
605   SmallVector<BasicBlock *, 4> NewBlocks;
606   NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
607 
608   // We will need to clone a bunch of blocks, wrap up the clone operation in
609   // a helper.
610   auto CloneBlock = [&](BasicBlock *OldBB) {
611     // Clone the basic block and insert it before the new preheader.
612     BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
613     NewBB->moveBefore(LoopPH);
614 
615     // Record this block and the mapping.
616     NewBlocks.push_back(NewBB);
617     VMap[OldBB] = NewBB;
618 
619     return NewBB;
620   };
621 
622   // First, clone the preheader.
623   auto *ClonedPH = CloneBlock(LoopPH);
624 
625   // Then clone all the loop blocks, skipping the ones that aren't necessary.
626   for (auto *LoopBB : L.blocks())
627     if (!SkippedLoopAndExitBlocks.count(LoopBB))
628       CloneBlock(LoopBB);
629 
630   // Split all the loop exit edges so that when we clone the exit blocks, if
631   // any of the exit blocks are *also* a preheader for some other loop, we
632   // don't create multiple predecessors entering the loop header.
633   for (auto *ExitBB : ExitBlocks) {
634     if (SkippedLoopAndExitBlocks.count(ExitBB))
635       continue;
636 
637     // When we are going to clone an exit, we don't need to clone all the
638     // instructions in the exit block and we want to ensure we have an easy
639     // place to merge the CFG, so split the exit first. This is always safe to
640     // do because there cannot be any non-loop predecessors of a loop exit in
641     // loop simplified form.
642     auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
643 
644     // Rearrange the names to make it easier to write test cases by having the
645     // exit block carry the suffix rather than the merge block carrying the
646     // suffix.
647     MergeBB->takeName(ExitBB);
648     ExitBB->setName(Twine(MergeBB->getName()) + ".split");
649 
650     // Now clone the original exit block.
651     auto *ClonedExitBB = CloneBlock(ExitBB);
652     assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
653            "Exit block should have been split to have one successor!");
654     assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
655            "Cloned exit block has the wrong successor!");
656 
657     // Remap any cloned instructions and create a merge phi node for them.
658     for (auto ZippedInsts : llvm::zip_first(
659              llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
660              llvm::make_range(ClonedExitBB->begin(),
661                               std::prev(ClonedExitBB->end())))) {
662       Instruction &I = std::get<0>(ZippedInsts);
663       Instruction &ClonedI = std::get<1>(ZippedInsts);
664 
665       // The only instructions in the exit block should be PHI nodes and
666       // potentially a landing pad.
667       assert(
668           (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
669           "Bad instruction in exit block!");
670       // We should have a value map between the instruction and its clone.
671       assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
672 
673       auto *MergePN =
674           PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
675                           &*MergeBB->getFirstInsertionPt());
676       I.replaceAllUsesWith(MergePN);
677       MergePN->addIncoming(&I, ExitBB);
678       MergePN->addIncoming(&ClonedI, ClonedExitBB);
679     }
680   }
681 
682   // Rewrite the instructions in the cloned blocks to refer to the instructions
683   // in the cloned blocks. We have to do this as a second pass so that we have
684   // everything available. Also, we have inserted new instructions which may
685   // include assume intrinsics, so we update the assumption cache while
686   // processing this.
687   for (auto *ClonedBB : NewBlocks)
688     for (Instruction &I : *ClonedBB) {
689       RemapInstruction(&I, VMap,
690                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
691       if (auto *II = dyn_cast<IntrinsicInst>(&I))
692         if (II->getIntrinsicID() == Intrinsic::assume)
693           AC.registerAssumption(II);
694     }
695 
696   // Remove the cloned parent as a predecessor of the cloned continue successor
697   // if we did in fact clone it.
698   auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
699   if (auto *ClonedContinueSuccBB =
700           cast_or_null<BasicBlock>(VMap.lookup(ContinueSuccBB)))
701     ClonedContinueSuccBB->removePredecessor(ClonedParentBB,
702                                             /*DontDeleteUselessPHIs*/ true);
703   // Replace the cloned branch with an unconditional branch to the cloned
704   // unswitched successor.
705   auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
706   ClonedParentBB->getTerminator()->eraseFromParent();
707   BranchInst::Create(ClonedSuccBB, ClonedParentBB);
708 
709   // Update any PHI nodes in the cloned successors of the skipped blocks to not
710   // have spurious incoming values.
711   for (auto *LoopBB : L.blocks())
712     if (SkippedLoopAndExitBlocks.count(LoopBB))
713       for (auto *SuccBB : successors(LoopBB))
714         if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
715           for (PHINode &PN : ClonedSuccBB->phis())
716             PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
717 
718   // Record the domtree updates for the new blocks.
719   SmallPtrSet<BasicBlock *, 4> SuccSet;
720   for (auto *ClonedBB : NewBlocks) {
721     for (auto *SuccBB : successors(ClonedBB))
722       if (SuccSet.insert(SuccBB).second)
723         DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
724     SuccSet.clear();
725   }
726 
727   return ClonedPH;
728 }
729 
730 /// Recursively clone the specified loop and all of its children.
731 ///
732 /// The target parent loop for the clone should be provided, or can be null if
733 /// the clone is a top-level loop. While cloning, all the blocks are mapped
734 /// with the provided value map. The entire original loop must be present in
735 /// the value map. The cloned loop is returned.
736 static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
737                            const ValueToValueMapTy &VMap, LoopInfo &LI) {
738   auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
739     assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
740     ClonedL.reserveBlocks(OrigL.getNumBlocks());
741     for (auto *BB : OrigL.blocks()) {
742       auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
743       ClonedL.addBlockEntry(ClonedBB);
744       if (LI.getLoopFor(BB) == &OrigL)
745         LI.changeLoopFor(ClonedBB, &ClonedL);
746     }
747   };
748 
749   // We specially handle the first loop because it may get cloned into
750   // a different parent and because we most commonly are cloning leaf loops.
751   Loop *ClonedRootL = LI.AllocateLoop();
752   if (RootParentL)
753     RootParentL->addChildLoop(ClonedRootL);
754   else
755     LI.addTopLevelLoop(ClonedRootL);
756   AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
757 
758   if (OrigRootL.empty())
759     return ClonedRootL;
760 
761   // If we have a nest, we can quickly clone the entire loop nest using an
762   // iterative approach because it is a tree. We keep the cloned parent in the
763   // data structure to avoid repeatedly querying through a map to find it.
764   SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
765   // Build up the loops to clone in reverse order as we'll clone them from the
766   // back.
767   for (Loop *ChildL : llvm::reverse(OrigRootL))
768     LoopsToClone.push_back({ClonedRootL, ChildL});
769   do {
770     Loop *ClonedParentL, *L;
771     std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
772     Loop *ClonedL = LI.AllocateLoop();
773     ClonedParentL->addChildLoop(ClonedL);
774     AddClonedBlocksToLoop(*L, *ClonedL);
775     for (Loop *ChildL : llvm::reverse(*L))
776       LoopsToClone.push_back({ClonedL, ChildL});
777   } while (!LoopsToClone.empty());
778 
779   return ClonedRootL;
780 }
781 
782 /// Build the cloned loops of an original loop from unswitching.
783 ///
784 /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
785 /// operation. We need to re-verify that there even is a loop (as the backedge
786 /// may not have been cloned), and even if there are remaining backedges the
787 /// backedge set may be different. However, we know that each child loop is
788 /// undisturbed, we only need to find where to place each child loop within
789 /// either any parent loop or within a cloned version of the original loop.
790 ///
791 /// Because child loops may end up cloned outside of any cloned version of the
792 /// original loop, multiple cloned sibling loops may be created. All of them
793 /// are returned so that the newly introduced loop nest roots can be
794 /// identified.
795 static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
796                              const ValueToValueMapTy &VMap, LoopInfo &LI,
797                              SmallVectorImpl<Loop *> &NonChildClonedLoops) {
798   Loop *ClonedL = nullptr;
799 
800   auto *OrigPH = OrigL.getLoopPreheader();
801   auto *OrigHeader = OrigL.getHeader();
802 
803   auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
804   auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
805 
806   // We need to know the loops of the cloned exit blocks to even compute the
807   // accurate parent loop. If we only clone exits to some parent of the
808   // original parent, we want to clone into that outer loop. We also keep track
809   // of the loops that our cloned exit blocks participate in.
810   Loop *ParentL = nullptr;
811   SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
812   SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
813   ClonedExitsInLoops.reserve(ExitBlocks.size());
814   for (auto *ExitBB : ExitBlocks)
815     if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
816       if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
817         ExitLoopMap[ClonedExitBB] = ExitL;
818         ClonedExitsInLoops.push_back(ClonedExitBB);
819         if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
820           ParentL = ExitL;
821       }
822   assert((!ParentL || ParentL == OrigL.getParentLoop() ||
823           ParentL->contains(OrigL.getParentLoop())) &&
824          "The computed parent loop should always contain (or be) the parent of "
825          "the original loop.");
826 
827   // We build the set of blocks dominated by the cloned header from the set of
828   // cloned blocks out of the original loop. While not all of these will
829   // necessarily be in the cloned loop, it is enough to establish that they
830   // aren't in unreachable cycles, etc.
831   SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
832   for (auto *BB : OrigL.blocks())
833     if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
834       ClonedLoopBlocks.insert(ClonedBB);
835 
836   // Rebuild the set of blocks that will end up in the cloned loop. We may have
837   // skipped cloning some region of this loop which can in turn skip some of
838   // the backedges so we have to rebuild the blocks in the loop based on the
839   // backedges that remain after cloning.
840   SmallVector<BasicBlock *, 16> Worklist;
841   SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
842   for (auto *Pred : predecessors(ClonedHeader)) {
843     // The only possible non-loop header predecessor is the preheader because
844     // we know we cloned the loop in simplified form.
845     if (Pred == ClonedPH)
846       continue;
847 
848     // Because the loop was in simplified form, the only non-loop predecessor
849     // should be the preheader.
850     assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
851                                            "header other than the preheader "
852                                            "that is not part of the loop!");
853 
854     // Insert this block into the loop set and on the first visit (and if it
855     // isn't the header we're currently walking) put it into the worklist to
856     // recurse through.
857     if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
858       Worklist.push_back(Pred);
859   }
860 
861   // If we had any backedges then there *is* a cloned loop. Put the header into
862   // the loop set and then walk the worklist backwards to find all the blocks
863   // that remain within the loop after cloning.
864   if (!BlocksInClonedLoop.empty()) {
865     BlocksInClonedLoop.insert(ClonedHeader);
866 
867     while (!Worklist.empty()) {
868       BasicBlock *BB = Worklist.pop_back_val();
869       assert(BlocksInClonedLoop.count(BB) &&
870              "Didn't put block into the loop set!");
871 
872       // Insert any predecessors that are in the possible set into the cloned
873       // set, and if the insert is successful, add them to the worklist. Note
874       // that we filter on the blocks that are definitely reachable via the
875       // backedge to the loop header so we may prune out dead code within the
876       // cloned loop.
877       for (auto *Pred : predecessors(BB))
878         if (ClonedLoopBlocks.count(Pred) &&
879             BlocksInClonedLoop.insert(Pred).second)
880           Worklist.push_back(Pred);
881     }
882 
883     ClonedL = LI.AllocateLoop();
884     if (ParentL) {
885       ParentL->addBasicBlockToLoop(ClonedPH, LI);
886       ParentL->addChildLoop(ClonedL);
887     } else {
888       LI.addTopLevelLoop(ClonedL);
889     }
890     NonChildClonedLoops.push_back(ClonedL);
891 
892     ClonedL->reserveBlocks(BlocksInClonedLoop.size());
893     // We don't want to just add the cloned loop blocks based on how we
894     // discovered them. The original order of blocks was carefully built in
895     // a way that doesn't rely on predecessor ordering. Rather than re-invent
896     // that logic, we just re-walk the original blocks (and those of the child
897     // loops) and filter them as we add them into the cloned loop.
898     for (auto *BB : OrigL.blocks()) {
899       auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
900       if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
901         continue;
902 
903       // Directly add the blocks that are only in this loop.
904       if (LI.getLoopFor(BB) == &OrigL) {
905         ClonedL->addBasicBlockToLoop(ClonedBB, LI);
906         continue;
907       }
908 
909       // We want to manually add it to this loop and parents.
910       // Registering it with LoopInfo will happen when we clone the top
911       // loop for this block.
912       for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
913         PL->addBlockEntry(ClonedBB);
914     }
915 
916     // Now add each child loop whose header remains within the cloned loop. All
917     // of the blocks within the loop must satisfy the same constraints as the
918     // header so once we pass the header checks we can just clone the entire
919     // child loop nest.
920     for (Loop *ChildL : OrigL) {
921       auto *ClonedChildHeader =
922           cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
923       if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
924         continue;
925 
926 #ifndef NDEBUG
927       // We should never have a cloned child loop header but fail to have
928       // all of the blocks for that child loop.
929       for (auto *ChildLoopBB : ChildL->blocks())
930         assert(BlocksInClonedLoop.count(
931                    cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
932                "Child cloned loop has a header within the cloned outer "
933                "loop but not all of its blocks!");
934 #endif
935 
936       cloneLoopNest(*ChildL, ClonedL, VMap, LI);
937     }
938   }
939 
940   // Now that we've handled all the components of the original loop that were
941   // cloned into a new loop, we still need to handle anything from the original
942   // loop that wasn't in a cloned loop.
943 
944   // Figure out what blocks are left to place within any loop nest containing
945   // the unswitched loop. If we never formed a loop, the cloned PH is one of
946   // them.
947   SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
948   if (BlocksInClonedLoop.empty())
949     UnloopedBlockSet.insert(ClonedPH);
950   for (auto *ClonedBB : ClonedLoopBlocks)
951     if (!BlocksInClonedLoop.count(ClonedBB))
952       UnloopedBlockSet.insert(ClonedBB);
953 
954   // Copy the cloned exits and sort them in ascending loop depth, we'll work
955   // backwards across these to process them inside out. The order shouldn't
956   // matter as we're just trying to build up the map from inside-out; we use
957   // the map in a more stably ordered way below.
958   auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
959   llvm::sort(OrderedClonedExitsInLoops.begin(),
960              OrderedClonedExitsInLoops.end(),
961              [&](BasicBlock *LHS, BasicBlock *RHS) {
962                return ExitLoopMap.lookup(LHS)->getLoopDepth() <
963                       ExitLoopMap.lookup(RHS)->getLoopDepth();
964              });
965 
966   // Populate the existing ExitLoopMap with everything reachable from each
967   // exit, starting from the inner most exit.
968   while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
969     assert(Worklist.empty() && "Didn't clear worklist!");
970 
971     BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
972     Loop *ExitL = ExitLoopMap.lookup(ExitBB);
973 
974     // Walk the CFG back until we hit the cloned PH adding everything reachable
975     // and in the unlooped set to this exit block's loop.
976     Worklist.push_back(ExitBB);
977     do {
978       BasicBlock *BB = Worklist.pop_back_val();
979       // We can stop recursing at the cloned preheader (if we get there).
980       if (BB == ClonedPH)
981         continue;
982 
983       for (BasicBlock *PredBB : predecessors(BB)) {
984         // If this pred has already been moved to our set or is part of some
985         // (inner) loop, no update needed.
986         if (!UnloopedBlockSet.erase(PredBB)) {
987           assert(
988               (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
989               "Predecessor not mapped to a loop!");
990           continue;
991         }
992 
993         // We just insert into the loop set here. We'll add these blocks to the
994         // exit loop after we build up the set in an order that doesn't rely on
995         // predecessor order (which in turn relies on use list order).
996         bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
997         (void)Inserted;
998         assert(Inserted && "Should only visit an unlooped block once!");
999 
1000         // And recurse through to its predecessors.
1001         Worklist.push_back(PredBB);
1002       }
1003     } while (!Worklist.empty());
1004   }
1005 
1006   // Now that the ExitLoopMap gives as  mapping for all the non-looping cloned
1007   // blocks to their outer loops, walk the cloned blocks and the cloned exits
1008   // in their original order adding them to the correct loop.
1009 
1010   // We need a stable insertion order. We use the order of the original loop
1011   // order and map into the correct parent loop.
1012   for (auto *BB : llvm::concat<BasicBlock *const>(
1013            makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1014     if (Loop *OuterL = ExitLoopMap.lookup(BB))
1015       OuterL->addBasicBlockToLoop(BB, LI);
1016 
1017 #ifndef NDEBUG
1018   for (auto &BBAndL : ExitLoopMap) {
1019     auto *BB = BBAndL.first;
1020     auto *OuterL = BBAndL.second;
1021     assert(LI.getLoopFor(BB) == OuterL &&
1022            "Failed to put all blocks into outer loops!");
1023   }
1024 #endif
1025 
1026   // Now that all the blocks are placed into the correct containing loop in the
1027   // absence of child loops, find all the potentially cloned child loops and
1028   // clone them into whatever outer loop we placed their header into.
1029   for (Loop *ChildL : OrigL) {
1030     auto *ClonedChildHeader =
1031         cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1032     if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1033       continue;
1034 
1035 #ifndef NDEBUG
1036     for (auto *ChildLoopBB : ChildL->blocks())
1037       assert(VMap.count(ChildLoopBB) &&
1038              "Cloned a child loop header but not all of that loops blocks!");
1039 #endif
1040 
1041     NonChildClonedLoops.push_back(cloneLoopNest(
1042         *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1043   }
1044 }
1045 
1046 static void
1047 deleteDeadBlocksFromLoop(Loop &L,
1048                          const SmallVectorImpl<BasicBlock *> &DeadBlocks,
1049                          SmallVectorImpl<BasicBlock *> &ExitBlocks,
1050                          DominatorTree &DT, LoopInfo &LI) {
1051   SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
1052                                              DeadBlocks.end());
1053 
1054   // Filter out the dead blocks from the exit blocks list so that it can be
1055   // used in the caller.
1056   llvm::erase_if(ExitBlocks,
1057                  [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1058 
1059   // Remove these blocks from their successors.
1060   for (auto *BB : DeadBlocks)
1061     for (BasicBlock *SuccBB : successors(BB))
1062       SuccBB->removePredecessor(BB, /*DontDeleteUselessPHIs*/ true);
1063 
1064   // Walk from this loop up through its parents removing all of the dead blocks.
1065   for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1066     for (auto *BB : DeadBlocks)
1067       ParentL->getBlocksSet().erase(BB);
1068     llvm::erase_if(ParentL->getBlocksVector(),
1069                    [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1070   }
1071 
1072   // Now delete the dead child loops. This raw delete will clear them
1073   // recursively.
1074   llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1075     if (!DeadBlockSet.count(ChildL->getHeader()))
1076       return false;
1077 
1078     assert(llvm::all_of(ChildL->blocks(),
1079                         [&](BasicBlock *ChildBB) {
1080                           return DeadBlockSet.count(ChildBB);
1081                         }) &&
1082            "If the child loop header is dead all blocks in the child loop must "
1083            "be dead as well!");
1084     LI.destroy(ChildL);
1085     return true;
1086   });
1087 
1088   // Remove the loop mappings for the dead blocks and drop all the references
1089   // from these blocks to others to handle cyclic references as we start
1090   // deleting the blocks themselves.
1091   for (auto *BB : DeadBlocks) {
1092     // Check that the dominator tree has already been updated.
1093     assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1094     LI.changeLoopFor(BB, nullptr);
1095     BB->dropAllReferences();
1096   }
1097 
1098   // Actually delete the blocks now that they've been fully unhooked from the
1099   // IR.
1100   for (auto *BB : DeadBlocks)
1101     BB->eraseFromParent();
1102 }
1103 
1104 /// Recompute the set of blocks in a loop after unswitching.
1105 ///
1106 /// This walks from the original headers predecessors to rebuild the loop. We
1107 /// take advantage of the fact that new blocks can't have been added, and so we
1108 /// filter by the original loop's blocks. This also handles potentially
1109 /// unreachable code that we don't want to explore but might be found examining
1110 /// the predecessors of the header.
1111 ///
1112 /// If the original loop is no longer a loop, this will return an empty set. If
1113 /// it remains a loop, all the blocks within it will be added to the set
1114 /// (including those blocks in inner loops).
1115 static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
1116                                                                  LoopInfo &LI) {
1117   SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
1118 
1119   auto *PH = L.getLoopPreheader();
1120   auto *Header = L.getHeader();
1121 
1122   // A worklist to use while walking backwards from the header.
1123   SmallVector<BasicBlock *, 16> Worklist;
1124 
1125   // First walk the predecessors of the header to find the backedges. This will
1126   // form the basis of our walk.
1127   for (auto *Pred : predecessors(Header)) {
1128     // Skip the preheader.
1129     if (Pred == PH)
1130       continue;
1131 
1132     // Because the loop was in simplified form, the only non-loop predecessor
1133     // is the preheader.
1134     assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1135                                "than the preheader that is not part of the "
1136                                "loop!");
1137 
1138     // Insert this block into the loop set and on the first visit and, if it
1139     // isn't the header we're currently walking, put it into the worklist to
1140     // recurse through.
1141     if (LoopBlockSet.insert(Pred).second && Pred != Header)
1142       Worklist.push_back(Pred);
1143   }
1144 
1145   // If no backedges were found, we're done.
1146   if (LoopBlockSet.empty())
1147     return LoopBlockSet;
1148 
1149   // We found backedges, recurse through them to identify the loop blocks.
1150   while (!Worklist.empty()) {
1151     BasicBlock *BB = Worklist.pop_back_val();
1152     assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1153 
1154     // No need to walk past the header.
1155     if (BB == Header)
1156       continue;
1157 
1158     // Because we know the inner loop structure remains valid we can use the
1159     // loop structure to jump immediately across the entire nested loop.
1160     // Further, because it is in loop simplified form, we can directly jump
1161     // to its preheader afterward.
1162     if (Loop *InnerL = LI.getLoopFor(BB))
1163       if (InnerL != &L) {
1164         assert(L.contains(InnerL) &&
1165                "Should not reach a loop *outside* this loop!");
1166         // The preheader is the only possible predecessor of the loop so
1167         // insert it into the set and check whether it was already handled.
1168         auto *InnerPH = InnerL->getLoopPreheader();
1169         assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1170                                       "but not contain the inner loop "
1171                                       "preheader!");
1172         if (!LoopBlockSet.insert(InnerPH).second)
1173           // The only way to reach the preheader is through the loop body
1174           // itself so if it has been visited the loop is already handled.
1175           continue;
1176 
1177         // Insert all of the blocks (other than those already present) into
1178         // the loop set. We expect at least the block that led us to find the
1179         // inner loop to be in the block set, but we may also have other loop
1180         // blocks if they were already enqueued as predecessors of some other
1181         // outer loop block.
1182         for (auto *InnerBB : InnerL->blocks()) {
1183           if (InnerBB == BB) {
1184             assert(LoopBlockSet.count(InnerBB) &&
1185                    "Block should already be in the set!");
1186             continue;
1187           }
1188 
1189           LoopBlockSet.insert(InnerBB);
1190         }
1191 
1192         // Add the preheader to the worklist so we will continue past the
1193         // loop body.
1194         Worklist.push_back(InnerPH);
1195         continue;
1196       }
1197 
1198     // Insert any predecessors that were in the original loop into the new
1199     // set, and if the insert is successful, add them to the worklist.
1200     for (auto *Pred : predecessors(BB))
1201       if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1202         Worklist.push_back(Pred);
1203   }
1204 
1205   assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1206 
1207   // We've found all the blocks participating in the loop, return our completed
1208   // set.
1209   return LoopBlockSet;
1210 }
1211 
1212 /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1213 ///
1214 /// The removal may have removed some child loops entirely but cannot have
1215 /// disturbed any remaining child loops. However, they may need to be hoisted
1216 /// to the parent loop (or to be top-level loops). The original loop may be
1217 /// completely removed.
1218 ///
1219 /// The sibling loops resulting from this update are returned. If the original
1220 /// loop remains a valid loop, it will be the first entry in this list with all
1221 /// of the newly sibling loops following it.
1222 ///
1223 /// Returns true if the loop remains a loop after unswitching, and false if it
1224 /// is no longer a loop after unswitching (and should not continue to be
1225 /// referenced).
1226 static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1227                                      LoopInfo &LI,
1228                                      SmallVectorImpl<Loop *> &HoistedLoops) {
1229   auto *PH = L.getLoopPreheader();
1230 
1231   // Compute the actual parent loop from the exit blocks. Because we may have
1232   // pruned some exits the loop may be different from the original parent.
1233   Loop *ParentL = nullptr;
1234   SmallVector<Loop *, 4> ExitLoops;
1235   SmallVector<BasicBlock *, 4> ExitsInLoops;
1236   ExitsInLoops.reserve(ExitBlocks.size());
1237   for (auto *ExitBB : ExitBlocks)
1238     if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1239       ExitLoops.push_back(ExitL);
1240       ExitsInLoops.push_back(ExitBB);
1241       if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1242         ParentL = ExitL;
1243     }
1244 
1245   // Recompute the blocks participating in this loop. This may be empty if it
1246   // is no longer a loop.
1247   auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1248 
1249   // If we still have a loop, we need to re-set the loop's parent as the exit
1250   // block set changing may have moved it within the loop nest. Note that this
1251   // can only happen when this loop has a parent as it can only hoist the loop
1252   // *up* the nest.
1253   if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1254     // Remove this loop's (original) blocks from all of the intervening loops.
1255     for (Loop *IL = L.getParentLoop(); IL != ParentL;
1256          IL = IL->getParentLoop()) {
1257       IL->getBlocksSet().erase(PH);
1258       for (auto *BB : L.blocks())
1259         IL->getBlocksSet().erase(BB);
1260       llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1261         return BB == PH || L.contains(BB);
1262       });
1263     }
1264 
1265     LI.changeLoopFor(PH, ParentL);
1266     L.getParentLoop()->removeChildLoop(&L);
1267     if (ParentL)
1268       ParentL->addChildLoop(&L);
1269     else
1270       LI.addTopLevelLoop(&L);
1271   }
1272 
1273   // Now we update all the blocks which are no longer within the loop.
1274   auto &Blocks = L.getBlocksVector();
1275   auto BlocksSplitI =
1276       LoopBlockSet.empty()
1277           ? Blocks.begin()
1278           : std::stable_partition(
1279                 Blocks.begin(), Blocks.end(),
1280                 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1281 
1282   // Before we erase the list of unlooped blocks, build a set of them.
1283   SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1284   if (LoopBlockSet.empty())
1285     UnloopedBlocks.insert(PH);
1286 
1287   // Now erase these blocks from the loop.
1288   for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1289     L.getBlocksSet().erase(BB);
1290   Blocks.erase(BlocksSplitI, Blocks.end());
1291 
1292   // Sort the exits in ascending loop depth, we'll work backwards across these
1293   // to process them inside out.
1294   std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
1295                    [&](BasicBlock *LHS, BasicBlock *RHS) {
1296                      return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1297                    });
1298 
1299   // We'll build up a set for each exit loop.
1300   SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1301   Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1302 
1303   auto RemoveUnloopedBlocksFromLoop =
1304       [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1305         for (auto *BB : UnloopedBlocks)
1306           L.getBlocksSet().erase(BB);
1307         llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1308           return UnloopedBlocks.count(BB);
1309         });
1310       };
1311 
1312   SmallVector<BasicBlock *, 16> Worklist;
1313   while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1314     assert(Worklist.empty() && "Didn't clear worklist!");
1315     assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1316 
1317     // Grab the next exit block, in decreasing loop depth order.
1318     BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1319     Loop &ExitL = *LI.getLoopFor(ExitBB);
1320     assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
1321 
1322     // Erase all of the unlooped blocks from the loops between the previous
1323     // exit loop and this exit loop. This works because the ExitInLoops list is
1324     // sorted in increasing order of loop depth and thus we visit loops in
1325     // decreasing order of loop depth.
1326     for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1327       RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1328 
1329     // Walk the CFG back until we hit the cloned PH adding everything reachable
1330     // and in the unlooped set to this exit block's loop.
1331     Worklist.push_back(ExitBB);
1332     do {
1333       BasicBlock *BB = Worklist.pop_back_val();
1334       // We can stop recursing at the cloned preheader (if we get there).
1335       if (BB == PH)
1336         continue;
1337 
1338       for (BasicBlock *PredBB : predecessors(BB)) {
1339         // If this pred has already been moved to our set or is part of some
1340         // (inner) loop, no update needed.
1341         if (!UnloopedBlocks.erase(PredBB)) {
1342           assert((NewExitLoopBlocks.count(PredBB) ||
1343                   ExitL.contains(LI.getLoopFor(PredBB))) &&
1344                  "Predecessor not in a nested loop (or already visited)!");
1345           continue;
1346         }
1347 
1348         // We just insert into the loop set here. We'll add these blocks to the
1349         // exit loop after we build up the set in a deterministic order rather
1350         // than the predecessor-influenced visit order.
1351         bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1352         (void)Inserted;
1353         assert(Inserted && "Should only visit an unlooped block once!");
1354 
1355         // And recurse through to its predecessors.
1356         Worklist.push_back(PredBB);
1357       }
1358     } while (!Worklist.empty());
1359 
1360     // If blocks in this exit loop were directly part of the original loop (as
1361     // opposed to a child loop) update the map to point to this exit loop. This
1362     // just updates a map and so the fact that the order is unstable is fine.
1363     for (auto *BB : NewExitLoopBlocks)
1364       if (Loop *BBL = LI.getLoopFor(BB))
1365         if (BBL == &L || !L.contains(BBL))
1366           LI.changeLoopFor(BB, &ExitL);
1367 
1368     // We will remove the remaining unlooped blocks from this loop in the next
1369     // iteration or below.
1370     NewExitLoopBlocks.clear();
1371   }
1372 
1373   // Any remaining unlooped blocks are no longer part of any loop unless they
1374   // are part of some child loop.
1375   for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1376     RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1377   for (auto *BB : UnloopedBlocks)
1378     if (Loop *BBL = LI.getLoopFor(BB))
1379       if (BBL == &L || !L.contains(BBL))
1380         LI.changeLoopFor(BB, nullptr);
1381 
1382   // Sink all the child loops whose headers are no longer in the loop set to
1383   // the parent (or to be top level loops). We reach into the loop and directly
1384   // update its subloop vector to make this batch update efficient.
1385   auto &SubLoops = L.getSubLoopsVector();
1386   auto SubLoopsSplitI =
1387       LoopBlockSet.empty()
1388           ? SubLoops.begin()
1389           : std::stable_partition(
1390                 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1391                   return LoopBlockSet.count(SubL->getHeader());
1392                 });
1393   for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1394     HoistedLoops.push_back(HoistedL);
1395     HoistedL->setParentLoop(nullptr);
1396 
1397     // To compute the new parent of this hoisted loop we look at where we
1398     // placed the preheader above. We can't lookup the header itself because we
1399     // retained the mapping from the header to the hoisted loop. But the
1400     // preheader and header should have the exact same new parent computed
1401     // based on the set of exit blocks from the original loop as the preheader
1402     // is a predecessor of the header and so reached in the reverse walk. And
1403     // because the loops were all in simplified form the preheader of the
1404     // hoisted loop can't be part of some *other* loop.
1405     if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1406       NewParentL->addChildLoop(HoistedL);
1407     else
1408       LI.addTopLevelLoop(HoistedL);
1409   }
1410   SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1411 
1412   // Actually delete the loop if nothing remained within it.
1413   if (Blocks.empty()) {
1414     assert(SubLoops.empty() &&
1415            "Failed to remove all subloops from the original loop!");
1416     if (Loop *ParentL = L.getParentLoop())
1417       ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1418     else
1419       LI.removeLoop(llvm::find(LI, &L));
1420     LI.destroy(&L);
1421     return false;
1422   }
1423 
1424   return true;
1425 }
1426 
1427 /// Helper to visit a dominator subtree, invoking a callable on each node.
1428 ///
1429 /// Returning false at any point will stop walking past that node of the tree.
1430 template <typename CallableT>
1431 void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
1432   SmallVector<DomTreeNode *, 4> DomWorklist;
1433   DomWorklist.push_back(DT[BB]);
1434 #ifndef NDEBUG
1435   SmallPtrSet<DomTreeNode *, 4> Visited;
1436   Visited.insert(DT[BB]);
1437 #endif
1438   do {
1439     DomTreeNode *N = DomWorklist.pop_back_val();
1440 
1441     // Visit this node.
1442     if (!Callable(N->getBlock()))
1443       continue;
1444 
1445     // Accumulate the child nodes.
1446     for (DomTreeNode *ChildN : *N) {
1447       assert(Visited.insert(ChildN).second &&
1448              "Cannot visit a node twice when walking a tree!");
1449       DomWorklist.push_back(ChildN);
1450     }
1451   } while (!DomWorklist.empty());
1452 }
1453 
1454 /// Take an invariant branch that has been determined to be safe and worthwhile
1455 /// to unswitch despite being non-trivial to do so and perform the unswitch.
1456 ///
1457 /// This directly updates the CFG to hoist the predicate out of the loop, and
1458 /// clone the necessary parts of the loop to maintain behavior.
1459 ///
1460 /// It also updates both dominator tree and loopinfo based on the unswitching.
1461 ///
1462 /// Once unswitching has been performed it runs the provided callback to report
1463 /// the new loops and no-longer valid loops to the caller.
1464 static bool unswitchInvariantBranch(
1465     Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
1466     AssumptionCache &AC,
1467     function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
1468   assert(BI.isConditional() && "Can only unswitch a conditional branch!");
1469   assert(L.isLoopInvariant(BI.getCondition()) &&
1470          "Can only unswitch an invariant branch condition!");
1471 
1472   // Constant and BBs tracking the cloned and continuing successor.
1473   const int ClonedSucc = 0;
1474   auto *ParentBB = BI.getParent();
1475   auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
1476   auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);
1477 
1478   assert(UnswitchedSuccBB != ContinueSuccBB &&
1479          "Should not unswitch a branch that always goes to the same place!");
1480 
1481   // The branch should be in this exact loop. Any inner loop's invariant branch
1482   // should be handled by unswitching that inner loop. The caller of this
1483   // routine should filter out any candidates that remain (but were skipped for
1484   // whatever reason).
1485   assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
1486 
1487   SmallVector<BasicBlock *, 4> ExitBlocks;
1488   L.getUniqueExitBlocks(ExitBlocks);
1489 
1490   // We cannot unswitch if exit blocks contain a cleanuppad instruction as we
1491   // don't know how to split those exit blocks.
1492   // FIXME: We should teach SplitBlock to handle this and remove this
1493   // restriction.
1494   for (auto *ExitBB : ExitBlocks)
1495     if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI()))
1496       return false;
1497 
1498   SmallPtrSet<BasicBlock *, 4> ExitBlockSet(ExitBlocks.begin(),
1499                                             ExitBlocks.end());
1500 
1501   // Compute the parent loop now before we start hacking on things.
1502   Loop *ParentL = L.getParentLoop();
1503 
1504   // Compute the outer-most loop containing one of our exit blocks. This is the
1505   // furthest up our loopnest which can be mutated, which we will use below to
1506   // update things.
1507   Loop *OuterExitL = &L;
1508   for (auto *ExitBB : ExitBlocks) {
1509     Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
1510     if (!NewOuterExitL) {
1511       // We exited the entire nest with this block, so we're done.
1512       OuterExitL = nullptr;
1513       break;
1514     }
1515     if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
1516       OuterExitL = NewOuterExitL;
1517   }
1518 
1519   // If the edge we *aren't* cloning in the unswitch (the continuing edge)
1520   // dominates its target, we can skip cloning the dominated region of the loop
1521   // and its exits. We compute this as a set of nodes to be skipped.
1522   SmallPtrSet<BasicBlock *, 4> SkippedLoopAndExitBlocks;
1523   if (ContinueSuccBB->getUniquePredecessor() ||
1524       llvm::all_of(predecessors(ContinueSuccBB), [&](BasicBlock *PredBB) {
1525         return PredBB == ParentBB || DT.dominates(ContinueSuccBB, PredBB);
1526       })) {
1527     visitDomSubTree(DT, ContinueSuccBB, [&](BasicBlock *BB) {
1528       SkippedLoopAndExitBlocks.insert(BB);
1529       return true;
1530     });
1531   }
1532   // Similarly, if the edge we *are* cloning in the unswitch (the unswitched
1533   // edge) dominates its target, we will end up with dead nodes in the original
1534   // loop and its exits that will need to be deleted. Here, we just retain that
1535   // the property holds and will compute the deleted set later.
1536   bool DeleteUnswitchedSucc =
1537       UnswitchedSuccBB->getUniquePredecessor() ||
1538       llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
1539         return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
1540       });
1541 
1542   // Split the preheader, so that we know that there is a safe place to insert
1543   // the conditional branch. We will change the preheader to have a conditional
1544   // branch on LoopCond. The original preheader will become the split point
1545   // between the unswitched versions, and we will have a new preheader for the
1546   // original loop.
1547   BasicBlock *SplitBB = L.getLoopPreheader();
1548   BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI);
1549 
1550   // Keep a mapping for the cloned values.
1551   ValueToValueMapTy VMap;
1552 
1553   // Keep track of the dominator tree updates needed.
1554   SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
1555 
1556   // Build the cloned blocks from the loop.
1557   auto *ClonedPH = buildClonedLoopBlocks(
1558       L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
1559       ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI);
1560 
1561   // Remove the parent as a predecessor of the unswitched successor.
1562   UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);
1563 
1564   // Now splice the branch from the original loop and use it to select between
1565   // the two loops.
1566   SplitBB->getTerminator()->eraseFromParent();
1567   SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
1568   BI.setSuccessor(ClonedSucc, ClonedPH);
1569   BI.setSuccessor(1 - ClonedSucc, LoopPH);
1570 
1571   // Create a new unconditional branch to the continuing block (as opposed to
1572   // the one cloned).
1573   BranchInst::Create(ContinueSuccBB, ParentBB);
1574 
1575   // Before we update the dominator tree, collect the dead blocks if we're going
1576   // to end up deleting the unswitched successor.
1577   SmallVector<BasicBlock *, 16> DeadBlocks;
1578   if (DeleteUnswitchedSucc) {
1579     DeadBlocks.push_back(UnswitchedSuccBB);
1580     for (int i = 0; i < (int)DeadBlocks.size(); ++i) {
1581       // If we reach an exit block, stop recursing as the unswitched loop will
1582       // end up reaching the merge block which we make the successor of the
1583       // exit.
1584       if (ExitBlockSet.count(DeadBlocks[i]))
1585         continue;
1586 
1587       // Insert the children that are within the loop or exit block set. Other
1588       // children may reach out of the loop. While we don't expect these to be
1589       // dead (as the unswitched clone should reach them) we don't try to prove
1590       // that here.
1591       for (DomTreeNode *ChildN : *DT[DeadBlocks[i]])
1592         if (L.contains(ChildN->getBlock()) ||
1593             ExitBlockSet.count(ChildN->getBlock()))
1594           DeadBlocks.push_back(ChildN->getBlock());
1595     }
1596   }
1597 
1598   // Add the remaining edges to our updates and apply them to get an up-to-date
1599   // dominator tree. Note that this will cause the dead blocks above to be
1600   // unreachable and no longer in the dominator tree.
1601   DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
1602   DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
1603   DT.applyUpdates(DTUpdates);
1604 
1605   // Build the cloned loop structure itself. This may be substantially
1606   // different from the original structure due to the simplified CFG. This also
1607   // handles inserting all the cloned blocks into the correct loops.
1608   SmallVector<Loop *, 4> NonChildClonedLoops;
1609   buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
1610 
1611   // Delete anything that was made dead in the original loop due to
1612   // unswitching.
1613   if (!DeadBlocks.empty())
1614     deleteDeadBlocksFromLoop(L, DeadBlocks, ExitBlocks, DT, LI);
1615 
1616   SmallVector<Loop *, 4> HoistedLoops;
1617   bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
1618 
1619   // This transformation has a high risk of corrupting the dominator tree, and
1620   // the below steps to rebuild loop structures will result in hard to debug
1621   // errors in that case so verify that the dominator tree is sane first.
1622   // FIXME: Remove this when the bugs stop showing up and rely on existing
1623   // verification steps.
1624   assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1625 
1626   // We can change which blocks are exit blocks of all the cloned sibling
1627   // loops, the current loop, and any parent loops which shared exit blocks
1628   // with the current loop. As a consequence, we need to re-form LCSSA for
1629   // them. But we shouldn't need to re-form LCSSA for any child loops.
1630   // FIXME: This could be made more efficient by tracking which exit blocks are
1631   // new, and focusing on them, but that isn't likely to be necessary.
1632   //
1633   // In order to reasonably rebuild LCSSA we need to walk inside-out across the
1634   // loop nest and update every loop that could have had its exits changed. We
1635   // also need to cover any intervening loops. We add all of these loops to
1636   // a list and sort them by loop depth to achieve this without updating
1637   // unnecessary loops.
1638   auto UpdateLoop = [&](Loop &UpdateL) {
1639 #ifndef NDEBUG
1640     UpdateL.verifyLoop();
1641     for (Loop *ChildL : UpdateL) {
1642       ChildL->verifyLoop();
1643       assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
1644              "Perturbed a child loop's LCSSA form!");
1645     }
1646 #endif
1647     // First build LCSSA for this loop so that we can preserve it when
1648     // forming dedicated exits. We don't want to perturb some other loop's
1649     // LCSSA while doing that CFG edit.
1650     formLCSSA(UpdateL, DT, &LI, nullptr);
1651 
1652     // For loops reached by this loop's original exit blocks we may
1653     // introduced new, non-dedicated exits. At least try to re-form dedicated
1654     // exits for these loops. This may fail if they couldn't have dedicated
1655     // exits to start with.
1656     formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true);
1657   };
1658 
1659   // For non-child cloned loops and hoisted loops, we just need to update LCSSA
1660   // and we can do it in any order as they don't nest relative to each other.
1661   //
1662   // Also check if any of the loops we have updated have become top-level loops
1663   // as that will necessitate widening the outer loop scope.
1664   for (Loop *UpdatedL :
1665        llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
1666     UpdateLoop(*UpdatedL);
1667     if (!UpdatedL->getParentLoop())
1668       OuterExitL = nullptr;
1669   }
1670   if (IsStillLoop) {
1671     UpdateLoop(L);
1672     if (!L.getParentLoop())
1673       OuterExitL = nullptr;
1674   }
1675 
1676   // If the original loop had exit blocks, walk up through the outer most loop
1677   // of those exit blocks to update LCSSA and form updated dedicated exits.
1678   if (OuterExitL != &L)
1679     for (Loop *OuterL = ParentL; OuterL != OuterExitL;
1680          OuterL = OuterL->getParentLoop())
1681       UpdateLoop(*OuterL);
1682 
1683 #ifndef NDEBUG
1684   // Verify the entire loop structure to catch any incorrect updates before we
1685   // progress in the pass pipeline.
1686   LI.verify(DT);
1687 #endif
1688 
1689   // Now that we've unswitched something, make callbacks to report the changes.
1690   // For that we need to merge together the updated loops and the cloned loops
1691   // and check whether the original loop survived.
1692   SmallVector<Loop *, 4> SibLoops;
1693   for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
1694     if (UpdatedL->getParentLoop() == ParentL)
1695       SibLoops.push_back(UpdatedL);
1696   UnswitchCB(IsStillLoop, SibLoops);
1697 
1698   ++NumBranches;
1699   return true;
1700 }
1701 
1702 /// Recursively compute the cost of a dominator subtree based on the per-block
1703 /// cost map provided.
1704 ///
1705 /// The recursive computation is memozied into the provided DT-indexed cost map
1706 /// to allow querying it for most nodes in the domtree without it becoming
1707 /// quadratic.
1708 static int
1709 computeDomSubtreeCost(DomTreeNode &N,
1710                       const SmallDenseMap<BasicBlock *, int, 4> &BBCostMap,
1711                       SmallDenseMap<DomTreeNode *, int, 4> &DTCostMap) {
1712   // Don't accumulate cost (or recurse through) blocks not in our block cost
1713   // map and thus not part of the duplication cost being considered.
1714   auto BBCostIt = BBCostMap.find(N.getBlock());
1715   if (BBCostIt == BBCostMap.end())
1716     return 0;
1717 
1718   // Lookup this node to see if we already computed its cost.
1719   auto DTCostIt = DTCostMap.find(&N);
1720   if (DTCostIt != DTCostMap.end())
1721     return DTCostIt->second;
1722 
1723   // If not, we have to compute it. We can't use insert above and update
1724   // because computing the cost may insert more things into the map.
1725   int Cost = std::accumulate(
1726       N.begin(), N.end(), BBCostIt->second, [&](int Sum, DomTreeNode *ChildN) {
1727         return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
1728       });
1729   bool Inserted = DTCostMap.insert({&N, Cost}).second;
1730   (void)Inserted;
1731   assert(Inserted && "Should not insert a node while visiting children!");
1732   return Cost;
1733 }
1734 
1735 /// Unswitch control flow predicated on loop invariant conditions.
1736 ///
1737 /// This first hoists all branches or switches which are trivial (IE, do not
1738 /// require duplicating any part of the loop) out of the loop body. It then
1739 /// looks at other loop invariant control flows and tries to unswitch those as
1740 /// well by cloning the loop if the result is small enough.
1741 static bool
1742 unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
1743              TargetTransformInfo &TTI, bool NonTrivial,
1744              function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
1745   assert(L.isRecursivelyLCSSAForm(DT, LI) &&
1746          "Loops must be in LCSSA form before unswitching.");
1747 
1748   // Must be in loop simplified form: we need a preheader and dedicated exits.
1749   if (!L.isLoopSimplifyForm())
1750     return false;
1751 
1752   // Try trivial unswitch first before loop over other basic blocks in the loop.
1753   if (unswitchAllTrivialConditions(L, DT, LI)) {
1754     // If we unswitched successfully we will want to clean up the loop before
1755     // processing it further so just mark it as unswitched and return.
1756     UnswitchCB(/*CurrentLoopValid*/ true, {});
1757     return true;
1758   }
1759 
1760   // If we're not doing non-trivial unswitching, we're done. We both accept
1761   // a parameter but also check a local flag that can be used for testing
1762   // a debugging.
1763   if (!NonTrivial && !EnableNonTrivialUnswitch)
1764     return false;
1765 
1766   // Collect all remaining invariant branch conditions within this loop (as
1767   // opposed to an inner loop which would be handled when visiting that inner
1768   // loop).
1769   SmallVector<TerminatorInst *, 4> UnswitchCandidates;
1770   for (auto *BB : L.blocks())
1771     if (LI.getLoopFor(BB) == &L)
1772       if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
1773         if (BI->isConditional() && L.isLoopInvariant(BI->getCondition()) &&
1774             BI->getSuccessor(0) != BI->getSuccessor(1))
1775           UnswitchCandidates.push_back(BI);
1776 
1777   // If we didn't find any candidates, we're done.
1778   if (UnswitchCandidates.empty())
1779     return false;
1780 
1781   // Check if there are irreducible CFG cycles in this loop. If so, we cannot
1782   // easily unswitch non-trivial edges out of the loop. Doing so might turn the
1783   // irreducible control flow into reducible control flow and introduce new
1784   // loops "out of thin air". If we ever discover important use cases for doing
1785   // this, we can add support to loop unswitch, but it is a lot of complexity
1786   // for what seems little or no real world benefit.
1787   LoopBlocksRPO RPOT(&L);
1788   RPOT.perform(&LI);
1789   if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
1790     return false;
1791 
1792   LLVM_DEBUG(
1793       dbgs() << "Considering " << UnswitchCandidates.size()
1794              << " non-trivial loop invariant conditions for unswitching.\n");
1795 
1796   // Given that unswitching these terminators will require duplicating parts of
1797   // the loop, so we need to be able to model that cost. Compute the ephemeral
1798   // values and set up a data structure to hold per-BB costs. We cache each
1799   // block's cost so that we don't recompute this when considering different
1800   // subsets of the loop for duplication during unswitching.
1801   SmallPtrSet<const Value *, 4> EphValues;
1802   CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
1803   SmallDenseMap<BasicBlock *, int, 4> BBCostMap;
1804 
1805   // Compute the cost of each block, as well as the total loop cost. Also, bail
1806   // out if we see instructions which are incompatible with loop unswitching
1807   // (convergent, noduplicate, or cross-basic-block tokens).
1808   // FIXME: We might be able to safely handle some of these in non-duplicated
1809   // regions.
1810   int LoopCost = 0;
1811   for (auto *BB : L.blocks()) {
1812     int Cost = 0;
1813     for (auto &I : *BB) {
1814       if (EphValues.count(&I))
1815         continue;
1816 
1817       if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
1818         return false;
1819       if (auto CS = CallSite(&I))
1820         if (CS.isConvergent() || CS.cannotDuplicate())
1821           return false;
1822 
1823       Cost += TTI.getUserCost(&I);
1824     }
1825     assert(Cost >= 0 && "Must not have negative costs!");
1826     LoopCost += Cost;
1827     assert(LoopCost >= 0 && "Must not have negative loop costs!");
1828     BBCostMap[BB] = Cost;
1829   }
1830   LLVM_DEBUG(dbgs() << "  Total loop cost: " << LoopCost << "\n");
1831 
1832   // Now we find the best candidate by searching for the one with the following
1833   // properties in order:
1834   //
1835   // 1) An unswitching cost below the threshold
1836   // 2) The smallest number of duplicated unswitch candidates (to avoid
1837   //    creating redundant subsequent unswitching)
1838   // 3) The smallest cost after unswitching.
1839   //
1840   // We prioritize reducing fanout of unswitch candidates provided the cost
1841   // remains below the threshold because this has a multiplicative effect.
1842   //
1843   // This requires memoizing each dominator subtree to avoid redundant work.
1844   //
1845   // FIXME: Need to actually do the number of candidates part above.
1846   SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
1847   // Given a terminator which might be unswitched, computes the non-duplicated
1848   // cost for that terminator.
1849   auto ComputeUnswitchedCost = [&](TerminatorInst *TI) {
1850     BasicBlock &BB = *TI->getParent();
1851     SmallPtrSet<BasicBlock *, 4> Visited;
1852 
1853     int Cost = LoopCost;
1854     for (BasicBlock *SuccBB : successors(&BB)) {
1855       // Don't count successors more than once.
1856       if (!Visited.insert(SuccBB).second)
1857         continue;
1858 
1859       // This successor's domtree will not need to be duplicated after
1860       // unswitching if the edge to the successor dominates it (and thus the
1861       // entire tree). This essentially means there is no other path into this
1862       // subtree and so it will end up live in only one clone of the loop.
1863       if (SuccBB->getUniquePredecessor() ||
1864           llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
1865             return PredBB == &BB || DT.dominates(SuccBB, PredBB);
1866           })) {
1867         Cost -= computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
1868         assert(Cost >= 0 &&
1869                "Non-duplicated cost should never exceed total loop cost!");
1870       }
1871     }
1872 
1873     // Now scale the cost by the number of unique successors minus one. We
1874     // subtract one because there is already at least one copy of the entire
1875     // loop. This is computing the new cost of unswitching a condition.
1876     assert(Visited.size() > 1 &&
1877            "Cannot unswitch a condition without multiple distinct successors!");
1878     return Cost * (Visited.size() - 1);
1879   };
1880   TerminatorInst *BestUnswitchTI = nullptr;
1881   int BestUnswitchCost;
1882   for (TerminatorInst *CandidateTI : UnswitchCandidates) {
1883     int CandidateCost = ComputeUnswitchedCost(CandidateTI);
1884     LLVM_DEBUG(dbgs() << "  Computed cost of " << CandidateCost
1885                       << " for unswitch candidate: " << *CandidateTI << "\n");
1886     if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
1887       BestUnswitchTI = CandidateTI;
1888       BestUnswitchCost = CandidateCost;
1889     }
1890   }
1891 
1892   if (BestUnswitchCost >= UnswitchThreshold) {
1893     LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
1894                       << BestUnswitchCost << "\n");
1895     return false;
1896   }
1897 
1898   LLVM_DEBUG(dbgs() << "  Trying to unswitch non-trivial (cost = "
1899                     << BestUnswitchCost << ") branch: " << *BestUnswitchTI
1900                     << "\n");
1901   return unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT, LI,
1902                                  AC, UnswitchCB);
1903 }
1904 
1905 PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
1906                                               LoopStandardAnalysisResults &AR,
1907                                               LPMUpdater &U) {
1908   Function &F = *L.getHeader()->getParent();
1909   (void)F;
1910 
1911   LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
1912                     << "\n");
1913 
1914   // Save the current loop name in a variable so that we can report it even
1915   // after it has been deleted.
1916   std::string LoopName = L.getName();
1917 
1918   auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
1919                                         ArrayRef<Loop *> NewLoops) {
1920     // If we did a non-trivial unswitch, we have added new (cloned) loops.
1921     if (!NewLoops.empty())
1922       U.addSiblingLoops(NewLoops);
1923 
1924     // If the current loop remains valid, we should revisit it to catch any
1925     // other unswitch opportunities. Otherwise, we need to mark it as deleted.
1926     if (CurrentLoopValid)
1927       U.revisitCurrentLoop();
1928     else
1929       U.markLoopAsDeleted(L, LoopName);
1930   };
1931 
1932   if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial,
1933                     UnswitchCB))
1934     return PreservedAnalyses::all();
1935 
1936   // Historically this pass has had issues with the dominator tree so verify it
1937   // in asserts builds.
1938   assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
1939   return getLoopPassPreservedAnalyses();
1940 }
1941 
1942 namespace {
1943 
1944 class SimpleLoopUnswitchLegacyPass : public LoopPass {
1945   bool NonTrivial;
1946 
1947 public:
1948   static char ID; // Pass ID, replacement for typeid
1949 
1950   explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
1951       : LoopPass(ID), NonTrivial(NonTrivial) {
1952     initializeSimpleLoopUnswitchLegacyPassPass(
1953         *PassRegistry::getPassRegistry());
1954   }
1955 
1956   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
1957 
1958   void getAnalysisUsage(AnalysisUsage &AU) const override {
1959     AU.addRequired<AssumptionCacheTracker>();
1960     AU.addRequired<TargetTransformInfoWrapperPass>();
1961     getLoopAnalysisUsage(AU);
1962   }
1963 };
1964 
1965 } // end anonymous namespace
1966 
1967 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
1968   if (skipLoop(L))
1969     return false;
1970 
1971   Function &F = *L->getHeader()->getParent();
1972 
1973   LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
1974                     << "\n");
1975 
1976   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1977   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1978   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1979   auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1980 
1981   auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid,
1982                                ArrayRef<Loop *> NewLoops) {
1983     // If we did a non-trivial unswitch, we have added new (cloned) loops.
1984     for (auto *NewL : NewLoops)
1985       LPM.addLoop(*NewL);
1986 
1987     // If the current loop remains valid, re-add it to the queue. This is
1988     // a little wasteful as we'll finish processing the current loop as well,
1989     // but it is the best we can do in the old PM.
1990     if (CurrentLoopValid)
1991       LPM.addLoop(*L);
1992     else
1993       LPM.markLoopAsDeleted(*L);
1994   };
1995 
1996   bool Changed =
1997       unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB);
1998 
1999   // If anything was unswitched, also clear any cached information about this
2000   // loop.
2001   LPM.deleteSimpleAnalysisLoop(L);
2002 
2003   // Historically this pass has had issues with the dominator tree so verify it
2004   // in asserts builds.
2005   assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2006 
2007   return Changed;
2008 }
2009 
2010 char SimpleLoopUnswitchLegacyPass::ID = 0;
2011 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2012                       "Simple unswitch loops", false, false)
2013 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2014 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2015 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
2016 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2017 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2018 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
2019                     "Simple unswitch loops", false, false)
2020 
2021 Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
2022   return new SimpleLoopUnswitchLegacyPass(NonTrivial);
2023 }
2024