xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 0586d1cac2856e2848e289947910ad2dafe26d85)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/iterator_range.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/LoopPass.h"
38 #include "llvm/Analysis/MemorySSA.h"
39 #include "llvm/Analysis/MemorySSAUpdater.h"
40 #include "llvm/Analysis/ScalarEvolution.h"
41 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
42 #include "llvm/Analysis/TargetLibraryInfo.h"
43 #include "llvm/Analysis/TargetTransformInfo.h"
44 #include "llvm/Analysis/ValueTracking.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/ConstantRange.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/IRBuilder.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/IntrinsicInst.h"
58 #include "llvm/IR/Intrinsics.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/Operator.h"
61 #include "llvm/IR/PassManager.h"
62 #include "llvm/IR/PatternMatch.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/IR/ValueHandle.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/CommandLine.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/Debug.h"
74 #include "llvm/Support/MathExtras.h"
75 #include "llvm/Support/raw_ostream.h"
76 #include "llvm/Transforms/Scalar.h"
77 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
78 #include "llvm/Transforms/Utils/Local.h"
79 #include "llvm/Transforms/Utils/LoopUtils.h"
80 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
81 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
82 #include <cassert>
83 #include <cstdint>
84 #include <utility>
85 
86 using namespace llvm;
87 using namespace PatternMatch;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened     , "Number of indvars widened");
92 STATISTIC(NumReplaced    , "Number of exit values replaced");
93 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
100 static cl::opt<bool> VerifyIndvars(
101     "verify-indvars", cl::Hidden,
102     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
103              "effect in release builds. (Note: this adds additional SCEV "
104              "queries potentially changing the analysis result)"));
105 
106 static cl::opt<ReplaceExitVal> ReplaceExitValue(
107     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
110                clEnumValN(OnlyCheapRepl, "cheap",
111                           "only replace exit value when the cost is cheap"),
112                clEnumValN(NoHardUse, "noharduse",
113                           "only replace exit values when loop def likely dead"),
114                clEnumValN(AlwaysRepl, "always",
115                           "always replace exit value whenever possible")));
116 
117 static cl::opt<bool> UsePostIncrementRanges(
118   "indvars-post-increment-ranges", cl::Hidden,
119   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
120   cl::init(true));
121 
122 static cl::opt<bool>
123 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
124             cl::desc("Disable Linear Function Test Replace optimization"));
125 
126 static cl::opt<bool>
127 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
128                 cl::desc("Predicate conditions in read only loops"));
129 
130 static cl::opt<bool>
131 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
132                 cl::desc("Allow widening of indvars to eliminate s/zext"));
133 
134 namespace {
135 
136 class IndVarSimplify {
137   LoopInfo *LI;
138   ScalarEvolution *SE;
139   DominatorTree *DT;
140   const DataLayout &DL;
141   TargetLibraryInfo *TLI;
142   const TargetTransformInfo *TTI;
143   std::unique_ptr<MemorySSAUpdater> MSSAU;
144 
145   SmallVector<WeakTrackingVH, 16> DeadInsts;
146   bool WidenIndVars;
147 
148   bool handleFloatingPointIV(Loop *L, PHINode *PH);
149   bool rewriteNonIntegerIVs(Loop *L);
150 
151   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152   /// Try to improve our exit conditions by converting condition from signed
153   /// to unsigned or rotating computation out of the loop.
154   /// (See inline comment about why this is duplicated from simplifyAndExtend)
155   bool canonicalizeExitCondition(Loop *L);
156   /// Try to eliminate loop exits based on analyzeable exit counts
157   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
158   /// Try to form loop invariant tests for loop exits by changing how many
159   /// iterations of the loop run when that is unobservable.
160   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
161 
162   bool rewriteFirstIterationLoopExitValues(Loop *L);
163 
164   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
165                                  const SCEV *ExitCount,
166                                  PHINode *IndVar, SCEVExpander &Rewriter);
167 
168   bool sinkUnusedInvariants(Loop *L);
169 
170 public:
171   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
172                  const DataLayout &DL, TargetLibraryInfo *TLI,
173                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
174       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
175         WidenIndVars(WidenIndVars) {
176     if (MSSA)
177       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
178   }
179 
180   bool run(Loop *L);
181 };
182 
183 } // end anonymous namespace
184 
185 //===----------------------------------------------------------------------===//
186 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
187 //===----------------------------------------------------------------------===//
188 
189 /// Convert APF to an integer, if possible.
190 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
191   bool isExact = false;
192   // See if we can convert this to an int64_t
193   uint64_t UIntVal;
194   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
195                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
196       !isExact)
197     return false;
198   IntVal = UIntVal;
199   return true;
200 }
201 
202 /// If the loop has floating induction variable then insert corresponding
203 /// integer induction variable if possible.
204 /// For example,
205 /// for(double i = 0; i < 10000; ++i)
206 ///   bar(i)
207 /// is converted into
208 /// for(int i = 0; i < 10000; ++i)
209 ///   bar((double)i);
210 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
211   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
212   unsigned BackEdge     = IncomingEdge^1;
213 
214   // Check incoming value.
215   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
216 
217   int64_t InitValue;
218   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
219     return false;
220 
221   // Check IV increment. Reject this PN if increment operation is not
222   // an add or increment value can not be represented by an integer.
223   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
224   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
225 
226   // If this is not an add of the PHI with a constantfp, or if the constant fp
227   // is not an integer, bail out.
228   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
229   int64_t IncValue;
230   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
231       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
232     return false;
233 
234   // Check Incr uses. One user is PN and the other user is an exit condition
235   // used by the conditional terminator.
236   Value::user_iterator IncrUse = Incr->user_begin();
237   Instruction *U1 = cast<Instruction>(*IncrUse++);
238   if (IncrUse == Incr->user_end()) return false;
239   Instruction *U2 = cast<Instruction>(*IncrUse++);
240   if (IncrUse != Incr->user_end()) return false;
241 
242   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
243   // only used by a branch, we can't transform it.
244   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
245   if (!Compare)
246     Compare = dyn_cast<FCmpInst>(U2);
247   if (!Compare || !Compare->hasOneUse() ||
248       !isa<BranchInst>(Compare->user_back()))
249     return false;
250 
251   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
252 
253   // We need to verify that the branch actually controls the iteration count
254   // of the loop.  If not, the new IV can overflow and no one will notice.
255   // The branch block must be in the loop and one of the successors must be out
256   // of the loop.
257   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
258   if (!L->contains(TheBr->getParent()) ||
259       (L->contains(TheBr->getSuccessor(0)) &&
260        L->contains(TheBr->getSuccessor(1))))
261     return false;
262 
263   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
264   // transform it.
265   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
266   int64_t ExitValue;
267   if (ExitValueVal == nullptr ||
268       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
269     return false;
270 
271   // Find new predicate for integer comparison.
272   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
273   switch (Compare->getPredicate()) {
274   default: return false;  // Unknown comparison.
275   case CmpInst::FCMP_OEQ:
276   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
277   case CmpInst::FCMP_ONE:
278   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
279   case CmpInst::FCMP_OGT:
280   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
281   case CmpInst::FCMP_OGE:
282   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
283   case CmpInst::FCMP_OLT:
284   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
285   case CmpInst::FCMP_OLE:
286   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
287   }
288 
289   // We convert the floating point induction variable to a signed i32 value if
290   // we can.  This is only safe if the comparison will not overflow in a way
291   // that won't be trapped by the integer equivalent operations.  Check for this
292   // now.
293   // TODO: We could use i64 if it is native and the range requires it.
294 
295   // The start/stride/exit values must all fit in signed i32.
296   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
297     return false;
298 
299   // If not actually striding (add x, 0.0), avoid touching the code.
300   if (IncValue == 0)
301     return false;
302 
303   // Positive and negative strides have different safety conditions.
304   if (IncValue > 0) {
305     // If we have a positive stride, we require the init to be less than the
306     // exit value.
307     if (InitValue >= ExitValue)
308       return false;
309 
310     uint32_t Range = uint32_t(ExitValue-InitValue);
311     // Check for infinite loop, either:
312     // while (i <= Exit) or until (i > Exit)
313     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
314       if (++Range == 0) return false;  // Range overflows.
315     }
316 
317     unsigned Leftover = Range % uint32_t(IncValue);
318 
319     // If this is an equality comparison, we require that the strided value
320     // exactly land on the exit value, otherwise the IV condition will wrap
321     // around and do things the fp IV wouldn't.
322     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
323         Leftover != 0)
324       return false;
325 
326     // If the stride would wrap around the i32 before exiting, we can't
327     // transform the IV.
328     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
329       return false;
330   } else {
331     // If we have a negative stride, we require the init to be greater than the
332     // exit value.
333     if (InitValue <= ExitValue)
334       return false;
335 
336     uint32_t Range = uint32_t(InitValue-ExitValue);
337     // Check for infinite loop, either:
338     // while (i >= Exit) or until (i < Exit)
339     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
340       if (++Range == 0) return false;  // Range overflows.
341     }
342 
343     unsigned Leftover = Range % uint32_t(-IncValue);
344 
345     // If this is an equality comparison, we require that the strided value
346     // exactly land on the exit value, otherwise the IV condition will wrap
347     // around and do things the fp IV wouldn't.
348     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
349         Leftover != 0)
350       return false;
351 
352     // If the stride would wrap around the i32 before exiting, we can't
353     // transform the IV.
354     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
355       return false;
356   }
357 
358   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
359 
360   // Insert new integer induction variable.
361   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
362   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
363                       PN->getIncomingBlock(IncomingEdge));
364 
365   Value *NewAdd =
366     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
367                               Incr->getName()+".int", Incr);
368   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
369 
370   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
371                                       ConstantInt::get(Int32Ty, ExitValue),
372                                       Compare->getName());
373 
374   // In the following deletions, PN may become dead and may be deleted.
375   // Use a WeakTrackingVH to observe whether this happens.
376   WeakTrackingVH WeakPH = PN;
377 
378   // Delete the old floating point exit comparison.  The branch starts using the
379   // new comparison.
380   NewCompare->takeName(Compare);
381   Compare->replaceAllUsesWith(NewCompare);
382   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
383 
384   // Delete the old floating point increment.
385   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
386   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
387 
388   // If the FP induction variable still has uses, this is because something else
389   // in the loop uses its value.  In order to canonicalize the induction
390   // variable, we chose to eliminate the IV and rewrite it in terms of an
391   // int->fp cast.
392   //
393   // We give preference to sitofp over uitofp because it is faster on most
394   // platforms.
395   if (WeakPH) {
396     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
397                                  &*PN->getParent()->getFirstInsertionPt());
398     PN->replaceAllUsesWith(Conv);
399     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
400   }
401   return true;
402 }
403 
404 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
405   // First step.  Check to see if there are any floating-point recurrences.
406   // If there are, change them into integer recurrences, permitting analysis by
407   // the SCEV routines.
408   BasicBlock *Header = L->getHeader();
409 
410   SmallVector<WeakTrackingVH, 8> PHIs;
411   for (PHINode &PN : Header->phis())
412     PHIs.push_back(&PN);
413 
414   bool Changed = false;
415   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
416     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
417       Changed |= handleFloatingPointIV(L, PN);
418 
419   // If the loop previously had floating-point IV, ScalarEvolution
420   // may not have been able to compute a trip count. Now that we've done some
421   // re-writing, the trip count may be computable.
422   if (Changed)
423     SE->forgetLoop(L);
424   return Changed;
425 }
426 
427 //===---------------------------------------------------------------------===//
428 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
429 // they will exit at the first iteration.
430 //===---------------------------------------------------------------------===//
431 
432 /// Check to see if this loop has loop invariant conditions which lead to loop
433 /// exits. If so, we know that if the exit path is taken, it is at the first
434 /// loop iteration. This lets us predict exit values of PHI nodes that live in
435 /// loop header.
436 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
437   // Verify the input to the pass is already in LCSSA form.
438   assert(L->isLCSSAForm(*DT));
439 
440   SmallVector<BasicBlock *, 8> ExitBlocks;
441   L->getUniqueExitBlocks(ExitBlocks);
442 
443   bool MadeAnyChanges = false;
444   for (auto *ExitBB : ExitBlocks) {
445     // If there are no more PHI nodes in this exit block, then no more
446     // values defined inside the loop are used on this path.
447     for (PHINode &PN : ExitBB->phis()) {
448       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
449            IncomingValIdx != E; ++IncomingValIdx) {
450         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
451 
452         // Can we prove that the exit must run on the first iteration if it
453         // runs at all?  (i.e. early exits are fine for our purposes, but
454         // traces which lead to this exit being taken on the 2nd iteration
455         // aren't.)  Note that this is about whether the exit branch is
456         // executed, not about whether it is taken.
457         if (!L->getLoopLatch() ||
458             !DT->dominates(IncomingBB, L->getLoopLatch()))
459           continue;
460 
461         // Get condition that leads to the exit path.
462         auto *TermInst = IncomingBB->getTerminator();
463 
464         Value *Cond = nullptr;
465         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
466           // Must be a conditional branch, otherwise the block
467           // should not be in the loop.
468           Cond = BI->getCondition();
469         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
470           Cond = SI->getCondition();
471         else
472           continue;
473 
474         if (!L->isLoopInvariant(Cond))
475           continue;
476 
477         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
478 
479         // Only deal with PHIs in the loop header.
480         if (!ExitVal || ExitVal->getParent() != L->getHeader())
481           continue;
482 
483         // If ExitVal is a PHI on the loop header, then we know its
484         // value along this exit because the exit can only be taken
485         // on the first iteration.
486         auto *LoopPreheader = L->getLoopPreheader();
487         assert(LoopPreheader && "Invalid loop");
488         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
489         if (PreheaderIdx != -1) {
490           assert(ExitVal->getParent() == L->getHeader() &&
491                  "ExitVal must be in loop header");
492           MadeAnyChanges = true;
493           PN.setIncomingValue(IncomingValIdx,
494                               ExitVal->getIncomingValue(PreheaderIdx));
495           SE->forgetValue(&PN);
496         }
497       }
498     }
499   }
500   return MadeAnyChanges;
501 }
502 
503 //===----------------------------------------------------------------------===//
504 //  IV Widening - Extend the width of an IV to cover its widest uses.
505 //===----------------------------------------------------------------------===//
506 
507 /// Update information about the induction variable that is extended by this
508 /// sign or zero extend operation. This is used to determine the final width of
509 /// the IV before actually widening it.
510 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
511                         ScalarEvolution *SE,
512                         const TargetTransformInfo *TTI) {
513   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
514   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
515     return;
516 
517   Type *Ty = Cast->getType();
518   uint64_t Width = SE->getTypeSizeInBits(Ty);
519   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
520     return;
521 
522   // Check that `Cast` actually extends the induction variable (we rely on this
523   // later).  This takes care of cases where `Cast` is extending a truncation of
524   // the narrow induction variable, and thus can end up being narrower than the
525   // "narrow" induction variable.
526   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
527   if (NarrowIVWidth >= Width)
528     return;
529 
530   // Cast is either an sext or zext up to this point.
531   // We should not widen an indvar if arithmetics on the wider indvar are more
532   // expensive than those on the narrower indvar. We check only the cost of ADD
533   // because at least an ADD is required to increment the induction variable. We
534   // could compute more comprehensively the cost of all instructions on the
535   // induction variable when necessary.
536   if (TTI &&
537       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
538           TTI->getArithmeticInstrCost(Instruction::Add,
539                                       Cast->getOperand(0)->getType())) {
540     return;
541   }
542 
543   if (!WI.WidestNativeType ||
544       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
545     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
546     WI.IsSigned = IsSigned;
547     return;
548   }
549 
550   // We extend the IV to satisfy the sign of its user(s), or 'signed'
551   // if there are multiple users with both sign- and zero extensions,
552   // in order not to introduce nondeterministic behaviour based on the
553   // unspecified order of a PHI nodes' users-iterator.
554   WI.IsSigned |= IsSigned;
555 }
556 
557 //===----------------------------------------------------------------------===//
558 //  Live IV Reduction - Minimize IVs live across the loop.
559 //===----------------------------------------------------------------------===//
560 
561 //===----------------------------------------------------------------------===//
562 //  Simplification of IV users based on SCEV evaluation.
563 //===----------------------------------------------------------------------===//
564 
565 namespace {
566 
567 class IndVarSimplifyVisitor : public IVVisitor {
568   ScalarEvolution *SE;
569   const TargetTransformInfo *TTI;
570   PHINode *IVPhi;
571 
572 public:
573   WideIVInfo WI;
574 
575   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
576                         const TargetTransformInfo *TTI,
577                         const DominatorTree *DTree)
578     : SE(SCEV), TTI(TTI), IVPhi(IV) {
579     DT = DTree;
580     WI.NarrowIV = IVPhi;
581   }
582 
583   // Implement the interface used by simplifyUsersOfIV.
584   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
585 };
586 
587 } // end anonymous namespace
588 
589 /// Iteratively perform simplification on a worklist of IV users. Each
590 /// successive simplification may push more users which may themselves be
591 /// candidates for simplification.
592 ///
593 /// Sign/Zero extend elimination is interleaved with IV simplification.
594 bool IndVarSimplify::simplifyAndExtend(Loop *L,
595                                        SCEVExpander &Rewriter,
596                                        LoopInfo *LI) {
597   SmallVector<WideIVInfo, 8> WideIVs;
598 
599   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
600           Intrinsic::getName(Intrinsic::experimental_guard));
601   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
602 
603   SmallVector<PHINode *, 8> LoopPhis;
604   for (PHINode &PN : L->getHeader()->phis())
605     LoopPhis.push_back(&PN);
606 
607   // Each round of simplification iterates through the SimplifyIVUsers worklist
608   // for all current phis, then determines whether any IVs can be
609   // widened. Widening adds new phis to LoopPhis, inducing another round of
610   // simplification on the wide IVs.
611   bool Changed = false;
612   while (!LoopPhis.empty()) {
613     // Evaluate as many IV expressions as possible before widening any IVs. This
614     // forces SCEV to set no-wrap flags before evaluating sign/zero
615     // extension. The first time SCEV attempts to normalize sign/zero extension,
616     // the result becomes final. So for the most predictable results, we delay
617     // evaluation of sign/zero extend evaluation until needed, and avoid running
618     // other SCEV based analysis prior to simplifyAndExtend.
619     do {
620       PHINode *CurrIV = LoopPhis.pop_back_val();
621 
622       // Information about sign/zero extensions of CurrIV.
623       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
624 
625       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
626                                    &Visitor);
627 
628       if (Visitor.WI.WidestNativeType) {
629         WideIVs.push_back(Visitor.WI);
630       }
631     } while(!LoopPhis.empty());
632 
633     // Continue if we disallowed widening.
634     if (!WidenIndVars)
635       continue;
636 
637     for (; !WideIVs.empty(); WideIVs.pop_back()) {
638       unsigned ElimExt;
639       unsigned Widened;
640       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
641                                           DT, DeadInsts, ElimExt, Widened,
642                                           HasGuards, UsePostIncrementRanges)) {
643         NumElimExt += ElimExt;
644         NumWidened += Widened;
645         Changed = true;
646         LoopPhis.push_back(WidePhi);
647       }
648     }
649   }
650   return Changed;
651 }
652 
653 //===----------------------------------------------------------------------===//
654 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
655 //===----------------------------------------------------------------------===//
656 
657 /// Given an Value which is hoped to be part of an add recurance in the given
658 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
659 /// that this is less general than SCEVs AddRec checking.
660 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
661   Instruction *IncI = dyn_cast<Instruction>(IncV);
662   if (!IncI)
663     return nullptr;
664 
665   switch (IncI->getOpcode()) {
666   case Instruction::Add:
667   case Instruction::Sub:
668     break;
669   case Instruction::GetElementPtr:
670     // An IV counter must preserve its type.
671     if (IncI->getNumOperands() == 2)
672       break;
673     LLVM_FALLTHROUGH;
674   default:
675     return nullptr;
676   }
677 
678   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
679   if (Phi && Phi->getParent() == L->getHeader()) {
680     if (L->isLoopInvariant(IncI->getOperand(1)))
681       return Phi;
682     return nullptr;
683   }
684   if (IncI->getOpcode() == Instruction::GetElementPtr)
685     return nullptr;
686 
687   // Allow add/sub to be commuted.
688   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
689   if (Phi && Phi->getParent() == L->getHeader()) {
690     if (L->isLoopInvariant(IncI->getOperand(0)))
691       return Phi;
692   }
693   return nullptr;
694 }
695 
696 /// Whether the current loop exit test is based on this value.  Currently this
697 /// is limited to a direct use in the loop condition.
698 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
699   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
700   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
701   // TODO: Allow non-icmp loop test.
702   if (!ICmp)
703     return false;
704 
705   // TODO: Allow indirect use.
706   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
707 }
708 
709 /// linearFunctionTestReplace policy. Return true unless we can show that the
710 /// current exit test is already sufficiently canonical.
711 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
712   assert(L->getLoopLatch() && "Must be in simplified form");
713 
714   // Avoid converting a constant or loop invariant test back to a runtime
715   // test.  This is critical for when SCEV's cached ExitCount is less precise
716   // than the current IR (such as after we've proven a particular exit is
717   // actually dead and thus the BE count never reaches our ExitCount.)
718   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
719   if (L->isLoopInvariant(BI->getCondition()))
720     return false;
721 
722   // Do LFTR to simplify the exit condition to an ICMP.
723   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
724   if (!Cond)
725     return true;
726 
727   // Do LFTR to simplify the exit ICMP to EQ/NE
728   ICmpInst::Predicate Pred = Cond->getPredicate();
729   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
730     return true;
731 
732   // Look for a loop invariant RHS
733   Value *LHS = Cond->getOperand(0);
734   Value *RHS = Cond->getOperand(1);
735   if (!L->isLoopInvariant(RHS)) {
736     if (!L->isLoopInvariant(LHS))
737       return true;
738     std::swap(LHS, RHS);
739   }
740   // Look for a simple IV counter LHS
741   PHINode *Phi = dyn_cast<PHINode>(LHS);
742   if (!Phi)
743     Phi = getLoopPhiForCounter(LHS, L);
744 
745   if (!Phi)
746     return true;
747 
748   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
749   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
750   if (Idx < 0)
751     return true;
752 
753   // Do LFTR if the exit condition's IV is *not* a simple counter.
754   Value *IncV = Phi->getIncomingValue(Idx);
755   return Phi != getLoopPhiForCounter(IncV, L);
756 }
757 
758 /// Return true if undefined behavior would provable be executed on the path to
759 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
760 /// anything about whether OnPathTo is actually executed or whether Root is
761 /// actually poison.  This can be used to assess whether a new use of Root can
762 /// be added at a location which is control equivalent with OnPathTo (such as
763 /// immediately before it) without introducing UB which didn't previously
764 /// exist.  Note that a false result conveys no information.
765 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
766                                           Instruction *OnPathTo,
767                                           DominatorTree *DT) {
768   // Basic approach is to assume Root is poison, propagate poison forward
769   // through all users we can easily track, and then check whether any of those
770   // users are provable UB and must execute before out exiting block might
771   // exit.
772 
773   // The set of all recursive users we've visited (which are assumed to all be
774   // poison because of said visit)
775   SmallSet<const Value *, 16> KnownPoison;
776   SmallVector<const Instruction*, 16> Worklist;
777   Worklist.push_back(Root);
778   while (!Worklist.empty()) {
779     const Instruction *I = Worklist.pop_back_val();
780 
781     // If we know this must trigger UB on a path leading our target.
782     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
783       return true;
784 
785     // If we can't analyze propagation through this instruction, just skip it
786     // and transitive users.  Safe as false is a conservative result.
787     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
788       continue;
789 
790     if (KnownPoison.insert(I).second)
791       for (const User *User : I->users())
792         Worklist.push_back(cast<Instruction>(User));
793   }
794 
795   // Might be non-UB, or might have a path we couldn't prove must execute on
796   // way to exiting bb.
797   return false;
798 }
799 
800 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
801 /// down to checking that all operands are constant and listing instructions
802 /// that may hide undef.
803 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
804                                unsigned Depth) {
805   if (isa<Constant>(V))
806     return !isa<UndefValue>(V);
807 
808   if (Depth >= 6)
809     return false;
810 
811   // Conservatively handle non-constant non-instructions. For example, Arguments
812   // may be undef.
813   Instruction *I = dyn_cast<Instruction>(V);
814   if (!I)
815     return false;
816 
817   // Load and return values may be undef.
818   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
819     return false;
820 
821   // Optimistically handle other instructions.
822   for (Value *Op : I->operands()) {
823     if (!Visited.insert(Op).second)
824       continue;
825     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
826       return false;
827   }
828   return true;
829 }
830 
831 /// Return true if the given value is concrete. We must prove that undef can
832 /// never reach it.
833 ///
834 /// TODO: If we decide that this is a good approach to checking for undef, we
835 /// may factor it into a common location.
836 static bool hasConcreteDef(Value *V) {
837   SmallPtrSet<Value*, 8> Visited;
838   Visited.insert(V);
839   return hasConcreteDefImpl(V, Visited, 0);
840 }
841 
842 /// Return true if this IV has any uses other than the (soon to be rewritten)
843 /// loop exit test.
844 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
845   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
846   Value *IncV = Phi->getIncomingValue(LatchIdx);
847 
848   for (User *U : Phi->users())
849     if (U != Cond && U != IncV) return false;
850 
851   for (User *U : IncV->users())
852     if (U != Cond && U != Phi) return false;
853   return true;
854 }
855 
856 /// Return true if the given phi is a "counter" in L.  A counter is an
857 /// add recurance (of integer or pointer type) with an arbitrary start, and a
858 /// step of 1.  Note that L must have exactly one latch.
859 static bool isLoopCounter(PHINode* Phi, Loop *L,
860                           ScalarEvolution *SE) {
861   assert(Phi->getParent() == L->getHeader());
862   assert(L->getLoopLatch());
863 
864   if (!SE->isSCEVable(Phi->getType()))
865     return false;
866 
867   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
868   if (!AR || AR->getLoop() != L || !AR->isAffine())
869     return false;
870 
871   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
872   if (!Step || !Step->isOne())
873     return false;
874 
875   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
876   Value *IncV = Phi->getIncomingValue(LatchIdx);
877   return (getLoopPhiForCounter(IncV, L) == Phi &&
878           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
879 }
880 
881 /// Search the loop header for a loop counter (anadd rec w/step of one)
882 /// suitable for use by LFTR.  If multiple counters are available, select the
883 /// "best" one based profitable heuristics.
884 ///
885 /// BECount may be an i8* pointer type. The pointer difference is already
886 /// valid count without scaling the address stride, so it remains a pointer
887 /// expression as far as SCEV is concerned.
888 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
889                                 const SCEV *BECount,
890                                 ScalarEvolution *SE, DominatorTree *DT) {
891   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
892 
893   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
894 
895   // Loop over all of the PHI nodes, looking for a simple counter.
896   PHINode *BestPhi = nullptr;
897   const SCEV *BestInit = nullptr;
898   BasicBlock *LatchBlock = L->getLoopLatch();
899   assert(LatchBlock && "Must be in simplified form");
900   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
901 
902   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
903     PHINode *Phi = cast<PHINode>(I);
904     if (!isLoopCounter(Phi, L, SE))
905       continue;
906 
907     // Avoid comparing an integer IV against a pointer Limit.
908     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
909       continue;
910 
911     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
912 
913     // AR may be a pointer type, while BECount is an integer type.
914     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
915     // AR may not be a narrower type, or we may never exit.
916     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
917     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
918       continue;
919 
920     // Avoid reusing a potentially undef value to compute other values that may
921     // have originally had a concrete definition.
922     if (!hasConcreteDef(Phi)) {
923       // We explicitly allow unknown phis as long as they are already used by
924       // the loop exit test.  This is legal since performing LFTR could not
925       // increase the number of undef users.
926       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
927       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
928           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
929         continue;
930     }
931 
932     // Avoid introducing undefined behavior due to poison which didn't exist in
933     // the original program.  (Annoyingly, the rules for poison and undef
934     // propagation are distinct, so this does NOT cover the undef case above.)
935     // We have to ensure that we don't introduce UB by introducing a use on an
936     // iteration where said IV produces poison.  Our strategy here differs for
937     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
938     // see code in linearFunctionTestReplace.  For pointers, we restrict
939     // transforms as there is no good way to reinfer inbounds once lost.
940     if (!Phi->getType()->isIntegerTy() &&
941         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
942       continue;
943 
944     const SCEV *Init = AR->getStart();
945 
946     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
947       // Don't force a live loop counter if another IV can be used.
948       if (AlmostDeadIV(Phi, LatchBlock, Cond))
949         continue;
950 
951       // Prefer to count-from-zero. This is a more "canonical" counter form. It
952       // also prefers integer to pointer IVs.
953       if (BestInit->isZero() != Init->isZero()) {
954         if (BestInit->isZero())
955           continue;
956       }
957       // If two IVs both count from zero or both count from nonzero then the
958       // narrower is likely a dead phi that has been widened. Use the wider phi
959       // to allow the other to be eliminated.
960       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
961         continue;
962     }
963     BestPhi = Phi;
964     BestInit = Init;
965   }
966   return BestPhi;
967 }
968 
969 /// Insert an IR expression which computes the value held by the IV IndVar
970 /// (which must be an loop counter w/unit stride) after the backedge of loop L
971 /// is taken ExitCount times.
972 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
973                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
974                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
975   assert(isLoopCounter(IndVar, L, SE));
976   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
977   const SCEV *IVInit = AR->getStart();
978   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
979 
980   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
981   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
982   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
983   // the existing GEPs whenever possible.
984   if (IndVar->getType()->isPointerTy() &&
985       !ExitCount->getType()->isPointerTy()) {
986     // IVOffset will be the new GEP offset that is interpreted by GEP as a
987     // signed value. ExitCount on the other hand represents the loop trip count,
988     // which is an unsigned value. FindLoopCounter only allows induction
989     // variables that have a positive unit stride of one. This means we don't
990     // have to handle the case of negative offsets (yet) and just need to zero
991     // extend ExitCount.
992     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
993     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
994     if (UsePostInc)
995       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
996 
997     // Expand the code for the iteration count.
998     assert(SE->isLoopInvariant(IVOffset, L) &&
999            "Computed iteration count is not loop invariant!");
1000 
1001     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1002     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1003     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1004   } else {
1005     // In any other case, convert both IVInit and ExitCount to integers before
1006     // comparing. This may result in SCEV expansion of pointers, but in practice
1007     // SCEV will fold the pointer arithmetic away as such:
1008     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1009     //
1010     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1011     // for simple memset-style loops.
1012     //
1013     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1014     // were generated on top of case #2, which is not expected.
1015 
1016     // For unit stride, IVCount = Start + ExitCount with 2's complement
1017     // overflow.
1018 
1019     // For integer IVs, truncate the IV before computing IVInit + BECount,
1020     // unless we know apriori that the limit must be a constant when evaluated
1021     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1022     // of the IV in the loop over a (potentially) expensive expansion of the
1023     // widened exit count add(zext(add)) expression.
1024     if (SE->getTypeSizeInBits(IVInit->getType())
1025         > SE->getTypeSizeInBits(ExitCount->getType())) {
1026       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1027         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1028       else
1029         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1030     }
1031 
1032     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1033 
1034     if (UsePostInc)
1035       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1036 
1037     // Expand the code for the iteration count.
1038     assert(SE->isLoopInvariant(IVLimit, L) &&
1039            "Computed iteration count is not loop invariant!");
1040     // Ensure that we generate the same type as IndVar, or a smaller integer
1041     // type. In the presence of null pointer values, we have an integer type
1042     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1043     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1044       IndVar->getType() : ExitCount->getType();
1045     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1046     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1047   }
1048 }
1049 
1050 /// This method rewrites the exit condition of the loop to be a canonical !=
1051 /// comparison against the incremented loop induction variable.  This pass is
1052 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1053 /// determine a loop-invariant trip count of the loop, which is actually a much
1054 /// broader range than just linear tests.
1055 bool IndVarSimplify::
1056 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1057                           const SCEV *ExitCount,
1058                           PHINode *IndVar, SCEVExpander &Rewriter) {
1059   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1060   assert(isLoopCounter(IndVar, L, SE));
1061   Instruction * const IncVar =
1062     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1063 
1064   // Initialize CmpIndVar to the preincremented IV.
1065   Value *CmpIndVar = IndVar;
1066   bool UsePostInc = false;
1067 
1068   // If the exiting block is the same as the backedge block, we prefer to
1069   // compare against the post-incremented value, otherwise we must compare
1070   // against the preincremented value.
1071   if (ExitingBB == L->getLoopLatch()) {
1072     // For pointer IVs, we chose to not strip inbounds which requires us not
1073     // to add a potentially UB introducing use.  We need to either a) show
1074     // the loop test we're modifying is already in post-inc form, or b) show
1075     // that adding a use must not introduce UB.
1076     bool SafeToPostInc =
1077         IndVar->getType()->isIntegerTy() ||
1078         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1079         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1080     if (SafeToPostInc) {
1081       UsePostInc = true;
1082       CmpIndVar = IncVar;
1083     }
1084   }
1085 
1086   // It may be necessary to drop nowrap flags on the incrementing instruction
1087   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1088   // case the increment might have previously been poison on the last iteration
1089   // only) or if LFTR switches to a different IV that was previously dynamically
1090   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1091   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1092   // check), because the pre-inc addrec flags may be adopted from the original
1093   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1094   // TODO: This handling is inaccurate for one case: If we switch to a
1095   // dynamically dead IV that wraps on the first loop iteration only, which is
1096   // not covered by the post-inc addrec. (If the new IV was not dynamically
1097   // dead, it could not be poison on the first iteration in the first place.)
1098   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1099     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1100     if (BO->hasNoUnsignedWrap())
1101       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1102     if (BO->hasNoSignedWrap())
1103       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1104   }
1105 
1106   Value *ExitCnt = genLoopLimit(
1107       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1108   assert(ExitCnt->getType()->isPointerTy() ==
1109              IndVar->getType()->isPointerTy() &&
1110          "genLoopLimit missed a cast");
1111 
1112   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1113   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1114   ICmpInst::Predicate P;
1115   if (L->contains(BI->getSuccessor(0)))
1116     P = ICmpInst::ICMP_NE;
1117   else
1118     P = ICmpInst::ICMP_EQ;
1119 
1120   IRBuilder<> Builder(BI);
1121 
1122   // The new loop exit condition should reuse the debug location of the
1123   // original loop exit condition.
1124   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1125     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1126 
1127   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1128   // avoid the expensive expansion of the limit expression in the wider type,
1129   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1130   // since we know (from the exit count bitwidth), that we can't self-wrap in
1131   // the narrower type.
1132   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1133   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1134   if (CmpIndVarSize > ExitCntSize) {
1135     assert(!CmpIndVar->getType()->isPointerTy() &&
1136            !ExitCnt->getType()->isPointerTy());
1137 
1138     // Before resorting to actually inserting the truncate, use the same
1139     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1140     // the other side of the comparison instead.  We still evaluate the limit
1141     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1142     // a truncate within in.
1143     bool Extended = false;
1144     const SCEV *IV = SE->getSCEV(CmpIndVar);
1145     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1146                                                   ExitCnt->getType());
1147     const SCEV *ZExtTrunc =
1148       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1149 
1150     if (ZExtTrunc == IV) {
1151       Extended = true;
1152       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1153                                    "wide.trip.count");
1154     } else {
1155       const SCEV *SExtTrunc =
1156         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1157       if (SExtTrunc == IV) {
1158         Extended = true;
1159         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1160                                      "wide.trip.count");
1161       }
1162     }
1163 
1164     if (Extended) {
1165       bool Discard;
1166       L->makeLoopInvariant(ExitCnt, Discard);
1167     } else
1168       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1169                                       "lftr.wideiv");
1170   }
1171   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1172                     << "      LHS:" << *CmpIndVar << '\n'
1173                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1174                     << "\n"
1175                     << "      RHS:\t" << *ExitCnt << "\n"
1176                     << "ExitCount:\t" << *ExitCount << "\n"
1177                     << "  was: " << *BI->getCondition() << "\n");
1178 
1179   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1180   Value *OrigCond = BI->getCondition();
1181   // It's tempting to use replaceAllUsesWith here to fully replace the old
1182   // comparison, but that's not immediately safe, since users of the old
1183   // comparison may not be dominated by the new comparison. Instead, just
1184   // update the branch to use the new comparison; in the common case this
1185   // will make old comparison dead.
1186   BI->setCondition(Cond);
1187   DeadInsts.emplace_back(OrigCond);
1188 
1189   ++NumLFTR;
1190   return true;
1191 }
1192 
1193 //===----------------------------------------------------------------------===//
1194 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1195 //===----------------------------------------------------------------------===//
1196 
1197 /// If there's a single exit block, sink any loop-invariant values that
1198 /// were defined in the preheader but not used inside the loop into the
1199 /// exit block to reduce register pressure in the loop.
1200 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1201   BasicBlock *ExitBlock = L->getExitBlock();
1202   if (!ExitBlock) return false;
1203 
1204   BasicBlock *Preheader = L->getLoopPreheader();
1205   if (!Preheader) return false;
1206 
1207   bool MadeAnyChanges = false;
1208   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1209   BasicBlock::iterator I(Preheader->getTerminator());
1210   while (I != Preheader->begin()) {
1211     --I;
1212     // New instructions were inserted at the end of the preheader.
1213     if (isa<PHINode>(I))
1214       break;
1215 
1216     // Don't move instructions which might have side effects, since the side
1217     // effects need to complete before instructions inside the loop.  Also don't
1218     // move instructions which might read memory, since the loop may modify
1219     // memory. Note that it's okay if the instruction might have undefined
1220     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1221     // block.
1222     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1223       continue;
1224 
1225     // Skip debug info intrinsics.
1226     if (isa<DbgInfoIntrinsic>(I))
1227       continue;
1228 
1229     // Skip eh pad instructions.
1230     if (I->isEHPad())
1231       continue;
1232 
1233     // Don't sink alloca: we never want to sink static alloca's out of the
1234     // entry block, and correctly sinking dynamic alloca's requires
1235     // checks for stacksave/stackrestore intrinsics.
1236     // FIXME: Refactor this check somehow?
1237     if (isa<AllocaInst>(I))
1238       continue;
1239 
1240     // Determine if there is a use in or before the loop (direct or
1241     // otherwise).
1242     bool UsedInLoop = false;
1243     for (Use &U : I->uses()) {
1244       Instruction *User = cast<Instruction>(U.getUser());
1245       BasicBlock *UseBB = User->getParent();
1246       if (PHINode *P = dyn_cast<PHINode>(User)) {
1247         unsigned i =
1248           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1249         UseBB = P->getIncomingBlock(i);
1250       }
1251       if (UseBB == Preheader || L->contains(UseBB)) {
1252         UsedInLoop = true;
1253         break;
1254       }
1255     }
1256 
1257     // If there is, the def must remain in the preheader.
1258     if (UsedInLoop)
1259       continue;
1260 
1261     // Otherwise, sink it to the exit block.
1262     Instruction *ToMove = &*I;
1263     bool Done = false;
1264 
1265     if (I != Preheader->begin()) {
1266       // Skip debug info intrinsics.
1267       do {
1268         --I;
1269       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1270 
1271       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1272         Done = true;
1273     } else {
1274       Done = true;
1275     }
1276 
1277     MadeAnyChanges = true;
1278     ToMove->moveBefore(*ExitBlock, InsertPt);
1279     if (Done) break;
1280     InsertPt = ToMove->getIterator();
1281   }
1282 
1283   return MadeAnyChanges;
1284 }
1285 
1286 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1287                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1288   auto *OldCond = BI->getCondition();
1289   BI->setCondition(NewCond);
1290   if (OldCond->use_empty())
1291     DeadInsts.emplace_back(OldCond);
1292 }
1293 
1294 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1295                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1296   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1297   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1298   auto *OldCond = BI->getCondition();
1299   auto *NewCond =
1300       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1301   replaceExitCond(BI, NewCond, DeadInsts);
1302 }
1303 
1304 static void replaceLoopPHINodesWithPreheaderValues(
1305     Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1306   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1307   auto *LoopPreheader = L->getLoopPreheader();
1308   auto *LoopHeader = L->getHeader();
1309   for (auto &PN : LoopHeader->phis()) {
1310     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1311     PN.replaceAllUsesWith(PreheaderIncoming);
1312     DeadInsts.emplace_back(&PN);
1313   }
1314 }
1315 
1316 static void replaceWithInvariantCond(
1317     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1318     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1319     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1320   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1321   Rewriter.setInsertPoint(BI);
1322   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1323   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1324   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1325   if (ExitIfTrue)
1326     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1327   IRBuilder<> Builder(BI);
1328   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1329                                      BI->getCondition()->getName());
1330   replaceExitCond(BI, NewCond, DeadInsts);
1331 }
1332 
1333 static bool optimizeLoopExitWithUnknownExitCount(
1334     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1335     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1336     ScalarEvolution *SE, SCEVExpander &Rewriter,
1337     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1338   ICmpInst::Predicate Pred;
1339   Value *LHS, *RHS;
1340   BasicBlock *TrueSucc, *FalseSucc;
1341   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1342                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1343     return false;
1344 
1345   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1346          "Not a loop exit!");
1347 
1348   // 'LHS pred RHS' should now mean that we stay in loop.
1349   if (L->contains(FalseSucc))
1350     Pred = CmpInst::getInversePredicate(Pred);
1351 
1352   // If we are proving loop exit, invert the predicate.
1353   if (Inverted)
1354     Pred = CmpInst::getInversePredicate(Pred);
1355 
1356   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1357   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1358   // Can we prove it to be trivially true?
1359   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1360     foldExit(L, ExitingBB, Inverted, DeadInsts);
1361     return true;
1362   }
1363   // Further logic works for non-inverted condition only.
1364   if (Inverted)
1365     return false;
1366 
1367   auto *ARTy = LHSS->getType();
1368   auto *MaxIterTy = MaxIter->getType();
1369   // If possible, adjust types.
1370   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1371     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1372   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1373     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1374     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1375     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1376       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1377   }
1378 
1379   if (SkipLastIter) {
1380     const SCEV *One = SE->getOne(MaxIter->getType());
1381     MaxIter = SE->getMinusSCEV(MaxIter, One);
1382   }
1383 
1384   // Check if there is a loop-invariant predicate equivalent to our check.
1385   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1386                                                                L, BI, MaxIter);
1387   if (!LIP)
1388     return false;
1389 
1390   // Can we prove it to be trivially true?
1391   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1392     foldExit(L, ExitingBB, Inverted, DeadInsts);
1393   else
1394     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1395                              Rewriter, DeadInsts);
1396 
1397   return true;
1398 }
1399 
1400 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1401   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1402   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1403   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1404   // it already has flags.  The alternative to this would be to extending the
1405   // set of "interesting" IV users to include the icmp, but doing that
1406   // regresses results in practice by querying SCEVs before trip counts which
1407   // rely on them which results in SCEV caching sub-optimal answers.  The
1408   // concern about caching sub-optimal results is why we only query SCEVs of
1409   // the loop invariant RHS here.
1410   SmallVector<BasicBlock*, 16> ExitingBlocks;
1411   L->getExitingBlocks(ExitingBlocks);
1412   bool Changed = false;
1413   for (auto *ExitingBB : ExitingBlocks) {
1414     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1415     if (!BI)
1416       continue;
1417     assert(BI->isConditional() && "exit branch must be conditional");
1418 
1419     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1420     if (!ICmp || !ICmp->hasOneUse())
1421       continue;
1422 
1423     auto *LHS = ICmp->getOperand(0);
1424     auto *RHS = ICmp->getOperand(1);
1425     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1426     // poisoning cache with sub-optimal results.  For the must-execute case,
1427     // this is a neccessary precondition for correctness.
1428     if (!L->isLoopInvariant(RHS)) {
1429       if (!L->isLoopInvariant(LHS))
1430         continue;
1431       // Same logic applies for the inverse case
1432       std::swap(LHS, RHS);
1433     }
1434 
1435     // Match (icmp signed-cond zext, RHS)
1436     Value *LHSOp = nullptr;
1437     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1438       continue;
1439 
1440     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1441     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1442     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1443     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1444     FullCR = FullCR.zeroExtend(OuterBitWidth);
1445     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1446     if (FullCR.contains(RHSCR)) {
1447       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1448       // replace the signed condition with the unsigned version.
1449       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1450       Changed = true;
1451       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1452       // have not changed exit counts, or the values produced by the compare.
1453       continue;
1454     }
1455   }
1456 
1457   // Now that we've canonicalized the condition to match the extend,
1458   // see if we can rotate the extend out of the loop.
1459   for (auto *ExitingBB : ExitingBlocks) {
1460     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1461     if (!BI)
1462       continue;
1463     assert(BI->isConditional() && "exit branch must be conditional");
1464 
1465     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1466     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1467       continue;
1468 
1469     bool Swapped = false;
1470     auto *LHS = ICmp->getOperand(0);
1471     auto *RHS = ICmp->getOperand(1);
1472     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1473       // Nothing to rotate
1474       continue;
1475     if (L->isLoopInvariant(LHS)) {
1476       // Same logic applies for the inverse case until we actually pick
1477       // which operand of the compare to update.
1478       Swapped = true;
1479       std::swap(LHS, RHS);
1480     }
1481     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1482 
1483     // Match (icmp unsigned-cond zext, RHS)
1484     // TODO: Extend to handle corresponding sext/signed-cmp case
1485     // TODO: Extend to other invertible functions
1486     Value *LHSOp = nullptr;
1487     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1488       continue;
1489 
1490     // In general, we only rotate if we can do so without increasing the number
1491     // of instructions.  The exception is when we have an zext(add-rec).  The
1492     // reason for allowing this exception is that we know we need to get rid
1493     // of the zext for SCEV to be able to compute a trip count for said loops;
1494     // we consider the new trip count valuable enough to increase instruction
1495     // count by one.
1496     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1497       continue;
1498 
1499     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1500     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1501     // when zext is loop varying and RHS is loop invariant.  This converts
1502     // loop varying work to loop-invariant work.
1503     auto doRotateTransform = [&]() {
1504       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1505       auto *NewRHS =
1506         CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1507                          L->getLoopPreheader()->getTerminator());
1508       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1509       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1510       if (LHS->use_empty())
1511         DeadInsts.push_back(LHS);
1512     };
1513 
1514 
1515     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1516     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1517     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1518     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1519     FullCR = FullCR.zeroExtend(OuterBitWidth);
1520     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1521     if (FullCR.contains(RHSCR)) {
1522       doRotateTransform();
1523       Changed = true;
1524       // Note, we are leaving SCEV in an unfortunately imprecise case here
1525       // as rotation tends to reveal information about trip counts not
1526       // previously visible.
1527       continue;
1528     }
1529   }
1530 
1531   return Changed;
1532 }
1533 
1534 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1535   SmallVector<BasicBlock*, 16> ExitingBlocks;
1536   L->getExitingBlocks(ExitingBlocks);
1537 
1538   // Remove all exits which aren't both rewriteable and execute on every
1539   // iteration.
1540   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1541     // If our exitting block exits multiple loops, we can only rewrite the
1542     // innermost one.  Otherwise, we're changing how many times the innermost
1543     // loop runs before it exits.
1544     if (LI->getLoopFor(ExitingBB) != L)
1545       return true;
1546 
1547     // Can't rewrite non-branch yet.
1548     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1549     if (!BI)
1550       return true;
1551 
1552     // If already constant, nothing to do.
1553     if (isa<Constant>(BI->getCondition()))
1554       return true;
1555 
1556     // Likewise, the loop latch must be dominated by the exiting BB.
1557     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1558       return true;
1559 
1560     return false;
1561   });
1562 
1563   if (ExitingBlocks.empty())
1564     return false;
1565 
1566   // Get a symbolic upper bound on the loop backedge taken count.
1567   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1568   if (isa<SCEVCouldNotCompute>(MaxExitCount))
1569     return false;
1570 
1571   // Visit our exit blocks in order of dominance. We know from the fact that
1572   // all exits must dominate the latch, so there is a total dominance order
1573   // between them.
1574   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1575                // std::sort sorts in ascending order, so we want the inverse of
1576                // the normal dominance relation.
1577                if (A == B) return false;
1578                if (DT->properlyDominates(A, B))
1579                  return true;
1580                else {
1581                  assert(DT->properlyDominates(B, A) &&
1582                         "expected total dominance order!");
1583                  return false;
1584                }
1585   });
1586 #ifdef ASSERT
1587   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1588     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1589   }
1590 #endif
1591 
1592   bool Changed = false;
1593   bool SkipLastIter = false;
1594   SmallSet<const SCEV*, 8> DominatingExitCounts;
1595   for (BasicBlock *ExitingBB : ExitingBlocks) {
1596     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1597     if (isa<SCEVCouldNotCompute>(ExitCount)) {
1598       // Okay, we do not know the exit count here. Can we at least prove that it
1599       // will remain the same within iteration space?
1600       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1601       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1602         return optimizeLoopExitWithUnknownExitCount(
1603             L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1604             Rewriter, DeadInsts);
1605       };
1606 
1607       // TODO: We might have proved that we can skip the last iteration for
1608       // this check. In this case, we only want to check the condition on the
1609       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1610       // corner case:
1611       //
1612       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1613       //
1614       // If we could not prove that len != 0, then we also could not prove that
1615       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1616       // OptimizeCond will likely not prove anything for it, even if it could
1617       // prove the same fact for len.
1618       //
1619       // As a temporary solution, we query both last and pre-last iterations in
1620       // hope that we will be able to prove triviality for at least one of
1621       // them. We can stop querying MaxExitCount for this case once SCEV
1622       // understands that (MaxExitCount - 1) will not overflow here.
1623       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1624         Changed = true;
1625       else if (SkipLastIter)
1626         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1627           Changed = true;
1628       continue;
1629     }
1630 
1631     if (MaxExitCount == ExitCount)
1632       // If the loop has more than 1 iteration, all further checks will be
1633       // executed 1 iteration less.
1634       SkipLastIter = true;
1635 
1636     // If we know we'd exit on the first iteration, rewrite the exit to
1637     // reflect this.  This does not imply the loop must exit through this
1638     // exit; there may be an earlier one taken on the first iteration.
1639     // We know that the backedge can't be taken, so we replace all
1640     // the header PHIs with values coming from the preheader.
1641     if (ExitCount->isZero()) {
1642       foldExit(L, ExitingBB, true, DeadInsts);
1643       replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
1644       Changed = true;
1645       continue;
1646     }
1647 
1648     assert(ExitCount->getType()->isIntegerTy() &&
1649            MaxExitCount->getType()->isIntegerTy() &&
1650            "Exit counts must be integers");
1651 
1652     Type *WiderType =
1653       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1654     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1655     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1656     assert(MaxExitCount->getType() == ExitCount->getType());
1657 
1658     // Can we prove that some other exit must be taken strictly before this
1659     // one?
1660     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1661                                      MaxExitCount, ExitCount)) {
1662       foldExit(L, ExitingBB, false, DeadInsts);
1663       Changed = true;
1664       continue;
1665     }
1666 
1667     // As we run, keep track of which exit counts we've encountered.  If we
1668     // find a duplicate, we've found an exit which would have exited on the
1669     // exiting iteration, but (from the visit order) strictly follows another
1670     // which does the same and is thus dead.
1671     if (!DominatingExitCounts.insert(ExitCount).second) {
1672       foldExit(L, ExitingBB, false, DeadInsts);
1673       Changed = true;
1674       continue;
1675     }
1676 
1677     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1678     // here.  If we kept track of the min of dominanting exits so far, we could
1679     // discharge exits with EC >= MDEC. This is less powerful than the existing
1680     // transform (since later exits aren't considered), but potentially more
1681     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1682     // or a >u b.  Such a case is not currently known.
1683   }
1684   return Changed;
1685 }
1686 
1687 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1688   SmallVector<BasicBlock*, 16> ExitingBlocks;
1689   L->getExitingBlocks(ExitingBlocks);
1690 
1691   // Finally, see if we can rewrite our exit conditions into a loop invariant
1692   // form. If we have a read-only loop, and we can tell that we must exit down
1693   // a path which does not need any of the values computed within the loop, we
1694   // can rewrite the loop to exit on the first iteration.  Note that this
1695   // doesn't either a) tell us the loop exits on the first iteration (unless
1696   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1697   // This transformation looks a lot like a restricted form of dead loop
1698   // elimination, but restricted to read-only loops and without neccesssarily
1699   // needing to kill the loop entirely.
1700   if (!LoopPredication)
1701     return false;
1702 
1703   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1704   // through *explicit* control flow.  We have to eliminate the possibility of
1705   // implicit exits (see below) before we know it's truly exact.
1706   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1707   if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
1708     return false;
1709 
1710   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1711   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1712 
1713   auto BadExit = [&](BasicBlock *ExitingBB) {
1714     // If our exiting block exits multiple loops, we can only rewrite the
1715     // innermost one.  Otherwise, we're changing how many times the innermost
1716     // loop runs before it exits.
1717     if (LI->getLoopFor(ExitingBB) != L)
1718       return true;
1719 
1720     // Can't rewrite non-branch yet.
1721     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1722     if (!BI)
1723       return true;
1724 
1725     // If already constant, nothing to do.
1726     if (isa<Constant>(BI->getCondition()))
1727       return true;
1728 
1729     // If the exit block has phis, we need to be able to compute the values
1730     // within the loop which contains them.  This assumes trivially lcssa phis
1731     // have already been removed; TODO: generalize
1732     BasicBlock *ExitBlock =
1733     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1734     if (!ExitBlock->phis().empty())
1735       return true;
1736 
1737     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1738     if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
1739       return true;
1740 
1741     assert(SE->isLoopInvariant(ExitCount, L) &&
1742            "Exit count must be loop invariant");
1743     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1744     return false;
1745   };
1746 
1747   // If we have any exits which can't be predicated themselves, than we can't
1748   // predicate any exit which isn't guaranteed to execute before it.  Consider
1749   // two exits (a) and (b) which would both exit on the same iteration.  If we
1750   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1751   // we could convert a loop from exiting through (a) to one exiting through
1752   // (b).  Note that this problem exists only for exits with the same exit
1753   // count, and we could be more aggressive when exit counts are known inequal.
1754   llvm::sort(ExitingBlocks,
1755             [&](BasicBlock *A, BasicBlock *B) {
1756               // std::sort sorts in ascending order, so we want the inverse of
1757               // the normal dominance relation, plus a tie breaker for blocks
1758               // unordered by dominance.
1759               if (DT->properlyDominates(A, B)) return true;
1760               if (DT->properlyDominates(B, A)) return false;
1761               return A->getName() < B->getName();
1762             });
1763   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1764   // exits before the backedge).  If they aren't, reasoning about reachability
1765   // is complicated and we choose not to for now.
1766   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1767     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1768       return false;
1769 
1770   // Given our sorted total order, we know that exit[j] must be evaluated
1771   // after all exit[i] such j > i.
1772   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1773     if (BadExit(ExitingBlocks[i])) {
1774       ExitingBlocks.resize(i);
1775       break;
1776     }
1777 
1778   if (ExitingBlocks.empty())
1779     return false;
1780 
1781   // We rely on not being able to reach an exiting block on a later iteration
1782   // then it's statically compute exit count.  The implementaton of
1783   // getExitCount currently has this invariant, but assert it here so that
1784   // breakage is obvious if this ever changes..
1785   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1786         return DT->dominates(ExitingBB, L->getLoopLatch());
1787       }));
1788 
1789   // At this point, ExitingBlocks consists of only those blocks which are
1790   // predicatable.  Given that, we know we have at least one exit we can
1791   // predicate if the loop is doesn't have side effects and doesn't have any
1792   // implicit exits (because then our exact BTC isn't actually exact).
1793   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1794   // suggestions on how to improve this?  I can obviously bail out for outer
1795   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1796   // is that enough for *all* side effects?
1797   for (BasicBlock *BB : L->blocks())
1798     for (auto &I : *BB)
1799       // TODO:isGuaranteedToTransfer
1800       if (I.mayHaveSideEffects())
1801         return false;
1802 
1803   bool Changed = false;
1804   // Finally, do the actual predication for all predicatable blocks.  A couple
1805   // of notes here:
1806   // 1) We don't bother to constant fold dominated exits with identical exit
1807   //    counts; that's simply a form of CSE/equality propagation and we leave
1808   //    it for dedicated passes.
1809   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1810   //    legality constraints and we leave that to dedicated logic.  We want to
1811   //    predicate even if we can't insert a loop invariant expression as
1812   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1813   //    varying check.
1814   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1815   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1816   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1817   for (BasicBlock *ExitingBB : ExitingBlocks) {
1818     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1819 
1820     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1821     Value *NewCond;
1822     if (ExitCount == ExactBTC) {
1823       NewCond = L->contains(BI->getSuccessor(0)) ?
1824         B.getFalse() : B.getTrue();
1825     } else {
1826       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1827       if (!ExactBTCV)
1828         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1829       Value *RHS = ExactBTCV;
1830       if (ECV->getType() != RHS->getType()) {
1831         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1832         ECV = B.CreateZExt(ECV, WiderTy);
1833         RHS = B.CreateZExt(RHS, WiderTy);
1834       }
1835       auto Pred = L->contains(BI->getSuccessor(0)) ?
1836         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1837       NewCond = B.CreateICmp(Pred, ECV, RHS);
1838     }
1839     Value *OldCond = BI->getCondition();
1840     BI->setCondition(NewCond);
1841     if (OldCond->use_empty())
1842       DeadInsts.emplace_back(OldCond);
1843     Changed = true;
1844   }
1845 
1846   return Changed;
1847 }
1848 
1849 //===----------------------------------------------------------------------===//
1850 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1851 //===----------------------------------------------------------------------===//
1852 
1853 bool IndVarSimplify::run(Loop *L) {
1854   // We need (and expect!) the incoming loop to be in LCSSA.
1855   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1856          "LCSSA required to run indvars!");
1857 
1858   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1859   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1860   //    canonicalization can be a pessimization without LSR to "clean up"
1861   //    afterwards.
1862   //  - We depend on having a preheader; in particular,
1863   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1864   //    and we're in trouble if we can't find the induction variable even when
1865   //    we've manually inserted one.
1866   //  - LFTR relies on having a single backedge.
1867   if (!L->isLoopSimplifyForm())
1868     return false;
1869 
1870 #ifndef NDEBUG
1871   // Used below for a consistency check only
1872   // Note: Since the result returned by ScalarEvolution may depend on the order
1873   // in which previous results are added to its cache, the call to
1874   // getBackedgeTakenCount() may change following SCEV queries.
1875   const SCEV *BackedgeTakenCount;
1876   if (VerifyIndvars)
1877     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1878 #endif
1879 
1880   bool Changed = false;
1881   // If there are any floating-point recurrences, attempt to
1882   // transform them to use integer recurrences.
1883   Changed |= rewriteNonIntegerIVs(L);
1884 
1885   // Create a rewriter object which we'll use to transform the code with.
1886   SCEVExpander Rewriter(*SE, DL, "indvars");
1887 #ifndef NDEBUG
1888   Rewriter.setDebugType(DEBUG_TYPE);
1889 #endif
1890 
1891   // Eliminate redundant IV users.
1892   //
1893   // Simplification works best when run before other consumers of SCEV. We
1894   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1895   // other expressions involving loop IVs have been evaluated. This helps SCEV
1896   // set no-wrap flags before normalizing sign/zero extension.
1897   Rewriter.disableCanonicalMode();
1898   Changed |= simplifyAndExtend(L, Rewriter, LI);
1899 
1900   // Check to see if we can compute the final value of any expressions
1901   // that are recurrent in the loop, and substitute the exit values from the
1902   // loop into any instructions outside of the loop that use the final values
1903   // of the current expressions.
1904   if (ReplaceExitValue != NeverRepl) {
1905     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1906                                              ReplaceExitValue, DeadInsts)) {
1907       NumReplaced += Rewrites;
1908       Changed = true;
1909     }
1910   }
1911 
1912   // Eliminate redundant IV cycles.
1913   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1914 
1915   // Try to convert exit conditions to unsigned and rotate computation
1916   // out of the loop.  Note: Handles invalidation internally if needed.
1917   Changed |= canonicalizeExitCondition(L);
1918 
1919   // Try to eliminate loop exits based on analyzeable exit counts
1920   if (optimizeLoopExits(L, Rewriter))  {
1921     Changed = true;
1922     // Given we've changed exit counts, notify SCEV
1923     // Some nested loops may share same folded exit basic block,
1924     // thus we need to notify top most loop.
1925     SE->forgetTopmostLoop(L);
1926   }
1927 
1928   // Try to form loop invariant tests for loop exits by changing how many
1929   // iterations of the loop run when that is unobservable.
1930   if (predicateLoopExits(L, Rewriter)) {
1931     Changed = true;
1932     // Given we've changed exit counts, notify SCEV
1933     SE->forgetLoop(L);
1934   }
1935 
1936   // If we have a trip count expression, rewrite the loop's exit condition
1937   // using it.
1938   if (!DisableLFTR) {
1939     BasicBlock *PreHeader = L->getLoopPreheader();
1940 
1941     SmallVector<BasicBlock*, 16> ExitingBlocks;
1942     L->getExitingBlocks(ExitingBlocks);
1943     for (BasicBlock *ExitingBB : ExitingBlocks) {
1944       // Can't rewrite non-branch yet.
1945       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1946         continue;
1947 
1948       // If our exitting block exits multiple loops, we can only rewrite the
1949       // innermost one.  Otherwise, we're changing how many times the innermost
1950       // loop runs before it exits.
1951       if (LI->getLoopFor(ExitingBB) != L)
1952         continue;
1953 
1954       if (!needsLFTR(L, ExitingBB))
1955         continue;
1956 
1957       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1958       if (isa<SCEVCouldNotCompute>(ExitCount))
1959         continue;
1960 
1961       // This was handled above, but as we form SCEVs, we can sometimes refine
1962       // existing ones; this allows exit counts to be folded to zero which
1963       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1964       // until stable to handle cases like this better.
1965       if (ExitCount->isZero())
1966         continue;
1967 
1968       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1969       if (!IndVar)
1970         continue;
1971 
1972       // Avoid high cost expansions.  Note: This heuristic is questionable in
1973       // that our definition of "high cost" is not exactly principled.
1974       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1975                                        TTI, PreHeader->getTerminator()))
1976         continue;
1977 
1978       // Check preconditions for proper SCEVExpander operation. SCEV does not
1979       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1980       // any pass that uses the SCEVExpander must do it. This does not work
1981       // well for loop passes because SCEVExpander makes assumptions about
1982       // all loops, while LoopPassManager only forces the current loop to be
1983       // simplified.
1984       //
1985       // FIXME: SCEV expansion has no way to bail out, so the caller must
1986       // explicitly check any assumptions made by SCEV. Brittle.
1987       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1988       if (!AR || AR->getLoop()->getLoopPreheader())
1989         Changed |= linearFunctionTestReplace(L, ExitingBB,
1990                                              ExitCount, IndVar,
1991                                              Rewriter);
1992     }
1993   }
1994   // Clear the rewriter cache, because values that are in the rewriter's cache
1995   // can be deleted in the loop below, causing the AssertingVH in the cache to
1996   // trigger.
1997   Rewriter.clear();
1998 
1999   // Now that we're done iterating through lists, clean up any instructions
2000   // which are now dead.
2001   while (!DeadInsts.empty()) {
2002     Value *V = DeadInsts.pop_back_val();
2003 
2004     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2005       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2006     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2007       Changed |=
2008           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2009   }
2010 
2011   // The Rewriter may not be used from this point on.
2012 
2013   // Loop-invariant instructions in the preheader that aren't used in the
2014   // loop may be sunk below the loop to reduce register pressure.
2015   Changed |= sinkUnusedInvariants(L);
2016 
2017   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2018   // trip count and therefore can further simplify exit values in addition to
2019   // rewriteLoopExitValues.
2020   Changed |= rewriteFirstIterationLoopExitValues(L);
2021 
2022   // Clean up dead instructions.
2023   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2024 
2025   // Check a post-condition.
2026   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2027          "Indvars did not preserve LCSSA!");
2028 
2029   // Verify that LFTR, and any other change have not interfered with SCEV's
2030   // ability to compute trip count.  We may have *changed* the exit count, but
2031   // only by reducing it.
2032 #ifndef NDEBUG
2033   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2034     SE->forgetLoop(L);
2035     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2036     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2037         SE->getTypeSizeInBits(NewBECount->getType()))
2038       NewBECount = SE->getTruncateOrNoop(NewBECount,
2039                                          BackedgeTakenCount->getType());
2040     else
2041       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2042                                                  NewBECount->getType());
2043     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2044                                  NewBECount) && "indvars must preserve SCEV");
2045   }
2046   if (VerifyMemorySSA && MSSAU)
2047     MSSAU->getMemorySSA()->verifyMemorySSA();
2048 #endif
2049 
2050   return Changed;
2051 }
2052 
2053 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2054                                           LoopStandardAnalysisResults &AR,
2055                                           LPMUpdater &) {
2056   Function *F = L.getHeader()->getParent();
2057   const DataLayout &DL = F->getParent()->getDataLayout();
2058 
2059   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2060                      WidenIndVars && AllowIVWidening);
2061   if (!IVS.run(&L))
2062     return PreservedAnalyses::all();
2063 
2064   auto PA = getLoopPassPreservedAnalyses();
2065   PA.preserveSet<CFGAnalyses>();
2066   if (AR.MSSA)
2067     PA.preserve<MemorySSAAnalysis>();
2068   return PA;
2069 }
2070 
2071 namespace {
2072 
2073 struct IndVarSimplifyLegacyPass : public LoopPass {
2074   static char ID; // Pass identification, replacement for typeid
2075 
2076   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2077     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2078   }
2079 
2080   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2081     if (skipLoop(L))
2082       return false;
2083 
2084     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2085     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2086     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2087     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2088     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2089     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2090     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2091     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2092     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2093     MemorySSA *MSSA = nullptr;
2094     if (MSSAAnalysis)
2095       MSSA = &MSSAAnalysis->getMSSA();
2096 
2097     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2098     return IVS.run(L);
2099   }
2100 
2101   void getAnalysisUsage(AnalysisUsage &AU) const override {
2102     AU.setPreservesCFG();
2103     AU.addPreserved<MemorySSAWrapperPass>();
2104     getLoopAnalysisUsage(AU);
2105   }
2106 };
2107 
2108 } // end anonymous namespace
2109 
2110 char IndVarSimplifyLegacyPass::ID = 0;
2111 
2112 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2113                       "Induction Variable Simplification", false, false)
2114 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2115 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2116                     "Induction Variable Simplification", false, false)
2117 
2118 Pass *llvm::createIndVarSimplifyPass() {
2119   return new IndVarSimplifyLegacyPass();
2120 }
2121