xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 7ff943a9ed878e3b8ffe162b2af41a81da1a11a2)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 using namespace PatternMatch;
93 
94 #define DEBUG_TYPE "indvars"
95 
96 STATISTIC(NumWidened     , "Number of indvars widened");
97 STATISTIC(NumReplaced    , "Number of exit values replaced");
98 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
99 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
100 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
101 
102 // Trip count verification can be enabled by default under NDEBUG if we
103 // implement a strong expression equivalence checker in SCEV. Until then, we
104 // use the verify-indvars flag, which may assert in some cases.
105 static cl::opt<bool> VerifyIndvars(
106     "verify-indvars", cl::Hidden,
107     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
108              "effect in release builds. (Note: this adds additional SCEV "
109              "queries potentially changing the analysis result)"));
110 
111 static cl::opt<ReplaceExitVal> ReplaceExitValue(
112     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
113     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
114     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
115                clEnumValN(OnlyCheapRepl, "cheap",
116                           "only replace exit value when the cost is cheap"),
117                clEnumValN(NoHardUse, "noharduse",
118                           "only replace exit values when loop def likely dead"),
119                clEnumValN(AlwaysRepl, "always",
120                           "always replace exit value whenever possible")));
121 
122 static cl::opt<bool> UsePostIncrementRanges(
123   "indvars-post-increment-ranges", cl::Hidden,
124   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
125   cl::init(true));
126 
127 static cl::opt<bool>
128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
129             cl::desc("Disable Linear Function Test Replace optimization"));
130 
131 static cl::opt<bool>
132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
133                 cl::desc("Predicate conditions in read only loops"));
134 
135 static cl::opt<bool>
136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
137                 cl::desc("Allow widening of indvars to eliminate s/zext"));
138 
139 namespace {
140 
141 struct RewritePhi;
142 
143 class IndVarSimplify {
144   LoopInfo *LI;
145   ScalarEvolution *SE;
146   DominatorTree *DT;
147   const DataLayout &DL;
148   TargetLibraryInfo *TLI;
149   const TargetTransformInfo *TTI;
150   std::unique_ptr<MemorySSAUpdater> MSSAU;
151 
152   SmallVector<WeakTrackingVH, 16> DeadInsts;
153   bool WidenIndVars;
154 
155   bool handleFloatingPointIV(Loop *L, PHINode *PH);
156   bool rewriteNonIntegerIVs(Loop *L);
157 
158   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
159   /// Try to improve our exit conditions by converting condition from signed
160   /// to unsigned or rotating computation out of the loop.
161   /// (See inline comment about why this is duplicated from simplifyAndExtend)
162   bool canonicalizeExitCondition(Loop *L);
163   /// Try to eliminate loop exits based on analyzeable exit counts
164   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
165   /// Try to form loop invariant tests for loop exits by changing how many
166   /// iterations of the loop run when that is unobservable.
167   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
168 
169   bool rewriteFirstIterationLoopExitValues(Loop *L);
170 
171   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
172                                  const SCEV *ExitCount,
173                                  PHINode *IndVar, SCEVExpander &Rewriter);
174 
175   bool sinkUnusedInvariants(Loop *L);
176 
177 public:
178   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
179                  const DataLayout &DL, TargetLibraryInfo *TLI,
180                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
181       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
182         WidenIndVars(WidenIndVars) {
183     if (MSSA)
184       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
185   }
186 
187   bool run(Loop *L);
188 };
189 
190 } // end anonymous namespace
191 
192 //===----------------------------------------------------------------------===//
193 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
194 //===----------------------------------------------------------------------===//
195 
196 /// Convert APF to an integer, if possible.
197 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
198   bool isExact = false;
199   // See if we can convert this to an int64_t
200   uint64_t UIntVal;
201   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
202                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
203       !isExact)
204     return false;
205   IntVal = UIntVal;
206   return true;
207 }
208 
209 /// If the loop has floating induction variable then insert corresponding
210 /// integer induction variable if possible.
211 /// For example,
212 /// for(double i = 0; i < 10000; ++i)
213 ///   bar(i)
214 /// is converted into
215 /// for(int i = 0; i < 10000; ++i)
216 ///   bar((double)i);
217 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
218   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
219   unsigned BackEdge     = IncomingEdge^1;
220 
221   // Check incoming value.
222   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
223 
224   int64_t InitValue;
225   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
226     return false;
227 
228   // Check IV increment. Reject this PN if increment operation is not
229   // an add or increment value can not be represented by an integer.
230   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
231   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
232 
233   // If this is not an add of the PHI with a constantfp, or if the constant fp
234   // is not an integer, bail out.
235   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
236   int64_t IncValue;
237   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
238       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
239     return false;
240 
241   // Check Incr uses. One user is PN and the other user is an exit condition
242   // used by the conditional terminator.
243   Value::user_iterator IncrUse = Incr->user_begin();
244   Instruction *U1 = cast<Instruction>(*IncrUse++);
245   if (IncrUse == Incr->user_end()) return false;
246   Instruction *U2 = cast<Instruction>(*IncrUse++);
247   if (IncrUse != Incr->user_end()) return false;
248 
249   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
250   // only used by a branch, we can't transform it.
251   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
252   if (!Compare)
253     Compare = dyn_cast<FCmpInst>(U2);
254   if (!Compare || !Compare->hasOneUse() ||
255       !isa<BranchInst>(Compare->user_back()))
256     return false;
257 
258   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
259 
260   // We need to verify that the branch actually controls the iteration count
261   // of the loop.  If not, the new IV can overflow and no one will notice.
262   // The branch block must be in the loop and one of the successors must be out
263   // of the loop.
264   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
265   if (!L->contains(TheBr->getParent()) ||
266       (L->contains(TheBr->getSuccessor(0)) &&
267        L->contains(TheBr->getSuccessor(1))))
268     return false;
269 
270   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
271   // transform it.
272   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
273   int64_t ExitValue;
274   if (ExitValueVal == nullptr ||
275       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
276     return false;
277 
278   // Find new predicate for integer comparison.
279   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
280   switch (Compare->getPredicate()) {
281   default: return false;  // Unknown comparison.
282   case CmpInst::FCMP_OEQ:
283   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
284   case CmpInst::FCMP_ONE:
285   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
286   case CmpInst::FCMP_OGT:
287   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
288   case CmpInst::FCMP_OGE:
289   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
290   case CmpInst::FCMP_OLT:
291   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
292   case CmpInst::FCMP_OLE:
293   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
294   }
295 
296   // We convert the floating point induction variable to a signed i32 value if
297   // we can.  This is only safe if the comparison will not overflow in a way
298   // that won't be trapped by the integer equivalent operations.  Check for this
299   // now.
300   // TODO: We could use i64 if it is native and the range requires it.
301 
302   // The start/stride/exit values must all fit in signed i32.
303   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
304     return false;
305 
306   // If not actually striding (add x, 0.0), avoid touching the code.
307   if (IncValue == 0)
308     return false;
309 
310   // Positive and negative strides have different safety conditions.
311   if (IncValue > 0) {
312     // If we have a positive stride, we require the init to be less than the
313     // exit value.
314     if (InitValue >= ExitValue)
315       return false;
316 
317     uint32_t Range = uint32_t(ExitValue-InitValue);
318     // Check for infinite loop, either:
319     // while (i <= Exit) or until (i > Exit)
320     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
321       if (++Range == 0) return false;  // Range overflows.
322     }
323 
324     unsigned Leftover = Range % uint32_t(IncValue);
325 
326     // If this is an equality comparison, we require that the strided value
327     // exactly land on the exit value, otherwise the IV condition will wrap
328     // around and do things the fp IV wouldn't.
329     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
330         Leftover != 0)
331       return false;
332 
333     // If the stride would wrap around the i32 before exiting, we can't
334     // transform the IV.
335     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
336       return false;
337   } else {
338     // If we have a negative stride, we require the init to be greater than the
339     // exit value.
340     if (InitValue <= ExitValue)
341       return false;
342 
343     uint32_t Range = uint32_t(InitValue-ExitValue);
344     // Check for infinite loop, either:
345     // while (i >= Exit) or until (i < Exit)
346     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
347       if (++Range == 0) return false;  // Range overflows.
348     }
349 
350     unsigned Leftover = Range % uint32_t(-IncValue);
351 
352     // If this is an equality comparison, we require that the strided value
353     // exactly land on the exit value, otherwise the IV condition will wrap
354     // around and do things the fp IV wouldn't.
355     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
356         Leftover != 0)
357       return false;
358 
359     // If the stride would wrap around the i32 before exiting, we can't
360     // transform the IV.
361     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
362       return false;
363   }
364 
365   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
366 
367   // Insert new integer induction variable.
368   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
369   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
370                       PN->getIncomingBlock(IncomingEdge));
371 
372   Value *NewAdd =
373     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
374                               Incr->getName()+".int", Incr);
375   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
376 
377   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
378                                       ConstantInt::get(Int32Ty, ExitValue),
379                                       Compare->getName());
380 
381   // In the following deletions, PN may become dead and may be deleted.
382   // Use a WeakTrackingVH to observe whether this happens.
383   WeakTrackingVH WeakPH = PN;
384 
385   // Delete the old floating point exit comparison.  The branch starts using the
386   // new comparison.
387   NewCompare->takeName(Compare);
388   Compare->replaceAllUsesWith(NewCompare);
389   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
390 
391   // Delete the old floating point increment.
392   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
393   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
394 
395   // If the FP induction variable still has uses, this is because something else
396   // in the loop uses its value.  In order to canonicalize the induction
397   // variable, we chose to eliminate the IV and rewrite it in terms of an
398   // int->fp cast.
399   //
400   // We give preference to sitofp over uitofp because it is faster on most
401   // platforms.
402   if (WeakPH) {
403     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
404                                  &*PN->getParent()->getFirstInsertionPt());
405     PN->replaceAllUsesWith(Conv);
406     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
407   }
408   return true;
409 }
410 
411 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
412   // First step.  Check to see if there are any floating-point recurrences.
413   // If there are, change them into integer recurrences, permitting analysis by
414   // the SCEV routines.
415   BasicBlock *Header = L->getHeader();
416 
417   SmallVector<WeakTrackingVH, 8> PHIs;
418   for (PHINode &PN : Header->phis())
419     PHIs.push_back(&PN);
420 
421   bool Changed = false;
422   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
423     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
424       Changed |= handleFloatingPointIV(L, PN);
425 
426   // If the loop previously had floating-point IV, ScalarEvolution
427   // may not have been able to compute a trip count. Now that we've done some
428   // re-writing, the trip count may be computable.
429   if (Changed)
430     SE->forgetLoop(L);
431   return Changed;
432 }
433 
434 //===---------------------------------------------------------------------===//
435 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
436 // they will exit at the first iteration.
437 //===---------------------------------------------------------------------===//
438 
439 /// Check to see if this loop has loop invariant conditions which lead to loop
440 /// exits. If so, we know that if the exit path is taken, it is at the first
441 /// loop iteration. This lets us predict exit values of PHI nodes that live in
442 /// loop header.
443 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
444   // Verify the input to the pass is already in LCSSA form.
445   assert(L->isLCSSAForm(*DT));
446 
447   SmallVector<BasicBlock *, 8> ExitBlocks;
448   L->getUniqueExitBlocks(ExitBlocks);
449 
450   bool MadeAnyChanges = false;
451   for (auto *ExitBB : ExitBlocks) {
452     // If there are no more PHI nodes in this exit block, then no more
453     // values defined inside the loop are used on this path.
454     for (PHINode &PN : ExitBB->phis()) {
455       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
456            IncomingValIdx != E; ++IncomingValIdx) {
457         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
458 
459         // Can we prove that the exit must run on the first iteration if it
460         // runs at all?  (i.e. early exits are fine for our purposes, but
461         // traces which lead to this exit being taken on the 2nd iteration
462         // aren't.)  Note that this is about whether the exit branch is
463         // executed, not about whether it is taken.
464         if (!L->getLoopLatch() ||
465             !DT->dominates(IncomingBB, L->getLoopLatch()))
466           continue;
467 
468         // Get condition that leads to the exit path.
469         auto *TermInst = IncomingBB->getTerminator();
470 
471         Value *Cond = nullptr;
472         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
473           // Must be a conditional branch, otherwise the block
474           // should not be in the loop.
475           Cond = BI->getCondition();
476         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
477           Cond = SI->getCondition();
478         else
479           continue;
480 
481         if (!L->isLoopInvariant(Cond))
482           continue;
483 
484         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
485 
486         // Only deal with PHIs in the loop header.
487         if (!ExitVal || ExitVal->getParent() != L->getHeader())
488           continue;
489 
490         // If ExitVal is a PHI on the loop header, then we know its
491         // value along this exit because the exit can only be taken
492         // on the first iteration.
493         auto *LoopPreheader = L->getLoopPreheader();
494         assert(LoopPreheader && "Invalid loop");
495         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
496         if (PreheaderIdx != -1) {
497           assert(ExitVal->getParent() == L->getHeader() &&
498                  "ExitVal must be in loop header");
499           MadeAnyChanges = true;
500           PN.setIncomingValue(IncomingValIdx,
501                               ExitVal->getIncomingValue(PreheaderIdx));
502           SE->forgetValue(&PN);
503         }
504       }
505     }
506   }
507   return MadeAnyChanges;
508 }
509 
510 //===----------------------------------------------------------------------===//
511 //  IV Widening - Extend the width of an IV to cover its widest uses.
512 //===----------------------------------------------------------------------===//
513 
514 /// Update information about the induction variable that is extended by this
515 /// sign or zero extend operation. This is used to determine the final width of
516 /// the IV before actually widening it.
517 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
518                         ScalarEvolution *SE,
519                         const TargetTransformInfo *TTI) {
520   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
521   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
522     return;
523 
524   Type *Ty = Cast->getType();
525   uint64_t Width = SE->getTypeSizeInBits(Ty);
526   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
527     return;
528 
529   // Check that `Cast` actually extends the induction variable (we rely on this
530   // later).  This takes care of cases where `Cast` is extending a truncation of
531   // the narrow induction variable, and thus can end up being narrower than the
532   // "narrow" induction variable.
533   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
534   if (NarrowIVWidth >= Width)
535     return;
536 
537   // Cast is either an sext or zext up to this point.
538   // We should not widen an indvar if arithmetics on the wider indvar are more
539   // expensive than those on the narrower indvar. We check only the cost of ADD
540   // because at least an ADD is required to increment the induction variable. We
541   // could compute more comprehensively the cost of all instructions on the
542   // induction variable when necessary.
543   if (TTI &&
544       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
545           TTI->getArithmeticInstrCost(Instruction::Add,
546                                       Cast->getOperand(0)->getType())) {
547     return;
548   }
549 
550   if (!WI.WidestNativeType) {
551     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
552     WI.IsSigned = IsSigned;
553     return;
554   }
555 
556   // We extend the IV to satisfy the sign of its first user, arbitrarily.
557   if (WI.IsSigned != IsSigned)
558     return;
559 
560   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
561     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
562 }
563 
564 //===----------------------------------------------------------------------===//
565 //  Live IV Reduction - Minimize IVs live across the loop.
566 //===----------------------------------------------------------------------===//
567 
568 //===----------------------------------------------------------------------===//
569 //  Simplification of IV users based on SCEV evaluation.
570 //===----------------------------------------------------------------------===//
571 
572 namespace {
573 
574 class IndVarSimplifyVisitor : public IVVisitor {
575   ScalarEvolution *SE;
576   const TargetTransformInfo *TTI;
577   PHINode *IVPhi;
578 
579 public:
580   WideIVInfo WI;
581 
582   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
583                         const TargetTransformInfo *TTI,
584                         const DominatorTree *DTree)
585     : SE(SCEV), TTI(TTI), IVPhi(IV) {
586     DT = DTree;
587     WI.NarrowIV = IVPhi;
588   }
589 
590   // Implement the interface used by simplifyUsersOfIV.
591   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
592 };
593 
594 } // end anonymous namespace
595 
596 /// Iteratively perform simplification on a worklist of IV users. Each
597 /// successive simplification may push more users which may themselves be
598 /// candidates for simplification.
599 ///
600 /// Sign/Zero extend elimination is interleaved with IV simplification.
601 bool IndVarSimplify::simplifyAndExtend(Loop *L,
602                                        SCEVExpander &Rewriter,
603                                        LoopInfo *LI) {
604   SmallVector<WideIVInfo, 8> WideIVs;
605 
606   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
607           Intrinsic::getName(Intrinsic::experimental_guard));
608   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
609 
610   SmallVector<PHINode*, 8> LoopPhis;
611   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
612     LoopPhis.push_back(cast<PHINode>(I));
613   }
614   // Each round of simplification iterates through the SimplifyIVUsers worklist
615   // for all current phis, then determines whether any IVs can be
616   // widened. Widening adds new phis to LoopPhis, inducing another round of
617   // simplification on the wide IVs.
618   bool Changed = false;
619   while (!LoopPhis.empty()) {
620     // Evaluate as many IV expressions as possible before widening any IVs. This
621     // forces SCEV to set no-wrap flags before evaluating sign/zero
622     // extension. The first time SCEV attempts to normalize sign/zero extension,
623     // the result becomes final. So for the most predictable results, we delay
624     // evaluation of sign/zero extend evaluation until needed, and avoid running
625     // other SCEV based analysis prior to simplifyAndExtend.
626     do {
627       PHINode *CurrIV = LoopPhis.pop_back_val();
628 
629       // Information about sign/zero extensions of CurrIV.
630       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
631 
632       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
633                                    &Visitor);
634 
635       if (Visitor.WI.WidestNativeType) {
636         WideIVs.push_back(Visitor.WI);
637       }
638     } while(!LoopPhis.empty());
639 
640     // Continue if we disallowed widening.
641     if (!WidenIndVars)
642       continue;
643 
644     for (; !WideIVs.empty(); WideIVs.pop_back()) {
645       unsigned ElimExt;
646       unsigned Widened;
647       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
648                                           DT, DeadInsts, ElimExt, Widened,
649                                           HasGuards, UsePostIncrementRanges)) {
650         NumElimExt += ElimExt;
651         NumWidened += Widened;
652         Changed = true;
653         LoopPhis.push_back(WidePhi);
654       }
655     }
656   }
657   return Changed;
658 }
659 
660 //===----------------------------------------------------------------------===//
661 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
662 //===----------------------------------------------------------------------===//
663 
664 /// Given an Value which is hoped to be part of an add recurance in the given
665 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
666 /// that this is less general than SCEVs AddRec checking.
667 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
668   Instruction *IncI = dyn_cast<Instruction>(IncV);
669   if (!IncI)
670     return nullptr;
671 
672   switch (IncI->getOpcode()) {
673   case Instruction::Add:
674   case Instruction::Sub:
675     break;
676   case Instruction::GetElementPtr:
677     // An IV counter must preserve its type.
678     if (IncI->getNumOperands() == 2)
679       break;
680     LLVM_FALLTHROUGH;
681   default:
682     return nullptr;
683   }
684 
685   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
686   if (Phi && Phi->getParent() == L->getHeader()) {
687     if (L->isLoopInvariant(IncI->getOperand(1)))
688       return Phi;
689     return nullptr;
690   }
691   if (IncI->getOpcode() == Instruction::GetElementPtr)
692     return nullptr;
693 
694   // Allow add/sub to be commuted.
695   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
696   if (Phi && Phi->getParent() == L->getHeader()) {
697     if (L->isLoopInvariant(IncI->getOperand(0)))
698       return Phi;
699   }
700   return nullptr;
701 }
702 
703 /// Whether the current loop exit test is based on this value.  Currently this
704 /// is limited to a direct use in the loop condition.
705 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
706   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
707   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
708   // TODO: Allow non-icmp loop test.
709   if (!ICmp)
710     return false;
711 
712   // TODO: Allow indirect use.
713   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
714 }
715 
716 /// linearFunctionTestReplace policy. Return true unless we can show that the
717 /// current exit test is already sufficiently canonical.
718 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
719   assert(L->getLoopLatch() && "Must be in simplified form");
720 
721   // Avoid converting a constant or loop invariant test back to a runtime
722   // test.  This is critical for when SCEV's cached ExitCount is less precise
723   // than the current IR (such as after we've proven a particular exit is
724   // actually dead and thus the BE count never reaches our ExitCount.)
725   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
726   if (L->isLoopInvariant(BI->getCondition()))
727     return false;
728 
729   // Do LFTR to simplify the exit condition to an ICMP.
730   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
731   if (!Cond)
732     return true;
733 
734   // Do LFTR to simplify the exit ICMP to EQ/NE
735   ICmpInst::Predicate Pred = Cond->getPredicate();
736   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
737     return true;
738 
739   // Look for a loop invariant RHS
740   Value *LHS = Cond->getOperand(0);
741   Value *RHS = Cond->getOperand(1);
742   if (!L->isLoopInvariant(RHS)) {
743     if (!L->isLoopInvariant(LHS))
744       return true;
745     std::swap(LHS, RHS);
746   }
747   // Look for a simple IV counter LHS
748   PHINode *Phi = dyn_cast<PHINode>(LHS);
749   if (!Phi)
750     Phi = getLoopPhiForCounter(LHS, L);
751 
752   if (!Phi)
753     return true;
754 
755   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
756   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
757   if (Idx < 0)
758     return true;
759 
760   // Do LFTR if the exit condition's IV is *not* a simple counter.
761   Value *IncV = Phi->getIncomingValue(Idx);
762   return Phi != getLoopPhiForCounter(IncV, L);
763 }
764 
765 /// Return true if undefined behavior would provable be executed on the path to
766 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
767 /// anything about whether OnPathTo is actually executed or whether Root is
768 /// actually poison.  This can be used to assess whether a new use of Root can
769 /// be added at a location which is control equivalent with OnPathTo (such as
770 /// immediately before it) without introducing UB which didn't previously
771 /// exist.  Note that a false result conveys no information.
772 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
773                                           Instruction *OnPathTo,
774                                           DominatorTree *DT) {
775   // Basic approach is to assume Root is poison, propagate poison forward
776   // through all users we can easily track, and then check whether any of those
777   // users are provable UB and must execute before out exiting block might
778   // exit.
779 
780   // The set of all recursive users we've visited (which are assumed to all be
781   // poison because of said visit)
782   SmallSet<const Value *, 16> KnownPoison;
783   SmallVector<const Instruction*, 16> Worklist;
784   Worklist.push_back(Root);
785   while (!Worklist.empty()) {
786     const Instruction *I = Worklist.pop_back_val();
787 
788     // If we know this must trigger UB on a path leading our target.
789     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
790       return true;
791 
792     // If we can't analyze propagation through this instruction, just skip it
793     // and transitive users.  Safe as false is a conservative result.
794     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
795       continue;
796 
797     if (KnownPoison.insert(I).second)
798       for (const User *User : I->users())
799         Worklist.push_back(cast<Instruction>(User));
800   }
801 
802   // Might be non-UB, or might have a path we couldn't prove must execute on
803   // way to exiting bb.
804   return false;
805 }
806 
807 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
808 /// down to checking that all operands are constant and listing instructions
809 /// that may hide undef.
810 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
811                                unsigned Depth) {
812   if (isa<Constant>(V))
813     return !isa<UndefValue>(V);
814 
815   if (Depth >= 6)
816     return false;
817 
818   // Conservatively handle non-constant non-instructions. For example, Arguments
819   // may be undef.
820   Instruction *I = dyn_cast<Instruction>(V);
821   if (!I)
822     return false;
823 
824   // Load and return values may be undef.
825   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
826     return false;
827 
828   // Optimistically handle other instructions.
829   for (Value *Op : I->operands()) {
830     if (!Visited.insert(Op).second)
831       continue;
832     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
833       return false;
834   }
835   return true;
836 }
837 
838 /// Return true if the given value is concrete. We must prove that undef can
839 /// never reach it.
840 ///
841 /// TODO: If we decide that this is a good approach to checking for undef, we
842 /// may factor it into a common location.
843 static bool hasConcreteDef(Value *V) {
844   SmallPtrSet<Value*, 8> Visited;
845   Visited.insert(V);
846   return hasConcreteDefImpl(V, Visited, 0);
847 }
848 
849 /// Return true if this IV has any uses other than the (soon to be rewritten)
850 /// loop exit test.
851 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
852   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
853   Value *IncV = Phi->getIncomingValue(LatchIdx);
854 
855   for (User *U : Phi->users())
856     if (U != Cond && U != IncV) return false;
857 
858   for (User *U : IncV->users())
859     if (U != Cond && U != Phi) return false;
860   return true;
861 }
862 
863 /// Return true if the given phi is a "counter" in L.  A counter is an
864 /// add recurance (of integer or pointer type) with an arbitrary start, and a
865 /// step of 1.  Note that L must have exactly one latch.
866 static bool isLoopCounter(PHINode* Phi, Loop *L,
867                           ScalarEvolution *SE) {
868   assert(Phi->getParent() == L->getHeader());
869   assert(L->getLoopLatch());
870 
871   if (!SE->isSCEVable(Phi->getType()))
872     return false;
873 
874   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
875   if (!AR || AR->getLoop() != L || !AR->isAffine())
876     return false;
877 
878   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
879   if (!Step || !Step->isOne())
880     return false;
881 
882   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
883   Value *IncV = Phi->getIncomingValue(LatchIdx);
884   return (getLoopPhiForCounter(IncV, L) == Phi &&
885           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
886 }
887 
888 /// Search the loop header for a loop counter (anadd rec w/step of one)
889 /// suitable for use by LFTR.  If multiple counters are available, select the
890 /// "best" one based profitable heuristics.
891 ///
892 /// BECount may be an i8* pointer type. The pointer difference is already
893 /// valid count without scaling the address stride, so it remains a pointer
894 /// expression as far as SCEV is concerned.
895 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
896                                 const SCEV *BECount,
897                                 ScalarEvolution *SE, DominatorTree *DT) {
898   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
899 
900   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
901 
902   // Loop over all of the PHI nodes, looking for a simple counter.
903   PHINode *BestPhi = nullptr;
904   const SCEV *BestInit = nullptr;
905   BasicBlock *LatchBlock = L->getLoopLatch();
906   assert(LatchBlock && "Must be in simplified form");
907   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
908 
909   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
910     PHINode *Phi = cast<PHINode>(I);
911     if (!isLoopCounter(Phi, L, SE))
912       continue;
913 
914     // Avoid comparing an integer IV against a pointer Limit.
915     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
916       continue;
917 
918     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
919 
920     // AR may be a pointer type, while BECount is an integer type.
921     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
922     // AR may not be a narrower type, or we may never exit.
923     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
924     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
925       continue;
926 
927     // Avoid reusing a potentially undef value to compute other values that may
928     // have originally had a concrete definition.
929     if (!hasConcreteDef(Phi)) {
930       // We explicitly allow unknown phis as long as they are already used by
931       // the loop exit test.  This is legal since performing LFTR could not
932       // increase the number of undef users.
933       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
934       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
935           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
936         continue;
937     }
938 
939     // Avoid introducing undefined behavior due to poison which didn't exist in
940     // the original program.  (Annoyingly, the rules for poison and undef
941     // propagation are distinct, so this does NOT cover the undef case above.)
942     // We have to ensure that we don't introduce UB by introducing a use on an
943     // iteration where said IV produces poison.  Our strategy here differs for
944     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
945     // see code in linearFunctionTestReplace.  For pointers, we restrict
946     // transforms as there is no good way to reinfer inbounds once lost.
947     if (!Phi->getType()->isIntegerTy() &&
948         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
949       continue;
950 
951     const SCEV *Init = AR->getStart();
952 
953     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
954       // Don't force a live loop counter if another IV can be used.
955       if (AlmostDeadIV(Phi, LatchBlock, Cond))
956         continue;
957 
958       // Prefer to count-from-zero. This is a more "canonical" counter form. It
959       // also prefers integer to pointer IVs.
960       if (BestInit->isZero() != Init->isZero()) {
961         if (BestInit->isZero())
962           continue;
963       }
964       // If two IVs both count from zero or both count from nonzero then the
965       // narrower is likely a dead phi that has been widened. Use the wider phi
966       // to allow the other to be eliminated.
967       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
968         continue;
969     }
970     BestPhi = Phi;
971     BestInit = Init;
972   }
973   return BestPhi;
974 }
975 
976 /// Insert an IR expression which computes the value held by the IV IndVar
977 /// (which must be an loop counter w/unit stride) after the backedge of loop L
978 /// is taken ExitCount times.
979 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
980                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
981                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
982   assert(isLoopCounter(IndVar, L, SE));
983   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
984   const SCEV *IVInit = AR->getStart();
985 
986   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
987   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
988   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
989   // the existing GEPs whenever possible.
990   if (IndVar->getType()->isPointerTy() &&
991       !ExitCount->getType()->isPointerTy()) {
992     // IVOffset will be the new GEP offset that is interpreted by GEP as a
993     // signed value. ExitCount on the other hand represents the loop trip count,
994     // which is an unsigned value. FindLoopCounter only allows induction
995     // variables that have a positive unit stride of one. This means we don't
996     // have to handle the case of negative offsets (yet) and just need to zero
997     // extend ExitCount.
998     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
999     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
1000     if (UsePostInc)
1001       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1002 
1003     // Expand the code for the iteration count.
1004     assert(SE->isLoopInvariant(IVOffset, L) &&
1005            "Computed iteration count is not loop invariant!");
1006 
1007     // We could handle pointer IVs other than i8*, but we need to compensate for
1008     // gep index scaling.
1009     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
1010                              cast<PointerType>(IndVar->getType())
1011                                  ->getElementType())->isOne() &&
1012            "unit stride pointer IV must be i8*");
1013 
1014     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1015     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1016     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1017   } else {
1018     // In any other case, convert both IVInit and ExitCount to integers before
1019     // comparing. This may result in SCEV expansion of pointers, but in practice
1020     // SCEV will fold the pointer arithmetic away as such:
1021     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1022     //
1023     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1024     // for simple memset-style loops.
1025     //
1026     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1027     // were generated on top of case #2, which is not expected.
1028 
1029     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1030     // For unit stride, IVCount = Start + ExitCount with 2's complement
1031     // overflow.
1032 
1033     // For integer IVs, truncate the IV before computing IVInit + BECount,
1034     // unless we know apriori that the limit must be a constant when evaluated
1035     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1036     // of the IV in the loop over a (potentially) expensive expansion of the
1037     // widened exit count add(zext(add)) expression.
1038     if (SE->getTypeSizeInBits(IVInit->getType())
1039         > SE->getTypeSizeInBits(ExitCount->getType())) {
1040       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1041         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1042       else
1043         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1044     }
1045 
1046     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1047 
1048     if (UsePostInc)
1049       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1050 
1051     // Expand the code for the iteration count.
1052     assert(SE->isLoopInvariant(IVLimit, L) &&
1053            "Computed iteration count is not loop invariant!");
1054     // Ensure that we generate the same type as IndVar, or a smaller integer
1055     // type. In the presence of null pointer values, we have an integer type
1056     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1057     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1058       IndVar->getType() : ExitCount->getType();
1059     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1060     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1061   }
1062 }
1063 
1064 /// This method rewrites the exit condition of the loop to be a canonical !=
1065 /// comparison against the incremented loop induction variable.  This pass is
1066 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1067 /// determine a loop-invariant trip count of the loop, which is actually a much
1068 /// broader range than just linear tests.
1069 bool IndVarSimplify::
1070 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1071                           const SCEV *ExitCount,
1072                           PHINode *IndVar, SCEVExpander &Rewriter) {
1073   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1074   assert(isLoopCounter(IndVar, L, SE));
1075   Instruction * const IncVar =
1076     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1077 
1078   // Initialize CmpIndVar to the preincremented IV.
1079   Value *CmpIndVar = IndVar;
1080   bool UsePostInc = false;
1081 
1082   // If the exiting block is the same as the backedge block, we prefer to
1083   // compare against the post-incremented value, otherwise we must compare
1084   // against the preincremented value.
1085   if (ExitingBB == L->getLoopLatch()) {
1086     // For pointer IVs, we chose to not strip inbounds which requires us not
1087     // to add a potentially UB introducing use.  We need to either a) show
1088     // the loop test we're modifying is already in post-inc form, or b) show
1089     // that adding a use must not introduce UB.
1090     bool SafeToPostInc =
1091         IndVar->getType()->isIntegerTy() ||
1092         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1093         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1094     if (SafeToPostInc) {
1095       UsePostInc = true;
1096       CmpIndVar = IncVar;
1097     }
1098   }
1099 
1100   // It may be necessary to drop nowrap flags on the incrementing instruction
1101   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1102   // case the increment might have previously been poison on the last iteration
1103   // only) or if LFTR switches to a different IV that was previously dynamically
1104   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1105   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1106   // check), because the pre-inc addrec flags may be adopted from the original
1107   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1108   // TODO: This handling is inaccurate for one case: If we switch to a
1109   // dynamically dead IV that wraps on the first loop iteration only, which is
1110   // not covered by the post-inc addrec. (If the new IV was not dynamically
1111   // dead, it could not be poison on the first iteration in the first place.)
1112   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1113     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1114     if (BO->hasNoUnsignedWrap())
1115       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1116     if (BO->hasNoSignedWrap())
1117       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1118   }
1119 
1120   Value *ExitCnt = genLoopLimit(
1121       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1122   assert(ExitCnt->getType()->isPointerTy() ==
1123              IndVar->getType()->isPointerTy() &&
1124          "genLoopLimit missed a cast");
1125 
1126   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1127   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1128   ICmpInst::Predicate P;
1129   if (L->contains(BI->getSuccessor(0)))
1130     P = ICmpInst::ICMP_NE;
1131   else
1132     P = ICmpInst::ICMP_EQ;
1133 
1134   IRBuilder<> Builder(BI);
1135 
1136   // The new loop exit condition should reuse the debug location of the
1137   // original loop exit condition.
1138   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1139     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1140 
1141   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1142   // avoid the expensive expansion of the limit expression in the wider type,
1143   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1144   // since we know (from the exit count bitwidth), that we can't self-wrap in
1145   // the narrower type.
1146   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1147   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1148   if (CmpIndVarSize > ExitCntSize) {
1149     assert(!CmpIndVar->getType()->isPointerTy() &&
1150            !ExitCnt->getType()->isPointerTy());
1151 
1152     // Before resorting to actually inserting the truncate, use the same
1153     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1154     // the other side of the comparison instead.  We still evaluate the limit
1155     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1156     // a truncate within in.
1157     bool Extended = false;
1158     const SCEV *IV = SE->getSCEV(CmpIndVar);
1159     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1160                                                   ExitCnt->getType());
1161     const SCEV *ZExtTrunc =
1162       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1163 
1164     if (ZExtTrunc == IV) {
1165       Extended = true;
1166       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1167                                    "wide.trip.count");
1168     } else {
1169       const SCEV *SExtTrunc =
1170         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1171       if (SExtTrunc == IV) {
1172         Extended = true;
1173         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1174                                      "wide.trip.count");
1175       }
1176     }
1177 
1178     if (Extended) {
1179       bool Discard;
1180       L->makeLoopInvariant(ExitCnt, Discard);
1181     } else
1182       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1183                                       "lftr.wideiv");
1184   }
1185   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1186                     << "      LHS:" << *CmpIndVar << '\n'
1187                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1188                     << "\n"
1189                     << "      RHS:\t" << *ExitCnt << "\n"
1190                     << "ExitCount:\t" << *ExitCount << "\n"
1191                     << "  was: " << *BI->getCondition() << "\n");
1192 
1193   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1194   Value *OrigCond = BI->getCondition();
1195   // It's tempting to use replaceAllUsesWith here to fully replace the old
1196   // comparison, but that's not immediately safe, since users of the old
1197   // comparison may not be dominated by the new comparison. Instead, just
1198   // update the branch to use the new comparison; in the common case this
1199   // will make old comparison dead.
1200   BI->setCondition(Cond);
1201   DeadInsts.emplace_back(OrigCond);
1202 
1203   ++NumLFTR;
1204   return true;
1205 }
1206 
1207 //===----------------------------------------------------------------------===//
1208 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1209 //===----------------------------------------------------------------------===//
1210 
1211 /// If there's a single exit block, sink any loop-invariant values that
1212 /// were defined in the preheader but not used inside the loop into the
1213 /// exit block to reduce register pressure in the loop.
1214 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1215   BasicBlock *ExitBlock = L->getExitBlock();
1216   if (!ExitBlock) return false;
1217 
1218   BasicBlock *Preheader = L->getLoopPreheader();
1219   if (!Preheader) return false;
1220 
1221   bool MadeAnyChanges = false;
1222   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1223   BasicBlock::iterator I(Preheader->getTerminator());
1224   while (I != Preheader->begin()) {
1225     --I;
1226     // New instructions were inserted at the end of the preheader.
1227     if (isa<PHINode>(I))
1228       break;
1229 
1230     // Don't move instructions which might have side effects, since the side
1231     // effects need to complete before instructions inside the loop.  Also don't
1232     // move instructions which might read memory, since the loop may modify
1233     // memory. Note that it's okay if the instruction might have undefined
1234     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1235     // block.
1236     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1237       continue;
1238 
1239     // Skip debug info intrinsics.
1240     if (isa<DbgInfoIntrinsic>(I))
1241       continue;
1242 
1243     // Skip eh pad instructions.
1244     if (I->isEHPad())
1245       continue;
1246 
1247     // Don't sink alloca: we never want to sink static alloca's out of the
1248     // entry block, and correctly sinking dynamic alloca's requires
1249     // checks for stacksave/stackrestore intrinsics.
1250     // FIXME: Refactor this check somehow?
1251     if (isa<AllocaInst>(I))
1252       continue;
1253 
1254     // Determine if there is a use in or before the loop (direct or
1255     // otherwise).
1256     bool UsedInLoop = false;
1257     for (Use &U : I->uses()) {
1258       Instruction *User = cast<Instruction>(U.getUser());
1259       BasicBlock *UseBB = User->getParent();
1260       if (PHINode *P = dyn_cast<PHINode>(User)) {
1261         unsigned i =
1262           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1263         UseBB = P->getIncomingBlock(i);
1264       }
1265       if (UseBB == Preheader || L->contains(UseBB)) {
1266         UsedInLoop = true;
1267         break;
1268       }
1269     }
1270 
1271     // If there is, the def must remain in the preheader.
1272     if (UsedInLoop)
1273       continue;
1274 
1275     // Otherwise, sink it to the exit block.
1276     Instruction *ToMove = &*I;
1277     bool Done = false;
1278 
1279     if (I != Preheader->begin()) {
1280       // Skip debug info intrinsics.
1281       do {
1282         --I;
1283       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1284 
1285       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1286         Done = true;
1287     } else {
1288       Done = true;
1289     }
1290 
1291     MadeAnyChanges = true;
1292     ToMove->moveBefore(*ExitBlock, InsertPt);
1293     if (Done) break;
1294     InsertPt = ToMove->getIterator();
1295   }
1296 
1297   return MadeAnyChanges;
1298 }
1299 
1300 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1301                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1302   auto *OldCond = BI->getCondition();
1303   BI->setCondition(NewCond);
1304   if (OldCond->use_empty())
1305     DeadInsts.emplace_back(OldCond);
1306 }
1307 
1308 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1309                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1310   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1311   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1312   auto *OldCond = BI->getCondition();
1313   auto *NewCond =
1314       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1315   replaceExitCond(BI, NewCond, DeadInsts);
1316 }
1317 
1318 static void replaceLoopPHINodesWithPreheaderValues(
1319     Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1320   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1321   auto *LoopPreheader = L->getLoopPreheader();
1322   auto *LoopHeader = L->getHeader();
1323   for (auto &PN : LoopHeader->phis()) {
1324     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1325     PN.replaceAllUsesWith(PreheaderIncoming);
1326     DeadInsts.emplace_back(&PN);
1327   }
1328 }
1329 
1330 static void replaceWithInvariantCond(
1331     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1332     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1333     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1334   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1335   Rewriter.setInsertPoint(BI);
1336   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1337   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1338   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1339   if (ExitIfTrue)
1340     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1341   IRBuilder<> Builder(BI);
1342   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1343                                      BI->getCondition()->getName());
1344   replaceExitCond(BI, NewCond, DeadInsts);
1345 }
1346 
1347 static bool optimizeLoopExitWithUnknownExitCount(
1348     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1349     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1350     ScalarEvolution *SE, SCEVExpander &Rewriter,
1351     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1352   ICmpInst::Predicate Pred;
1353   Value *LHS, *RHS;
1354   BasicBlock *TrueSucc, *FalseSucc;
1355   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1356                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1357     return false;
1358 
1359   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1360          "Not a loop exit!");
1361 
1362   // 'LHS pred RHS' should now mean that we stay in loop.
1363   if (L->contains(FalseSucc))
1364     Pred = CmpInst::getInversePredicate(Pred);
1365 
1366   // If we are proving loop exit, invert the predicate.
1367   if (Inverted)
1368     Pred = CmpInst::getInversePredicate(Pred);
1369 
1370   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1371   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1372   // Can we prove it to be trivially true?
1373   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1374     foldExit(L, ExitingBB, Inverted, DeadInsts);
1375     return true;
1376   }
1377   // Further logic works for non-inverted condition only.
1378   if (Inverted)
1379     return false;
1380 
1381   auto *ARTy = LHSS->getType();
1382   auto *MaxIterTy = MaxIter->getType();
1383   // If possible, adjust types.
1384   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1385     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1386   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1387     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1388     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1389     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1390       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1391   }
1392 
1393   if (SkipLastIter) {
1394     const SCEV *One = SE->getOne(MaxIter->getType());
1395     MaxIter = SE->getMinusSCEV(MaxIter, One);
1396   }
1397 
1398   // Check if there is a loop-invariant predicate equivalent to our check.
1399   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1400                                                                L, BI, MaxIter);
1401   if (!LIP)
1402     return false;
1403 
1404   // Can we prove it to be trivially true?
1405   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1406     foldExit(L, ExitingBB, Inverted, DeadInsts);
1407   else
1408     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1409                              Rewriter, DeadInsts);
1410 
1411   return true;
1412 }
1413 
1414 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1415   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1416   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1417   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1418   // it already has flags.  The alternative to this would be to extending the
1419   // set of "interesting" IV users to include the icmp, but doing that
1420   // regresses results in practice by querying SCEVs before trip counts which
1421   // rely on them which results in SCEV caching sub-optimal answers.  The
1422   // concern about caching sub-optimal results is why we only query SCEVs of
1423   // the loop invariant RHS here.
1424   SmallVector<BasicBlock*, 16> ExitingBlocks;
1425   L->getExitingBlocks(ExitingBlocks);
1426   bool Changed = false;
1427   for (auto *ExitingBB : ExitingBlocks) {
1428     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1429     if (!BI)
1430       continue;
1431     assert(BI->isConditional() && "exit branch must be conditional");
1432 
1433     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1434     if (!ICmp || !ICmp->hasOneUse() || ICmp->isUnsigned())
1435       continue;
1436 
1437     auto *LHS = ICmp->getOperand(0);
1438     auto *RHS = ICmp->getOperand(1);
1439     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1440     // poisoning cache with sub-optimal results.  For the must-execute case,
1441     // this is a neccessary precondition for correctness.
1442     if (!L->isLoopInvariant(RHS))
1443       continue;
1444 
1445     // Match (icmp signed-cond zext, RHS)
1446     Value *LHSOp = nullptr;
1447     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1448       continue;
1449 
1450     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1451     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1452     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1453     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1454     FullCR = FullCR.zeroExtend(OuterBitWidth);
1455     if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) {
1456       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1457       // replace the signed condition with the unsigned version.
1458       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1459       Changed = true;
1460       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1461       // have not changed exit counts, or the values produced by the compare.
1462       continue;
1463     }
1464 
1465     // If we have a loop which would be undefined if infinite, and it has at
1466     // most one possible dynamic exit, then we can conclude that exit must
1467     // be taken.  If that exit must be taken, and we know the LHS can only
1468     // take values in the positive domain, then we can conclude RHS must
1469     // also be in that same range, and replace a signed compare with an
1470     // unsigned one.
1471     // If the exit might not be taken in a well defined program.
1472     if (ExitingBlocks.size() == 1 && SE->loopHasNoAbnormalExits(L) &&
1473         SE->loopIsFiniteByAssumption(L)) {
1474       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1475       // replace the signed condition with the unsigned version.
1476       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1477       Changed = true;
1478 
1479       // Given we've changed exit counts, notify SCEV.
1480       // Some nested loops may share same folded exit basic block,
1481       // thus we need to notify top most loop.
1482       SE->forgetTopmostLoop(L);
1483       continue;
1484     }
1485   }
1486 
1487   // Now that we've canonicalized the condition to match the extend,
1488   // see if we can rotate the extend out of the loop.
1489   for (auto *ExitingBB : ExitingBlocks) {
1490     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1491     if (!BI)
1492       continue;
1493     assert(BI->isConditional() && "exit branch must be conditional");
1494 
1495     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1496     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1497       continue;
1498 
1499     auto *LHS = ICmp->getOperand(0);
1500     auto *RHS = ICmp->getOperand(1);
1501     if (L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS))
1502       // Nothing to rotate
1503       continue;
1504 
1505     if (!LHS->hasOneUse())
1506       // Can't rotate without increasing instruction count
1507       continue;
1508 
1509     // Match (icmp unsigned-cond zext, RHS)
1510     // TODO: Extend to handle corresponding sext/signed-cmp case
1511     // TODO: Extend to other invertible functions
1512     Value *LHSOp = nullptr;
1513     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1514       continue;
1515 
1516     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1517     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1518     // when zext is loop varying and RHS is loop invariant.  This converts
1519     // loop varying work to loop-invariant work.
1520     auto doRotateTransform = [&]() {
1521       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1522       auto *NewRHS =
1523         CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1524                          L->getLoopPreheader()->getTerminator());
1525       ICmp->setOperand(0, LHSOp);
1526       ICmp->setOperand(1, NewRHS);
1527       if (LHS->use_empty())
1528         DeadInsts.push_back(LHS);
1529     };
1530 
1531 
1532     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1533     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1534     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1535     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1536     FullCR = FullCR.zeroExtend(OuterBitWidth);
1537     if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) {
1538       doRotateTransform();
1539       Changed = true;
1540       // Note, we are leaving SCEV in an unfortunately imprecise case here
1541       // as rotation tends to reveal information about trip counts not
1542       // previously visible.
1543       continue;
1544     }
1545 
1546     // If we have a loop which would be undefined if infinite, and it has at
1547     // most one possible dynamic exit, then we can conclude that exit must
1548     // be taken.  If that exit must be taken, and we know the LHS can only
1549     // take values in the positive domain, then we can conclude RHS must
1550     // also be in that same range.
1551     if (ExitingBlocks.size() == 1 && SE->loopHasNoAbnormalExits(L) &&
1552         SE->loopIsFiniteByAssumption(L)) {
1553       doRotateTransform();
1554       Changed = true;
1555       // Given we've changed exit counts, notify SCEV.
1556       // Some nested loops may share same folded exit basic block,
1557       // thus we need to notify top most loop.
1558       SE->forgetTopmostLoop(L);
1559       continue;
1560     }
1561   }
1562 
1563   return Changed;
1564 }
1565 
1566 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1567   SmallVector<BasicBlock*, 16> ExitingBlocks;
1568   L->getExitingBlocks(ExitingBlocks);
1569 
1570   // Remove all exits which aren't both rewriteable and execute on every
1571   // iteration.
1572   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1573     // If our exitting block exits multiple loops, we can only rewrite the
1574     // innermost one.  Otherwise, we're changing how many times the innermost
1575     // loop runs before it exits.
1576     if (LI->getLoopFor(ExitingBB) != L)
1577       return true;
1578 
1579     // Can't rewrite non-branch yet.
1580     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1581     if (!BI)
1582       return true;
1583 
1584     // If already constant, nothing to do.
1585     if (isa<Constant>(BI->getCondition()))
1586       return true;
1587 
1588     // Likewise, the loop latch must be dominated by the exiting BB.
1589     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1590       return true;
1591 
1592     return false;
1593   });
1594 
1595   if (ExitingBlocks.empty())
1596     return false;
1597 
1598   // Get a symbolic upper bound on the loop backedge taken count.
1599   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1600   if (isa<SCEVCouldNotCompute>(MaxExitCount))
1601     return false;
1602 
1603   // Visit our exit blocks in order of dominance. We know from the fact that
1604   // all exits must dominate the latch, so there is a total dominance order
1605   // between them.
1606   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1607                // std::sort sorts in ascending order, so we want the inverse of
1608                // the normal dominance relation.
1609                if (A == B) return false;
1610                if (DT->properlyDominates(A, B))
1611                  return true;
1612                else {
1613                  assert(DT->properlyDominates(B, A) &&
1614                         "expected total dominance order!");
1615                  return false;
1616                }
1617   });
1618 #ifdef ASSERT
1619   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1620     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1621   }
1622 #endif
1623 
1624   bool Changed = false;
1625   bool SkipLastIter = false;
1626   SmallSet<const SCEV*, 8> DominatingExitCounts;
1627   for (BasicBlock *ExitingBB : ExitingBlocks) {
1628     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1629     if (isa<SCEVCouldNotCompute>(ExitCount)) {
1630       // Okay, we do not know the exit count here. Can we at least prove that it
1631       // will remain the same within iteration space?
1632       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1633       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1634         return optimizeLoopExitWithUnknownExitCount(
1635             L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1636             Rewriter, DeadInsts);
1637       };
1638 
1639       // TODO: We might have proved that we can skip the last iteration for
1640       // this check. In this case, we only want to check the condition on the
1641       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1642       // corner case:
1643       //
1644       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1645       //
1646       // If we could not prove that len != 0, then we also could not prove that
1647       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1648       // OptimizeCond will likely not prove anything for it, even if it could
1649       // prove the same fact for len.
1650       //
1651       // As a temporary solution, we query both last and pre-last iterations in
1652       // hope that we will be able to prove triviality for at least one of
1653       // them. We can stop querying MaxExitCount for this case once SCEV
1654       // understands that (MaxExitCount - 1) will not overflow here.
1655       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1656         Changed = true;
1657       else if (SkipLastIter)
1658         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1659           Changed = true;
1660       continue;
1661     }
1662 
1663     if (MaxExitCount == ExitCount)
1664       // If the loop has more than 1 iteration, all further checks will be
1665       // executed 1 iteration less.
1666       SkipLastIter = true;
1667 
1668     // If we know we'd exit on the first iteration, rewrite the exit to
1669     // reflect this.  This does not imply the loop must exit through this
1670     // exit; there may be an earlier one taken on the first iteration.
1671     // We know that the backedge can't be taken, so we replace all
1672     // the header PHIs with values coming from the preheader.
1673     if (ExitCount->isZero()) {
1674       foldExit(L, ExitingBB, true, DeadInsts);
1675       replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
1676       Changed = true;
1677       continue;
1678     }
1679 
1680     assert(ExitCount->getType()->isIntegerTy() &&
1681            MaxExitCount->getType()->isIntegerTy() &&
1682            "Exit counts must be integers");
1683 
1684     Type *WiderType =
1685       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1686     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1687     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1688     assert(MaxExitCount->getType() == ExitCount->getType());
1689 
1690     // Can we prove that some other exit must be taken strictly before this
1691     // one?
1692     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1693                                      MaxExitCount, ExitCount)) {
1694       foldExit(L, ExitingBB, false, DeadInsts);
1695       Changed = true;
1696       continue;
1697     }
1698 
1699     // As we run, keep track of which exit counts we've encountered.  If we
1700     // find a duplicate, we've found an exit which would have exited on the
1701     // exiting iteration, but (from the visit order) strictly follows another
1702     // which does the same and is thus dead.
1703     if (!DominatingExitCounts.insert(ExitCount).second) {
1704       foldExit(L, ExitingBB, false, DeadInsts);
1705       Changed = true;
1706       continue;
1707     }
1708 
1709     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1710     // here.  If we kept track of the min of dominanting exits so far, we could
1711     // discharge exits with EC >= MDEC. This is less powerful than the existing
1712     // transform (since later exits aren't considered), but potentially more
1713     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1714     // or a >u b.  Such a case is not currently known.
1715   }
1716   return Changed;
1717 }
1718 
1719 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1720   SmallVector<BasicBlock*, 16> ExitingBlocks;
1721   L->getExitingBlocks(ExitingBlocks);
1722 
1723   // Finally, see if we can rewrite our exit conditions into a loop invariant
1724   // form. If we have a read-only loop, and we can tell that we must exit down
1725   // a path which does not need any of the values computed within the loop, we
1726   // can rewrite the loop to exit on the first iteration.  Note that this
1727   // doesn't either a) tell us the loop exits on the first iteration (unless
1728   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1729   // This transformation looks a lot like a restricted form of dead loop
1730   // elimination, but restricted to read-only loops and without neccesssarily
1731   // needing to kill the loop entirely.
1732   if (!LoopPredication)
1733     return false;
1734 
1735   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1736   // through *explicit* control flow.  We have to eliminate the possibility of
1737   // implicit exits (see below) before we know it's truly exact.
1738   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1739   if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
1740     return false;
1741 
1742   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1743   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1744 
1745   auto BadExit = [&](BasicBlock *ExitingBB) {
1746     // If our exiting block exits multiple loops, we can only rewrite the
1747     // innermost one.  Otherwise, we're changing how many times the innermost
1748     // loop runs before it exits.
1749     if (LI->getLoopFor(ExitingBB) != L)
1750       return true;
1751 
1752     // Can't rewrite non-branch yet.
1753     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1754     if (!BI)
1755       return true;
1756 
1757     // If already constant, nothing to do.
1758     if (isa<Constant>(BI->getCondition()))
1759       return true;
1760 
1761     // If the exit block has phis, we need to be able to compute the values
1762     // within the loop which contains them.  This assumes trivially lcssa phis
1763     // have already been removed; TODO: generalize
1764     BasicBlock *ExitBlock =
1765     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1766     if (!ExitBlock->phis().empty())
1767       return true;
1768 
1769     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1770     if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
1771       return true;
1772 
1773     assert(SE->isLoopInvariant(ExitCount, L) &&
1774            "Exit count must be loop invariant");
1775     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1776     return false;
1777   };
1778 
1779   // If we have any exits which can't be predicated themselves, than we can't
1780   // predicate any exit which isn't guaranteed to execute before it.  Consider
1781   // two exits (a) and (b) which would both exit on the same iteration.  If we
1782   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1783   // we could convert a loop from exiting through (a) to one exiting through
1784   // (b).  Note that this problem exists only for exits with the same exit
1785   // count, and we could be more aggressive when exit counts are known inequal.
1786   llvm::sort(ExitingBlocks,
1787             [&](BasicBlock *A, BasicBlock *B) {
1788               // std::sort sorts in ascending order, so we want the inverse of
1789               // the normal dominance relation, plus a tie breaker for blocks
1790               // unordered by dominance.
1791               if (DT->properlyDominates(A, B)) return true;
1792               if (DT->properlyDominates(B, A)) return false;
1793               return A->getName() < B->getName();
1794             });
1795   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1796   // exits before the backedge).  If they aren't, reasoning about reachability
1797   // is complicated and we choose not to for now.
1798   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1799     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1800       return false;
1801 
1802   // Given our sorted total order, we know that exit[j] must be evaluated
1803   // after all exit[i] such j > i.
1804   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1805     if (BadExit(ExitingBlocks[i])) {
1806       ExitingBlocks.resize(i);
1807       break;
1808     }
1809 
1810   if (ExitingBlocks.empty())
1811     return false;
1812 
1813   // We rely on not being able to reach an exiting block on a later iteration
1814   // then it's statically compute exit count.  The implementaton of
1815   // getExitCount currently has this invariant, but assert it here so that
1816   // breakage is obvious if this ever changes..
1817   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1818         return DT->dominates(ExitingBB, L->getLoopLatch());
1819       }));
1820 
1821   // At this point, ExitingBlocks consists of only those blocks which are
1822   // predicatable.  Given that, we know we have at least one exit we can
1823   // predicate if the loop is doesn't have side effects and doesn't have any
1824   // implicit exits (because then our exact BTC isn't actually exact).
1825   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1826   // suggestions on how to improve this?  I can obviously bail out for outer
1827   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1828   // is that enough for *all* side effects?
1829   for (BasicBlock *BB : L->blocks())
1830     for (auto &I : *BB)
1831       // TODO:isGuaranteedToTransfer
1832       if (I.mayHaveSideEffects())
1833         return false;
1834 
1835   bool Changed = false;
1836   // Finally, do the actual predication for all predicatable blocks.  A couple
1837   // of notes here:
1838   // 1) We don't bother to constant fold dominated exits with identical exit
1839   //    counts; that's simply a form of CSE/equality propagation and we leave
1840   //    it for dedicated passes.
1841   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1842   //    legality constraints and we leave that to dedicated logic.  We want to
1843   //    predicate even if we can't insert a loop invariant expression as
1844   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1845   //    varying check.
1846   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1847   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1848   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1849   for (BasicBlock *ExitingBB : ExitingBlocks) {
1850     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1851 
1852     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1853     Value *NewCond;
1854     if (ExitCount == ExactBTC) {
1855       NewCond = L->contains(BI->getSuccessor(0)) ?
1856         B.getFalse() : B.getTrue();
1857     } else {
1858       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1859       if (!ExactBTCV)
1860         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1861       Value *RHS = ExactBTCV;
1862       if (ECV->getType() != RHS->getType()) {
1863         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1864         ECV = B.CreateZExt(ECV, WiderTy);
1865         RHS = B.CreateZExt(RHS, WiderTy);
1866       }
1867       auto Pred = L->contains(BI->getSuccessor(0)) ?
1868         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1869       NewCond = B.CreateICmp(Pred, ECV, RHS);
1870     }
1871     Value *OldCond = BI->getCondition();
1872     BI->setCondition(NewCond);
1873     if (OldCond->use_empty())
1874       DeadInsts.emplace_back(OldCond);
1875     Changed = true;
1876   }
1877 
1878   return Changed;
1879 }
1880 
1881 //===----------------------------------------------------------------------===//
1882 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1883 //===----------------------------------------------------------------------===//
1884 
1885 bool IndVarSimplify::run(Loop *L) {
1886   // We need (and expect!) the incoming loop to be in LCSSA.
1887   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1888          "LCSSA required to run indvars!");
1889 
1890   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1891   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1892   //    canonicalization can be a pessimization without LSR to "clean up"
1893   //    afterwards.
1894   //  - We depend on having a preheader; in particular,
1895   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1896   //    and we're in trouble if we can't find the induction variable even when
1897   //    we've manually inserted one.
1898   //  - LFTR relies on having a single backedge.
1899   if (!L->isLoopSimplifyForm())
1900     return false;
1901 
1902 #ifndef NDEBUG
1903   // Used below for a consistency check only
1904   // Note: Since the result returned by ScalarEvolution may depend on the order
1905   // in which previous results are added to its cache, the call to
1906   // getBackedgeTakenCount() may change following SCEV queries.
1907   const SCEV *BackedgeTakenCount;
1908   if (VerifyIndvars)
1909     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1910 #endif
1911 
1912   bool Changed = false;
1913   // If there are any floating-point recurrences, attempt to
1914   // transform them to use integer recurrences.
1915   Changed |= rewriteNonIntegerIVs(L);
1916 
1917   // Create a rewriter object which we'll use to transform the code with.
1918   SCEVExpander Rewriter(*SE, DL, "indvars");
1919 #ifndef NDEBUG
1920   Rewriter.setDebugType(DEBUG_TYPE);
1921 #endif
1922 
1923   // Eliminate redundant IV users.
1924   //
1925   // Simplification works best when run before other consumers of SCEV. We
1926   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1927   // other expressions involving loop IVs have been evaluated. This helps SCEV
1928   // set no-wrap flags before normalizing sign/zero extension.
1929   Rewriter.disableCanonicalMode();
1930   Changed |= simplifyAndExtend(L, Rewriter, LI);
1931 
1932   // Check to see if we can compute the final value of any expressions
1933   // that are recurrent in the loop, and substitute the exit values from the
1934   // loop into any instructions outside of the loop that use the final values
1935   // of the current expressions.
1936   if (ReplaceExitValue != NeverRepl) {
1937     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1938                                              ReplaceExitValue, DeadInsts)) {
1939       NumReplaced += Rewrites;
1940       Changed = true;
1941     }
1942   }
1943 
1944   // Eliminate redundant IV cycles.
1945   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1946 
1947   // Try to convert exit conditions to unsigned and rotate computation
1948   // out of the loop.  Note: Handles invalidation internally if needed.
1949   Changed |= canonicalizeExitCondition(L);
1950 
1951   // Try to eliminate loop exits based on analyzeable exit counts
1952   if (optimizeLoopExits(L, Rewriter))  {
1953     Changed = true;
1954     // Given we've changed exit counts, notify SCEV
1955     // Some nested loops may share same folded exit basic block,
1956     // thus we need to notify top most loop.
1957     SE->forgetTopmostLoop(L);
1958   }
1959 
1960   // Try to form loop invariant tests for loop exits by changing how many
1961   // iterations of the loop run when that is unobservable.
1962   if (predicateLoopExits(L, Rewriter)) {
1963     Changed = true;
1964     // Given we've changed exit counts, notify SCEV
1965     SE->forgetLoop(L);
1966   }
1967 
1968   // If we have a trip count expression, rewrite the loop's exit condition
1969   // using it.
1970   if (!DisableLFTR) {
1971     BasicBlock *PreHeader = L->getLoopPreheader();
1972     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1973 
1974     SmallVector<BasicBlock*, 16> ExitingBlocks;
1975     L->getExitingBlocks(ExitingBlocks);
1976     for (BasicBlock *ExitingBB : ExitingBlocks) {
1977       // Can't rewrite non-branch yet.
1978       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1979         continue;
1980 
1981       // If our exitting block exits multiple loops, we can only rewrite the
1982       // innermost one.  Otherwise, we're changing how many times the innermost
1983       // loop runs before it exits.
1984       if (LI->getLoopFor(ExitingBB) != L)
1985         continue;
1986 
1987       if (!needsLFTR(L, ExitingBB))
1988         continue;
1989 
1990       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1991       if (isa<SCEVCouldNotCompute>(ExitCount))
1992         continue;
1993 
1994       // This was handled above, but as we form SCEVs, we can sometimes refine
1995       // existing ones; this allows exit counts to be folded to zero which
1996       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1997       // until stable to handle cases like this better.
1998       if (ExitCount->isZero())
1999         continue;
2000 
2001       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2002       if (!IndVar)
2003         continue;
2004 
2005       // Avoid high cost expansions.  Note: This heuristic is questionable in
2006       // that our definition of "high cost" is not exactly principled.
2007       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2008                                        TTI, PreHeaderBR))
2009         continue;
2010 
2011       // Check preconditions for proper SCEVExpander operation. SCEV does not
2012       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2013       // any pass that uses the SCEVExpander must do it. This does not work
2014       // well for loop passes because SCEVExpander makes assumptions about
2015       // all loops, while LoopPassManager only forces the current loop to be
2016       // simplified.
2017       //
2018       // FIXME: SCEV expansion has no way to bail out, so the caller must
2019       // explicitly check any assumptions made by SCEV. Brittle.
2020       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2021       if (!AR || AR->getLoop()->getLoopPreheader())
2022         Changed |= linearFunctionTestReplace(L, ExitingBB,
2023                                              ExitCount, IndVar,
2024                                              Rewriter);
2025     }
2026   }
2027   // Clear the rewriter cache, because values that are in the rewriter's cache
2028   // can be deleted in the loop below, causing the AssertingVH in the cache to
2029   // trigger.
2030   Rewriter.clear();
2031 
2032   // Now that we're done iterating through lists, clean up any instructions
2033   // which are now dead.
2034   while (!DeadInsts.empty()) {
2035     Value *V = DeadInsts.pop_back_val();
2036 
2037     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2038       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2039     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2040       Changed |=
2041           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2042   }
2043 
2044   // The Rewriter may not be used from this point on.
2045 
2046   // Loop-invariant instructions in the preheader that aren't used in the
2047   // loop may be sunk below the loop to reduce register pressure.
2048   Changed |= sinkUnusedInvariants(L);
2049 
2050   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2051   // trip count and therefore can further simplify exit values in addition to
2052   // rewriteLoopExitValues.
2053   Changed |= rewriteFirstIterationLoopExitValues(L);
2054 
2055   // Clean up dead instructions.
2056   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2057 
2058   // Check a post-condition.
2059   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2060          "Indvars did not preserve LCSSA!");
2061 
2062   // Verify that LFTR, and any other change have not interfered with SCEV's
2063   // ability to compute trip count.  We may have *changed* the exit count, but
2064   // only by reducing it.
2065 #ifndef NDEBUG
2066   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2067     SE->forgetLoop(L);
2068     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2069     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2070         SE->getTypeSizeInBits(NewBECount->getType()))
2071       NewBECount = SE->getTruncateOrNoop(NewBECount,
2072                                          BackedgeTakenCount->getType());
2073     else
2074       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2075                                                  NewBECount->getType());
2076     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2077                                  NewBECount) && "indvars must preserve SCEV");
2078   }
2079   if (VerifyMemorySSA && MSSAU)
2080     MSSAU->getMemorySSA()->verifyMemorySSA();
2081 #endif
2082 
2083   return Changed;
2084 }
2085 
2086 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2087                                           LoopStandardAnalysisResults &AR,
2088                                           LPMUpdater &) {
2089   Function *F = L.getHeader()->getParent();
2090   const DataLayout &DL = F->getParent()->getDataLayout();
2091 
2092   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2093                      WidenIndVars && AllowIVWidening);
2094   if (!IVS.run(&L))
2095     return PreservedAnalyses::all();
2096 
2097   auto PA = getLoopPassPreservedAnalyses();
2098   PA.preserveSet<CFGAnalyses>();
2099   if (AR.MSSA)
2100     PA.preserve<MemorySSAAnalysis>();
2101   return PA;
2102 }
2103 
2104 namespace {
2105 
2106 struct IndVarSimplifyLegacyPass : public LoopPass {
2107   static char ID; // Pass identification, replacement for typeid
2108 
2109   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2110     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2111   }
2112 
2113   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2114     if (skipLoop(L))
2115       return false;
2116 
2117     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2118     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2119     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2120     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2121     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2122     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2123     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2124     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2125     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2126     MemorySSA *MSSA = nullptr;
2127     if (MSSAAnalysis)
2128       MSSA = &MSSAAnalysis->getMSSA();
2129 
2130     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2131     return IVS.run(L);
2132   }
2133 
2134   void getAnalysisUsage(AnalysisUsage &AU) const override {
2135     AU.setPreservesCFG();
2136     AU.addPreserved<MemorySSAWrapperPass>();
2137     getLoopAnalysisUsage(AU);
2138   }
2139 };
2140 
2141 } // end anonymous namespace
2142 
2143 char IndVarSimplifyLegacyPass::ID = 0;
2144 
2145 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2146                       "Induction Variable Simplification", false, false)
2147 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2148 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2149                     "Induction Variable Simplification", false, false)
2150 
2151 Pass *llvm::createIndVarSimplifyPass() {
2152   return new IndVarSimplifyLegacyPass();
2153 }
2154