xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 262f2fed65b83ba9b64227b983d5ed95883fd71e)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/InitializePasses.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/MathExtras.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/Transforms/Scalar.h"
76 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
77 #include "llvm/Transforms/Utils/Local.h"
78 #include "llvm/Transforms/Utils/LoopUtils.h"
79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
80 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
81 #include <cassert>
82 #include <cstdint>
83 #include <utility>
84 
85 using namespace llvm;
86 using namespace PatternMatch;
87 
88 #define DEBUG_TYPE "indvars"
89 
90 STATISTIC(NumWidened     , "Number of indvars widened");
91 STATISTIC(NumReplaced    , "Number of exit values replaced");
92 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
93 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
94 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
95 
96 // Trip count verification can be enabled by default under NDEBUG if we
97 // implement a strong expression equivalence checker in SCEV. Until then, we
98 // use the verify-indvars flag, which may assert in some cases.
99 static cl::opt<bool> VerifyIndvars(
100     "verify-indvars", cl::Hidden,
101     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
102              "effect in release builds. (Note: this adds additional SCEV "
103              "queries potentially changing the analysis result)"));
104 
105 static cl::opt<ReplaceExitVal> ReplaceExitValue(
106     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
107     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
108     cl::values(
109         clEnumValN(NeverRepl, "never", "never replace exit value"),
110         clEnumValN(OnlyCheapRepl, "cheap",
111                    "only replace exit value when the cost is cheap"),
112         clEnumValN(
113             UnusedIndVarInLoop, "unusedindvarinloop",
114             "only replace exit value when it is an unused "
115             "induction variable in the loop and has cheap replacement cost"),
116         clEnumValN(NoHardUse, "noharduse",
117                    "only replace exit values when loop def likely dead"),
118         clEnumValN(AlwaysRepl, "always",
119                    "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 class IndVarSimplify {
141   LoopInfo *LI;
142   ScalarEvolution *SE;
143   DominatorTree *DT;
144   const DataLayout &DL;
145   TargetLibraryInfo *TLI;
146   const TargetTransformInfo *TTI;
147   std::unique_ptr<MemorySSAUpdater> MSSAU;
148 
149   SmallVector<WeakTrackingVH, 16> DeadInsts;
150   bool WidenIndVars;
151 
152   bool handleFloatingPointIV(Loop *L, PHINode *PH);
153   bool rewriteNonIntegerIVs(Loop *L);
154 
155   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
156   /// Try to improve our exit conditions by converting condition from signed
157   /// to unsigned or rotating computation out of the loop.
158   /// (See inline comment about why this is duplicated from simplifyAndExtend)
159   bool canonicalizeExitCondition(Loop *L);
160   /// Try to eliminate loop exits based on analyzeable exit counts
161   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
162   /// Try to form loop invariant tests for loop exits by changing how many
163   /// iterations of the loop run when that is unobservable.
164   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
165 
166   bool rewriteFirstIterationLoopExitValues(Loop *L);
167 
168   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
169                                  const SCEV *ExitCount,
170                                  PHINode *IndVar, SCEVExpander &Rewriter);
171 
172   bool sinkUnusedInvariants(Loop *L);
173 
174 public:
175   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
176                  const DataLayout &DL, TargetLibraryInfo *TLI,
177                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
178       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
179         WidenIndVars(WidenIndVars) {
180     if (MSSA)
181       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
182   }
183 
184   bool run(Loop *L);
185 };
186 
187 } // end anonymous namespace
188 
189 //===----------------------------------------------------------------------===//
190 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
191 //===----------------------------------------------------------------------===//
192 
193 /// Convert APF to an integer, if possible.
194 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
195   bool isExact = false;
196   // See if we can convert this to an int64_t
197   uint64_t UIntVal;
198   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
199                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
200       !isExact)
201     return false;
202   IntVal = UIntVal;
203   return true;
204 }
205 
206 /// If the loop has floating induction variable then insert corresponding
207 /// integer induction variable if possible.
208 /// For example,
209 /// for(double i = 0; i < 10000; ++i)
210 ///   bar(i)
211 /// is converted into
212 /// for(int i = 0; i < 10000; ++i)
213 ///   bar((double)i);
214 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
215   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
216   unsigned BackEdge     = IncomingEdge^1;
217 
218   // Check incoming value.
219   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
220 
221   int64_t InitValue;
222   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
223     return false;
224 
225   // Check IV increment. Reject this PN if increment operation is not
226   // an add or increment value can not be represented by an integer.
227   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
228   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
229 
230   // If this is not an add of the PHI with a constantfp, or if the constant fp
231   // is not an integer, bail out.
232   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
233   int64_t IncValue;
234   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
235       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
236     return false;
237 
238   // Check Incr uses. One user is PN and the other user is an exit condition
239   // used by the conditional terminator.
240   Value::user_iterator IncrUse = Incr->user_begin();
241   Instruction *U1 = cast<Instruction>(*IncrUse++);
242   if (IncrUse == Incr->user_end()) return false;
243   Instruction *U2 = cast<Instruction>(*IncrUse++);
244   if (IncrUse != Incr->user_end()) return false;
245 
246   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
247   // only used by a branch, we can't transform it.
248   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
249   if (!Compare)
250     Compare = dyn_cast<FCmpInst>(U2);
251   if (!Compare || !Compare->hasOneUse() ||
252       !isa<BranchInst>(Compare->user_back()))
253     return false;
254 
255   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
256 
257   // We need to verify that the branch actually controls the iteration count
258   // of the loop.  If not, the new IV can overflow and no one will notice.
259   // The branch block must be in the loop and one of the successors must be out
260   // of the loop.
261   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
262   if (!L->contains(TheBr->getParent()) ||
263       (L->contains(TheBr->getSuccessor(0)) &&
264        L->contains(TheBr->getSuccessor(1))))
265     return false;
266 
267   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
268   // transform it.
269   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
270   int64_t ExitValue;
271   if (ExitValueVal == nullptr ||
272       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
273     return false;
274 
275   // Find new predicate for integer comparison.
276   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
277   switch (Compare->getPredicate()) {
278   default: return false;  // Unknown comparison.
279   case CmpInst::FCMP_OEQ:
280   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
281   case CmpInst::FCMP_ONE:
282   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
283   case CmpInst::FCMP_OGT:
284   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
285   case CmpInst::FCMP_OGE:
286   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
287   case CmpInst::FCMP_OLT:
288   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
289   case CmpInst::FCMP_OLE:
290   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
291   }
292 
293   // We convert the floating point induction variable to a signed i32 value if
294   // we can.  This is only safe if the comparison will not overflow in a way
295   // that won't be trapped by the integer equivalent operations.  Check for this
296   // now.
297   // TODO: We could use i64 if it is native and the range requires it.
298 
299   // The start/stride/exit values must all fit in signed i32.
300   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
301     return false;
302 
303   // If not actually striding (add x, 0.0), avoid touching the code.
304   if (IncValue == 0)
305     return false;
306 
307   // Positive and negative strides have different safety conditions.
308   if (IncValue > 0) {
309     // If we have a positive stride, we require the init to be less than the
310     // exit value.
311     if (InitValue >= ExitValue)
312       return false;
313 
314     uint32_t Range = uint32_t(ExitValue-InitValue);
315     // Check for infinite loop, either:
316     // while (i <= Exit) or until (i > Exit)
317     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
318       if (++Range == 0) return false;  // Range overflows.
319     }
320 
321     unsigned Leftover = Range % uint32_t(IncValue);
322 
323     // If this is an equality comparison, we require that the strided value
324     // exactly land on the exit value, otherwise the IV condition will wrap
325     // around and do things the fp IV wouldn't.
326     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
327         Leftover != 0)
328       return false;
329 
330     // If the stride would wrap around the i32 before exiting, we can't
331     // transform the IV.
332     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
333       return false;
334   } else {
335     // If we have a negative stride, we require the init to be greater than the
336     // exit value.
337     if (InitValue <= ExitValue)
338       return false;
339 
340     uint32_t Range = uint32_t(InitValue-ExitValue);
341     // Check for infinite loop, either:
342     // while (i >= Exit) or until (i < Exit)
343     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
344       if (++Range == 0) return false;  // Range overflows.
345     }
346 
347     unsigned Leftover = Range % uint32_t(-IncValue);
348 
349     // If this is an equality comparison, we require that the strided value
350     // exactly land on the exit value, otherwise the IV condition will wrap
351     // around and do things the fp IV wouldn't.
352     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
353         Leftover != 0)
354       return false;
355 
356     // If the stride would wrap around the i32 before exiting, we can't
357     // transform the IV.
358     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
359       return false;
360   }
361 
362   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
363 
364   // Insert new integer induction variable.
365   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
366   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
367                       PN->getIncomingBlock(IncomingEdge));
368 
369   Value *NewAdd =
370     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
371                               Incr->getName()+".int", Incr);
372   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
373 
374   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
375                                       ConstantInt::get(Int32Ty, ExitValue),
376                                       Compare->getName());
377 
378   // In the following deletions, PN may become dead and may be deleted.
379   // Use a WeakTrackingVH to observe whether this happens.
380   WeakTrackingVH WeakPH = PN;
381 
382   // Delete the old floating point exit comparison.  The branch starts using the
383   // new comparison.
384   NewCompare->takeName(Compare);
385   Compare->replaceAllUsesWith(NewCompare);
386   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
387 
388   // Delete the old floating point increment.
389   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
390   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
391 
392   // If the FP induction variable still has uses, this is because something else
393   // in the loop uses its value.  In order to canonicalize the induction
394   // variable, we chose to eliminate the IV and rewrite it in terms of an
395   // int->fp cast.
396   //
397   // We give preference to sitofp over uitofp because it is faster on most
398   // platforms.
399   if (WeakPH) {
400     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
401                                  &*PN->getParent()->getFirstInsertionPt());
402     PN->replaceAllUsesWith(Conv);
403     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
404   }
405   return true;
406 }
407 
408 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
409   // First step.  Check to see if there are any floating-point recurrences.
410   // If there are, change them into integer recurrences, permitting analysis by
411   // the SCEV routines.
412   BasicBlock *Header = L->getHeader();
413 
414   SmallVector<WeakTrackingVH, 8> PHIs;
415   for (PHINode &PN : Header->phis())
416     PHIs.push_back(&PN);
417 
418   bool Changed = false;
419   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
420     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
421       Changed |= handleFloatingPointIV(L, PN);
422 
423   // If the loop previously had floating-point IV, ScalarEvolution
424   // may not have been able to compute a trip count. Now that we've done some
425   // re-writing, the trip count may be computable.
426   if (Changed)
427     SE->forgetLoop(L);
428   return Changed;
429 }
430 
431 //===---------------------------------------------------------------------===//
432 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
433 // they will exit at the first iteration.
434 //===---------------------------------------------------------------------===//
435 
436 /// Check to see if this loop has loop invariant conditions which lead to loop
437 /// exits. If so, we know that if the exit path is taken, it is at the first
438 /// loop iteration. This lets us predict exit values of PHI nodes that live in
439 /// loop header.
440 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
441   // Verify the input to the pass is already in LCSSA form.
442   assert(L->isLCSSAForm(*DT));
443 
444   SmallVector<BasicBlock *, 8> ExitBlocks;
445   L->getUniqueExitBlocks(ExitBlocks);
446 
447   bool MadeAnyChanges = false;
448   for (auto *ExitBB : ExitBlocks) {
449     // If there are no more PHI nodes in this exit block, then no more
450     // values defined inside the loop are used on this path.
451     for (PHINode &PN : ExitBB->phis()) {
452       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
453            IncomingValIdx != E; ++IncomingValIdx) {
454         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
455 
456         // Can we prove that the exit must run on the first iteration if it
457         // runs at all?  (i.e. early exits are fine for our purposes, but
458         // traces which lead to this exit being taken on the 2nd iteration
459         // aren't.)  Note that this is about whether the exit branch is
460         // executed, not about whether it is taken.
461         if (!L->getLoopLatch() ||
462             !DT->dominates(IncomingBB, L->getLoopLatch()))
463           continue;
464 
465         // Get condition that leads to the exit path.
466         auto *TermInst = IncomingBB->getTerminator();
467 
468         Value *Cond = nullptr;
469         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
470           // Must be a conditional branch, otherwise the block
471           // should not be in the loop.
472           Cond = BI->getCondition();
473         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
474           Cond = SI->getCondition();
475         else
476           continue;
477 
478         if (!L->isLoopInvariant(Cond))
479           continue;
480 
481         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
482 
483         // Only deal with PHIs in the loop header.
484         if (!ExitVal || ExitVal->getParent() != L->getHeader())
485           continue;
486 
487         // If ExitVal is a PHI on the loop header, then we know its
488         // value along this exit because the exit can only be taken
489         // on the first iteration.
490         auto *LoopPreheader = L->getLoopPreheader();
491         assert(LoopPreheader && "Invalid loop");
492         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
493         if (PreheaderIdx != -1) {
494           assert(ExitVal->getParent() == L->getHeader() &&
495                  "ExitVal must be in loop header");
496           MadeAnyChanges = true;
497           PN.setIncomingValue(IncomingValIdx,
498                               ExitVal->getIncomingValue(PreheaderIdx));
499           SE->forgetValue(&PN);
500         }
501       }
502     }
503   }
504   return MadeAnyChanges;
505 }
506 
507 //===----------------------------------------------------------------------===//
508 //  IV Widening - Extend the width of an IV to cover its widest uses.
509 //===----------------------------------------------------------------------===//
510 
511 /// Update information about the induction variable that is extended by this
512 /// sign or zero extend operation. This is used to determine the final width of
513 /// the IV before actually widening it.
514 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
515                         ScalarEvolution *SE,
516                         const TargetTransformInfo *TTI) {
517   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
518   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
519     return;
520 
521   Type *Ty = Cast->getType();
522   uint64_t Width = SE->getTypeSizeInBits(Ty);
523   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
524     return;
525 
526   // Check that `Cast` actually extends the induction variable (we rely on this
527   // later).  This takes care of cases where `Cast` is extending a truncation of
528   // the narrow induction variable, and thus can end up being narrower than the
529   // "narrow" induction variable.
530   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
531   if (NarrowIVWidth >= Width)
532     return;
533 
534   // Cast is either an sext or zext up to this point.
535   // We should not widen an indvar if arithmetics on the wider indvar are more
536   // expensive than those on the narrower indvar. We check only the cost of ADD
537   // because at least an ADD is required to increment the induction variable. We
538   // could compute more comprehensively the cost of all instructions on the
539   // induction variable when necessary.
540   if (TTI &&
541       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
542           TTI->getArithmeticInstrCost(Instruction::Add,
543                                       Cast->getOperand(0)->getType())) {
544     return;
545   }
546 
547   if (!WI.WidestNativeType ||
548       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
549     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
550     WI.IsSigned = IsSigned;
551     return;
552   }
553 
554   // We extend the IV to satisfy the sign of its user(s), or 'signed'
555   // if there are multiple users with both sign- and zero extensions,
556   // in order not to introduce nondeterministic behaviour based on the
557   // unspecified order of a PHI nodes' users-iterator.
558   WI.IsSigned |= IsSigned;
559 }
560 
561 //===----------------------------------------------------------------------===//
562 //  Live IV Reduction - Minimize IVs live across the loop.
563 //===----------------------------------------------------------------------===//
564 
565 //===----------------------------------------------------------------------===//
566 //  Simplification of IV users based on SCEV evaluation.
567 //===----------------------------------------------------------------------===//
568 
569 namespace {
570 
571 class IndVarSimplifyVisitor : public IVVisitor {
572   ScalarEvolution *SE;
573   const TargetTransformInfo *TTI;
574   PHINode *IVPhi;
575 
576 public:
577   WideIVInfo WI;
578 
579   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
580                         const TargetTransformInfo *TTI,
581                         const DominatorTree *DTree)
582     : SE(SCEV), TTI(TTI), IVPhi(IV) {
583     DT = DTree;
584     WI.NarrowIV = IVPhi;
585   }
586 
587   // Implement the interface used by simplifyUsersOfIV.
588   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
589 };
590 
591 } // end anonymous namespace
592 
593 /// Iteratively perform simplification on a worklist of IV users. Each
594 /// successive simplification may push more users which may themselves be
595 /// candidates for simplification.
596 ///
597 /// Sign/Zero extend elimination is interleaved with IV simplification.
598 bool IndVarSimplify::simplifyAndExtend(Loop *L,
599                                        SCEVExpander &Rewriter,
600                                        LoopInfo *LI) {
601   SmallVector<WideIVInfo, 8> WideIVs;
602 
603   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
604           Intrinsic::getName(Intrinsic::experimental_guard));
605   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
606 
607   SmallVector<PHINode *, 8> LoopPhis;
608   for (PHINode &PN : L->getHeader()->phis())
609     LoopPhis.push_back(&PN);
610 
611   // Each round of simplification iterates through the SimplifyIVUsers worklist
612   // for all current phis, then determines whether any IVs can be
613   // widened. Widening adds new phis to LoopPhis, inducing another round of
614   // simplification on the wide IVs.
615   bool Changed = false;
616   while (!LoopPhis.empty()) {
617     // Evaluate as many IV expressions as possible before widening any IVs. This
618     // forces SCEV to set no-wrap flags before evaluating sign/zero
619     // extension. The first time SCEV attempts to normalize sign/zero extension,
620     // the result becomes final. So for the most predictable results, we delay
621     // evaluation of sign/zero extend evaluation until needed, and avoid running
622     // other SCEV based analysis prior to simplifyAndExtend.
623     do {
624       PHINode *CurrIV = LoopPhis.pop_back_val();
625 
626       // Information about sign/zero extensions of CurrIV.
627       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
628 
629       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
630                                    &Visitor);
631 
632       if (Visitor.WI.WidestNativeType) {
633         WideIVs.push_back(Visitor.WI);
634       }
635     } while(!LoopPhis.empty());
636 
637     // Continue if we disallowed widening.
638     if (!WidenIndVars)
639       continue;
640 
641     for (; !WideIVs.empty(); WideIVs.pop_back()) {
642       unsigned ElimExt;
643       unsigned Widened;
644       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
645                                           DT, DeadInsts, ElimExt, Widened,
646                                           HasGuards, UsePostIncrementRanges)) {
647         NumElimExt += ElimExt;
648         NumWidened += Widened;
649         Changed = true;
650         LoopPhis.push_back(WidePhi);
651       }
652     }
653   }
654   return Changed;
655 }
656 
657 //===----------------------------------------------------------------------===//
658 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
659 //===----------------------------------------------------------------------===//
660 
661 /// Given an Value which is hoped to be part of an add recurance in the given
662 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
663 /// that this is less general than SCEVs AddRec checking.
664 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
665   Instruction *IncI = dyn_cast<Instruction>(IncV);
666   if (!IncI)
667     return nullptr;
668 
669   switch (IncI->getOpcode()) {
670   case Instruction::Add:
671   case Instruction::Sub:
672     break;
673   case Instruction::GetElementPtr:
674     // An IV counter must preserve its type.
675     if (IncI->getNumOperands() == 2)
676       break;
677     [[fallthrough]];
678   default:
679     return nullptr;
680   }
681 
682   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
683   if (Phi && Phi->getParent() == L->getHeader()) {
684     if (L->isLoopInvariant(IncI->getOperand(1)))
685       return Phi;
686     return nullptr;
687   }
688   if (IncI->getOpcode() == Instruction::GetElementPtr)
689     return nullptr;
690 
691   // Allow add/sub to be commuted.
692   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
693   if (Phi && Phi->getParent() == L->getHeader()) {
694     if (L->isLoopInvariant(IncI->getOperand(0)))
695       return Phi;
696   }
697   return nullptr;
698 }
699 
700 /// Whether the current loop exit test is based on this value.  Currently this
701 /// is limited to a direct use in the loop condition.
702 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
703   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
705   // TODO: Allow non-icmp loop test.
706   if (!ICmp)
707     return false;
708 
709   // TODO: Allow indirect use.
710   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
711 }
712 
713 /// linearFunctionTestReplace policy. Return true unless we can show that the
714 /// current exit test is already sufficiently canonical.
715 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
716   assert(L->getLoopLatch() && "Must be in simplified form");
717 
718   // Avoid converting a constant or loop invariant test back to a runtime
719   // test.  This is critical for when SCEV's cached ExitCount is less precise
720   // than the current IR (such as after we've proven a particular exit is
721   // actually dead and thus the BE count never reaches our ExitCount.)
722   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
723   if (L->isLoopInvariant(BI->getCondition()))
724     return false;
725 
726   // Do LFTR to simplify the exit condition to an ICMP.
727   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
728   if (!Cond)
729     return true;
730 
731   // Do LFTR to simplify the exit ICMP to EQ/NE
732   ICmpInst::Predicate Pred = Cond->getPredicate();
733   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
734     return true;
735 
736   // Look for a loop invariant RHS
737   Value *LHS = Cond->getOperand(0);
738   Value *RHS = Cond->getOperand(1);
739   if (!L->isLoopInvariant(RHS)) {
740     if (!L->isLoopInvariant(LHS))
741       return true;
742     std::swap(LHS, RHS);
743   }
744   // Look for a simple IV counter LHS
745   PHINode *Phi = dyn_cast<PHINode>(LHS);
746   if (!Phi)
747     Phi = getLoopPhiForCounter(LHS, L);
748 
749   if (!Phi)
750     return true;
751 
752   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
753   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
754   if (Idx < 0)
755     return true;
756 
757   // Do LFTR if the exit condition's IV is *not* a simple counter.
758   Value *IncV = Phi->getIncomingValue(Idx);
759   return Phi != getLoopPhiForCounter(IncV, L);
760 }
761 
762 /// Return true if undefined behavior would provable be executed on the path to
763 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
764 /// anything about whether OnPathTo is actually executed or whether Root is
765 /// actually poison.  This can be used to assess whether a new use of Root can
766 /// be added at a location which is control equivalent with OnPathTo (such as
767 /// immediately before it) without introducing UB which didn't previously
768 /// exist.  Note that a false result conveys no information.
769 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
770                                           Instruction *OnPathTo,
771                                           DominatorTree *DT) {
772   // Basic approach is to assume Root is poison, propagate poison forward
773   // through all users we can easily track, and then check whether any of those
774   // users are provable UB and must execute before out exiting block might
775   // exit.
776 
777   // The set of all recursive users we've visited (which are assumed to all be
778   // poison because of said visit)
779   SmallSet<const Value *, 16> KnownPoison;
780   SmallVector<const Instruction*, 16> Worklist;
781   Worklist.push_back(Root);
782   while (!Worklist.empty()) {
783     const Instruction *I = Worklist.pop_back_val();
784 
785     // If we know this must trigger UB on a path leading our target.
786     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
787       return true;
788 
789     // If we can't analyze propagation through this instruction, just skip it
790     // and transitive users.  Safe as false is a conservative result.
791     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
792       continue;
793 
794     if (KnownPoison.insert(I).second)
795       for (const User *User : I->users())
796         Worklist.push_back(cast<Instruction>(User));
797   }
798 
799   // Might be non-UB, or might have a path we couldn't prove must execute on
800   // way to exiting bb.
801   return false;
802 }
803 
804 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
805 /// down to checking that all operands are constant and listing instructions
806 /// that may hide undef.
807 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
808                                unsigned Depth) {
809   if (isa<Constant>(V))
810     return !isa<UndefValue>(V);
811 
812   if (Depth >= 6)
813     return false;
814 
815   // Conservatively handle non-constant non-instructions. For example, Arguments
816   // may be undef.
817   Instruction *I = dyn_cast<Instruction>(V);
818   if (!I)
819     return false;
820 
821   // Load and return values may be undef.
822   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
823     return false;
824 
825   // Optimistically handle other instructions.
826   for (Value *Op : I->operands()) {
827     if (!Visited.insert(Op).second)
828       continue;
829     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
830       return false;
831   }
832   return true;
833 }
834 
835 /// Return true if the given value is concrete. We must prove that undef can
836 /// never reach it.
837 ///
838 /// TODO: If we decide that this is a good approach to checking for undef, we
839 /// may factor it into a common location.
840 static bool hasConcreteDef(Value *V) {
841   SmallPtrSet<Value*, 8> Visited;
842   Visited.insert(V);
843   return hasConcreteDefImpl(V, Visited, 0);
844 }
845 
846 /// Return true if this IV has any uses other than the (soon to be rewritten)
847 /// loop exit test.
848 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
849   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
850   Value *IncV = Phi->getIncomingValue(LatchIdx);
851 
852   for (User *U : Phi->users())
853     if (U != Cond && U != IncV) return false;
854 
855   for (User *U : IncV->users())
856     if (U != Cond && U != Phi) return false;
857   return true;
858 }
859 
860 /// Return true if the given phi is a "counter" in L.  A counter is an
861 /// add recurance (of integer or pointer type) with an arbitrary start, and a
862 /// step of 1.  Note that L must have exactly one latch.
863 static bool isLoopCounter(PHINode* Phi, Loop *L,
864                           ScalarEvolution *SE) {
865   assert(Phi->getParent() == L->getHeader());
866   assert(L->getLoopLatch());
867 
868   if (!SE->isSCEVable(Phi->getType()))
869     return false;
870 
871   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
872   if (!AR || AR->getLoop() != L || !AR->isAffine())
873     return false;
874 
875   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
876   if (!Step || !Step->isOne())
877     return false;
878 
879   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
880   Value *IncV = Phi->getIncomingValue(LatchIdx);
881   return (getLoopPhiForCounter(IncV, L) == Phi &&
882           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
883 }
884 
885 /// Search the loop header for a loop counter (anadd rec w/step of one)
886 /// suitable for use by LFTR.  If multiple counters are available, select the
887 /// "best" one based profitable heuristics.
888 ///
889 /// BECount may be an i8* pointer type. The pointer difference is already
890 /// valid count without scaling the address stride, so it remains a pointer
891 /// expression as far as SCEV is concerned.
892 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
893                                 const SCEV *BECount,
894                                 ScalarEvolution *SE, DominatorTree *DT) {
895   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
896 
897   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
898 
899   // Loop over all of the PHI nodes, looking for a simple counter.
900   PHINode *BestPhi = nullptr;
901   const SCEV *BestInit = nullptr;
902   BasicBlock *LatchBlock = L->getLoopLatch();
903   assert(LatchBlock && "Must be in simplified form");
904   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
905 
906   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
907     PHINode *Phi = cast<PHINode>(I);
908     if (!isLoopCounter(Phi, L, SE))
909       continue;
910 
911     // Avoid comparing an integer IV against a pointer Limit.
912     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
913       continue;
914 
915     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
916 
917     // AR may be a pointer type, while BECount is an integer type.
918     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
919     // AR may not be a narrower type, or we may never exit.
920     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
921     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
922       continue;
923 
924     // Avoid reusing a potentially undef value to compute other values that may
925     // have originally had a concrete definition.
926     if (!hasConcreteDef(Phi)) {
927       // We explicitly allow unknown phis as long as they are already used by
928       // the loop exit test.  This is legal since performing LFTR could not
929       // increase the number of undef users.
930       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
931       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
932           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
933         continue;
934     }
935 
936     // Avoid introducing undefined behavior due to poison which didn't exist in
937     // the original program.  (Annoyingly, the rules for poison and undef
938     // propagation are distinct, so this does NOT cover the undef case above.)
939     // We have to ensure that we don't introduce UB by introducing a use on an
940     // iteration where said IV produces poison.  Our strategy here differs for
941     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
942     // see code in linearFunctionTestReplace.  For pointers, we restrict
943     // transforms as there is no good way to reinfer inbounds once lost.
944     if (!Phi->getType()->isIntegerTy() &&
945         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
946       continue;
947 
948     const SCEV *Init = AR->getStart();
949 
950     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
951       // Don't force a live loop counter if another IV can be used.
952       if (AlmostDeadIV(Phi, LatchBlock, Cond))
953         continue;
954 
955       // Prefer to count-from-zero. This is a more "canonical" counter form. It
956       // also prefers integer to pointer IVs.
957       if (BestInit->isZero() != Init->isZero()) {
958         if (BestInit->isZero())
959           continue;
960       }
961       // If two IVs both count from zero or both count from nonzero then the
962       // narrower is likely a dead phi that has been widened. Use the wider phi
963       // to allow the other to be eliminated.
964       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
965         continue;
966     }
967     BestPhi = Phi;
968     BestInit = Init;
969   }
970   return BestPhi;
971 }
972 
973 /// Insert an IR expression which computes the value held by the IV IndVar
974 /// (which must be an loop counter w/unit stride) after the backedge of loop L
975 /// is taken ExitCount times.
976 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
977                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
978                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
979   assert(isLoopCounter(IndVar, L, SE));
980   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
981   const SCEV *IVInit = AR->getStart();
982   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
983 
984   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
985   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
986   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
987   // the existing GEPs whenever possible.
988   if (IndVar->getType()->isPointerTy() &&
989       !ExitCount->getType()->isPointerTy()) {
990     // IVOffset will be the new GEP offset that is interpreted by GEP as a
991     // signed value. ExitCount on the other hand represents the loop trip count,
992     // which is an unsigned value. FindLoopCounter only allows induction
993     // variables that have a positive unit stride of one. This means we don't
994     // have to handle the case of negative offsets (yet) and just need to zero
995     // extend ExitCount.
996     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
997     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
998     if (UsePostInc)
999       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1000 
1001     // Expand the code for the iteration count.
1002     assert(SE->isLoopInvariant(IVOffset, L) &&
1003            "Computed iteration count is not loop invariant!");
1004 
1005     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1006     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1007     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1008   } else {
1009     // In any other case, convert both IVInit and ExitCount to integers before
1010     // comparing. This may result in SCEV expansion of pointers, but in practice
1011     // SCEV will fold the pointer arithmetic away as such:
1012     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1013     //
1014     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1015     // for simple memset-style loops.
1016     //
1017     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1018     // were generated on top of case #2, which is not expected.
1019 
1020     // For unit stride, IVCount = Start + ExitCount with 2's complement
1021     // overflow.
1022 
1023     // For integer IVs, truncate the IV before computing IVInit + BECount,
1024     // unless we know apriori that the limit must be a constant when evaluated
1025     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1026     // of the IV in the loop over a (potentially) expensive expansion of the
1027     // widened exit count add(zext(add)) expression.
1028     if (SE->getTypeSizeInBits(IVInit->getType())
1029         > SE->getTypeSizeInBits(ExitCount->getType())) {
1030       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1031         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1032       else
1033         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1034     }
1035 
1036     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1037 
1038     if (UsePostInc)
1039       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1040 
1041     // Expand the code for the iteration count.
1042     assert(SE->isLoopInvariant(IVLimit, L) &&
1043            "Computed iteration count is not loop invariant!");
1044     // Ensure that we generate the same type as IndVar, or a smaller integer
1045     // type. In the presence of null pointer values, we have an integer type
1046     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1047     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1048       IndVar->getType() : ExitCount->getType();
1049     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1050     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1051   }
1052 }
1053 
1054 /// This method rewrites the exit condition of the loop to be a canonical !=
1055 /// comparison against the incremented loop induction variable.  This pass is
1056 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1057 /// determine a loop-invariant trip count of the loop, which is actually a much
1058 /// broader range than just linear tests.
1059 bool IndVarSimplify::
1060 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1061                           const SCEV *ExitCount,
1062                           PHINode *IndVar, SCEVExpander &Rewriter) {
1063   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1064   assert(isLoopCounter(IndVar, L, SE));
1065   Instruction * const IncVar =
1066     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1067 
1068   // Initialize CmpIndVar to the preincremented IV.
1069   Value *CmpIndVar = IndVar;
1070   bool UsePostInc = false;
1071 
1072   // If the exiting block is the same as the backedge block, we prefer to
1073   // compare against the post-incremented value, otherwise we must compare
1074   // against the preincremented value.
1075   if (ExitingBB == L->getLoopLatch()) {
1076     // For pointer IVs, we chose to not strip inbounds which requires us not
1077     // to add a potentially UB introducing use.  We need to either a) show
1078     // the loop test we're modifying is already in post-inc form, or b) show
1079     // that adding a use must not introduce UB.
1080     bool SafeToPostInc =
1081         IndVar->getType()->isIntegerTy() ||
1082         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1083         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1084     if (SafeToPostInc) {
1085       UsePostInc = true;
1086       CmpIndVar = IncVar;
1087     }
1088   }
1089 
1090   // It may be necessary to drop nowrap flags on the incrementing instruction
1091   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1092   // case the increment might have previously been poison on the last iteration
1093   // only) or if LFTR switches to a different IV that was previously dynamically
1094   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1095   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1096   // check), because the pre-inc addrec flags may be adopted from the original
1097   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1098   // TODO: This handling is inaccurate for one case: If we switch to a
1099   // dynamically dead IV that wraps on the first loop iteration only, which is
1100   // not covered by the post-inc addrec. (If the new IV was not dynamically
1101   // dead, it could not be poison on the first iteration in the first place.)
1102   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1103     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1104     if (BO->hasNoUnsignedWrap())
1105       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1106     if (BO->hasNoSignedWrap())
1107       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1108   }
1109 
1110   Value *ExitCnt = genLoopLimit(
1111       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1112   assert(ExitCnt->getType()->isPointerTy() ==
1113              IndVar->getType()->isPointerTy() &&
1114          "genLoopLimit missed a cast");
1115 
1116   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1117   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1118   ICmpInst::Predicate P;
1119   if (L->contains(BI->getSuccessor(0)))
1120     P = ICmpInst::ICMP_NE;
1121   else
1122     P = ICmpInst::ICMP_EQ;
1123 
1124   IRBuilder<> Builder(BI);
1125 
1126   // The new loop exit condition should reuse the debug location of the
1127   // original loop exit condition.
1128   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1129     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1130 
1131   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1132   // avoid the expensive expansion of the limit expression in the wider type,
1133   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1134   // since we know (from the exit count bitwidth), that we can't self-wrap in
1135   // the narrower type.
1136   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1137   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1138   if (CmpIndVarSize > ExitCntSize) {
1139     assert(!CmpIndVar->getType()->isPointerTy() &&
1140            !ExitCnt->getType()->isPointerTy());
1141 
1142     // Before resorting to actually inserting the truncate, use the same
1143     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1144     // the other side of the comparison instead.  We still evaluate the limit
1145     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1146     // a truncate within in.
1147     bool Extended = false;
1148     const SCEV *IV = SE->getSCEV(CmpIndVar);
1149     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1150                                                   ExitCnt->getType());
1151     const SCEV *ZExtTrunc =
1152       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1153 
1154     if (ZExtTrunc == IV) {
1155       Extended = true;
1156       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1157                                    "wide.trip.count");
1158     } else {
1159       const SCEV *SExtTrunc =
1160         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1161       if (SExtTrunc == IV) {
1162         Extended = true;
1163         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1164                                      "wide.trip.count");
1165       }
1166     }
1167 
1168     if (Extended) {
1169       bool Discard;
1170       L->makeLoopInvariant(ExitCnt, Discard);
1171     } else
1172       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1173                                       "lftr.wideiv");
1174   }
1175   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1176                     << "      LHS:" << *CmpIndVar << '\n'
1177                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1178                     << "\n"
1179                     << "      RHS:\t" << *ExitCnt << "\n"
1180                     << "ExitCount:\t" << *ExitCount << "\n"
1181                     << "  was: " << *BI->getCondition() << "\n");
1182 
1183   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1184   Value *OrigCond = BI->getCondition();
1185   // It's tempting to use replaceAllUsesWith here to fully replace the old
1186   // comparison, but that's not immediately safe, since users of the old
1187   // comparison may not be dominated by the new comparison. Instead, just
1188   // update the branch to use the new comparison; in the common case this
1189   // will make old comparison dead.
1190   BI->setCondition(Cond);
1191   DeadInsts.emplace_back(OrigCond);
1192 
1193   ++NumLFTR;
1194   return true;
1195 }
1196 
1197 //===----------------------------------------------------------------------===//
1198 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1199 //===----------------------------------------------------------------------===//
1200 
1201 /// If there's a single exit block, sink any loop-invariant values that
1202 /// were defined in the preheader but not used inside the loop into the
1203 /// exit block to reduce register pressure in the loop.
1204 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1205   BasicBlock *ExitBlock = L->getExitBlock();
1206   if (!ExitBlock) return false;
1207 
1208   BasicBlock *Preheader = L->getLoopPreheader();
1209   if (!Preheader) return false;
1210 
1211   bool MadeAnyChanges = false;
1212   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1213   BasicBlock::iterator I(Preheader->getTerminator());
1214   while (I != Preheader->begin()) {
1215     --I;
1216     // New instructions were inserted at the end of the preheader.
1217     if (isa<PHINode>(I))
1218       break;
1219 
1220     // Don't move instructions which might have side effects, since the side
1221     // effects need to complete before instructions inside the loop.  Also don't
1222     // move instructions which might read memory, since the loop may modify
1223     // memory. Note that it's okay if the instruction might have undefined
1224     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1225     // block.
1226     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1227       continue;
1228 
1229     // Skip debug info intrinsics.
1230     if (isa<DbgInfoIntrinsic>(I))
1231       continue;
1232 
1233     // Skip eh pad instructions.
1234     if (I->isEHPad())
1235       continue;
1236 
1237     // Don't sink alloca: we never want to sink static alloca's out of the
1238     // entry block, and correctly sinking dynamic alloca's requires
1239     // checks for stacksave/stackrestore intrinsics.
1240     // FIXME: Refactor this check somehow?
1241     if (isa<AllocaInst>(I))
1242       continue;
1243 
1244     // Determine if there is a use in or before the loop (direct or
1245     // otherwise).
1246     bool UsedInLoop = false;
1247     for (Use &U : I->uses()) {
1248       Instruction *User = cast<Instruction>(U.getUser());
1249       BasicBlock *UseBB = User->getParent();
1250       if (PHINode *P = dyn_cast<PHINode>(User)) {
1251         unsigned i =
1252           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1253         UseBB = P->getIncomingBlock(i);
1254       }
1255       if (UseBB == Preheader || L->contains(UseBB)) {
1256         UsedInLoop = true;
1257         break;
1258       }
1259     }
1260 
1261     // If there is, the def must remain in the preheader.
1262     if (UsedInLoop)
1263       continue;
1264 
1265     // Otherwise, sink it to the exit block.
1266     Instruction *ToMove = &*I;
1267     bool Done = false;
1268 
1269     if (I != Preheader->begin()) {
1270       // Skip debug info intrinsics.
1271       do {
1272         --I;
1273       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1274 
1275       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1276         Done = true;
1277     } else {
1278       Done = true;
1279     }
1280 
1281     MadeAnyChanges = true;
1282     ToMove->moveBefore(*ExitBlock, InsertPt);
1283     SE->forgetValue(ToMove);
1284     if (Done) break;
1285     InsertPt = ToMove->getIterator();
1286   }
1287 
1288   return MadeAnyChanges;
1289 }
1290 
1291 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1292                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1293   auto *OldCond = BI->getCondition();
1294   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1295                     << " with " << *NewCond << "\n");
1296   BI->setCondition(NewCond);
1297   if (OldCond->use_empty())
1298     DeadInsts.emplace_back(OldCond);
1299 }
1300 
1301 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1302                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1303   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1304   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1305   auto *OldCond = BI->getCondition();
1306   auto *NewCond =
1307       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1308   replaceExitCond(BI, NewCond, DeadInsts);
1309 }
1310 
1311 static void replaceLoopPHINodesWithPreheaderValues(
1312     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1313     ScalarEvolution &SE) {
1314   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1315   auto *LoopPreheader = L->getLoopPreheader();
1316   auto *LoopHeader = L->getHeader();
1317   SmallVector<Instruction *> Worklist;
1318   for (auto &PN : LoopHeader->phis()) {
1319     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1320     for (User *U : PN.users())
1321       Worklist.push_back(cast<Instruction>(U));
1322     SE.forgetValue(&PN);
1323     PN.replaceAllUsesWith(PreheaderIncoming);
1324     DeadInsts.emplace_back(&PN);
1325   }
1326 
1327   // Replacing with the preheader value will often allow IV users to simplify
1328   // (especially if the preheader value is a constant).
1329   SmallPtrSet<Instruction *, 16> Visited;
1330   while (!Worklist.empty()) {
1331     auto *I = cast<Instruction>(Worklist.pop_back_val());
1332     if (!Visited.insert(I).second)
1333       continue;
1334 
1335     // Don't simplify instructions outside the loop.
1336     if (!L->contains(I))
1337       continue;
1338 
1339     Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1340     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1341       for (User *U : I->users())
1342         Worklist.push_back(cast<Instruction>(U));
1343       I->replaceAllUsesWith(Res);
1344       DeadInsts.emplace_back(I);
1345     }
1346   }
1347 }
1348 
1349 static void replaceWithInvariantCond(
1350     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1351     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1352     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1353   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1354   Rewriter.setInsertPoint(BI);
1355   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1356   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1357   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1358   if (ExitIfTrue)
1359     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1360   IRBuilder<> Builder(BI);
1361   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1362                                      BI->getCondition()->getName());
1363   replaceExitCond(BI, NewCond, DeadInsts);
1364 }
1365 
1366 static bool optimizeLoopExitWithUnknownExitCount(
1367     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1368     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1369     ScalarEvolution *SE, SCEVExpander &Rewriter,
1370     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1371   ICmpInst::Predicate Pred;
1372   Value *LHS, *RHS;
1373   BasicBlock *TrueSucc, *FalseSucc;
1374   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1375                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1376     return false;
1377 
1378   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1379          "Not a loop exit!");
1380 
1381   // 'LHS pred RHS' should now mean that we stay in loop.
1382   if (L->contains(FalseSucc))
1383     Pred = CmpInst::getInversePredicate(Pred);
1384 
1385   // If we are proving loop exit, invert the predicate.
1386   if (Inverted)
1387     Pred = CmpInst::getInversePredicate(Pred);
1388 
1389   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1390   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1391   // Can we prove it to be trivially true?
1392   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1393     foldExit(L, ExitingBB, Inverted, DeadInsts);
1394     return true;
1395   }
1396   // Further logic works for non-inverted condition only.
1397   if (Inverted)
1398     return false;
1399 
1400   auto *ARTy = LHSS->getType();
1401   auto *MaxIterTy = MaxIter->getType();
1402   // If possible, adjust types.
1403   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1404     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1405   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1406     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1407     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1408     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1409       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1410   }
1411 
1412   if (SkipLastIter) {
1413     const SCEV *One = SE->getOne(MaxIter->getType());
1414     MaxIter = SE->getMinusSCEV(MaxIter, One);
1415   }
1416 
1417   // Check if there is a loop-invariant predicate equivalent to our check.
1418   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1419                                                                L, BI, MaxIter);
1420   if (!LIP)
1421     return false;
1422 
1423   // Can we prove it to be trivially true?
1424   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1425     foldExit(L, ExitingBB, Inverted, DeadInsts);
1426   else
1427     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1428                              Rewriter, DeadInsts);
1429 
1430   return true;
1431 }
1432 
1433 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1434   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1435   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1436   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1437   // it already has flags.  The alternative to this would be to extending the
1438   // set of "interesting" IV users to include the icmp, but doing that
1439   // regresses results in practice by querying SCEVs before trip counts which
1440   // rely on them which results in SCEV caching sub-optimal answers.  The
1441   // concern about caching sub-optimal results is why we only query SCEVs of
1442   // the loop invariant RHS here.
1443   SmallVector<BasicBlock*, 16> ExitingBlocks;
1444   L->getExitingBlocks(ExitingBlocks);
1445   bool Changed = false;
1446   for (auto *ExitingBB : ExitingBlocks) {
1447     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1448     if (!BI)
1449       continue;
1450     assert(BI->isConditional() && "exit branch must be conditional");
1451 
1452     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1453     if (!ICmp || !ICmp->hasOneUse())
1454       continue;
1455 
1456     auto *LHS = ICmp->getOperand(0);
1457     auto *RHS = ICmp->getOperand(1);
1458     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1459     // poisoning cache with sub-optimal results.  For the must-execute case,
1460     // this is a neccessary precondition for correctness.
1461     if (!L->isLoopInvariant(RHS)) {
1462       if (!L->isLoopInvariant(LHS))
1463         continue;
1464       // Same logic applies for the inverse case
1465       std::swap(LHS, RHS);
1466     }
1467 
1468     // Match (icmp signed-cond zext, RHS)
1469     Value *LHSOp = nullptr;
1470     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1471       continue;
1472 
1473     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1474     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1475     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1476     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1477     FullCR = FullCR.zeroExtend(OuterBitWidth);
1478     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1479     if (FullCR.contains(RHSCR)) {
1480       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1481       // replace the signed condition with the unsigned version.
1482       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1483       Changed = true;
1484       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1485       // have not changed exit counts, or the values produced by the compare.
1486       continue;
1487     }
1488   }
1489 
1490   // Now that we've canonicalized the condition to match the extend,
1491   // see if we can rotate the extend out of the loop.
1492   for (auto *ExitingBB : ExitingBlocks) {
1493     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1494     if (!BI)
1495       continue;
1496     assert(BI->isConditional() && "exit branch must be conditional");
1497 
1498     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1499     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1500       continue;
1501 
1502     bool Swapped = false;
1503     auto *LHS = ICmp->getOperand(0);
1504     auto *RHS = ICmp->getOperand(1);
1505     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1506       // Nothing to rotate
1507       continue;
1508     if (L->isLoopInvariant(LHS)) {
1509       // Same logic applies for the inverse case until we actually pick
1510       // which operand of the compare to update.
1511       Swapped = true;
1512       std::swap(LHS, RHS);
1513     }
1514     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1515 
1516     // Match (icmp unsigned-cond zext, RHS)
1517     // TODO: Extend to handle corresponding sext/signed-cmp case
1518     // TODO: Extend to other invertible functions
1519     Value *LHSOp = nullptr;
1520     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1521       continue;
1522 
1523     // In general, we only rotate if we can do so without increasing the number
1524     // of instructions.  The exception is when we have an zext(add-rec).  The
1525     // reason for allowing this exception is that we know we need to get rid
1526     // of the zext for SCEV to be able to compute a trip count for said loops;
1527     // we consider the new trip count valuable enough to increase instruction
1528     // count by one.
1529     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1530       continue;
1531 
1532     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1533     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1534     // when zext is loop varying and RHS is loop invariant.  This converts
1535     // loop varying work to loop-invariant work.
1536     auto doRotateTransform = [&]() {
1537       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1538       auto *NewRHS =
1539         CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1540                          L->getLoopPreheader()->getTerminator());
1541       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1542       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1543       if (LHS->use_empty())
1544         DeadInsts.push_back(LHS);
1545     };
1546 
1547 
1548     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1549     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1550     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1551     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1552     FullCR = FullCR.zeroExtend(OuterBitWidth);
1553     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1554     if (FullCR.contains(RHSCR)) {
1555       doRotateTransform();
1556       Changed = true;
1557       // Note, we are leaving SCEV in an unfortunately imprecise case here
1558       // as rotation tends to reveal information about trip counts not
1559       // previously visible.
1560       continue;
1561     }
1562   }
1563 
1564   return Changed;
1565 }
1566 
1567 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1568   SmallVector<BasicBlock*, 16> ExitingBlocks;
1569   L->getExitingBlocks(ExitingBlocks);
1570 
1571   // Remove all exits which aren't both rewriteable and execute on every
1572   // iteration.
1573   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1574     // If our exitting block exits multiple loops, we can only rewrite the
1575     // innermost one.  Otherwise, we're changing how many times the innermost
1576     // loop runs before it exits.
1577     if (LI->getLoopFor(ExitingBB) != L)
1578       return true;
1579 
1580     // Can't rewrite non-branch yet.
1581     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1582     if (!BI)
1583       return true;
1584 
1585     // Likewise, the loop latch must be dominated by the exiting BB.
1586     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1587       return true;
1588 
1589     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1590       // If already constant, nothing to do. However, if this is an
1591       // unconditional exit, we can still replace header phis with their
1592       // preheader value.
1593       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1594         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1595       return true;
1596     }
1597 
1598     return false;
1599   });
1600 
1601   if (ExitingBlocks.empty())
1602     return false;
1603 
1604   // Get a symbolic upper bound on the loop backedge taken count.
1605   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1606   if (isa<SCEVCouldNotCompute>(MaxBECount))
1607     return false;
1608 
1609   // Visit our exit blocks in order of dominance. We know from the fact that
1610   // all exits must dominate the latch, so there is a total dominance order
1611   // between them.
1612   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1613                // std::sort sorts in ascending order, so we want the inverse of
1614                // the normal dominance relation.
1615                if (A == B) return false;
1616                if (DT->properlyDominates(A, B))
1617                  return true;
1618                else {
1619                  assert(DT->properlyDominates(B, A) &&
1620                         "expected total dominance order!");
1621                  return false;
1622                }
1623   });
1624 #ifdef ASSERT
1625   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1626     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1627   }
1628 #endif
1629 
1630   bool Changed = false;
1631   bool SkipLastIter = false;
1632   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1633   for (BasicBlock *ExitingBB : ExitingBlocks) {
1634     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1635     const SCEV *MaxExitCount = SE->getExitCount(
1636         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1637     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1638       // Okay, we do not know the exit count here. Can we at least prove that it
1639       // will remain the same within iteration space?
1640       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1641       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1642         return optimizeLoopExitWithUnknownExitCount(
1643             L, BI, ExitingBB, MaxBECount, Inverted, SkipLastIter, SE, Rewriter,
1644             DeadInsts);
1645       };
1646 
1647       // TODO: We might have proved that we can skip the last iteration for
1648       // this check. In this case, we only want to check the condition on the
1649       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1650       // corner case:
1651       //
1652       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1653       //
1654       // If we could not prove that len != 0, then we also could not prove that
1655       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1656       // OptimizeCond will likely not prove anything for it, even if it could
1657       // prove the same fact for len.
1658       //
1659       // As a temporary solution, we query both last and pre-last iterations in
1660       // hope that we will be able to prove triviality for at least one of
1661       // them. We can stop querying MaxBECount for this case once SCEV
1662       // understands that (MaxBECount - 1) will not overflow here.
1663       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1664         Changed = true;
1665       else if (SkipLastIter)
1666         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1667           Changed = true;
1668       if (MaxBECount == MaxExitCount)
1669         // If the loop has more than 1 iteration, all further checks will be
1670         // executed 1 iteration less.
1671         SkipLastIter = true;
1672       continue;
1673     }
1674 
1675     if (MaxBECount == MaxExitCount)
1676       // If the loop has more than 1 iteration, all further checks will be
1677       // executed 1 iteration less.
1678       SkipLastIter = true;
1679 
1680     // If we know we'd exit on the first iteration, rewrite the exit to
1681     // reflect this.  This does not imply the loop must exit through this
1682     // exit; there may be an earlier one taken on the first iteration.
1683     // We know that the backedge can't be taken, so we replace all
1684     // the header PHIs with values coming from the preheader.
1685     if (ExactExitCount->isZero()) {
1686       foldExit(L, ExitingBB, true, DeadInsts);
1687       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1688       Changed = true;
1689       continue;
1690     }
1691 
1692     assert(ExactExitCount->getType()->isIntegerTy() &&
1693            MaxBECount->getType()->isIntegerTy() &&
1694            "Exit counts must be integers");
1695 
1696     Type *WiderType =
1697         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1698     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1699     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1700     assert(MaxBECount->getType() == ExactExitCount->getType());
1701 
1702     // Can we prove that some other exit must be taken strictly before this
1703     // one?
1704     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1705                                      ExactExitCount)) {
1706       foldExit(L, ExitingBB, false, DeadInsts);
1707       Changed = true;
1708       continue;
1709     }
1710 
1711     // As we run, keep track of which exit counts we've encountered.  If we
1712     // find a duplicate, we've found an exit which would have exited on the
1713     // exiting iteration, but (from the visit order) strictly follows another
1714     // which does the same and is thus dead.
1715     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1716       foldExit(L, ExitingBB, false, DeadInsts);
1717       Changed = true;
1718       continue;
1719     }
1720 
1721     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1722     // here.  If we kept track of the min of dominanting exits so far, we could
1723     // discharge exits with EC >= MDEC. This is less powerful than the existing
1724     // transform (since later exits aren't considered), but potentially more
1725     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1726     // or a >u b.  Such a case is not currently known.
1727   }
1728   return Changed;
1729 }
1730 
1731 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1732   SmallVector<BasicBlock*, 16> ExitingBlocks;
1733   L->getExitingBlocks(ExitingBlocks);
1734 
1735   // Finally, see if we can rewrite our exit conditions into a loop invariant
1736   // form. If we have a read-only loop, and we can tell that we must exit down
1737   // a path which does not need any of the values computed within the loop, we
1738   // can rewrite the loop to exit on the first iteration.  Note that this
1739   // doesn't either a) tell us the loop exits on the first iteration (unless
1740   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1741   // This transformation looks a lot like a restricted form of dead loop
1742   // elimination, but restricted to read-only loops and without neccesssarily
1743   // needing to kill the loop entirely.
1744   if (!LoopPredication)
1745     return false;
1746 
1747   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1748   // through *explicit* control flow.  We have to eliminate the possibility of
1749   // implicit exits (see below) before we know it's truly exact.
1750   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1751   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1752     return false;
1753 
1754   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1755   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1756 
1757   auto BadExit = [&](BasicBlock *ExitingBB) {
1758     // If our exiting block exits multiple loops, we can only rewrite the
1759     // innermost one.  Otherwise, we're changing how many times the innermost
1760     // loop runs before it exits.
1761     if (LI->getLoopFor(ExitingBB) != L)
1762       return true;
1763 
1764     // Can't rewrite non-branch yet.
1765     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1766     if (!BI)
1767       return true;
1768 
1769     // If already constant, nothing to do.
1770     if (isa<Constant>(BI->getCondition()))
1771       return true;
1772 
1773     // If the exit block has phis, we need to be able to compute the values
1774     // within the loop which contains them.  This assumes trivially lcssa phis
1775     // have already been removed; TODO: generalize
1776     BasicBlock *ExitBlock =
1777     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1778     if (!ExitBlock->phis().empty())
1779       return true;
1780 
1781     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1782     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1783         !Rewriter.isSafeToExpand(ExitCount))
1784       return true;
1785 
1786     assert(SE->isLoopInvariant(ExitCount, L) &&
1787            "Exit count must be loop invariant");
1788     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1789     return false;
1790   };
1791 
1792   // If we have any exits which can't be predicated themselves, than we can't
1793   // predicate any exit which isn't guaranteed to execute before it.  Consider
1794   // two exits (a) and (b) which would both exit on the same iteration.  If we
1795   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1796   // we could convert a loop from exiting through (a) to one exiting through
1797   // (b).  Note that this problem exists only for exits with the same exit
1798   // count, and we could be more aggressive when exit counts are known inequal.
1799   llvm::sort(ExitingBlocks,
1800             [&](BasicBlock *A, BasicBlock *B) {
1801               // std::sort sorts in ascending order, so we want the inverse of
1802               // the normal dominance relation, plus a tie breaker for blocks
1803               // unordered by dominance.
1804               if (DT->properlyDominates(A, B)) return true;
1805               if (DT->properlyDominates(B, A)) return false;
1806               return A->getName() < B->getName();
1807             });
1808   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1809   // exits before the backedge).  If they aren't, reasoning about reachability
1810   // is complicated and we choose not to for now.
1811   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1812     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1813       return false;
1814 
1815   // Given our sorted total order, we know that exit[j] must be evaluated
1816   // after all exit[i] such j > i.
1817   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1818     if (BadExit(ExitingBlocks[i])) {
1819       ExitingBlocks.resize(i);
1820       break;
1821     }
1822 
1823   if (ExitingBlocks.empty())
1824     return false;
1825 
1826   // We rely on not being able to reach an exiting block on a later iteration
1827   // then it's statically compute exit count.  The implementaton of
1828   // getExitCount currently has this invariant, but assert it here so that
1829   // breakage is obvious if this ever changes..
1830   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1831         return DT->dominates(ExitingBB, L->getLoopLatch());
1832       }));
1833 
1834   // At this point, ExitingBlocks consists of only those blocks which are
1835   // predicatable.  Given that, we know we have at least one exit we can
1836   // predicate if the loop is doesn't have side effects and doesn't have any
1837   // implicit exits (because then our exact BTC isn't actually exact).
1838   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1839   // suggestions on how to improve this?  I can obviously bail out for outer
1840   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1841   // is that enough for *all* side effects?
1842   for (BasicBlock *BB : L->blocks())
1843     for (auto &I : *BB)
1844       // TODO:isGuaranteedToTransfer
1845       if (I.mayHaveSideEffects())
1846         return false;
1847 
1848   bool Changed = false;
1849   // Finally, do the actual predication for all predicatable blocks.  A couple
1850   // of notes here:
1851   // 1) We don't bother to constant fold dominated exits with identical exit
1852   //    counts; that's simply a form of CSE/equality propagation and we leave
1853   //    it for dedicated passes.
1854   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1855   //    legality constraints and we leave that to dedicated logic.  We want to
1856   //    predicate even if we can't insert a loop invariant expression as
1857   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1858   //    varying check.
1859   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1860   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1861   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1862   for (BasicBlock *ExitingBB : ExitingBlocks) {
1863     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1864 
1865     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1866     Value *NewCond;
1867     if (ExitCount == ExactBTC) {
1868       NewCond = L->contains(BI->getSuccessor(0)) ?
1869         B.getFalse() : B.getTrue();
1870     } else {
1871       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1872       if (!ExactBTCV)
1873         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1874       Value *RHS = ExactBTCV;
1875       if (ECV->getType() != RHS->getType()) {
1876         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1877         ECV = B.CreateZExt(ECV, WiderTy);
1878         RHS = B.CreateZExt(RHS, WiderTy);
1879       }
1880       auto Pred = L->contains(BI->getSuccessor(0)) ?
1881         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1882       NewCond = B.CreateICmp(Pred, ECV, RHS);
1883     }
1884     Value *OldCond = BI->getCondition();
1885     BI->setCondition(NewCond);
1886     if (OldCond->use_empty())
1887       DeadInsts.emplace_back(OldCond);
1888     Changed = true;
1889   }
1890 
1891   return Changed;
1892 }
1893 
1894 //===----------------------------------------------------------------------===//
1895 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1896 //===----------------------------------------------------------------------===//
1897 
1898 bool IndVarSimplify::run(Loop *L) {
1899   // We need (and expect!) the incoming loop to be in LCSSA.
1900   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1901          "LCSSA required to run indvars!");
1902 
1903   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1904   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1905   //    canonicalization can be a pessimization without LSR to "clean up"
1906   //    afterwards.
1907   //  - We depend on having a preheader; in particular,
1908   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1909   //    and we're in trouble if we can't find the induction variable even when
1910   //    we've manually inserted one.
1911   //  - LFTR relies on having a single backedge.
1912   if (!L->isLoopSimplifyForm())
1913     return false;
1914 
1915 #ifndef NDEBUG
1916   // Used below for a consistency check only
1917   // Note: Since the result returned by ScalarEvolution may depend on the order
1918   // in which previous results are added to its cache, the call to
1919   // getBackedgeTakenCount() may change following SCEV queries.
1920   const SCEV *BackedgeTakenCount;
1921   if (VerifyIndvars)
1922     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1923 #endif
1924 
1925   bool Changed = false;
1926   // If there are any floating-point recurrences, attempt to
1927   // transform them to use integer recurrences.
1928   Changed |= rewriteNonIntegerIVs(L);
1929 
1930   // Create a rewriter object which we'll use to transform the code with.
1931   SCEVExpander Rewriter(*SE, DL, "indvars");
1932 #ifndef NDEBUG
1933   Rewriter.setDebugType(DEBUG_TYPE);
1934 #endif
1935 
1936   // Eliminate redundant IV users.
1937   //
1938   // Simplification works best when run before other consumers of SCEV. We
1939   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1940   // other expressions involving loop IVs have been evaluated. This helps SCEV
1941   // set no-wrap flags before normalizing sign/zero extension.
1942   Rewriter.disableCanonicalMode();
1943   Changed |= simplifyAndExtend(L, Rewriter, LI);
1944 
1945   // Check to see if we can compute the final value of any expressions
1946   // that are recurrent in the loop, and substitute the exit values from the
1947   // loop into any instructions outside of the loop that use the final values
1948   // of the current expressions.
1949   if (ReplaceExitValue != NeverRepl) {
1950     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1951                                              ReplaceExitValue, DeadInsts)) {
1952       NumReplaced += Rewrites;
1953       Changed = true;
1954     }
1955   }
1956 
1957   // Eliminate redundant IV cycles.
1958   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1959 
1960   // Try to convert exit conditions to unsigned and rotate computation
1961   // out of the loop.  Note: Handles invalidation internally if needed.
1962   Changed |= canonicalizeExitCondition(L);
1963 
1964   // Try to eliminate loop exits based on analyzeable exit counts
1965   if (optimizeLoopExits(L, Rewriter))  {
1966     Changed = true;
1967     // Given we've changed exit counts, notify SCEV
1968     // Some nested loops may share same folded exit basic block,
1969     // thus we need to notify top most loop.
1970     SE->forgetTopmostLoop(L);
1971   }
1972 
1973   // Try to form loop invariant tests for loop exits by changing how many
1974   // iterations of the loop run when that is unobservable.
1975   if (predicateLoopExits(L, Rewriter)) {
1976     Changed = true;
1977     // Given we've changed exit counts, notify SCEV
1978     SE->forgetLoop(L);
1979   }
1980 
1981   // If we have a trip count expression, rewrite the loop's exit condition
1982   // using it.
1983   if (!DisableLFTR) {
1984     BasicBlock *PreHeader = L->getLoopPreheader();
1985 
1986     SmallVector<BasicBlock*, 16> ExitingBlocks;
1987     L->getExitingBlocks(ExitingBlocks);
1988     for (BasicBlock *ExitingBB : ExitingBlocks) {
1989       // Can't rewrite non-branch yet.
1990       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1991         continue;
1992 
1993       // If our exitting block exits multiple loops, we can only rewrite the
1994       // innermost one.  Otherwise, we're changing how many times the innermost
1995       // loop runs before it exits.
1996       if (LI->getLoopFor(ExitingBB) != L)
1997         continue;
1998 
1999       if (!needsLFTR(L, ExitingBB))
2000         continue;
2001 
2002       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2003       if (isa<SCEVCouldNotCompute>(ExitCount))
2004         continue;
2005 
2006       // This was handled above, but as we form SCEVs, we can sometimes refine
2007       // existing ones; this allows exit counts to be folded to zero which
2008       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2009       // until stable to handle cases like this better.
2010       if (ExitCount->isZero())
2011         continue;
2012 
2013       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2014       if (!IndVar)
2015         continue;
2016 
2017       // Avoid high cost expansions.  Note: This heuristic is questionable in
2018       // that our definition of "high cost" is not exactly principled.
2019       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2020                                        TTI, PreHeader->getTerminator()))
2021         continue;
2022 
2023       // Check preconditions for proper SCEVExpander operation. SCEV does not
2024       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2025       // any pass that uses the SCEVExpander must do it. This does not work
2026       // well for loop passes because SCEVExpander makes assumptions about
2027       // all loops, while LoopPassManager only forces the current loop to be
2028       // simplified.
2029       //
2030       // FIXME: SCEV expansion has no way to bail out, so the caller must
2031       // explicitly check any assumptions made by SCEV. Brittle.
2032       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2033       if (!AR || AR->getLoop()->getLoopPreheader())
2034         Changed |= linearFunctionTestReplace(L, ExitingBB,
2035                                              ExitCount, IndVar,
2036                                              Rewriter);
2037     }
2038   }
2039   // Clear the rewriter cache, because values that are in the rewriter's cache
2040   // can be deleted in the loop below, causing the AssertingVH in the cache to
2041   // trigger.
2042   Rewriter.clear();
2043 
2044   // Now that we're done iterating through lists, clean up any instructions
2045   // which are now dead.
2046   while (!DeadInsts.empty()) {
2047     Value *V = DeadInsts.pop_back_val();
2048 
2049     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2050       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2051     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2052       Changed |=
2053           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2054   }
2055 
2056   // The Rewriter may not be used from this point on.
2057 
2058   // Loop-invariant instructions in the preheader that aren't used in the
2059   // loop may be sunk below the loop to reduce register pressure.
2060   Changed |= sinkUnusedInvariants(L);
2061 
2062   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2063   // trip count and therefore can further simplify exit values in addition to
2064   // rewriteLoopExitValues.
2065   Changed |= rewriteFirstIterationLoopExitValues(L);
2066 
2067   // Clean up dead instructions.
2068   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2069 
2070   // Check a post-condition.
2071   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2072          "Indvars did not preserve LCSSA!");
2073 
2074   // Verify that LFTR, and any other change have not interfered with SCEV's
2075   // ability to compute trip count.  We may have *changed* the exit count, but
2076   // only by reducing it.
2077 #ifndef NDEBUG
2078   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2079     SE->forgetLoop(L);
2080     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2081     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2082         SE->getTypeSizeInBits(NewBECount->getType()))
2083       NewBECount = SE->getTruncateOrNoop(NewBECount,
2084                                          BackedgeTakenCount->getType());
2085     else
2086       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2087                                                  NewBECount->getType());
2088     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2089                                  NewBECount) && "indvars must preserve SCEV");
2090   }
2091   if (VerifyMemorySSA && MSSAU)
2092     MSSAU->getMemorySSA()->verifyMemorySSA();
2093 #endif
2094 
2095   return Changed;
2096 }
2097 
2098 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2099                                           LoopStandardAnalysisResults &AR,
2100                                           LPMUpdater &) {
2101   Function *F = L.getHeader()->getParent();
2102   const DataLayout &DL = F->getParent()->getDataLayout();
2103 
2104   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2105                      WidenIndVars && AllowIVWidening);
2106   if (!IVS.run(&L))
2107     return PreservedAnalyses::all();
2108 
2109   auto PA = getLoopPassPreservedAnalyses();
2110   PA.preserveSet<CFGAnalyses>();
2111   if (AR.MSSA)
2112     PA.preserve<MemorySSAAnalysis>();
2113   return PA;
2114 }
2115 
2116 namespace {
2117 
2118 struct IndVarSimplifyLegacyPass : public LoopPass {
2119   static char ID; // Pass identification, replacement for typeid
2120 
2121   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2122     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2123   }
2124 
2125   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2126     if (skipLoop(L))
2127       return false;
2128 
2129     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2130     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2131     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2132     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2133     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2134     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2135     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2136     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2137     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2138     MemorySSA *MSSA = nullptr;
2139     if (MSSAAnalysis)
2140       MSSA = &MSSAAnalysis->getMSSA();
2141 
2142     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2143     return IVS.run(L);
2144   }
2145 
2146   void getAnalysisUsage(AnalysisUsage &AU) const override {
2147     AU.setPreservesCFG();
2148     AU.addPreserved<MemorySSAWrapperPass>();
2149     getLoopAnalysisUsage(AU);
2150   }
2151 };
2152 
2153 } // end anonymous namespace
2154 
2155 char IndVarSimplifyLegacyPass::ID = 0;
2156 
2157 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2158                       "Induction Variable Simplification", false, false)
2159 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2160 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2161                     "Induction Variable Simplification", false, false)
2162 
2163 Pass *llvm::createIndVarSimplifyPass() {
2164   return new IndVarSimplifyLegacyPass();
2165 }
2166