xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 2a3ac7fd0c9e25ecad272243dabc155e348f2910)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/iterator_range.h"
36 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/Analysis/LoopPass.h"
38 #include "llvm/Analysis/MemorySSA.h"
39 #include "llvm/Analysis/MemorySSAUpdater.h"
40 #include "llvm/Analysis/ScalarEvolution.h"
41 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
42 #include "llvm/Analysis/TargetLibraryInfo.h"
43 #include "llvm/Analysis/TargetTransformInfo.h"
44 #include "llvm/Analysis/ValueTracking.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/ConstantRange.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/IRBuilder.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/IntrinsicInst.h"
58 #include "llvm/IR/Intrinsics.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/Operator.h"
61 #include "llvm/IR/PassManager.h"
62 #include "llvm/IR/PatternMatch.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Use.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/IR/ValueHandle.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/CommandLine.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/Debug.h"
74 #include "llvm/Support/MathExtras.h"
75 #include "llvm/Support/raw_ostream.h"
76 #include "llvm/Transforms/Scalar.h"
77 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
78 #include "llvm/Transforms/Utils/Local.h"
79 #include "llvm/Transforms/Utils/LoopUtils.h"
80 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
81 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
82 #include <cassert>
83 #include <cstdint>
84 #include <utility>
85 
86 using namespace llvm;
87 using namespace PatternMatch;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened     , "Number of indvars widened");
92 STATISTIC(NumReplaced    , "Number of exit values replaced");
93 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
100 static cl::opt<bool> VerifyIndvars(
101     "verify-indvars", cl::Hidden,
102     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
103              "effect in release builds. (Note: this adds additional SCEV "
104              "queries potentially changing the analysis result)"));
105 
106 static cl::opt<ReplaceExitVal> ReplaceExitValue(
107     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109     cl::values(
110         clEnumValN(NeverRepl, "never", "never replace exit value"),
111         clEnumValN(OnlyCheapRepl, "cheap",
112                    "only replace exit value when the cost is cheap"),
113         clEnumValN(
114             UnusedIndVarInLoop, "unusedindvarinloop",
115             "only replace exit value when it is an unused "
116             "induction variable in the loop and has cheap replacement cost"),
117         clEnumValN(NoHardUse, "noharduse",
118                    "only replace exit values when loop def likely dead"),
119         clEnumValN(AlwaysRepl, "always",
120                    "always replace exit value whenever possible")));
121 
122 static cl::opt<bool> UsePostIncrementRanges(
123   "indvars-post-increment-ranges", cl::Hidden,
124   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
125   cl::init(true));
126 
127 static cl::opt<bool>
128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
129             cl::desc("Disable Linear Function Test Replace optimization"));
130 
131 static cl::opt<bool>
132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
133                 cl::desc("Predicate conditions in read only loops"));
134 
135 static cl::opt<bool>
136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
137                 cl::desc("Allow widening of indvars to eliminate s/zext"));
138 
139 namespace {
140 
141 class IndVarSimplify {
142   LoopInfo *LI;
143   ScalarEvolution *SE;
144   DominatorTree *DT;
145   const DataLayout &DL;
146   TargetLibraryInfo *TLI;
147   const TargetTransformInfo *TTI;
148   std::unique_ptr<MemorySSAUpdater> MSSAU;
149 
150   SmallVector<WeakTrackingVH, 16> DeadInsts;
151   bool WidenIndVars;
152 
153   bool handleFloatingPointIV(Loop *L, PHINode *PH);
154   bool rewriteNonIntegerIVs(Loop *L);
155 
156   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157   /// Try to improve our exit conditions by converting condition from signed
158   /// to unsigned or rotating computation out of the loop.
159   /// (See inline comment about why this is duplicated from simplifyAndExtend)
160   bool canonicalizeExitCondition(Loop *L);
161   /// Try to eliminate loop exits based on analyzeable exit counts
162   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
163   /// Try to form loop invariant tests for loop exits by changing how many
164   /// iterations of the loop run when that is unobservable.
165   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
166 
167   bool rewriteFirstIterationLoopExitValues(Loop *L);
168 
169   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
170                                  const SCEV *ExitCount,
171                                  PHINode *IndVar, SCEVExpander &Rewriter);
172 
173   bool sinkUnusedInvariants(Loop *L);
174 
175 public:
176   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
177                  const DataLayout &DL, TargetLibraryInfo *TLI,
178                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
179       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
180         WidenIndVars(WidenIndVars) {
181     if (MSSA)
182       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
183   }
184 
185   bool run(Loop *L);
186 };
187 
188 } // end anonymous namespace
189 
190 //===----------------------------------------------------------------------===//
191 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
192 //===----------------------------------------------------------------------===//
193 
194 /// Convert APF to an integer, if possible.
195 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
196   bool isExact = false;
197   // See if we can convert this to an int64_t
198   uint64_t UIntVal;
199   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
200                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
201       !isExact)
202     return false;
203   IntVal = UIntVal;
204   return true;
205 }
206 
207 /// If the loop has floating induction variable then insert corresponding
208 /// integer induction variable if possible.
209 /// For example,
210 /// for(double i = 0; i < 10000; ++i)
211 ///   bar(i)
212 /// is converted into
213 /// for(int i = 0; i < 10000; ++i)
214 ///   bar((double)i);
215 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
216   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
217   unsigned BackEdge     = IncomingEdge^1;
218 
219   // Check incoming value.
220   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
221 
222   int64_t InitValue;
223   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
224     return false;
225 
226   // Check IV increment. Reject this PN if increment operation is not
227   // an add or increment value can not be represented by an integer.
228   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
229   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
230 
231   // If this is not an add of the PHI with a constantfp, or if the constant fp
232   // is not an integer, bail out.
233   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
234   int64_t IncValue;
235   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
236       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
237     return false;
238 
239   // Check Incr uses. One user is PN and the other user is an exit condition
240   // used by the conditional terminator.
241   Value::user_iterator IncrUse = Incr->user_begin();
242   Instruction *U1 = cast<Instruction>(*IncrUse++);
243   if (IncrUse == Incr->user_end()) return false;
244   Instruction *U2 = cast<Instruction>(*IncrUse++);
245   if (IncrUse != Incr->user_end()) return false;
246 
247   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
248   // only used by a branch, we can't transform it.
249   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
250   if (!Compare)
251     Compare = dyn_cast<FCmpInst>(U2);
252   if (!Compare || !Compare->hasOneUse() ||
253       !isa<BranchInst>(Compare->user_back()))
254     return false;
255 
256   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
257 
258   // We need to verify that the branch actually controls the iteration count
259   // of the loop.  If not, the new IV can overflow and no one will notice.
260   // The branch block must be in the loop and one of the successors must be out
261   // of the loop.
262   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
263   if (!L->contains(TheBr->getParent()) ||
264       (L->contains(TheBr->getSuccessor(0)) &&
265        L->contains(TheBr->getSuccessor(1))))
266     return false;
267 
268   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
269   // transform it.
270   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
271   int64_t ExitValue;
272   if (ExitValueVal == nullptr ||
273       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
274     return false;
275 
276   // Find new predicate for integer comparison.
277   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
278   switch (Compare->getPredicate()) {
279   default: return false;  // Unknown comparison.
280   case CmpInst::FCMP_OEQ:
281   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
282   case CmpInst::FCMP_ONE:
283   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
284   case CmpInst::FCMP_OGT:
285   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
286   case CmpInst::FCMP_OGE:
287   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
288   case CmpInst::FCMP_OLT:
289   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
290   case CmpInst::FCMP_OLE:
291   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
292   }
293 
294   // We convert the floating point induction variable to a signed i32 value if
295   // we can.  This is only safe if the comparison will not overflow in a way
296   // that won't be trapped by the integer equivalent operations.  Check for this
297   // now.
298   // TODO: We could use i64 if it is native and the range requires it.
299 
300   // The start/stride/exit values must all fit in signed i32.
301   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
302     return false;
303 
304   // If not actually striding (add x, 0.0), avoid touching the code.
305   if (IncValue == 0)
306     return false;
307 
308   // Positive and negative strides have different safety conditions.
309   if (IncValue > 0) {
310     // If we have a positive stride, we require the init to be less than the
311     // exit value.
312     if (InitValue >= ExitValue)
313       return false;
314 
315     uint32_t Range = uint32_t(ExitValue-InitValue);
316     // Check for infinite loop, either:
317     // while (i <= Exit) or until (i > Exit)
318     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
319       if (++Range == 0) return false;  // Range overflows.
320     }
321 
322     unsigned Leftover = Range % uint32_t(IncValue);
323 
324     // If this is an equality comparison, we require that the strided value
325     // exactly land on the exit value, otherwise the IV condition will wrap
326     // around and do things the fp IV wouldn't.
327     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
328         Leftover != 0)
329       return false;
330 
331     // If the stride would wrap around the i32 before exiting, we can't
332     // transform the IV.
333     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
334       return false;
335   } else {
336     // If we have a negative stride, we require the init to be greater than the
337     // exit value.
338     if (InitValue <= ExitValue)
339       return false;
340 
341     uint32_t Range = uint32_t(InitValue-ExitValue);
342     // Check for infinite loop, either:
343     // while (i >= Exit) or until (i < Exit)
344     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
345       if (++Range == 0) return false;  // Range overflows.
346     }
347 
348     unsigned Leftover = Range % uint32_t(-IncValue);
349 
350     // If this is an equality comparison, we require that the strided value
351     // exactly land on the exit value, otherwise the IV condition will wrap
352     // around and do things the fp IV wouldn't.
353     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
354         Leftover != 0)
355       return false;
356 
357     // If the stride would wrap around the i32 before exiting, we can't
358     // transform the IV.
359     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
360       return false;
361   }
362 
363   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
364 
365   // Insert new integer induction variable.
366   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
367   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
368                       PN->getIncomingBlock(IncomingEdge));
369 
370   Value *NewAdd =
371     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
372                               Incr->getName()+".int", Incr);
373   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
374 
375   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
376                                       ConstantInt::get(Int32Ty, ExitValue),
377                                       Compare->getName());
378 
379   // In the following deletions, PN may become dead and may be deleted.
380   // Use a WeakTrackingVH to observe whether this happens.
381   WeakTrackingVH WeakPH = PN;
382 
383   // Delete the old floating point exit comparison.  The branch starts using the
384   // new comparison.
385   NewCompare->takeName(Compare);
386   Compare->replaceAllUsesWith(NewCompare);
387   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
388 
389   // Delete the old floating point increment.
390   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
391   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
392 
393   // If the FP induction variable still has uses, this is because something else
394   // in the loop uses its value.  In order to canonicalize the induction
395   // variable, we chose to eliminate the IV and rewrite it in terms of an
396   // int->fp cast.
397   //
398   // We give preference to sitofp over uitofp because it is faster on most
399   // platforms.
400   if (WeakPH) {
401     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
402                                  &*PN->getParent()->getFirstInsertionPt());
403     PN->replaceAllUsesWith(Conv);
404     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
405   }
406   return true;
407 }
408 
409 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
410   // First step.  Check to see if there are any floating-point recurrences.
411   // If there are, change them into integer recurrences, permitting analysis by
412   // the SCEV routines.
413   BasicBlock *Header = L->getHeader();
414 
415   SmallVector<WeakTrackingVH, 8> PHIs;
416   for (PHINode &PN : Header->phis())
417     PHIs.push_back(&PN);
418 
419   bool Changed = false;
420   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
421     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
422       Changed |= handleFloatingPointIV(L, PN);
423 
424   // If the loop previously had floating-point IV, ScalarEvolution
425   // may not have been able to compute a trip count. Now that we've done some
426   // re-writing, the trip count may be computable.
427   if (Changed)
428     SE->forgetLoop(L);
429   return Changed;
430 }
431 
432 //===---------------------------------------------------------------------===//
433 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
434 // they will exit at the first iteration.
435 //===---------------------------------------------------------------------===//
436 
437 /// Check to see if this loop has loop invariant conditions which lead to loop
438 /// exits. If so, we know that if the exit path is taken, it is at the first
439 /// loop iteration. This lets us predict exit values of PHI nodes that live in
440 /// loop header.
441 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
442   // Verify the input to the pass is already in LCSSA form.
443   assert(L->isLCSSAForm(*DT));
444 
445   SmallVector<BasicBlock *, 8> ExitBlocks;
446   L->getUniqueExitBlocks(ExitBlocks);
447 
448   bool MadeAnyChanges = false;
449   for (auto *ExitBB : ExitBlocks) {
450     // If there are no more PHI nodes in this exit block, then no more
451     // values defined inside the loop are used on this path.
452     for (PHINode &PN : ExitBB->phis()) {
453       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
454            IncomingValIdx != E; ++IncomingValIdx) {
455         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
456 
457         // Can we prove that the exit must run on the first iteration if it
458         // runs at all?  (i.e. early exits are fine for our purposes, but
459         // traces which lead to this exit being taken on the 2nd iteration
460         // aren't.)  Note that this is about whether the exit branch is
461         // executed, not about whether it is taken.
462         if (!L->getLoopLatch() ||
463             !DT->dominates(IncomingBB, L->getLoopLatch()))
464           continue;
465 
466         // Get condition that leads to the exit path.
467         auto *TermInst = IncomingBB->getTerminator();
468 
469         Value *Cond = nullptr;
470         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
471           // Must be a conditional branch, otherwise the block
472           // should not be in the loop.
473           Cond = BI->getCondition();
474         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
475           Cond = SI->getCondition();
476         else
477           continue;
478 
479         if (!L->isLoopInvariant(Cond))
480           continue;
481 
482         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
483 
484         // Only deal with PHIs in the loop header.
485         if (!ExitVal || ExitVal->getParent() != L->getHeader())
486           continue;
487 
488         // If ExitVal is a PHI on the loop header, then we know its
489         // value along this exit because the exit can only be taken
490         // on the first iteration.
491         auto *LoopPreheader = L->getLoopPreheader();
492         assert(LoopPreheader && "Invalid loop");
493         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
494         if (PreheaderIdx != -1) {
495           assert(ExitVal->getParent() == L->getHeader() &&
496                  "ExitVal must be in loop header");
497           MadeAnyChanges = true;
498           PN.setIncomingValue(IncomingValIdx,
499                               ExitVal->getIncomingValue(PreheaderIdx));
500           SE->forgetValue(&PN);
501         }
502       }
503     }
504   }
505   return MadeAnyChanges;
506 }
507 
508 //===----------------------------------------------------------------------===//
509 //  IV Widening - Extend the width of an IV to cover its widest uses.
510 //===----------------------------------------------------------------------===//
511 
512 /// Update information about the induction variable that is extended by this
513 /// sign or zero extend operation. This is used to determine the final width of
514 /// the IV before actually widening it.
515 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
516                         ScalarEvolution *SE,
517                         const TargetTransformInfo *TTI) {
518   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
519   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
520     return;
521 
522   Type *Ty = Cast->getType();
523   uint64_t Width = SE->getTypeSizeInBits(Ty);
524   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
525     return;
526 
527   // Check that `Cast` actually extends the induction variable (we rely on this
528   // later).  This takes care of cases where `Cast` is extending a truncation of
529   // the narrow induction variable, and thus can end up being narrower than the
530   // "narrow" induction variable.
531   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
532   if (NarrowIVWidth >= Width)
533     return;
534 
535   // Cast is either an sext or zext up to this point.
536   // We should not widen an indvar if arithmetics on the wider indvar are more
537   // expensive than those on the narrower indvar. We check only the cost of ADD
538   // because at least an ADD is required to increment the induction variable. We
539   // could compute more comprehensively the cost of all instructions on the
540   // induction variable when necessary.
541   if (TTI &&
542       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
543           TTI->getArithmeticInstrCost(Instruction::Add,
544                                       Cast->getOperand(0)->getType())) {
545     return;
546   }
547 
548   if (!WI.WidestNativeType ||
549       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
550     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
551     WI.IsSigned = IsSigned;
552     return;
553   }
554 
555   // We extend the IV to satisfy the sign of its user(s), or 'signed'
556   // if there are multiple users with both sign- and zero extensions,
557   // in order not to introduce nondeterministic behaviour based on the
558   // unspecified order of a PHI nodes' users-iterator.
559   WI.IsSigned |= IsSigned;
560 }
561 
562 //===----------------------------------------------------------------------===//
563 //  Live IV Reduction - Minimize IVs live across the loop.
564 //===----------------------------------------------------------------------===//
565 
566 //===----------------------------------------------------------------------===//
567 //  Simplification of IV users based on SCEV evaluation.
568 //===----------------------------------------------------------------------===//
569 
570 namespace {
571 
572 class IndVarSimplifyVisitor : public IVVisitor {
573   ScalarEvolution *SE;
574   const TargetTransformInfo *TTI;
575   PHINode *IVPhi;
576 
577 public:
578   WideIVInfo WI;
579 
580   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
581                         const TargetTransformInfo *TTI,
582                         const DominatorTree *DTree)
583     : SE(SCEV), TTI(TTI), IVPhi(IV) {
584     DT = DTree;
585     WI.NarrowIV = IVPhi;
586   }
587 
588   // Implement the interface used by simplifyUsersOfIV.
589   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
590 };
591 
592 } // end anonymous namespace
593 
594 /// Iteratively perform simplification on a worklist of IV users. Each
595 /// successive simplification may push more users which may themselves be
596 /// candidates for simplification.
597 ///
598 /// Sign/Zero extend elimination is interleaved with IV simplification.
599 bool IndVarSimplify::simplifyAndExtend(Loop *L,
600                                        SCEVExpander &Rewriter,
601                                        LoopInfo *LI) {
602   SmallVector<WideIVInfo, 8> WideIVs;
603 
604   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
605           Intrinsic::getName(Intrinsic::experimental_guard));
606   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
607 
608   SmallVector<PHINode *, 8> LoopPhis;
609   for (PHINode &PN : L->getHeader()->phis())
610     LoopPhis.push_back(&PN);
611 
612   // Each round of simplification iterates through the SimplifyIVUsers worklist
613   // for all current phis, then determines whether any IVs can be
614   // widened. Widening adds new phis to LoopPhis, inducing another round of
615   // simplification on the wide IVs.
616   bool Changed = false;
617   while (!LoopPhis.empty()) {
618     // Evaluate as many IV expressions as possible before widening any IVs. This
619     // forces SCEV to set no-wrap flags before evaluating sign/zero
620     // extension. The first time SCEV attempts to normalize sign/zero extension,
621     // the result becomes final. So for the most predictable results, we delay
622     // evaluation of sign/zero extend evaluation until needed, and avoid running
623     // other SCEV based analysis prior to simplifyAndExtend.
624     do {
625       PHINode *CurrIV = LoopPhis.pop_back_val();
626 
627       // Information about sign/zero extensions of CurrIV.
628       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
629 
630       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
631                                    &Visitor);
632 
633       if (Visitor.WI.WidestNativeType) {
634         WideIVs.push_back(Visitor.WI);
635       }
636     } while(!LoopPhis.empty());
637 
638     // Continue if we disallowed widening.
639     if (!WidenIndVars)
640       continue;
641 
642     for (; !WideIVs.empty(); WideIVs.pop_back()) {
643       unsigned ElimExt;
644       unsigned Widened;
645       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
646                                           DT, DeadInsts, ElimExt, Widened,
647                                           HasGuards, UsePostIncrementRanges)) {
648         NumElimExt += ElimExt;
649         NumWidened += Widened;
650         Changed = true;
651         LoopPhis.push_back(WidePhi);
652       }
653     }
654   }
655   return Changed;
656 }
657 
658 //===----------------------------------------------------------------------===//
659 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
660 //===----------------------------------------------------------------------===//
661 
662 /// Given an Value which is hoped to be part of an add recurance in the given
663 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
664 /// that this is less general than SCEVs AddRec checking.
665 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
666   Instruction *IncI = dyn_cast<Instruction>(IncV);
667   if (!IncI)
668     return nullptr;
669 
670   switch (IncI->getOpcode()) {
671   case Instruction::Add:
672   case Instruction::Sub:
673     break;
674   case Instruction::GetElementPtr:
675     // An IV counter must preserve its type.
676     if (IncI->getNumOperands() == 2)
677       break;
678     [[fallthrough]];
679   default:
680     return nullptr;
681   }
682 
683   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
684   if (Phi && Phi->getParent() == L->getHeader()) {
685     if (L->isLoopInvariant(IncI->getOperand(1)))
686       return Phi;
687     return nullptr;
688   }
689   if (IncI->getOpcode() == Instruction::GetElementPtr)
690     return nullptr;
691 
692   // Allow add/sub to be commuted.
693   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
694   if (Phi && Phi->getParent() == L->getHeader()) {
695     if (L->isLoopInvariant(IncI->getOperand(0)))
696       return Phi;
697   }
698   return nullptr;
699 }
700 
701 /// Whether the current loop exit test is based on this value.  Currently this
702 /// is limited to a direct use in the loop condition.
703 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
704   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
705   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
706   // TODO: Allow non-icmp loop test.
707   if (!ICmp)
708     return false;
709 
710   // TODO: Allow indirect use.
711   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
712 }
713 
714 /// linearFunctionTestReplace policy. Return true unless we can show that the
715 /// current exit test is already sufficiently canonical.
716 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
717   assert(L->getLoopLatch() && "Must be in simplified form");
718 
719   // Avoid converting a constant or loop invariant test back to a runtime
720   // test.  This is critical for when SCEV's cached ExitCount is less precise
721   // than the current IR (such as after we've proven a particular exit is
722   // actually dead and thus the BE count never reaches our ExitCount.)
723   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
724   if (L->isLoopInvariant(BI->getCondition()))
725     return false;
726 
727   // Do LFTR to simplify the exit condition to an ICMP.
728   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
729   if (!Cond)
730     return true;
731 
732   // Do LFTR to simplify the exit ICMP to EQ/NE
733   ICmpInst::Predicate Pred = Cond->getPredicate();
734   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
735     return true;
736 
737   // Look for a loop invariant RHS
738   Value *LHS = Cond->getOperand(0);
739   Value *RHS = Cond->getOperand(1);
740   if (!L->isLoopInvariant(RHS)) {
741     if (!L->isLoopInvariant(LHS))
742       return true;
743     std::swap(LHS, RHS);
744   }
745   // Look for a simple IV counter LHS
746   PHINode *Phi = dyn_cast<PHINode>(LHS);
747   if (!Phi)
748     Phi = getLoopPhiForCounter(LHS, L);
749 
750   if (!Phi)
751     return true;
752 
753   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
754   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
755   if (Idx < 0)
756     return true;
757 
758   // Do LFTR if the exit condition's IV is *not* a simple counter.
759   Value *IncV = Phi->getIncomingValue(Idx);
760   return Phi != getLoopPhiForCounter(IncV, L);
761 }
762 
763 /// Return true if undefined behavior would provable be executed on the path to
764 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
765 /// anything about whether OnPathTo is actually executed or whether Root is
766 /// actually poison.  This can be used to assess whether a new use of Root can
767 /// be added at a location which is control equivalent with OnPathTo (such as
768 /// immediately before it) without introducing UB which didn't previously
769 /// exist.  Note that a false result conveys no information.
770 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
771                                           Instruction *OnPathTo,
772                                           DominatorTree *DT) {
773   // Basic approach is to assume Root is poison, propagate poison forward
774   // through all users we can easily track, and then check whether any of those
775   // users are provable UB and must execute before out exiting block might
776   // exit.
777 
778   // The set of all recursive users we've visited (which are assumed to all be
779   // poison because of said visit)
780   SmallSet<const Value *, 16> KnownPoison;
781   SmallVector<const Instruction*, 16> Worklist;
782   Worklist.push_back(Root);
783   while (!Worklist.empty()) {
784     const Instruction *I = Worklist.pop_back_val();
785 
786     // If we know this must trigger UB on a path leading our target.
787     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
788       return true;
789 
790     // If we can't analyze propagation through this instruction, just skip it
791     // and transitive users.  Safe as false is a conservative result.
792     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
793       continue;
794 
795     if (KnownPoison.insert(I).second)
796       for (const User *User : I->users())
797         Worklist.push_back(cast<Instruction>(User));
798   }
799 
800   // Might be non-UB, or might have a path we couldn't prove must execute on
801   // way to exiting bb.
802   return false;
803 }
804 
805 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
806 /// down to checking that all operands are constant and listing instructions
807 /// that may hide undef.
808 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
809                                unsigned Depth) {
810   if (isa<Constant>(V))
811     return !isa<UndefValue>(V);
812 
813   if (Depth >= 6)
814     return false;
815 
816   // Conservatively handle non-constant non-instructions. For example, Arguments
817   // may be undef.
818   Instruction *I = dyn_cast<Instruction>(V);
819   if (!I)
820     return false;
821 
822   // Load and return values may be undef.
823   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
824     return false;
825 
826   // Optimistically handle other instructions.
827   for (Value *Op : I->operands()) {
828     if (!Visited.insert(Op).second)
829       continue;
830     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
831       return false;
832   }
833   return true;
834 }
835 
836 /// Return true if the given value is concrete. We must prove that undef can
837 /// never reach it.
838 ///
839 /// TODO: If we decide that this is a good approach to checking for undef, we
840 /// may factor it into a common location.
841 static bool hasConcreteDef(Value *V) {
842   SmallPtrSet<Value*, 8> Visited;
843   Visited.insert(V);
844   return hasConcreteDefImpl(V, Visited, 0);
845 }
846 
847 /// Return true if this IV has any uses other than the (soon to be rewritten)
848 /// loop exit test.
849 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
850   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
851   Value *IncV = Phi->getIncomingValue(LatchIdx);
852 
853   for (User *U : Phi->users())
854     if (U != Cond && U != IncV) return false;
855 
856   for (User *U : IncV->users())
857     if (U != Cond && U != Phi) return false;
858   return true;
859 }
860 
861 /// Return true if the given phi is a "counter" in L.  A counter is an
862 /// add recurance (of integer or pointer type) with an arbitrary start, and a
863 /// step of 1.  Note that L must have exactly one latch.
864 static bool isLoopCounter(PHINode* Phi, Loop *L,
865                           ScalarEvolution *SE) {
866   assert(Phi->getParent() == L->getHeader());
867   assert(L->getLoopLatch());
868 
869   if (!SE->isSCEVable(Phi->getType()))
870     return false;
871 
872   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
873   if (!AR || AR->getLoop() != L || !AR->isAffine())
874     return false;
875 
876   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
877   if (!Step || !Step->isOne())
878     return false;
879 
880   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
881   Value *IncV = Phi->getIncomingValue(LatchIdx);
882   return (getLoopPhiForCounter(IncV, L) == Phi &&
883           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
884 }
885 
886 /// Search the loop header for a loop counter (anadd rec w/step of one)
887 /// suitable for use by LFTR.  If multiple counters are available, select the
888 /// "best" one based profitable heuristics.
889 ///
890 /// BECount may be an i8* pointer type. The pointer difference is already
891 /// valid count without scaling the address stride, so it remains a pointer
892 /// expression as far as SCEV is concerned.
893 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
894                                 const SCEV *BECount,
895                                 ScalarEvolution *SE, DominatorTree *DT) {
896   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
897 
898   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
899 
900   // Loop over all of the PHI nodes, looking for a simple counter.
901   PHINode *BestPhi = nullptr;
902   const SCEV *BestInit = nullptr;
903   BasicBlock *LatchBlock = L->getLoopLatch();
904   assert(LatchBlock && "Must be in simplified form");
905   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
906 
907   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
908     PHINode *Phi = cast<PHINode>(I);
909     if (!isLoopCounter(Phi, L, SE))
910       continue;
911 
912     // Avoid comparing an integer IV against a pointer Limit.
913     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
914       continue;
915 
916     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
917 
918     // AR may be a pointer type, while BECount is an integer type.
919     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
920     // AR may not be a narrower type, or we may never exit.
921     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
922     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
923       continue;
924 
925     // Avoid reusing a potentially undef value to compute other values that may
926     // have originally had a concrete definition.
927     if (!hasConcreteDef(Phi)) {
928       // We explicitly allow unknown phis as long as they are already used by
929       // the loop exit test.  This is legal since performing LFTR could not
930       // increase the number of undef users.
931       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
932       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
933           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
934         continue;
935     }
936 
937     // Avoid introducing undefined behavior due to poison which didn't exist in
938     // the original program.  (Annoyingly, the rules for poison and undef
939     // propagation are distinct, so this does NOT cover the undef case above.)
940     // We have to ensure that we don't introduce UB by introducing a use on an
941     // iteration where said IV produces poison.  Our strategy here differs for
942     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
943     // see code in linearFunctionTestReplace.  For pointers, we restrict
944     // transforms as there is no good way to reinfer inbounds once lost.
945     if (!Phi->getType()->isIntegerTy() &&
946         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
947       continue;
948 
949     const SCEV *Init = AR->getStart();
950 
951     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
952       // Don't force a live loop counter if another IV can be used.
953       if (AlmostDeadIV(Phi, LatchBlock, Cond))
954         continue;
955 
956       // Prefer to count-from-zero. This is a more "canonical" counter form. It
957       // also prefers integer to pointer IVs.
958       if (BestInit->isZero() != Init->isZero()) {
959         if (BestInit->isZero())
960           continue;
961       }
962       // If two IVs both count from zero or both count from nonzero then the
963       // narrower is likely a dead phi that has been widened. Use the wider phi
964       // to allow the other to be eliminated.
965       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
966         continue;
967     }
968     BestPhi = Phi;
969     BestInit = Init;
970   }
971   return BestPhi;
972 }
973 
974 /// Insert an IR expression which computes the value held by the IV IndVar
975 /// (which must be an loop counter w/unit stride) after the backedge of loop L
976 /// is taken ExitCount times.
977 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
978                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
979                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
980   assert(isLoopCounter(IndVar, L, SE));
981   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
982   const SCEV *IVInit = AR->getStart();
983   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
984 
985   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
986   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
987   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
988   // the existing GEPs whenever possible.
989   if (IndVar->getType()->isPointerTy() &&
990       !ExitCount->getType()->isPointerTy()) {
991     // IVOffset will be the new GEP offset that is interpreted by GEP as a
992     // signed value. ExitCount on the other hand represents the loop trip count,
993     // which is an unsigned value. FindLoopCounter only allows induction
994     // variables that have a positive unit stride of one. This means we don't
995     // have to handle the case of negative offsets (yet) and just need to zero
996     // extend ExitCount.
997     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
998     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
999     if (UsePostInc)
1000       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1001 
1002     // Expand the code for the iteration count.
1003     assert(SE->isLoopInvariant(IVOffset, L) &&
1004            "Computed iteration count is not loop invariant!");
1005 
1006     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1007     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1008     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1009   } else {
1010     // In any other case, convert both IVInit and ExitCount to integers before
1011     // comparing. This may result in SCEV expansion of pointers, but in practice
1012     // SCEV will fold the pointer arithmetic away as such:
1013     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1014     //
1015     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1016     // for simple memset-style loops.
1017     //
1018     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1019     // were generated on top of case #2, which is not expected.
1020 
1021     // For unit stride, IVCount = Start + ExitCount with 2's complement
1022     // overflow.
1023 
1024     // For integer IVs, truncate the IV before computing IVInit + BECount,
1025     // unless we know apriori that the limit must be a constant when evaluated
1026     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1027     // of the IV in the loop over a (potentially) expensive expansion of the
1028     // widened exit count add(zext(add)) expression.
1029     if (SE->getTypeSizeInBits(IVInit->getType())
1030         > SE->getTypeSizeInBits(ExitCount->getType())) {
1031       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1032         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1033       else
1034         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1035     }
1036 
1037     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1038 
1039     if (UsePostInc)
1040       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1041 
1042     // Expand the code for the iteration count.
1043     assert(SE->isLoopInvariant(IVLimit, L) &&
1044            "Computed iteration count is not loop invariant!");
1045     // Ensure that we generate the same type as IndVar, or a smaller integer
1046     // type. In the presence of null pointer values, we have an integer type
1047     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1048     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1049       IndVar->getType() : ExitCount->getType();
1050     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1051     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1052   }
1053 }
1054 
1055 /// This method rewrites the exit condition of the loop to be a canonical !=
1056 /// comparison against the incremented loop induction variable.  This pass is
1057 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1058 /// determine a loop-invariant trip count of the loop, which is actually a much
1059 /// broader range than just linear tests.
1060 bool IndVarSimplify::
1061 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1062                           const SCEV *ExitCount,
1063                           PHINode *IndVar, SCEVExpander &Rewriter) {
1064   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1065   assert(isLoopCounter(IndVar, L, SE));
1066   Instruction * const IncVar =
1067     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1068 
1069   // Initialize CmpIndVar to the preincremented IV.
1070   Value *CmpIndVar = IndVar;
1071   bool UsePostInc = false;
1072 
1073   // If the exiting block is the same as the backedge block, we prefer to
1074   // compare against the post-incremented value, otherwise we must compare
1075   // against the preincremented value.
1076   if (ExitingBB == L->getLoopLatch()) {
1077     // For pointer IVs, we chose to not strip inbounds which requires us not
1078     // to add a potentially UB introducing use.  We need to either a) show
1079     // the loop test we're modifying is already in post-inc form, or b) show
1080     // that adding a use must not introduce UB.
1081     bool SafeToPostInc =
1082         IndVar->getType()->isIntegerTy() ||
1083         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1084         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1085     if (SafeToPostInc) {
1086       UsePostInc = true;
1087       CmpIndVar = IncVar;
1088     }
1089   }
1090 
1091   // It may be necessary to drop nowrap flags on the incrementing instruction
1092   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1093   // case the increment might have previously been poison on the last iteration
1094   // only) or if LFTR switches to a different IV that was previously dynamically
1095   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1096   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1097   // check), because the pre-inc addrec flags may be adopted from the original
1098   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1099   // TODO: This handling is inaccurate for one case: If we switch to a
1100   // dynamically dead IV that wraps on the first loop iteration only, which is
1101   // not covered by the post-inc addrec. (If the new IV was not dynamically
1102   // dead, it could not be poison on the first iteration in the first place.)
1103   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1104     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1105     if (BO->hasNoUnsignedWrap())
1106       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1107     if (BO->hasNoSignedWrap())
1108       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1109   }
1110 
1111   Value *ExitCnt = genLoopLimit(
1112       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1113   assert(ExitCnt->getType()->isPointerTy() ==
1114              IndVar->getType()->isPointerTy() &&
1115          "genLoopLimit missed a cast");
1116 
1117   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1118   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1119   ICmpInst::Predicate P;
1120   if (L->contains(BI->getSuccessor(0)))
1121     P = ICmpInst::ICMP_NE;
1122   else
1123     P = ICmpInst::ICMP_EQ;
1124 
1125   IRBuilder<> Builder(BI);
1126 
1127   // The new loop exit condition should reuse the debug location of the
1128   // original loop exit condition.
1129   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1130     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1131 
1132   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1133   // avoid the expensive expansion of the limit expression in the wider type,
1134   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1135   // since we know (from the exit count bitwidth), that we can't self-wrap in
1136   // the narrower type.
1137   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1138   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1139   if (CmpIndVarSize > ExitCntSize) {
1140     assert(!CmpIndVar->getType()->isPointerTy() &&
1141            !ExitCnt->getType()->isPointerTy());
1142 
1143     // Before resorting to actually inserting the truncate, use the same
1144     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1145     // the other side of the comparison instead.  We still evaluate the limit
1146     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1147     // a truncate within in.
1148     bool Extended = false;
1149     const SCEV *IV = SE->getSCEV(CmpIndVar);
1150     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1151                                                   ExitCnt->getType());
1152     const SCEV *ZExtTrunc =
1153       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1154 
1155     if (ZExtTrunc == IV) {
1156       Extended = true;
1157       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1158                                    "wide.trip.count");
1159     } else {
1160       const SCEV *SExtTrunc =
1161         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1162       if (SExtTrunc == IV) {
1163         Extended = true;
1164         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1165                                      "wide.trip.count");
1166       }
1167     }
1168 
1169     if (Extended) {
1170       bool Discard;
1171       L->makeLoopInvariant(ExitCnt, Discard);
1172     } else
1173       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1174                                       "lftr.wideiv");
1175   }
1176   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1177                     << "      LHS:" << *CmpIndVar << '\n'
1178                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1179                     << "\n"
1180                     << "      RHS:\t" << *ExitCnt << "\n"
1181                     << "ExitCount:\t" << *ExitCount << "\n"
1182                     << "  was: " << *BI->getCondition() << "\n");
1183 
1184   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1185   Value *OrigCond = BI->getCondition();
1186   // It's tempting to use replaceAllUsesWith here to fully replace the old
1187   // comparison, but that's not immediately safe, since users of the old
1188   // comparison may not be dominated by the new comparison. Instead, just
1189   // update the branch to use the new comparison; in the common case this
1190   // will make old comparison dead.
1191   BI->setCondition(Cond);
1192   DeadInsts.emplace_back(OrigCond);
1193 
1194   ++NumLFTR;
1195   return true;
1196 }
1197 
1198 //===----------------------------------------------------------------------===//
1199 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1200 //===----------------------------------------------------------------------===//
1201 
1202 /// If there's a single exit block, sink any loop-invariant values that
1203 /// were defined in the preheader but not used inside the loop into the
1204 /// exit block to reduce register pressure in the loop.
1205 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1206   BasicBlock *ExitBlock = L->getExitBlock();
1207   if (!ExitBlock) return false;
1208 
1209   BasicBlock *Preheader = L->getLoopPreheader();
1210   if (!Preheader) return false;
1211 
1212   bool MadeAnyChanges = false;
1213   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1214   BasicBlock::iterator I(Preheader->getTerminator());
1215   while (I != Preheader->begin()) {
1216     --I;
1217     // New instructions were inserted at the end of the preheader.
1218     if (isa<PHINode>(I))
1219       break;
1220 
1221     // Don't move instructions which might have side effects, since the side
1222     // effects need to complete before instructions inside the loop.  Also don't
1223     // move instructions which might read memory, since the loop may modify
1224     // memory. Note that it's okay if the instruction might have undefined
1225     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1226     // block.
1227     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1228       continue;
1229 
1230     // Skip debug info intrinsics.
1231     if (isa<DbgInfoIntrinsic>(I))
1232       continue;
1233 
1234     // Skip eh pad instructions.
1235     if (I->isEHPad())
1236       continue;
1237 
1238     // Don't sink alloca: we never want to sink static alloca's out of the
1239     // entry block, and correctly sinking dynamic alloca's requires
1240     // checks for stacksave/stackrestore intrinsics.
1241     // FIXME: Refactor this check somehow?
1242     if (isa<AllocaInst>(I))
1243       continue;
1244 
1245     // Determine if there is a use in or before the loop (direct or
1246     // otherwise).
1247     bool UsedInLoop = false;
1248     for (Use &U : I->uses()) {
1249       Instruction *User = cast<Instruction>(U.getUser());
1250       BasicBlock *UseBB = User->getParent();
1251       if (PHINode *P = dyn_cast<PHINode>(User)) {
1252         unsigned i =
1253           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1254         UseBB = P->getIncomingBlock(i);
1255       }
1256       if (UseBB == Preheader || L->contains(UseBB)) {
1257         UsedInLoop = true;
1258         break;
1259       }
1260     }
1261 
1262     // If there is, the def must remain in the preheader.
1263     if (UsedInLoop)
1264       continue;
1265 
1266     // Otherwise, sink it to the exit block.
1267     Instruction *ToMove = &*I;
1268     bool Done = false;
1269 
1270     if (I != Preheader->begin()) {
1271       // Skip debug info intrinsics.
1272       do {
1273         --I;
1274       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1275 
1276       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1277         Done = true;
1278     } else {
1279       Done = true;
1280     }
1281 
1282     MadeAnyChanges = true;
1283     ToMove->moveBefore(*ExitBlock, InsertPt);
1284     SE->forgetValue(ToMove);
1285     if (Done) break;
1286     InsertPt = ToMove->getIterator();
1287   }
1288 
1289   return MadeAnyChanges;
1290 }
1291 
1292 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1293                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1294   auto *OldCond = BI->getCondition();
1295   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1296                     << " with " << *NewCond << "\n");
1297   BI->setCondition(NewCond);
1298   if (OldCond->use_empty())
1299     DeadInsts.emplace_back(OldCond);
1300 }
1301 
1302 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1303                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1304   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1305   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1306   auto *OldCond = BI->getCondition();
1307   auto *NewCond =
1308       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1309   replaceExitCond(BI, NewCond, DeadInsts);
1310 }
1311 
1312 static void replaceLoopPHINodesWithPreheaderValues(
1313     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1314     ScalarEvolution &SE) {
1315   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1316   auto *LoopPreheader = L->getLoopPreheader();
1317   auto *LoopHeader = L->getHeader();
1318   SmallVector<Instruction *> Worklist;
1319   for (auto &PN : LoopHeader->phis()) {
1320     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1321     for (User *U : PN.users())
1322       Worklist.push_back(cast<Instruction>(U));
1323     SE.forgetValue(&PN);
1324     PN.replaceAllUsesWith(PreheaderIncoming);
1325     DeadInsts.emplace_back(&PN);
1326   }
1327 
1328   // Replacing with the preheader value will often allow IV users to simplify
1329   // (especially if the preheader value is a constant).
1330   SmallPtrSet<Instruction *, 16> Visited;
1331   while (!Worklist.empty()) {
1332     auto *I = cast<Instruction>(Worklist.pop_back_val());
1333     if (!Visited.insert(I).second)
1334       continue;
1335 
1336     // Don't simplify instructions outside the loop.
1337     if (!L->contains(I))
1338       continue;
1339 
1340     Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1341     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1342       for (User *U : I->users())
1343         Worklist.push_back(cast<Instruction>(U));
1344       I->replaceAllUsesWith(Res);
1345       DeadInsts.emplace_back(I);
1346     }
1347   }
1348 }
1349 
1350 static void replaceWithInvariantCond(
1351     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1352     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1353     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1354   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1355   Rewriter.setInsertPoint(BI);
1356   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1357   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1358   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1359   if (ExitIfTrue)
1360     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1361   IRBuilder<> Builder(BI);
1362   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1363                                      BI->getCondition()->getName());
1364   replaceExitCond(BI, NewCond, DeadInsts);
1365 }
1366 
1367 static bool optimizeLoopExitWithUnknownExitCount(
1368     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1369     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1370     ScalarEvolution *SE, SCEVExpander &Rewriter,
1371     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1372   ICmpInst::Predicate Pred;
1373   Value *LHS, *RHS;
1374   BasicBlock *TrueSucc, *FalseSucc;
1375   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1376                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1377     return false;
1378 
1379   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1380          "Not a loop exit!");
1381 
1382   // 'LHS pred RHS' should now mean that we stay in loop.
1383   if (L->contains(FalseSucc))
1384     Pred = CmpInst::getInversePredicate(Pred);
1385 
1386   // If we are proving loop exit, invert the predicate.
1387   if (Inverted)
1388     Pred = CmpInst::getInversePredicate(Pred);
1389 
1390   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1391   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1392   // Can we prove it to be trivially true?
1393   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1394     foldExit(L, ExitingBB, Inverted, DeadInsts);
1395     return true;
1396   }
1397   // Further logic works for non-inverted condition only.
1398   if (Inverted)
1399     return false;
1400 
1401   auto *ARTy = LHSS->getType();
1402   auto *MaxIterTy = MaxIter->getType();
1403   // If possible, adjust types.
1404   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1405     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1406   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1407     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1408     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1409     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1410       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1411   }
1412 
1413   if (SkipLastIter) {
1414     const SCEV *One = SE->getOne(MaxIter->getType());
1415     MaxIter = SE->getMinusSCEV(MaxIter, One);
1416   }
1417 
1418   // Check if there is a loop-invariant predicate equivalent to our check.
1419   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1420                                                                L, BI, MaxIter);
1421   if (!LIP)
1422     return false;
1423 
1424   // Can we prove it to be trivially true?
1425   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1426     foldExit(L, ExitingBB, Inverted, DeadInsts);
1427   else
1428     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1429                              Rewriter, DeadInsts);
1430 
1431   return true;
1432 }
1433 
1434 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1435   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1436   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1437   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1438   // it already has flags.  The alternative to this would be to extending the
1439   // set of "interesting" IV users to include the icmp, but doing that
1440   // regresses results in practice by querying SCEVs before trip counts which
1441   // rely on them which results in SCEV caching sub-optimal answers.  The
1442   // concern about caching sub-optimal results is why we only query SCEVs of
1443   // the loop invariant RHS here.
1444   SmallVector<BasicBlock*, 16> ExitingBlocks;
1445   L->getExitingBlocks(ExitingBlocks);
1446   bool Changed = false;
1447   for (auto *ExitingBB : ExitingBlocks) {
1448     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1449     if (!BI)
1450       continue;
1451     assert(BI->isConditional() && "exit branch must be conditional");
1452 
1453     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1454     if (!ICmp || !ICmp->hasOneUse())
1455       continue;
1456 
1457     auto *LHS = ICmp->getOperand(0);
1458     auto *RHS = ICmp->getOperand(1);
1459     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1460     // poisoning cache with sub-optimal results.  For the must-execute case,
1461     // this is a neccessary precondition for correctness.
1462     if (!L->isLoopInvariant(RHS)) {
1463       if (!L->isLoopInvariant(LHS))
1464         continue;
1465       // Same logic applies for the inverse case
1466       std::swap(LHS, RHS);
1467     }
1468 
1469     // Match (icmp signed-cond zext, RHS)
1470     Value *LHSOp = nullptr;
1471     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1472       continue;
1473 
1474     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1475     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1476     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1477     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1478     FullCR = FullCR.zeroExtend(OuterBitWidth);
1479     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1480     if (FullCR.contains(RHSCR)) {
1481       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1482       // replace the signed condition with the unsigned version.
1483       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1484       Changed = true;
1485       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1486       // have not changed exit counts, or the values produced by the compare.
1487       continue;
1488     }
1489   }
1490 
1491   // Now that we've canonicalized the condition to match the extend,
1492   // see if we can rotate the extend out of the loop.
1493   for (auto *ExitingBB : ExitingBlocks) {
1494     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1495     if (!BI)
1496       continue;
1497     assert(BI->isConditional() && "exit branch must be conditional");
1498 
1499     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1500     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1501       continue;
1502 
1503     bool Swapped = false;
1504     auto *LHS = ICmp->getOperand(0);
1505     auto *RHS = ICmp->getOperand(1);
1506     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1507       // Nothing to rotate
1508       continue;
1509     if (L->isLoopInvariant(LHS)) {
1510       // Same logic applies for the inverse case until we actually pick
1511       // which operand of the compare to update.
1512       Swapped = true;
1513       std::swap(LHS, RHS);
1514     }
1515     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1516 
1517     // Match (icmp unsigned-cond zext, RHS)
1518     // TODO: Extend to handle corresponding sext/signed-cmp case
1519     // TODO: Extend to other invertible functions
1520     Value *LHSOp = nullptr;
1521     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1522       continue;
1523 
1524     // In general, we only rotate if we can do so without increasing the number
1525     // of instructions.  The exception is when we have an zext(add-rec).  The
1526     // reason for allowing this exception is that we know we need to get rid
1527     // of the zext for SCEV to be able to compute a trip count for said loops;
1528     // we consider the new trip count valuable enough to increase instruction
1529     // count by one.
1530     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1531       continue;
1532 
1533     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1534     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1535     // when zext is loop varying and RHS is loop invariant.  This converts
1536     // loop varying work to loop-invariant work.
1537     auto doRotateTransform = [&]() {
1538       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1539       auto *NewRHS =
1540         CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1541                          L->getLoopPreheader()->getTerminator());
1542       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1543       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1544       if (LHS->use_empty())
1545         DeadInsts.push_back(LHS);
1546     };
1547 
1548 
1549     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1550     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1551     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1552     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1553     FullCR = FullCR.zeroExtend(OuterBitWidth);
1554     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1555     if (FullCR.contains(RHSCR)) {
1556       doRotateTransform();
1557       Changed = true;
1558       // Note, we are leaving SCEV in an unfortunately imprecise case here
1559       // as rotation tends to reveal information about trip counts not
1560       // previously visible.
1561       continue;
1562     }
1563   }
1564 
1565   return Changed;
1566 }
1567 
1568 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1569   SmallVector<BasicBlock*, 16> ExitingBlocks;
1570   L->getExitingBlocks(ExitingBlocks);
1571 
1572   // Remove all exits which aren't both rewriteable and execute on every
1573   // iteration.
1574   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1575     // If our exitting block exits multiple loops, we can only rewrite the
1576     // innermost one.  Otherwise, we're changing how many times the innermost
1577     // loop runs before it exits.
1578     if (LI->getLoopFor(ExitingBB) != L)
1579       return true;
1580 
1581     // Can't rewrite non-branch yet.
1582     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1583     if (!BI)
1584       return true;
1585 
1586     // Likewise, the loop latch must be dominated by the exiting BB.
1587     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1588       return true;
1589 
1590     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1591       // If already constant, nothing to do. However, if this is an
1592       // unconditional exit, we can still replace header phis with their
1593       // preheader value.
1594       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1595         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1596       return true;
1597     }
1598 
1599     return false;
1600   });
1601 
1602   if (ExitingBlocks.empty())
1603     return false;
1604 
1605   // Get a symbolic upper bound on the loop backedge taken count.
1606   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1607   if (isa<SCEVCouldNotCompute>(MaxExitCount))
1608     return false;
1609 
1610   // Visit our exit blocks in order of dominance. We know from the fact that
1611   // all exits must dominate the latch, so there is a total dominance order
1612   // between them.
1613   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1614                // std::sort sorts in ascending order, so we want the inverse of
1615                // the normal dominance relation.
1616                if (A == B) return false;
1617                if (DT->properlyDominates(A, B))
1618                  return true;
1619                else {
1620                  assert(DT->properlyDominates(B, A) &&
1621                         "expected total dominance order!");
1622                  return false;
1623                }
1624   });
1625 #ifdef ASSERT
1626   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1627     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1628   }
1629 #endif
1630 
1631   bool Changed = false;
1632   bool SkipLastIter = false;
1633   SmallSet<const SCEV*, 8> DominatingExitCounts;
1634   for (BasicBlock *ExitingBB : ExitingBlocks) {
1635     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1636     if (isa<SCEVCouldNotCompute>(ExitCount)) {
1637       // Okay, we do not know the exit count here. Can we at least prove that it
1638       // will remain the same within iteration space?
1639       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1640       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1641         return optimizeLoopExitWithUnknownExitCount(
1642             L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1643             Rewriter, DeadInsts);
1644       };
1645 
1646       // TODO: We might have proved that we can skip the last iteration for
1647       // this check. In this case, we only want to check the condition on the
1648       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1649       // corner case:
1650       //
1651       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1652       //
1653       // If we could not prove that len != 0, then we also could not prove that
1654       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1655       // OptimizeCond will likely not prove anything for it, even if it could
1656       // prove the same fact for len.
1657       //
1658       // As a temporary solution, we query both last and pre-last iterations in
1659       // hope that we will be able to prove triviality for at least one of
1660       // them. We can stop querying MaxExitCount for this case once SCEV
1661       // understands that (MaxExitCount - 1) will not overflow here.
1662       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1663         Changed = true;
1664       else if (SkipLastIter)
1665         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1666           Changed = true;
1667       continue;
1668     }
1669 
1670     if (MaxExitCount == ExitCount)
1671       // If the loop has more than 1 iteration, all further checks will be
1672       // executed 1 iteration less.
1673       SkipLastIter = true;
1674 
1675     // If we know we'd exit on the first iteration, rewrite the exit to
1676     // reflect this.  This does not imply the loop must exit through this
1677     // exit; there may be an earlier one taken on the first iteration.
1678     // We know that the backedge can't be taken, so we replace all
1679     // the header PHIs with values coming from the preheader.
1680     if (ExitCount->isZero()) {
1681       foldExit(L, ExitingBB, true, DeadInsts);
1682       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1683       Changed = true;
1684       continue;
1685     }
1686 
1687     assert(ExitCount->getType()->isIntegerTy() &&
1688            MaxExitCount->getType()->isIntegerTy() &&
1689            "Exit counts must be integers");
1690 
1691     Type *WiderType =
1692       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1693     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1694     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1695     assert(MaxExitCount->getType() == ExitCount->getType());
1696 
1697     // Can we prove that some other exit must be taken strictly before this
1698     // one?
1699     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1700                                      MaxExitCount, ExitCount)) {
1701       foldExit(L, ExitingBB, false, DeadInsts);
1702       Changed = true;
1703       continue;
1704     }
1705 
1706     // As we run, keep track of which exit counts we've encountered.  If we
1707     // find a duplicate, we've found an exit which would have exited on the
1708     // exiting iteration, but (from the visit order) strictly follows another
1709     // which does the same and is thus dead.
1710     if (!DominatingExitCounts.insert(ExitCount).second) {
1711       foldExit(L, ExitingBB, false, DeadInsts);
1712       Changed = true;
1713       continue;
1714     }
1715 
1716     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1717     // here.  If we kept track of the min of dominanting exits so far, we could
1718     // discharge exits with EC >= MDEC. This is less powerful than the existing
1719     // transform (since later exits aren't considered), but potentially more
1720     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1721     // or a >u b.  Such a case is not currently known.
1722   }
1723   return Changed;
1724 }
1725 
1726 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1727   SmallVector<BasicBlock*, 16> ExitingBlocks;
1728   L->getExitingBlocks(ExitingBlocks);
1729 
1730   // Finally, see if we can rewrite our exit conditions into a loop invariant
1731   // form. If we have a read-only loop, and we can tell that we must exit down
1732   // a path which does not need any of the values computed within the loop, we
1733   // can rewrite the loop to exit on the first iteration.  Note that this
1734   // doesn't either a) tell us the loop exits on the first iteration (unless
1735   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1736   // This transformation looks a lot like a restricted form of dead loop
1737   // elimination, but restricted to read-only loops and without neccesssarily
1738   // needing to kill the loop entirely.
1739   if (!LoopPredication)
1740     return false;
1741 
1742   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1743   // through *explicit* control flow.  We have to eliminate the possibility of
1744   // implicit exits (see below) before we know it's truly exact.
1745   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1746   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1747     return false;
1748 
1749   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1750   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1751 
1752   auto BadExit = [&](BasicBlock *ExitingBB) {
1753     // If our exiting block exits multiple loops, we can only rewrite the
1754     // innermost one.  Otherwise, we're changing how many times the innermost
1755     // loop runs before it exits.
1756     if (LI->getLoopFor(ExitingBB) != L)
1757       return true;
1758 
1759     // Can't rewrite non-branch yet.
1760     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1761     if (!BI)
1762       return true;
1763 
1764     // If already constant, nothing to do.
1765     if (isa<Constant>(BI->getCondition()))
1766       return true;
1767 
1768     // If the exit block has phis, we need to be able to compute the values
1769     // within the loop which contains them.  This assumes trivially lcssa phis
1770     // have already been removed; TODO: generalize
1771     BasicBlock *ExitBlock =
1772     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1773     if (!ExitBlock->phis().empty())
1774       return true;
1775 
1776     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1777     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1778         !Rewriter.isSafeToExpand(ExitCount))
1779       return true;
1780 
1781     assert(SE->isLoopInvariant(ExitCount, L) &&
1782            "Exit count must be loop invariant");
1783     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1784     return false;
1785   };
1786 
1787   // If we have any exits which can't be predicated themselves, than we can't
1788   // predicate any exit which isn't guaranteed to execute before it.  Consider
1789   // two exits (a) and (b) which would both exit on the same iteration.  If we
1790   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1791   // we could convert a loop from exiting through (a) to one exiting through
1792   // (b).  Note that this problem exists only for exits with the same exit
1793   // count, and we could be more aggressive when exit counts are known inequal.
1794   llvm::sort(ExitingBlocks,
1795             [&](BasicBlock *A, BasicBlock *B) {
1796               // std::sort sorts in ascending order, so we want the inverse of
1797               // the normal dominance relation, plus a tie breaker for blocks
1798               // unordered by dominance.
1799               if (DT->properlyDominates(A, B)) return true;
1800               if (DT->properlyDominates(B, A)) return false;
1801               return A->getName() < B->getName();
1802             });
1803   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1804   // exits before the backedge).  If they aren't, reasoning about reachability
1805   // is complicated and we choose not to for now.
1806   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1807     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1808       return false;
1809 
1810   // Given our sorted total order, we know that exit[j] must be evaluated
1811   // after all exit[i] such j > i.
1812   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1813     if (BadExit(ExitingBlocks[i])) {
1814       ExitingBlocks.resize(i);
1815       break;
1816     }
1817 
1818   if (ExitingBlocks.empty())
1819     return false;
1820 
1821   // We rely on not being able to reach an exiting block on a later iteration
1822   // then it's statically compute exit count.  The implementaton of
1823   // getExitCount currently has this invariant, but assert it here so that
1824   // breakage is obvious if this ever changes..
1825   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1826         return DT->dominates(ExitingBB, L->getLoopLatch());
1827       }));
1828 
1829   // At this point, ExitingBlocks consists of only those blocks which are
1830   // predicatable.  Given that, we know we have at least one exit we can
1831   // predicate if the loop is doesn't have side effects and doesn't have any
1832   // implicit exits (because then our exact BTC isn't actually exact).
1833   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1834   // suggestions on how to improve this?  I can obviously bail out for outer
1835   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1836   // is that enough for *all* side effects?
1837   for (BasicBlock *BB : L->blocks())
1838     for (auto &I : *BB)
1839       // TODO:isGuaranteedToTransfer
1840       if (I.mayHaveSideEffects())
1841         return false;
1842 
1843   bool Changed = false;
1844   // Finally, do the actual predication for all predicatable blocks.  A couple
1845   // of notes here:
1846   // 1) We don't bother to constant fold dominated exits with identical exit
1847   //    counts; that's simply a form of CSE/equality propagation and we leave
1848   //    it for dedicated passes.
1849   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1850   //    legality constraints and we leave that to dedicated logic.  We want to
1851   //    predicate even if we can't insert a loop invariant expression as
1852   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1853   //    varying check.
1854   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1855   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1856   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1857   for (BasicBlock *ExitingBB : ExitingBlocks) {
1858     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1859 
1860     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1861     Value *NewCond;
1862     if (ExitCount == ExactBTC) {
1863       NewCond = L->contains(BI->getSuccessor(0)) ?
1864         B.getFalse() : B.getTrue();
1865     } else {
1866       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1867       if (!ExactBTCV)
1868         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1869       Value *RHS = ExactBTCV;
1870       if (ECV->getType() != RHS->getType()) {
1871         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1872         ECV = B.CreateZExt(ECV, WiderTy);
1873         RHS = B.CreateZExt(RHS, WiderTy);
1874       }
1875       auto Pred = L->contains(BI->getSuccessor(0)) ?
1876         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1877       NewCond = B.CreateICmp(Pred, ECV, RHS);
1878     }
1879     Value *OldCond = BI->getCondition();
1880     BI->setCondition(NewCond);
1881     if (OldCond->use_empty())
1882       DeadInsts.emplace_back(OldCond);
1883     Changed = true;
1884   }
1885 
1886   return Changed;
1887 }
1888 
1889 //===----------------------------------------------------------------------===//
1890 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1891 //===----------------------------------------------------------------------===//
1892 
1893 bool IndVarSimplify::run(Loop *L) {
1894   // We need (and expect!) the incoming loop to be in LCSSA.
1895   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1896          "LCSSA required to run indvars!");
1897 
1898   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1899   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1900   //    canonicalization can be a pessimization without LSR to "clean up"
1901   //    afterwards.
1902   //  - We depend on having a preheader; in particular,
1903   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1904   //    and we're in trouble if we can't find the induction variable even when
1905   //    we've manually inserted one.
1906   //  - LFTR relies on having a single backedge.
1907   if (!L->isLoopSimplifyForm())
1908     return false;
1909 
1910 #ifndef NDEBUG
1911   // Used below for a consistency check only
1912   // Note: Since the result returned by ScalarEvolution may depend on the order
1913   // in which previous results are added to its cache, the call to
1914   // getBackedgeTakenCount() may change following SCEV queries.
1915   const SCEV *BackedgeTakenCount;
1916   if (VerifyIndvars)
1917     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1918 #endif
1919 
1920   bool Changed = false;
1921   // If there are any floating-point recurrences, attempt to
1922   // transform them to use integer recurrences.
1923   Changed |= rewriteNonIntegerIVs(L);
1924 
1925   // Create a rewriter object which we'll use to transform the code with.
1926   SCEVExpander Rewriter(*SE, DL, "indvars");
1927 #ifndef NDEBUG
1928   Rewriter.setDebugType(DEBUG_TYPE);
1929 #endif
1930 
1931   // Eliminate redundant IV users.
1932   //
1933   // Simplification works best when run before other consumers of SCEV. We
1934   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1935   // other expressions involving loop IVs have been evaluated. This helps SCEV
1936   // set no-wrap flags before normalizing sign/zero extension.
1937   Rewriter.disableCanonicalMode();
1938   Changed |= simplifyAndExtend(L, Rewriter, LI);
1939 
1940   // Check to see if we can compute the final value of any expressions
1941   // that are recurrent in the loop, and substitute the exit values from the
1942   // loop into any instructions outside of the loop that use the final values
1943   // of the current expressions.
1944   if (ReplaceExitValue != NeverRepl) {
1945     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1946                                              ReplaceExitValue, DeadInsts)) {
1947       NumReplaced += Rewrites;
1948       Changed = true;
1949     }
1950   }
1951 
1952   // Eliminate redundant IV cycles.
1953   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1954 
1955   // Try to convert exit conditions to unsigned and rotate computation
1956   // out of the loop.  Note: Handles invalidation internally if needed.
1957   Changed |= canonicalizeExitCondition(L);
1958 
1959   // Try to eliminate loop exits based on analyzeable exit counts
1960   if (optimizeLoopExits(L, Rewriter))  {
1961     Changed = true;
1962     // Given we've changed exit counts, notify SCEV
1963     // Some nested loops may share same folded exit basic block,
1964     // thus we need to notify top most loop.
1965     SE->forgetTopmostLoop(L);
1966   }
1967 
1968   // Try to form loop invariant tests for loop exits by changing how many
1969   // iterations of the loop run when that is unobservable.
1970   if (predicateLoopExits(L, Rewriter)) {
1971     Changed = true;
1972     // Given we've changed exit counts, notify SCEV
1973     SE->forgetLoop(L);
1974   }
1975 
1976   // If we have a trip count expression, rewrite the loop's exit condition
1977   // using it.
1978   if (!DisableLFTR) {
1979     BasicBlock *PreHeader = L->getLoopPreheader();
1980 
1981     SmallVector<BasicBlock*, 16> ExitingBlocks;
1982     L->getExitingBlocks(ExitingBlocks);
1983     for (BasicBlock *ExitingBB : ExitingBlocks) {
1984       // Can't rewrite non-branch yet.
1985       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1986         continue;
1987 
1988       // If our exitting block exits multiple loops, we can only rewrite the
1989       // innermost one.  Otherwise, we're changing how many times the innermost
1990       // loop runs before it exits.
1991       if (LI->getLoopFor(ExitingBB) != L)
1992         continue;
1993 
1994       if (!needsLFTR(L, ExitingBB))
1995         continue;
1996 
1997       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1998       if (isa<SCEVCouldNotCompute>(ExitCount))
1999         continue;
2000 
2001       // This was handled above, but as we form SCEVs, we can sometimes refine
2002       // existing ones; this allows exit counts to be folded to zero which
2003       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2004       // until stable to handle cases like this better.
2005       if (ExitCount->isZero())
2006         continue;
2007 
2008       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2009       if (!IndVar)
2010         continue;
2011 
2012       // Avoid high cost expansions.  Note: This heuristic is questionable in
2013       // that our definition of "high cost" is not exactly principled.
2014       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2015                                        TTI, PreHeader->getTerminator()))
2016         continue;
2017 
2018       // Check preconditions for proper SCEVExpander operation. SCEV does not
2019       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2020       // any pass that uses the SCEVExpander must do it. This does not work
2021       // well for loop passes because SCEVExpander makes assumptions about
2022       // all loops, while LoopPassManager only forces the current loop to be
2023       // simplified.
2024       //
2025       // FIXME: SCEV expansion has no way to bail out, so the caller must
2026       // explicitly check any assumptions made by SCEV. Brittle.
2027       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2028       if (!AR || AR->getLoop()->getLoopPreheader())
2029         Changed |= linearFunctionTestReplace(L, ExitingBB,
2030                                              ExitCount, IndVar,
2031                                              Rewriter);
2032     }
2033   }
2034   // Clear the rewriter cache, because values that are in the rewriter's cache
2035   // can be deleted in the loop below, causing the AssertingVH in the cache to
2036   // trigger.
2037   Rewriter.clear();
2038 
2039   // Now that we're done iterating through lists, clean up any instructions
2040   // which are now dead.
2041   while (!DeadInsts.empty()) {
2042     Value *V = DeadInsts.pop_back_val();
2043 
2044     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2045       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2046     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2047       Changed |=
2048           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2049   }
2050 
2051   // The Rewriter may not be used from this point on.
2052 
2053   // Loop-invariant instructions in the preheader that aren't used in the
2054   // loop may be sunk below the loop to reduce register pressure.
2055   Changed |= sinkUnusedInvariants(L);
2056 
2057   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2058   // trip count and therefore can further simplify exit values in addition to
2059   // rewriteLoopExitValues.
2060   Changed |= rewriteFirstIterationLoopExitValues(L);
2061 
2062   // Clean up dead instructions.
2063   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2064 
2065   // Check a post-condition.
2066   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2067          "Indvars did not preserve LCSSA!");
2068 
2069   // Verify that LFTR, and any other change have not interfered with SCEV's
2070   // ability to compute trip count.  We may have *changed* the exit count, but
2071   // only by reducing it.
2072 #ifndef NDEBUG
2073   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2074     SE->forgetLoop(L);
2075     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2076     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2077         SE->getTypeSizeInBits(NewBECount->getType()))
2078       NewBECount = SE->getTruncateOrNoop(NewBECount,
2079                                          BackedgeTakenCount->getType());
2080     else
2081       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2082                                                  NewBECount->getType());
2083     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2084                                  NewBECount) && "indvars must preserve SCEV");
2085   }
2086   if (VerifyMemorySSA && MSSAU)
2087     MSSAU->getMemorySSA()->verifyMemorySSA();
2088 #endif
2089 
2090   return Changed;
2091 }
2092 
2093 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2094                                           LoopStandardAnalysisResults &AR,
2095                                           LPMUpdater &) {
2096   Function *F = L.getHeader()->getParent();
2097   const DataLayout &DL = F->getParent()->getDataLayout();
2098 
2099   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2100                      WidenIndVars && AllowIVWidening);
2101   if (!IVS.run(&L))
2102     return PreservedAnalyses::all();
2103 
2104   auto PA = getLoopPassPreservedAnalyses();
2105   PA.preserveSet<CFGAnalyses>();
2106   if (AR.MSSA)
2107     PA.preserve<MemorySSAAnalysis>();
2108   return PA;
2109 }
2110 
2111 namespace {
2112 
2113 struct IndVarSimplifyLegacyPass : public LoopPass {
2114   static char ID; // Pass identification, replacement for typeid
2115 
2116   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2117     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2118   }
2119 
2120   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2121     if (skipLoop(L))
2122       return false;
2123 
2124     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2125     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2126     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2127     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2128     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2129     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2130     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2131     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2132     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2133     MemorySSA *MSSA = nullptr;
2134     if (MSSAAnalysis)
2135       MSSA = &MSSAAnalysis->getMSSA();
2136 
2137     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2138     return IVS.run(L);
2139   }
2140 
2141   void getAnalysisUsage(AnalysisUsage &AU) const override {
2142     AU.setPreservesCFG();
2143     AU.addPreserved<MemorySSAWrapperPass>();
2144     getLoopAnalysisUsage(AU);
2145   }
2146 };
2147 
2148 } // end anonymous namespace
2149 
2150 char IndVarSimplifyLegacyPass::ID = 0;
2151 
2152 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2153                       "Induction Variable Simplification", false, false)
2154 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2155 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2156                     "Induction Variable Simplification", false, false)
2157 
2158 Pass *llvm::createIndVarSimplifyPass() {
2159   return new IndVarSimplifyLegacyPass();
2160 }
2161