xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision f82cf6187fbe9584ba0a8d9bf4d703498df249e6)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 using namespace PatternMatch;
93 
94 #define DEBUG_TYPE "indvars"
95 
96 STATISTIC(NumWidened     , "Number of indvars widened");
97 STATISTIC(NumReplaced    , "Number of exit values replaced");
98 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
99 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
100 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
101 
102 // Trip count verification can be enabled by default under NDEBUG if we
103 // implement a strong expression equivalence checker in SCEV. Until then, we
104 // use the verify-indvars flag, which may assert in some cases.
105 static cl::opt<bool> VerifyIndvars(
106     "verify-indvars", cl::Hidden,
107     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
108              "effect in release builds. (Note: this adds additional SCEV "
109              "queries potentially changing the analysis result)"));
110 
111 static cl::opt<ReplaceExitVal> ReplaceExitValue(
112     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
113     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
114     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
115                clEnumValN(OnlyCheapRepl, "cheap",
116                           "only replace exit value when the cost is cheap"),
117                clEnumValN(NoHardUse, "noharduse",
118                           "only replace exit values when loop def likely dead"),
119                clEnumValN(AlwaysRepl, "always",
120                           "always replace exit value whenever possible")));
121 
122 static cl::opt<bool> UsePostIncrementRanges(
123   "indvars-post-increment-ranges", cl::Hidden,
124   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
125   cl::init(true));
126 
127 static cl::opt<bool>
128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
129             cl::desc("Disable Linear Function Test Replace optimization"));
130 
131 static cl::opt<bool>
132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
133                 cl::desc("Predicate conditions in read only loops"));
134 
135 static cl::opt<bool>
136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
137                 cl::desc("Allow widening of indvars to eliminate s/zext"));
138 
139 namespace {
140 
141 struct RewritePhi;
142 
143 class IndVarSimplify {
144   LoopInfo *LI;
145   ScalarEvolution *SE;
146   DominatorTree *DT;
147   const DataLayout &DL;
148   TargetLibraryInfo *TLI;
149   const TargetTransformInfo *TTI;
150   std::unique_ptr<MemorySSAUpdater> MSSAU;
151 
152   SmallVector<WeakTrackingVH, 16> DeadInsts;
153   bool WidenIndVars;
154 
155   bool handleFloatingPointIV(Loop *L, PHINode *PH);
156   bool rewriteNonIntegerIVs(Loop *L);
157 
158   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
159   /// See if we can convert an exit condition from signed to unsigned.
160   /// (See inline comment about why this is duplicated from simplifyAndExtend)
161   bool canonicalizeExitCondition(Loop *L);
162   /// Try to eliminate loop exits based on analyzeable exit counts
163   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
164   /// Try to form loop invariant tests for loop exits by changing how many
165   /// iterations of the loop run when that is unobservable.
166   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
167 
168   bool rewriteFirstIterationLoopExitValues(Loop *L);
169 
170   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
171                                  const SCEV *ExitCount,
172                                  PHINode *IndVar, SCEVExpander &Rewriter);
173 
174   bool sinkUnusedInvariants(Loop *L);
175 
176 public:
177   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
178                  const DataLayout &DL, TargetLibraryInfo *TLI,
179                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
180       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
181         WidenIndVars(WidenIndVars) {
182     if (MSSA)
183       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
184   }
185 
186   bool run(Loop *L);
187 };
188 
189 } // end anonymous namespace
190 
191 //===----------------------------------------------------------------------===//
192 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
193 //===----------------------------------------------------------------------===//
194 
195 /// Convert APF to an integer, if possible.
196 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
197   bool isExact = false;
198   // See if we can convert this to an int64_t
199   uint64_t UIntVal;
200   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
201                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
202       !isExact)
203     return false;
204   IntVal = UIntVal;
205   return true;
206 }
207 
208 /// If the loop has floating induction variable then insert corresponding
209 /// integer induction variable if possible.
210 /// For example,
211 /// for(double i = 0; i < 10000; ++i)
212 ///   bar(i)
213 /// is converted into
214 /// for(int i = 0; i < 10000; ++i)
215 ///   bar((double)i);
216 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
217   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
218   unsigned BackEdge     = IncomingEdge^1;
219 
220   // Check incoming value.
221   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
222 
223   int64_t InitValue;
224   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
225     return false;
226 
227   // Check IV increment. Reject this PN if increment operation is not
228   // an add or increment value can not be represented by an integer.
229   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
230   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
231 
232   // If this is not an add of the PHI with a constantfp, or if the constant fp
233   // is not an integer, bail out.
234   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
235   int64_t IncValue;
236   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
237       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
238     return false;
239 
240   // Check Incr uses. One user is PN and the other user is an exit condition
241   // used by the conditional terminator.
242   Value::user_iterator IncrUse = Incr->user_begin();
243   Instruction *U1 = cast<Instruction>(*IncrUse++);
244   if (IncrUse == Incr->user_end()) return false;
245   Instruction *U2 = cast<Instruction>(*IncrUse++);
246   if (IncrUse != Incr->user_end()) return false;
247 
248   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
249   // only used by a branch, we can't transform it.
250   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
251   if (!Compare)
252     Compare = dyn_cast<FCmpInst>(U2);
253   if (!Compare || !Compare->hasOneUse() ||
254       !isa<BranchInst>(Compare->user_back()))
255     return false;
256 
257   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
258 
259   // We need to verify that the branch actually controls the iteration count
260   // of the loop.  If not, the new IV can overflow and no one will notice.
261   // The branch block must be in the loop and one of the successors must be out
262   // of the loop.
263   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
264   if (!L->contains(TheBr->getParent()) ||
265       (L->contains(TheBr->getSuccessor(0)) &&
266        L->contains(TheBr->getSuccessor(1))))
267     return false;
268 
269   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
270   // transform it.
271   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
272   int64_t ExitValue;
273   if (ExitValueVal == nullptr ||
274       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
275     return false;
276 
277   // Find new predicate for integer comparison.
278   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
279   switch (Compare->getPredicate()) {
280   default: return false;  // Unknown comparison.
281   case CmpInst::FCMP_OEQ:
282   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
283   case CmpInst::FCMP_ONE:
284   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
285   case CmpInst::FCMP_OGT:
286   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
287   case CmpInst::FCMP_OGE:
288   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
289   case CmpInst::FCMP_OLT:
290   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
291   case CmpInst::FCMP_OLE:
292   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
293   }
294 
295   // We convert the floating point induction variable to a signed i32 value if
296   // we can.  This is only safe if the comparison will not overflow in a way
297   // that won't be trapped by the integer equivalent operations.  Check for this
298   // now.
299   // TODO: We could use i64 if it is native and the range requires it.
300 
301   // The start/stride/exit values must all fit in signed i32.
302   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
303     return false;
304 
305   // If not actually striding (add x, 0.0), avoid touching the code.
306   if (IncValue == 0)
307     return false;
308 
309   // Positive and negative strides have different safety conditions.
310   if (IncValue > 0) {
311     // If we have a positive stride, we require the init to be less than the
312     // exit value.
313     if (InitValue >= ExitValue)
314       return false;
315 
316     uint32_t Range = uint32_t(ExitValue-InitValue);
317     // Check for infinite loop, either:
318     // while (i <= Exit) or until (i > Exit)
319     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
320       if (++Range == 0) return false;  // Range overflows.
321     }
322 
323     unsigned Leftover = Range % uint32_t(IncValue);
324 
325     // If this is an equality comparison, we require that the strided value
326     // exactly land on the exit value, otherwise the IV condition will wrap
327     // around and do things the fp IV wouldn't.
328     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
329         Leftover != 0)
330       return false;
331 
332     // If the stride would wrap around the i32 before exiting, we can't
333     // transform the IV.
334     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
335       return false;
336   } else {
337     // If we have a negative stride, we require the init to be greater than the
338     // exit value.
339     if (InitValue <= ExitValue)
340       return false;
341 
342     uint32_t Range = uint32_t(InitValue-ExitValue);
343     // Check for infinite loop, either:
344     // while (i >= Exit) or until (i < Exit)
345     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
346       if (++Range == 0) return false;  // Range overflows.
347     }
348 
349     unsigned Leftover = Range % uint32_t(-IncValue);
350 
351     // If this is an equality comparison, we require that the strided value
352     // exactly land on the exit value, otherwise the IV condition will wrap
353     // around and do things the fp IV wouldn't.
354     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
355         Leftover != 0)
356       return false;
357 
358     // If the stride would wrap around the i32 before exiting, we can't
359     // transform the IV.
360     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
361       return false;
362   }
363 
364   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
365 
366   // Insert new integer induction variable.
367   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
368   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
369                       PN->getIncomingBlock(IncomingEdge));
370 
371   Value *NewAdd =
372     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
373                               Incr->getName()+".int", Incr);
374   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
375 
376   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
377                                       ConstantInt::get(Int32Ty, ExitValue),
378                                       Compare->getName());
379 
380   // In the following deletions, PN may become dead and may be deleted.
381   // Use a WeakTrackingVH to observe whether this happens.
382   WeakTrackingVH WeakPH = PN;
383 
384   // Delete the old floating point exit comparison.  The branch starts using the
385   // new comparison.
386   NewCompare->takeName(Compare);
387   Compare->replaceAllUsesWith(NewCompare);
388   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
389 
390   // Delete the old floating point increment.
391   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
392   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
393 
394   // If the FP induction variable still has uses, this is because something else
395   // in the loop uses its value.  In order to canonicalize the induction
396   // variable, we chose to eliminate the IV and rewrite it in terms of an
397   // int->fp cast.
398   //
399   // We give preference to sitofp over uitofp because it is faster on most
400   // platforms.
401   if (WeakPH) {
402     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
403                                  &*PN->getParent()->getFirstInsertionPt());
404     PN->replaceAllUsesWith(Conv);
405     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
406   }
407   return true;
408 }
409 
410 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
411   // First step.  Check to see if there are any floating-point recurrences.
412   // If there are, change them into integer recurrences, permitting analysis by
413   // the SCEV routines.
414   BasicBlock *Header = L->getHeader();
415 
416   SmallVector<WeakTrackingVH, 8> PHIs;
417   for (PHINode &PN : Header->phis())
418     PHIs.push_back(&PN);
419 
420   bool Changed = false;
421   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
422     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
423       Changed |= handleFloatingPointIV(L, PN);
424 
425   // If the loop previously had floating-point IV, ScalarEvolution
426   // may not have been able to compute a trip count. Now that we've done some
427   // re-writing, the trip count may be computable.
428   if (Changed)
429     SE->forgetLoop(L);
430   return Changed;
431 }
432 
433 //===---------------------------------------------------------------------===//
434 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
435 // they will exit at the first iteration.
436 //===---------------------------------------------------------------------===//
437 
438 /// Check to see if this loop has loop invariant conditions which lead to loop
439 /// exits. If so, we know that if the exit path is taken, it is at the first
440 /// loop iteration. This lets us predict exit values of PHI nodes that live in
441 /// loop header.
442 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
443   // Verify the input to the pass is already in LCSSA form.
444   assert(L->isLCSSAForm(*DT));
445 
446   SmallVector<BasicBlock *, 8> ExitBlocks;
447   L->getUniqueExitBlocks(ExitBlocks);
448 
449   bool MadeAnyChanges = false;
450   for (auto *ExitBB : ExitBlocks) {
451     // If there are no more PHI nodes in this exit block, then no more
452     // values defined inside the loop are used on this path.
453     for (PHINode &PN : ExitBB->phis()) {
454       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
455            IncomingValIdx != E; ++IncomingValIdx) {
456         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
457 
458         // Can we prove that the exit must run on the first iteration if it
459         // runs at all?  (i.e. early exits are fine for our purposes, but
460         // traces which lead to this exit being taken on the 2nd iteration
461         // aren't.)  Note that this is about whether the exit branch is
462         // executed, not about whether it is taken.
463         if (!L->getLoopLatch() ||
464             !DT->dominates(IncomingBB, L->getLoopLatch()))
465           continue;
466 
467         // Get condition that leads to the exit path.
468         auto *TermInst = IncomingBB->getTerminator();
469 
470         Value *Cond = nullptr;
471         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
472           // Must be a conditional branch, otherwise the block
473           // should not be in the loop.
474           Cond = BI->getCondition();
475         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
476           Cond = SI->getCondition();
477         else
478           continue;
479 
480         if (!L->isLoopInvariant(Cond))
481           continue;
482 
483         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
484 
485         // Only deal with PHIs in the loop header.
486         if (!ExitVal || ExitVal->getParent() != L->getHeader())
487           continue;
488 
489         // If ExitVal is a PHI on the loop header, then we know its
490         // value along this exit because the exit can only be taken
491         // on the first iteration.
492         auto *LoopPreheader = L->getLoopPreheader();
493         assert(LoopPreheader && "Invalid loop");
494         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
495         if (PreheaderIdx != -1) {
496           assert(ExitVal->getParent() == L->getHeader() &&
497                  "ExitVal must be in loop header");
498           MadeAnyChanges = true;
499           PN.setIncomingValue(IncomingValIdx,
500                               ExitVal->getIncomingValue(PreheaderIdx));
501           SE->forgetValue(&PN);
502         }
503       }
504     }
505   }
506   return MadeAnyChanges;
507 }
508 
509 //===----------------------------------------------------------------------===//
510 //  IV Widening - Extend the width of an IV to cover its widest uses.
511 //===----------------------------------------------------------------------===//
512 
513 /// Update information about the induction variable that is extended by this
514 /// sign or zero extend operation. This is used to determine the final width of
515 /// the IV before actually widening it.
516 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
517                         ScalarEvolution *SE,
518                         const TargetTransformInfo *TTI) {
519   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
520   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
521     return;
522 
523   Type *Ty = Cast->getType();
524   uint64_t Width = SE->getTypeSizeInBits(Ty);
525   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
526     return;
527 
528   // Check that `Cast` actually extends the induction variable (we rely on this
529   // later).  This takes care of cases where `Cast` is extending a truncation of
530   // the narrow induction variable, and thus can end up being narrower than the
531   // "narrow" induction variable.
532   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
533   if (NarrowIVWidth >= Width)
534     return;
535 
536   // Cast is either an sext or zext up to this point.
537   // We should not widen an indvar if arithmetics on the wider indvar are more
538   // expensive than those on the narrower indvar. We check only the cost of ADD
539   // because at least an ADD is required to increment the induction variable. We
540   // could compute more comprehensively the cost of all instructions on the
541   // induction variable when necessary.
542   if (TTI &&
543       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
544           TTI->getArithmeticInstrCost(Instruction::Add,
545                                       Cast->getOperand(0)->getType())) {
546     return;
547   }
548 
549   if (!WI.WidestNativeType) {
550     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
551     WI.IsSigned = IsSigned;
552     return;
553   }
554 
555   // We extend the IV to satisfy the sign of its first user, arbitrarily.
556   if (WI.IsSigned != IsSigned)
557     return;
558 
559   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
560     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
561 }
562 
563 //===----------------------------------------------------------------------===//
564 //  Live IV Reduction - Minimize IVs live across the loop.
565 //===----------------------------------------------------------------------===//
566 
567 //===----------------------------------------------------------------------===//
568 //  Simplification of IV users based on SCEV evaluation.
569 //===----------------------------------------------------------------------===//
570 
571 namespace {
572 
573 class IndVarSimplifyVisitor : public IVVisitor {
574   ScalarEvolution *SE;
575   const TargetTransformInfo *TTI;
576   PHINode *IVPhi;
577 
578 public:
579   WideIVInfo WI;
580 
581   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
582                         const TargetTransformInfo *TTI,
583                         const DominatorTree *DTree)
584     : SE(SCEV), TTI(TTI), IVPhi(IV) {
585     DT = DTree;
586     WI.NarrowIV = IVPhi;
587   }
588 
589   // Implement the interface used by simplifyUsersOfIV.
590   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
591 };
592 
593 } // end anonymous namespace
594 
595 /// Iteratively perform simplification on a worklist of IV users. Each
596 /// successive simplification may push more users which may themselves be
597 /// candidates for simplification.
598 ///
599 /// Sign/Zero extend elimination is interleaved with IV simplification.
600 bool IndVarSimplify::simplifyAndExtend(Loop *L,
601                                        SCEVExpander &Rewriter,
602                                        LoopInfo *LI) {
603   SmallVector<WideIVInfo, 8> WideIVs;
604 
605   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
606           Intrinsic::getName(Intrinsic::experimental_guard));
607   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
608 
609   SmallVector<PHINode*, 8> LoopPhis;
610   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
611     LoopPhis.push_back(cast<PHINode>(I));
612   }
613   // Each round of simplification iterates through the SimplifyIVUsers worklist
614   // for all current phis, then determines whether any IVs can be
615   // widened. Widening adds new phis to LoopPhis, inducing another round of
616   // simplification on the wide IVs.
617   bool Changed = false;
618   while (!LoopPhis.empty()) {
619     // Evaluate as many IV expressions as possible before widening any IVs. This
620     // forces SCEV to set no-wrap flags before evaluating sign/zero
621     // extension. The first time SCEV attempts to normalize sign/zero extension,
622     // the result becomes final. So for the most predictable results, we delay
623     // evaluation of sign/zero extend evaluation until needed, and avoid running
624     // other SCEV based analysis prior to simplifyAndExtend.
625     do {
626       PHINode *CurrIV = LoopPhis.pop_back_val();
627 
628       // Information about sign/zero extensions of CurrIV.
629       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
630 
631       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
632                                    &Visitor);
633 
634       if (Visitor.WI.WidestNativeType) {
635         WideIVs.push_back(Visitor.WI);
636       }
637     } while(!LoopPhis.empty());
638 
639     // Continue if we disallowed widening.
640     if (!WidenIndVars)
641       continue;
642 
643     for (; !WideIVs.empty(); WideIVs.pop_back()) {
644       unsigned ElimExt;
645       unsigned Widened;
646       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
647                                           DT, DeadInsts, ElimExt, Widened,
648                                           HasGuards, UsePostIncrementRanges)) {
649         NumElimExt += ElimExt;
650         NumWidened += Widened;
651         Changed = true;
652         LoopPhis.push_back(WidePhi);
653       }
654     }
655   }
656   return Changed;
657 }
658 
659 //===----------------------------------------------------------------------===//
660 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
661 //===----------------------------------------------------------------------===//
662 
663 /// Given an Value which is hoped to be part of an add recurance in the given
664 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
665 /// that this is less general than SCEVs AddRec checking.
666 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
667   Instruction *IncI = dyn_cast<Instruction>(IncV);
668   if (!IncI)
669     return nullptr;
670 
671   switch (IncI->getOpcode()) {
672   case Instruction::Add:
673   case Instruction::Sub:
674     break;
675   case Instruction::GetElementPtr:
676     // An IV counter must preserve its type.
677     if (IncI->getNumOperands() == 2)
678       break;
679     LLVM_FALLTHROUGH;
680   default:
681     return nullptr;
682   }
683 
684   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
685   if (Phi && Phi->getParent() == L->getHeader()) {
686     if (L->isLoopInvariant(IncI->getOperand(1)))
687       return Phi;
688     return nullptr;
689   }
690   if (IncI->getOpcode() == Instruction::GetElementPtr)
691     return nullptr;
692 
693   // Allow add/sub to be commuted.
694   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
695   if (Phi && Phi->getParent() == L->getHeader()) {
696     if (L->isLoopInvariant(IncI->getOperand(0)))
697       return Phi;
698   }
699   return nullptr;
700 }
701 
702 /// Whether the current loop exit test is based on this value.  Currently this
703 /// is limited to a direct use in the loop condition.
704 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
705   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
706   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
707   // TODO: Allow non-icmp loop test.
708   if (!ICmp)
709     return false;
710 
711   // TODO: Allow indirect use.
712   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
713 }
714 
715 /// linearFunctionTestReplace policy. Return true unless we can show that the
716 /// current exit test is already sufficiently canonical.
717 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
718   assert(L->getLoopLatch() && "Must be in simplified form");
719 
720   // Avoid converting a constant or loop invariant test back to a runtime
721   // test.  This is critical for when SCEV's cached ExitCount is less precise
722   // than the current IR (such as after we've proven a particular exit is
723   // actually dead and thus the BE count never reaches our ExitCount.)
724   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
725   if (L->isLoopInvariant(BI->getCondition()))
726     return false;
727 
728   // Do LFTR to simplify the exit condition to an ICMP.
729   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
730   if (!Cond)
731     return true;
732 
733   // Do LFTR to simplify the exit ICMP to EQ/NE
734   ICmpInst::Predicate Pred = Cond->getPredicate();
735   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
736     return true;
737 
738   // Look for a loop invariant RHS
739   Value *LHS = Cond->getOperand(0);
740   Value *RHS = Cond->getOperand(1);
741   if (!L->isLoopInvariant(RHS)) {
742     if (!L->isLoopInvariant(LHS))
743       return true;
744     std::swap(LHS, RHS);
745   }
746   // Look for a simple IV counter LHS
747   PHINode *Phi = dyn_cast<PHINode>(LHS);
748   if (!Phi)
749     Phi = getLoopPhiForCounter(LHS, L);
750 
751   if (!Phi)
752     return true;
753 
754   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
755   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
756   if (Idx < 0)
757     return true;
758 
759   // Do LFTR if the exit condition's IV is *not* a simple counter.
760   Value *IncV = Phi->getIncomingValue(Idx);
761   return Phi != getLoopPhiForCounter(IncV, L);
762 }
763 
764 /// Return true if undefined behavior would provable be executed on the path to
765 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
766 /// anything about whether OnPathTo is actually executed or whether Root is
767 /// actually poison.  This can be used to assess whether a new use of Root can
768 /// be added at a location which is control equivalent with OnPathTo (such as
769 /// immediately before it) without introducing UB which didn't previously
770 /// exist.  Note that a false result conveys no information.
771 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
772                                           Instruction *OnPathTo,
773                                           DominatorTree *DT) {
774   // Basic approach is to assume Root is poison, propagate poison forward
775   // through all users we can easily track, and then check whether any of those
776   // users are provable UB and must execute before out exiting block might
777   // exit.
778 
779   // The set of all recursive users we've visited (which are assumed to all be
780   // poison because of said visit)
781   SmallSet<const Value *, 16> KnownPoison;
782   SmallVector<const Instruction*, 16> Worklist;
783   Worklist.push_back(Root);
784   while (!Worklist.empty()) {
785     const Instruction *I = Worklist.pop_back_val();
786 
787     // If we know this must trigger UB on a path leading our target.
788     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
789       return true;
790 
791     // If we can't analyze propagation through this instruction, just skip it
792     // and transitive users.  Safe as false is a conservative result.
793     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
794       continue;
795 
796     if (KnownPoison.insert(I).second)
797       for (const User *User : I->users())
798         Worklist.push_back(cast<Instruction>(User));
799   }
800 
801   // Might be non-UB, or might have a path we couldn't prove must execute on
802   // way to exiting bb.
803   return false;
804 }
805 
806 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
807 /// down to checking that all operands are constant and listing instructions
808 /// that may hide undef.
809 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
810                                unsigned Depth) {
811   if (isa<Constant>(V))
812     return !isa<UndefValue>(V);
813 
814   if (Depth >= 6)
815     return false;
816 
817   // Conservatively handle non-constant non-instructions. For example, Arguments
818   // may be undef.
819   Instruction *I = dyn_cast<Instruction>(V);
820   if (!I)
821     return false;
822 
823   // Load and return values may be undef.
824   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
825     return false;
826 
827   // Optimistically handle other instructions.
828   for (Value *Op : I->operands()) {
829     if (!Visited.insert(Op).second)
830       continue;
831     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
832       return false;
833   }
834   return true;
835 }
836 
837 /// Return true if the given value is concrete. We must prove that undef can
838 /// never reach it.
839 ///
840 /// TODO: If we decide that this is a good approach to checking for undef, we
841 /// may factor it into a common location.
842 static bool hasConcreteDef(Value *V) {
843   SmallPtrSet<Value*, 8> Visited;
844   Visited.insert(V);
845   return hasConcreteDefImpl(V, Visited, 0);
846 }
847 
848 /// Return true if this IV has any uses other than the (soon to be rewritten)
849 /// loop exit test.
850 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
851   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
852   Value *IncV = Phi->getIncomingValue(LatchIdx);
853 
854   for (User *U : Phi->users())
855     if (U != Cond && U != IncV) return false;
856 
857   for (User *U : IncV->users())
858     if (U != Cond && U != Phi) return false;
859   return true;
860 }
861 
862 /// Return true if the given phi is a "counter" in L.  A counter is an
863 /// add recurance (of integer or pointer type) with an arbitrary start, and a
864 /// step of 1.  Note that L must have exactly one latch.
865 static bool isLoopCounter(PHINode* Phi, Loop *L,
866                           ScalarEvolution *SE) {
867   assert(Phi->getParent() == L->getHeader());
868   assert(L->getLoopLatch());
869 
870   if (!SE->isSCEVable(Phi->getType()))
871     return false;
872 
873   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
874   if (!AR || AR->getLoop() != L || !AR->isAffine())
875     return false;
876 
877   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
878   if (!Step || !Step->isOne())
879     return false;
880 
881   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
882   Value *IncV = Phi->getIncomingValue(LatchIdx);
883   return (getLoopPhiForCounter(IncV, L) == Phi &&
884           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
885 }
886 
887 /// Search the loop header for a loop counter (anadd rec w/step of one)
888 /// suitable for use by LFTR.  If multiple counters are available, select the
889 /// "best" one based profitable heuristics.
890 ///
891 /// BECount may be an i8* pointer type. The pointer difference is already
892 /// valid count without scaling the address stride, so it remains a pointer
893 /// expression as far as SCEV is concerned.
894 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
895                                 const SCEV *BECount,
896                                 ScalarEvolution *SE, DominatorTree *DT) {
897   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
898 
899   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
900 
901   // Loop over all of the PHI nodes, looking for a simple counter.
902   PHINode *BestPhi = nullptr;
903   const SCEV *BestInit = nullptr;
904   BasicBlock *LatchBlock = L->getLoopLatch();
905   assert(LatchBlock && "Must be in simplified form");
906   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
907 
908   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
909     PHINode *Phi = cast<PHINode>(I);
910     if (!isLoopCounter(Phi, L, SE))
911       continue;
912 
913     // Avoid comparing an integer IV against a pointer Limit.
914     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
915       continue;
916 
917     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
918 
919     // AR may be a pointer type, while BECount is an integer type.
920     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
921     // AR may not be a narrower type, or we may never exit.
922     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
923     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
924       continue;
925 
926     // Avoid reusing a potentially undef value to compute other values that may
927     // have originally had a concrete definition.
928     if (!hasConcreteDef(Phi)) {
929       // We explicitly allow unknown phis as long as they are already used by
930       // the loop exit test.  This is legal since performing LFTR could not
931       // increase the number of undef users.
932       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
933       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
934           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
935         continue;
936     }
937 
938     // Avoid introducing undefined behavior due to poison which didn't exist in
939     // the original program.  (Annoyingly, the rules for poison and undef
940     // propagation are distinct, so this does NOT cover the undef case above.)
941     // We have to ensure that we don't introduce UB by introducing a use on an
942     // iteration where said IV produces poison.  Our strategy here differs for
943     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
944     // see code in linearFunctionTestReplace.  For pointers, we restrict
945     // transforms as there is no good way to reinfer inbounds once lost.
946     if (!Phi->getType()->isIntegerTy() &&
947         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
948       continue;
949 
950     const SCEV *Init = AR->getStart();
951 
952     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
953       // Don't force a live loop counter if another IV can be used.
954       if (AlmostDeadIV(Phi, LatchBlock, Cond))
955         continue;
956 
957       // Prefer to count-from-zero. This is a more "canonical" counter form. It
958       // also prefers integer to pointer IVs.
959       if (BestInit->isZero() != Init->isZero()) {
960         if (BestInit->isZero())
961           continue;
962       }
963       // If two IVs both count from zero or both count from nonzero then the
964       // narrower is likely a dead phi that has been widened. Use the wider phi
965       // to allow the other to be eliminated.
966       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
967         continue;
968     }
969     BestPhi = Phi;
970     BestInit = Init;
971   }
972   return BestPhi;
973 }
974 
975 /// Insert an IR expression which computes the value held by the IV IndVar
976 /// (which must be an loop counter w/unit stride) after the backedge of loop L
977 /// is taken ExitCount times.
978 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
979                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
980                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
981   assert(isLoopCounter(IndVar, L, SE));
982   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
983   const SCEV *IVInit = AR->getStart();
984 
985   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
986   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
987   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
988   // the existing GEPs whenever possible.
989   if (IndVar->getType()->isPointerTy() &&
990       !ExitCount->getType()->isPointerTy()) {
991     // IVOffset will be the new GEP offset that is interpreted by GEP as a
992     // signed value. ExitCount on the other hand represents the loop trip count,
993     // which is an unsigned value. FindLoopCounter only allows induction
994     // variables that have a positive unit stride of one. This means we don't
995     // have to handle the case of negative offsets (yet) and just need to zero
996     // extend ExitCount.
997     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
998     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
999     if (UsePostInc)
1000       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1001 
1002     // Expand the code for the iteration count.
1003     assert(SE->isLoopInvariant(IVOffset, L) &&
1004            "Computed iteration count is not loop invariant!");
1005 
1006     // We could handle pointer IVs other than i8*, but we need to compensate for
1007     // gep index scaling.
1008     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
1009                              cast<PointerType>(IndVar->getType())
1010                                  ->getElementType())->isOne() &&
1011            "unit stride pointer IV must be i8*");
1012 
1013     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1014     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1015     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1016   } else {
1017     // In any other case, convert both IVInit and ExitCount to integers before
1018     // comparing. This may result in SCEV expansion of pointers, but in practice
1019     // SCEV will fold the pointer arithmetic away as such:
1020     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1021     //
1022     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1023     // for simple memset-style loops.
1024     //
1025     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1026     // were generated on top of case #2, which is not expected.
1027 
1028     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1029     // For unit stride, IVCount = Start + ExitCount with 2's complement
1030     // overflow.
1031 
1032     // For integer IVs, truncate the IV before computing IVInit + BECount,
1033     // unless we know apriori that the limit must be a constant when evaluated
1034     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1035     // of the IV in the loop over a (potentially) expensive expansion of the
1036     // widened exit count add(zext(add)) expression.
1037     if (SE->getTypeSizeInBits(IVInit->getType())
1038         > SE->getTypeSizeInBits(ExitCount->getType())) {
1039       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1040         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1041       else
1042         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1043     }
1044 
1045     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1046 
1047     if (UsePostInc)
1048       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1049 
1050     // Expand the code for the iteration count.
1051     assert(SE->isLoopInvariant(IVLimit, L) &&
1052            "Computed iteration count is not loop invariant!");
1053     // Ensure that we generate the same type as IndVar, or a smaller integer
1054     // type. In the presence of null pointer values, we have an integer type
1055     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1056     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1057       IndVar->getType() : ExitCount->getType();
1058     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1059     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1060   }
1061 }
1062 
1063 /// This method rewrites the exit condition of the loop to be a canonical !=
1064 /// comparison against the incremented loop induction variable.  This pass is
1065 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1066 /// determine a loop-invariant trip count of the loop, which is actually a much
1067 /// broader range than just linear tests.
1068 bool IndVarSimplify::
1069 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1070                           const SCEV *ExitCount,
1071                           PHINode *IndVar, SCEVExpander &Rewriter) {
1072   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1073   assert(isLoopCounter(IndVar, L, SE));
1074   Instruction * const IncVar =
1075     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1076 
1077   // Initialize CmpIndVar to the preincremented IV.
1078   Value *CmpIndVar = IndVar;
1079   bool UsePostInc = false;
1080 
1081   // If the exiting block is the same as the backedge block, we prefer to
1082   // compare against the post-incremented value, otherwise we must compare
1083   // against the preincremented value.
1084   if (ExitingBB == L->getLoopLatch()) {
1085     // For pointer IVs, we chose to not strip inbounds which requires us not
1086     // to add a potentially UB introducing use.  We need to either a) show
1087     // the loop test we're modifying is already in post-inc form, or b) show
1088     // that adding a use must not introduce UB.
1089     bool SafeToPostInc =
1090         IndVar->getType()->isIntegerTy() ||
1091         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1092         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1093     if (SafeToPostInc) {
1094       UsePostInc = true;
1095       CmpIndVar = IncVar;
1096     }
1097   }
1098 
1099   // It may be necessary to drop nowrap flags on the incrementing instruction
1100   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1101   // case the increment might have previously been poison on the last iteration
1102   // only) or if LFTR switches to a different IV that was previously dynamically
1103   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1104   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1105   // check), because the pre-inc addrec flags may be adopted from the original
1106   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1107   // TODO: This handling is inaccurate for one case: If we switch to a
1108   // dynamically dead IV that wraps on the first loop iteration only, which is
1109   // not covered by the post-inc addrec. (If the new IV was not dynamically
1110   // dead, it could not be poison on the first iteration in the first place.)
1111   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1112     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1113     if (BO->hasNoUnsignedWrap())
1114       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1115     if (BO->hasNoSignedWrap())
1116       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1117   }
1118 
1119   Value *ExitCnt = genLoopLimit(
1120       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1121   assert(ExitCnt->getType()->isPointerTy() ==
1122              IndVar->getType()->isPointerTy() &&
1123          "genLoopLimit missed a cast");
1124 
1125   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1126   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1127   ICmpInst::Predicate P;
1128   if (L->contains(BI->getSuccessor(0)))
1129     P = ICmpInst::ICMP_NE;
1130   else
1131     P = ICmpInst::ICMP_EQ;
1132 
1133   IRBuilder<> Builder(BI);
1134 
1135   // The new loop exit condition should reuse the debug location of the
1136   // original loop exit condition.
1137   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1138     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1139 
1140   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1141   // avoid the expensive expansion of the limit expression in the wider type,
1142   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1143   // since we know (from the exit count bitwidth), that we can't self-wrap in
1144   // the narrower type.
1145   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1146   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1147   if (CmpIndVarSize > ExitCntSize) {
1148     assert(!CmpIndVar->getType()->isPointerTy() &&
1149            !ExitCnt->getType()->isPointerTy());
1150 
1151     // Before resorting to actually inserting the truncate, use the same
1152     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1153     // the other side of the comparison instead.  We still evaluate the limit
1154     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1155     // a truncate within in.
1156     bool Extended = false;
1157     const SCEV *IV = SE->getSCEV(CmpIndVar);
1158     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1159                                                   ExitCnt->getType());
1160     const SCEV *ZExtTrunc =
1161       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1162 
1163     if (ZExtTrunc == IV) {
1164       Extended = true;
1165       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1166                                    "wide.trip.count");
1167     } else {
1168       const SCEV *SExtTrunc =
1169         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1170       if (SExtTrunc == IV) {
1171         Extended = true;
1172         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1173                                      "wide.trip.count");
1174       }
1175     }
1176 
1177     if (Extended) {
1178       bool Discard;
1179       L->makeLoopInvariant(ExitCnt, Discard);
1180     } else
1181       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1182                                       "lftr.wideiv");
1183   }
1184   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1185                     << "      LHS:" << *CmpIndVar << '\n'
1186                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1187                     << "\n"
1188                     << "      RHS:\t" << *ExitCnt << "\n"
1189                     << "ExitCount:\t" << *ExitCount << "\n"
1190                     << "  was: " << *BI->getCondition() << "\n");
1191 
1192   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1193   Value *OrigCond = BI->getCondition();
1194   // It's tempting to use replaceAllUsesWith here to fully replace the old
1195   // comparison, but that's not immediately safe, since users of the old
1196   // comparison may not be dominated by the new comparison. Instead, just
1197   // update the branch to use the new comparison; in the common case this
1198   // will make old comparison dead.
1199   BI->setCondition(Cond);
1200   DeadInsts.emplace_back(OrigCond);
1201 
1202   ++NumLFTR;
1203   return true;
1204 }
1205 
1206 //===----------------------------------------------------------------------===//
1207 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1208 //===----------------------------------------------------------------------===//
1209 
1210 /// If there's a single exit block, sink any loop-invariant values that
1211 /// were defined in the preheader but not used inside the loop into the
1212 /// exit block to reduce register pressure in the loop.
1213 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1214   BasicBlock *ExitBlock = L->getExitBlock();
1215   if (!ExitBlock) return false;
1216 
1217   BasicBlock *Preheader = L->getLoopPreheader();
1218   if (!Preheader) return false;
1219 
1220   bool MadeAnyChanges = false;
1221   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1222   BasicBlock::iterator I(Preheader->getTerminator());
1223   while (I != Preheader->begin()) {
1224     --I;
1225     // New instructions were inserted at the end of the preheader.
1226     if (isa<PHINode>(I))
1227       break;
1228 
1229     // Don't move instructions which might have side effects, since the side
1230     // effects need to complete before instructions inside the loop.  Also don't
1231     // move instructions which might read memory, since the loop may modify
1232     // memory. Note that it's okay if the instruction might have undefined
1233     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1234     // block.
1235     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1236       continue;
1237 
1238     // Skip debug info intrinsics.
1239     if (isa<DbgInfoIntrinsic>(I))
1240       continue;
1241 
1242     // Skip eh pad instructions.
1243     if (I->isEHPad())
1244       continue;
1245 
1246     // Don't sink alloca: we never want to sink static alloca's out of the
1247     // entry block, and correctly sinking dynamic alloca's requires
1248     // checks for stacksave/stackrestore intrinsics.
1249     // FIXME: Refactor this check somehow?
1250     if (isa<AllocaInst>(I))
1251       continue;
1252 
1253     // Determine if there is a use in or before the loop (direct or
1254     // otherwise).
1255     bool UsedInLoop = false;
1256     for (Use &U : I->uses()) {
1257       Instruction *User = cast<Instruction>(U.getUser());
1258       BasicBlock *UseBB = User->getParent();
1259       if (PHINode *P = dyn_cast<PHINode>(User)) {
1260         unsigned i =
1261           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1262         UseBB = P->getIncomingBlock(i);
1263       }
1264       if (UseBB == Preheader || L->contains(UseBB)) {
1265         UsedInLoop = true;
1266         break;
1267       }
1268     }
1269 
1270     // If there is, the def must remain in the preheader.
1271     if (UsedInLoop)
1272       continue;
1273 
1274     // Otherwise, sink it to the exit block.
1275     Instruction *ToMove = &*I;
1276     bool Done = false;
1277 
1278     if (I != Preheader->begin()) {
1279       // Skip debug info intrinsics.
1280       do {
1281         --I;
1282       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1283 
1284       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1285         Done = true;
1286     } else {
1287       Done = true;
1288     }
1289 
1290     MadeAnyChanges = true;
1291     ToMove->moveBefore(*ExitBlock, InsertPt);
1292     if (Done) break;
1293     InsertPt = ToMove->getIterator();
1294   }
1295 
1296   return MadeAnyChanges;
1297 }
1298 
1299 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1300                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1301   auto *OldCond = BI->getCondition();
1302   BI->setCondition(NewCond);
1303   if (OldCond->use_empty())
1304     DeadInsts.emplace_back(OldCond);
1305 }
1306 
1307 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1308                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1309   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1310   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1311   auto *OldCond = BI->getCondition();
1312   auto *NewCond =
1313       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1314   replaceExitCond(BI, NewCond, DeadInsts);
1315 }
1316 
1317 static void replaceLoopPHINodesWithPreheaderValues(
1318     Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1319   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1320   auto *LoopPreheader = L->getLoopPreheader();
1321   auto *LoopHeader = L->getHeader();
1322   for (auto &PN : LoopHeader->phis()) {
1323     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1324     PN.replaceAllUsesWith(PreheaderIncoming);
1325     DeadInsts.emplace_back(&PN);
1326   }
1327 }
1328 
1329 static void replaceWithInvariantCond(
1330     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1331     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1332     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1333   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1334   Rewriter.setInsertPoint(BI);
1335   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1336   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1337   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1338   if (ExitIfTrue)
1339     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1340   IRBuilder<> Builder(BI);
1341   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1342                                      BI->getCondition()->getName());
1343   replaceExitCond(BI, NewCond, DeadInsts);
1344 }
1345 
1346 static bool optimizeLoopExitWithUnknownExitCount(
1347     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1348     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1349     ScalarEvolution *SE, SCEVExpander &Rewriter,
1350     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1351   ICmpInst::Predicate Pred;
1352   Value *LHS, *RHS;
1353   BasicBlock *TrueSucc, *FalseSucc;
1354   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1355                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1356     return false;
1357 
1358   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1359          "Not a loop exit!");
1360 
1361   // 'LHS pred RHS' should now mean that we stay in loop.
1362   if (L->contains(FalseSucc))
1363     Pred = CmpInst::getInversePredicate(Pred);
1364 
1365   // If we are proving loop exit, invert the predicate.
1366   if (Inverted)
1367     Pred = CmpInst::getInversePredicate(Pred);
1368 
1369   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1370   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1371   // Can we prove it to be trivially true?
1372   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1373     foldExit(L, ExitingBB, Inverted, DeadInsts);
1374     return true;
1375   }
1376   // Further logic works for non-inverted condition only.
1377   if (Inverted)
1378     return false;
1379 
1380   auto *ARTy = LHSS->getType();
1381   auto *MaxIterTy = MaxIter->getType();
1382   // If possible, adjust types.
1383   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1384     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1385   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1386     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1387     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1388     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1389       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1390   }
1391 
1392   if (SkipLastIter) {
1393     const SCEV *One = SE->getOne(MaxIter->getType());
1394     MaxIter = SE->getMinusSCEV(MaxIter, One);
1395   }
1396 
1397   // Check if there is a loop-invariant predicate equivalent to our check.
1398   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1399                                                                L, BI, MaxIter);
1400   if (!LIP)
1401     return false;
1402 
1403   // Can we prove it to be trivially true?
1404   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1405     foldExit(L, ExitingBB, Inverted, DeadInsts);
1406   else
1407     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1408                              Rewriter, DeadInsts);
1409 
1410   return true;
1411 }
1412 
1413 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1414   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1415   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1416   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1417   // it already has flags.  The alternative to this would be to extending the
1418   // set of "interesting" IV users to include the icmp, but doing that
1419   // regresses results in practice by querying SCEVs before trip counts which
1420   // rely on them which results in SCEV caching sub-optimal answers.  The
1421   // concern about caching sub-optimal results is why we only query SCEVs of
1422   // the loop invariant RHS here.
1423   SmallVector<BasicBlock*, 16> ExitingBlocks;
1424   L->getExitingBlocks(ExitingBlocks);
1425   bool Changed = false;
1426   for (auto *ExitingBB : ExitingBlocks) {
1427     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1428     if (!BI)
1429       continue;
1430     assert(BI->isConditional() && "exit branch must be conditional");
1431 
1432     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1433     if (!ICmp || !ICmp->hasOneUse())
1434       continue;
1435 
1436     auto *LHS = ICmp->getOperand(0);
1437     auto *RHS = ICmp->getOperand(1);
1438     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1439     // poisoning cache with sub-optimal results.  For the must-execute case,
1440     // this is a neccessary precondition for correctness.
1441     if (!L->isLoopInvariant(RHS))
1442       continue;
1443 
1444     // Match (icmp signed-cond zext, RHS)
1445     Value *LHSOp = nullptr;
1446     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1447       continue;
1448 
1449     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1450     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1451     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1452     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1453     FullCR = FullCR.zeroExtend(OuterBitWidth);
1454     if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) {
1455       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1456       // replace the signed condition with the unsigned version.
1457       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1458       Changed = true;
1459       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1460       // have not changed exit counts, or the values produced by the compare.
1461       continue;
1462     }
1463 
1464     // If we have a loop which would be undefined if infinite, and it has at
1465     // most one possible dynamic exit, then we can conclude that exit must
1466     // be taken.  If that exit must be taken, and we know the LHS can only
1467     // take values in the positive domain, then we can conclude RHS must
1468     // also be in that same range, and replace a signed compare with an
1469     // unsigned one.
1470     // If the exit might not be taken in a well defined program.
1471     if (ExitingBlocks.size() == 1 && SE->loopHasNoAbnormalExits(L) &&
1472         SE->loopIsFiniteByAssumption(L)) {
1473       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1474       // replace the signed condition with the unsigned version.
1475       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1476       Changed = true;
1477 
1478       // Given we've changed exit counts, notify SCEV.
1479       // Some nested loops may share same folded exit basic block,
1480       // thus we need to notify top most loop.
1481       SE->forgetTopmostLoop(L);
1482       continue;
1483     }
1484   }
1485   return Changed;
1486 }
1487 
1488 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1489   SmallVector<BasicBlock*, 16> ExitingBlocks;
1490   L->getExitingBlocks(ExitingBlocks);
1491 
1492   // Remove all exits which aren't both rewriteable and execute on every
1493   // iteration.
1494   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1495     // If our exitting block exits multiple loops, we can only rewrite the
1496     // innermost one.  Otherwise, we're changing how many times the innermost
1497     // loop runs before it exits.
1498     if (LI->getLoopFor(ExitingBB) != L)
1499       return true;
1500 
1501     // Can't rewrite non-branch yet.
1502     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1503     if (!BI)
1504       return true;
1505 
1506     // If already constant, nothing to do.
1507     if (isa<Constant>(BI->getCondition()))
1508       return true;
1509 
1510     // Likewise, the loop latch must be dominated by the exiting BB.
1511     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1512       return true;
1513 
1514     return false;
1515   });
1516 
1517   if (ExitingBlocks.empty())
1518     return false;
1519 
1520   // Get a symbolic upper bound on the loop backedge taken count.
1521   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1522   if (isa<SCEVCouldNotCompute>(MaxExitCount))
1523     return false;
1524 
1525   // Visit our exit blocks in order of dominance. We know from the fact that
1526   // all exits must dominate the latch, so there is a total dominance order
1527   // between them.
1528   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1529                // std::sort sorts in ascending order, so we want the inverse of
1530                // the normal dominance relation.
1531                if (A == B) return false;
1532                if (DT->properlyDominates(A, B))
1533                  return true;
1534                else {
1535                  assert(DT->properlyDominates(B, A) &&
1536                         "expected total dominance order!");
1537                  return false;
1538                }
1539   });
1540 #ifdef ASSERT
1541   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1542     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1543   }
1544 #endif
1545 
1546   bool Changed = false;
1547   bool SkipLastIter = false;
1548   SmallSet<const SCEV*, 8> DominatingExitCounts;
1549   for (BasicBlock *ExitingBB : ExitingBlocks) {
1550     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1551     if (isa<SCEVCouldNotCompute>(ExitCount)) {
1552       // Okay, we do not know the exit count here. Can we at least prove that it
1553       // will remain the same within iteration space?
1554       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1555       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1556         return optimizeLoopExitWithUnknownExitCount(
1557             L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1558             Rewriter, DeadInsts);
1559       };
1560 
1561       // TODO: We might have proved that we can skip the last iteration for
1562       // this check. In this case, we only want to check the condition on the
1563       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1564       // corner case:
1565       //
1566       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1567       //
1568       // If we could not prove that len != 0, then we also could not prove that
1569       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1570       // OptimizeCond will likely not prove anything for it, even if it could
1571       // prove the same fact for len.
1572       //
1573       // As a temporary solution, we query both last and pre-last iterations in
1574       // hope that we will be able to prove triviality for at least one of
1575       // them. We can stop querying MaxExitCount for this case once SCEV
1576       // understands that (MaxExitCount - 1) will not overflow here.
1577       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1578         Changed = true;
1579       else if (SkipLastIter)
1580         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1581           Changed = true;
1582       continue;
1583     }
1584 
1585     if (MaxExitCount == ExitCount)
1586       // If the loop has more than 1 iteration, all further checks will be
1587       // executed 1 iteration less.
1588       SkipLastIter = true;
1589 
1590     // If we know we'd exit on the first iteration, rewrite the exit to
1591     // reflect this.  This does not imply the loop must exit through this
1592     // exit; there may be an earlier one taken on the first iteration.
1593     // We know that the backedge can't be taken, so we replace all
1594     // the header PHIs with values coming from the preheader.
1595     if (ExitCount->isZero()) {
1596       foldExit(L, ExitingBB, true, DeadInsts);
1597       replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
1598       Changed = true;
1599       continue;
1600     }
1601 
1602     assert(ExitCount->getType()->isIntegerTy() &&
1603            MaxExitCount->getType()->isIntegerTy() &&
1604            "Exit counts must be integers");
1605 
1606     Type *WiderType =
1607       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1608     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1609     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1610     assert(MaxExitCount->getType() == ExitCount->getType());
1611 
1612     // Can we prove that some other exit must be taken strictly before this
1613     // one?
1614     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1615                                      MaxExitCount, ExitCount)) {
1616       foldExit(L, ExitingBB, false, DeadInsts);
1617       Changed = true;
1618       continue;
1619     }
1620 
1621     // As we run, keep track of which exit counts we've encountered.  If we
1622     // find a duplicate, we've found an exit which would have exited on the
1623     // exiting iteration, but (from the visit order) strictly follows another
1624     // which does the same and is thus dead.
1625     if (!DominatingExitCounts.insert(ExitCount).second) {
1626       foldExit(L, ExitingBB, false, DeadInsts);
1627       Changed = true;
1628       continue;
1629     }
1630 
1631     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1632     // here.  If we kept track of the min of dominanting exits so far, we could
1633     // discharge exits with EC >= MDEC. This is less powerful than the existing
1634     // transform (since later exits aren't considered), but potentially more
1635     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1636     // or a >u b.  Such a case is not currently known.
1637   }
1638   return Changed;
1639 }
1640 
1641 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1642   SmallVector<BasicBlock*, 16> ExitingBlocks;
1643   L->getExitingBlocks(ExitingBlocks);
1644 
1645   // Finally, see if we can rewrite our exit conditions into a loop invariant
1646   // form. If we have a read-only loop, and we can tell that we must exit down
1647   // a path which does not need any of the values computed within the loop, we
1648   // can rewrite the loop to exit on the first iteration.  Note that this
1649   // doesn't either a) tell us the loop exits on the first iteration (unless
1650   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1651   // This transformation looks a lot like a restricted form of dead loop
1652   // elimination, but restricted to read-only loops and without neccesssarily
1653   // needing to kill the loop entirely.
1654   if (!LoopPredication)
1655     return false;
1656 
1657   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1658   // through *explicit* control flow.  We have to eliminate the possibility of
1659   // implicit exits (see below) before we know it's truly exact.
1660   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1661   if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
1662     return false;
1663 
1664   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1665   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1666 
1667   auto BadExit = [&](BasicBlock *ExitingBB) {
1668     // If our exiting block exits multiple loops, we can only rewrite the
1669     // innermost one.  Otherwise, we're changing how many times the innermost
1670     // loop runs before it exits.
1671     if (LI->getLoopFor(ExitingBB) != L)
1672       return true;
1673 
1674     // Can't rewrite non-branch yet.
1675     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1676     if (!BI)
1677       return true;
1678 
1679     // If already constant, nothing to do.
1680     if (isa<Constant>(BI->getCondition()))
1681       return true;
1682 
1683     // If the exit block has phis, we need to be able to compute the values
1684     // within the loop which contains them.  This assumes trivially lcssa phis
1685     // have already been removed; TODO: generalize
1686     BasicBlock *ExitBlock =
1687     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1688     if (!ExitBlock->phis().empty())
1689       return true;
1690 
1691     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1692     if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
1693       return true;
1694 
1695     assert(SE->isLoopInvariant(ExitCount, L) &&
1696            "Exit count must be loop invariant");
1697     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1698     return false;
1699   };
1700 
1701   // If we have any exits which can't be predicated themselves, than we can't
1702   // predicate any exit which isn't guaranteed to execute before it.  Consider
1703   // two exits (a) and (b) which would both exit on the same iteration.  If we
1704   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1705   // we could convert a loop from exiting through (a) to one exiting through
1706   // (b).  Note that this problem exists only for exits with the same exit
1707   // count, and we could be more aggressive when exit counts are known inequal.
1708   llvm::sort(ExitingBlocks,
1709             [&](BasicBlock *A, BasicBlock *B) {
1710               // std::sort sorts in ascending order, so we want the inverse of
1711               // the normal dominance relation, plus a tie breaker for blocks
1712               // unordered by dominance.
1713               if (DT->properlyDominates(A, B)) return true;
1714               if (DT->properlyDominates(B, A)) return false;
1715               return A->getName() < B->getName();
1716             });
1717   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1718   // exits before the backedge).  If they aren't, reasoning about reachability
1719   // is complicated and we choose not to for now.
1720   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1721     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1722       return false;
1723 
1724   // Given our sorted total order, we know that exit[j] must be evaluated
1725   // after all exit[i] such j > i.
1726   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1727     if (BadExit(ExitingBlocks[i])) {
1728       ExitingBlocks.resize(i);
1729       break;
1730     }
1731 
1732   if (ExitingBlocks.empty())
1733     return false;
1734 
1735   // We rely on not being able to reach an exiting block on a later iteration
1736   // then it's statically compute exit count.  The implementaton of
1737   // getExitCount currently has this invariant, but assert it here so that
1738   // breakage is obvious if this ever changes..
1739   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1740         return DT->dominates(ExitingBB, L->getLoopLatch());
1741       }));
1742 
1743   // At this point, ExitingBlocks consists of only those blocks which are
1744   // predicatable.  Given that, we know we have at least one exit we can
1745   // predicate if the loop is doesn't have side effects and doesn't have any
1746   // implicit exits (because then our exact BTC isn't actually exact).
1747   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1748   // suggestions on how to improve this?  I can obviously bail out for outer
1749   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1750   // is that enough for *all* side effects?
1751   for (BasicBlock *BB : L->blocks())
1752     for (auto &I : *BB)
1753       // TODO:isGuaranteedToTransfer
1754       if (I.mayHaveSideEffects())
1755         return false;
1756 
1757   bool Changed = false;
1758   // Finally, do the actual predication for all predicatable blocks.  A couple
1759   // of notes here:
1760   // 1) We don't bother to constant fold dominated exits with identical exit
1761   //    counts; that's simply a form of CSE/equality propagation and we leave
1762   //    it for dedicated passes.
1763   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1764   //    legality constraints and we leave that to dedicated logic.  We want to
1765   //    predicate even if we can't insert a loop invariant expression as
1766   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1767   //    varying check.
1768   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1769   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1770   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1771   for (BasicBlock *ExitingBB : ExitingBlocks) {
1772     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1773 
1774     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1775     Value *NewCond;
1776     if (ExitCount == ExactBTC) {
1777       NewCond = L->contains(BI->getSuccessor(0)) ?
1778         B.getFalse() : B.getTrue();
1779     } else {
1780       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1781       if (!ExactBTCV)
1782         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1783       Value *RHS = ExactBTCV;
1784       if (ECV->getType() != RHS->getType()) {
1785         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1786         ECV = B.CreateZExt(ECV, WiderTy);
1787         RHS = B.CreateZExt(RHS, WiderTy);
1788       }
1789       auto Pred = L->contains(BI->getSuccessor(0)) ?
1790         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1791       NewCond = B.CreateICmp(Pred, ECV, RHS);
1792     }
1793     Value *OldCond = BI->getCondition();
1794     BI->setCondition(NewCond);
1795     if (OldCond->use_empty())
1796       DeadInsts.emplace_back(OldCond);
1797     Changed = true;
1798   }
1799 
1800   return Changed;
1801 }
1802 
1803 //===----------------------------------------------------------------------===//
1804 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1805 //===----------------------------------------------------------------------===//
1806 
1807 bool IndVarSimplify::run(Loop *L) {
1808   // We need (and expect!) the incoming loop to be in LCSSA.
1809   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1810          "LCSSA required to run indvars!");
1811 
1812   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1813   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1814   //    canonicalization can be a pessimization without LSR to "clean up"
1815   //    afterwards.
1816   //  - We depend on having a preheader; in particular,
1817   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1818   //    and we're in trouble if we can't find the induction variable even when
1819   //    we've manually inserted one.
1820   //  - LFTR relies on having a single backedge.
1821   if (!L->isLoopSimplifyForm())
1822     return false;
1823 
1824 #ifndef NDEBUG
1825   // Used below for a consistency check only
1826   // Note: Since the result returned by ScalarEvolution may depend on the order
1827   // in which previous results are added to its cache, the call to
1828   // getBackedgeTakenCount() may change following SCEV queries.
1829   const SCEV *BackedgeTakenCount;
1830   if (VerifyIndvars)
1831     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1832 #endif
1833 
1834   bool Changed = false;
1835   // If there are any floating-point recurrences, attempt to
1836   // transform them to use integer recurrences.
1837   Changed |= rewriteNonIntegerIVs(L);
1838 
1839   // Create a rewriter object which we'll use to transform the code with.
1840   SCEVExpander Rewriter(*SE, DL, "indvars");
1841 #ifndef NDEBUG
1842   Rewriter.setDebugType(DEBUG_TYPE);
1843 #endif
1844 
1845   // Eliminate redundant IV users.
1846   //
1847   // Simplification works best when run before other consumers of SCEV. We
1848   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1849   // other expressions involving loop IVs have been evaluated. This helps SCEV
1850   // set no-wrap flags before normalizing sign/zero extension.
1851   Rewriter.disableCanonicalMode();
1852   Changed |= simplifyAndExtend(L, Rewriter, LI);
1853 
1854   // Check to see if we can compute the final value of any expressions
1855   // that are recurrent in the loop, and substitute the exit values from the
1856   // loop into any instructions outside of the loop that use the final values
1857   // of the current expressions.
1858   if (ReplaceExitValue != NeverRepl) {
1859     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1860                                              ReplaceExitValue, DeadInsts)) {
1861       NumReplaced += Rewrites;
1862       Changed = true;
1863     }
1864   }
1865 
1866   // Eliminate redundant IV cycles.
1867   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1868 
1869   // Try to convert exit conditions to unsigned
1870   // Note: Handles invalidation internally if needed.
1871   Changed |= canonicalizeExitCondition(L);
1872 
1873   // Try to eliminate loop exits based on analyzeable exit counts
1874   if (optimizeLoopExits(L, Rewriter))  {
1875     Changed = true;
1876     // Given we've changed exit counts, notify SCEV
1877     // Some nested loops may share same folded exit basic block,
1878     // thus we need to notify top most loop.
1879     SE->forgetTopmostLoop(L);
1880   }
1881 
1882   // Try to form loop invariant tests for loop exits by changing how many
1883   // iterations of the loop run when that is unobservable.
1884   if (predicateLoopExits(L, Rewriter)) {
1885     Changed = true;
1886     // Given we've changed exit counts, notify SCEV
1887     SE->forgetLoop(L);
1888   }
1889 
1890   // If we have a trip count expression, rewrite the loop's exit condition
1891   // using it.
1892   if (!DisableLFTR) {
1893     BasicBlock *PreHeader = L->getLoopPreheader();
1894     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1895 
1896     SmallVector<BasicBlock*, 16> ExitingBlocks;
1897     L->getExitingBlocks(ExitingBlocks);
1898     for (BasicBlock *ExitingBB : ExitingBlocks) {
1899       // Can't rewrite non-branch yet.
1900       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1901         continue;
1902 
1903       // If our exitting block exits multiple loops, we can only rewrite the
1904       // innermost one.  Otherwise, we're changing how many times the innermost
1905       // loop runs before it exits.
1906       if (LI->getLoopFor(ExitingBB) != L)
1907         continue;
1908 
1909       if (!needsLFTR(L, ExitingBB))
1910         continue;
1911 
1912       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1913       if (isa<SCEVCouldNotCompute>(ExitCount))
1914         continue;
1915 
1916       // This was handled above, but as we form SCEVs, we can sometimes refine
1917       // existing ones; this allows exit counts to be folded to zero which
1918       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1919       // until stable to handle cases like this better.
1920       if (ExitCount->isZero())
1921         continue;
1922 
1923       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1924       if (!IndVar)
1925         continue;
1926 
1927       // Avoid high cost expansions.  Note: This heuristic is questionable in
1928       // that our definition of "high cost" is not exactly principled.
1929       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1930                                        TTI, PreHeaderBR))
1931         continue;
1932 
1933       // Check preconditions for proper SCEVExpander operation. SCEV does not
1934       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1935       // any pass that uses the SCEVExpander must do it. This does not work
1936       // well for loop passes because SCEVExpander makes assumptions about
1937       // all loops, while LoopPassManager only forces the current loop to be
1938       // simplified.
1939       //
1940       // FIXME: SCEV expansion has no way to bail out, so the caller must
1941       // explicitly check any assumptions made by SCEV. Brittle.
1942       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1943       if (!AR || AR->getLoop()->getLoopPreheader())
1944         Changed |= linearFunctionTestReplace(L, ExitingBB,
1945                                              ExitCount, IndVar,
1946                                              Rewriter);
1947     }
1948   }
1949   // Clear the rewriter cache, because values that are in the rewriter's cache
1950   // can be deleted in the loop below, causing the AssertingVH in the cache to
1951   // trigger.
1952   Rewriter.clear();
1953 
1954   // Now that we're done iterating through lists, clean up any instructions
1955   // which are now dead.
1956   while (!DeadInsts.empty()) {
1957     Value *V = DeadInsts.pop_back_val();
1958 
1959     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
1960       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
1961     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
1962       Changed |=
1963           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1964   }
1965 
1966   // The Rewriter may not be used from this point on.
1967 
1968   // Loop-invariant instructions in the preheader that aren't used in the
1969   // loop may be sunk below the loop to reduce register pressure.
1970   Changed |= sinkUnusedInvariants(L);
1971 
1972   // rewriteFirstIterationLoopExitValues does not rely on the computation of
1973   // trip count and therefore can further simplify exit values in addition to
1974   // rewriteLoopExitValues.
1975   Changed |= rewriteFirstIterationLoopExitValues(L);
1976 
1977   // Clean up dead instructions.
1978   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
1979 
1980   // Check a post-condition.
1981   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1982          "Indvars did not preserve LCSSA!");
1983 
1984   // Verify that LFTR, and any other change have not interfered with SCEV's
1985   // ability to compute trip count.  We may have *changed* the exit count, but
1986   // only by reducing it.
1987 #ifndef NDEBUG
1988   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1989     SE->forgetLoop(L);
1990     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
1991     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
1992         SE->getTypeSizeInBits(NewBECount->getType()))
1993       NewBECount = SE->getTruncateOrNoop(NewBECount,
1994                                          BackedgeTakenCount->getType());
1995     else
1996       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
1997                                                  NewBECount->getType());
1998     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
1999                                  NewBECount) && "indvars must preserve SCEV");
2000   }
2001   if (VerifyMemorySSA && MSSAU)
2002     MSSAU->getMemorySSA()->verifyMemorySSA();
2003 #endif
2004 
2005   return Changed;
2006 }
2007 
2008 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2009                                           LoopStandardAnalysisResults &AR,
2010                                           LPMUpdater &) {
2011   Function *F = L.getHeader()->getParent();
2012   const DataLayout &DL = F->getParent()->getDataLayout();
2013 
2014   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2015                      WidenIndVars && AllowIVWidening);
2016   if (!IVS.run(&L))
2017     return PreservedAnalyses::all();
2018 
2019   auto PA = getLoopPassPreservedAnalyses();
2020   PA.preserveSet<CFGAnalyses>();
2021   if (AR.MSSA)
2022     PA.preserve<MemorySSAAnalysis>();
2023   return PA;
2024 }
2025 
2026 namespace {
2027 
2028 struct IndVarSimplifyLegacyPass : public LoopPass {
2029   static char ID; // Pass identification, replacement for typeid
2030 
2031   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2032     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2033   }
2034 
2035   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2036     if (skipLoop(L))
2037       return false;
2038 
2039     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2040     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2041     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2042     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2043     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2044     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2045     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2046     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2047     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2048     MemorySSA *MSSA = nullptr;
2049     if (MSSAAnalysis)
2050       MSSA = &MSSAAnalysis->getMSSA();
2051 
2052     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2053     return IVS.run(L);
2054   }
2055 
2056   void getAnalysisUsage(AnalysisUsage &AU) const override {
2057     AU.setPreservesCFG();
2058     AU.addPreserved<MemorySSAWrapperPass>();
2059     getLoopAnalysisUsage(AU);
2060   }
2061 };
2062 
2063 } // end anonymous namespace
2064 
2065 char IndVarSimplifyLegacyPass::ID = 0;
2066 
2067 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2068                       "Induction Variable Simplification", false, false)
2069 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2070 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2071                     "Induction Variable Simplification", false, false)
2072 
2073 Pass *llvm::createIndVarSimplifyPass() {
2074   return new IndVarSimplifyLegacyPass();
2075 }
2076