xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision fca0218875f5117110d38b9cd7503bc2789693d3)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 using namespace PatternMatch;
93 
94 #define DEBUG_TYPE "indvars"
95 
96 STATISTIC(NumWidened     , "Number of indvars widened");
97 STATISTIC(NumReplaced    , "Number of exit values replaced");
98 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
99 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
100 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
101 
102 // Trip count verification can be enabled by default under NDEBUG if we
103 // implement a strong expression equivalence checker in SCEV. Until then, we
104 // use the verify-indvars flag, which may assert in some cases.
105 static cl::opt<bool> VerifyIndvars(
106     "verify-indvars", cl::Hidden,
107     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
108              "effect in release builds. (Note: this adds additional SCEV "
109              "queries potentially changing the analysis result)"));
110 
111 static cl::opt<ReplaceExitVal> ReplaceExitValue(
112     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
113     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
114     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
115                clEnumValN(OnlyCheapRepl, "cheap",
116                           "only replace exit value when the cost is cheap"),
117                clEnumValN(NoHardUse, "noharduse",
118                           "only replace exit values when loop def likely dead"),
119                clEnumValN(AlwaysRepl, "always",
120                           "always replace exit value whenever possible")));
121 
122 static cl::opt<bool> UsePostIncrementRanges(
123   "indvars-post-increment-ranges", cl::Hidden,
124   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
125   cl::init(true));
126 
127 static cl::opt<bool>
128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
129             cl::desc("Disable Linear Function Test Replace optimization"));
130 
131 static cl::opt<bool>
132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
133                 cl::desc("Predicate conditions in read only loops"));
134 
135 static cl::opt<bool>
136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
137                 cl::desc("Allow widening of indvars to eliminate s/zext"));
138 
139 namespace {
140 
141 struct RewritePhi;
142 
143 class IndVarSimplify {
144   LoopInfo *LI;
145   ScalarEvolution *SE;
146   DominatorTree *DT;
147   const DataLayout &DL;
148   TargetLibraryInfo *TLI;
149   const TargetTransformInfo *TTI;
150   std::unique_ptr<MemorySSAUpdater> MSSAU;
151 
152   SmallVector<WeakTrackingVH, 16> DeadInsts;
153   bool WidenIndVars;
154 
155   bool handleFloatingPointIV(Loop *L, PHINode *PH);
156   bool rewriteNonIntegerIVs(Loop *L);
157 
158   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
159   /// See if we can convert an exit condition from signed to unsigned.
160   /// (See inline comment about why this is duplicated from simplifyAndExtend)
161   bool canonicalizeExitCondition(Loop *L);
162   /// Try to eliminate loop exits based on analyzeable exit counts
163   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
164   /// Try to form loop invariant tests for loop exits by changing how many
165   /// iterations of the loop run when that is unobservable.
166   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
167 
168   bool rewriteFirstIterationLoopExitValues(Loop *L);
169 
170   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
171                                  const SCEV *ExitCount,
172                                  PHINode *IndVar, SCEVExpander &Rewriter);
173 
174   bool sinkUnusedInvariants(Loop *L);
175 
176 public:
177   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
178                  const DataLayout &DL, TargetLibraryInfo *TLI,
179                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
180       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
181         WidenIndVars(WidenIndVars) {
182     if (MSSA)
183       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
184   }
185 
186   bool run(Loop *L);
187 };
188 
189 } // end anonymous namespace
190 
191 //===----------------------------------------------------------------------===//
192 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
193 //===----------------------------------------------------------------------===//
194 
195 /// Convert APF to an integer, if possible.
196 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
197   bool isExact = false;
198   // See if we can convert this to an int64_t
199   uint64_t UIntVal;
200   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
201                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
202       !isExact)
203     return false;
204   IntVal = UIntVal;
205   return true;
206 }
207 
208 /// If the loop has floating induction variable then insert corresponding
209 /// integer induction variable if possible.
210 /// For example,
211 /// for(double i = 0; i < 10000; ++i)
212 ///   bar(i)
213 /// is converted into
214 /// for(int i = 0; i < 10000; ++i)
215 ///   bar((double)i);
216 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
217   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
218   unsigned BackEdge     = IncomingEdge^1;
219 
220   // Check incoming value.
221   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
222 
223   int64_t InitValue;
224   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
225     return false;
226 
227   // Check IV increment. Reject this PN if increment operation is not
228   // an add or increment value can not be represented by an integer.
229   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
230   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
231 
232   // If this is not an add of the PHI with a constantfp, or if the constant fp
233   // is not an integer, bail out.
234   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
235   int64_t IncValue;
236   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
237       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
238     return false;
239 
240   // Check Incr uses. One user is PN and the other user is an exit condition
241   // used by the conditional terminator.
242   Value::user_iterator IncrUse = Incr->user_begin();
243   Instruction *U1 = cast<Instruction>(*IncrUse++);
244   if (IncrUse == Incr->user_end()) return false;
245   Instruction *U2 = cast<Instruction>(*IncrUse++);
246   if (IncrUse != Incr->user_end()) return false;
247 
248   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
249   // only used by a branch, we can't transform it.
250   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
251   if (!Compare)
252     Compare = dyn_cast<FCmpInst>(U2);
253   if (!Compare || !Compare->hasOneUse() ||
254       !isa<BranchInst>(Compare->user_back()))
255     return false;
256 
257   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
258 
259   // We need to verify that the branch actually controls the iteration count
260   // of the loop.  If not, the new IV can overflow and no one will notice.
261   // The branch block must be in the loop and one of the successors must be out
262   // of the loop.
263   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
264   if (!L->contains(TheBr->getParent()) ||
265       (L->contains(TheBr->getSuccessor(0)) &&
266        L->contains(TheBr->getSuccessor(1))))
267     return false;
268 
269   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
270   // transform it.
271   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
272   int64_t ExitValue;
273   if (ExitValueVal == nullptr ||
274       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
275     return false;
276 
277   // Find new predicate for integer comparison.
278   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
279   switch (Compare->getPredicate()) {
280   default: return false;  // Unknown comparison.
281   case CmpInst::FCMP_OEQ:
282   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
283   case CmpInst::FCMP_ONE:
284   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
285   case CmpInst::FCMP_OGT:
286   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
287   case CmpInst::FCMP_OGE:
288   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
289   case CmpInst::FCMP_OLT:
290   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
291   case CmpInst::FCMP_OLE:
292   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
293   }
294 
295   // We convert the floating point induction variable to a signed i32 value if
296   // we can.  This is only safe if the comparison will not overflow in a way
297   // that won't be trapped by the integer equivalent operations.  Check for this
298   // now.
299   // TODO: We could use i64 if it is native and the range requires it.
300 
301   // The start/stride/exit values must all fit in signed i32.
302   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
303     return false;
304 
305   // If not actually striding (add x, 0.0), avoid touching the code.
306   if (IncValue == 0)
307     return false;
308 
309   // Positive and negative strides have different safety conditions.
310   if (IncValue > 0) {
311     // If we have a positive stride, we require the init to be less than the
312     // exit value.
313     if (InitValue >= ExitValue)
314       return false;
315 
316     uint32_t Range = uint32_t(ExitValue-InitValue);
317     // Check for infinite loop, either:
318     // while (i <= Exit) or until (i > Exit)
319     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
320       if (++Range == 0) return false;  // Range overflows.
321     }
322 
323     unsigned Leftover = Range % uint32_t(IncValue);
324 
325     // If this is an equality comparison, we require that the strided value
326     // exactly land on the exit value, otherwise the IV condition will wrap
327     // around and do things the fp IV wouldn't.
328     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
329         Leftover != 0)
330       return false;
331 
332     // If the stride would wrap around the i32 before exiting, we can't
333     // transform the IV.
334     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
335       return false;
336   } else {
337     // If we have a negative stride, we require the init to be greater than the
338     // exit value.
339     if (InitValue <= ExitValue)
340       return false;
341 
342     uint32_t Range = uint32_t(InitValue-ExitValue);
343     // Check for infinite loop, either:
344     // while (i >= Exit) or until (i < Exit)
345     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
346       if (++Range == 0) return false;  // Range overflows.
347     }
348 
349     unsigned Leftover = Range % uint32_t(-IncValue);
350 
351     // If this is an equality comparison, we require that the strided value
352     // exactly land on the exit value, otherwise the IV condition will wrap
353     // around and do things the fp IV wouldn't.
354     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
355         Leftover != 0)
356       return false;
357 
358     // If the stride would wrap around the i32 before exiting, we can't
359     // transform the IV.
360     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
361       return false;
362   }
363 
364   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
365 
366   // Insert new integer induction variable.
367   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
368   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
369                       PN->getIncomingBlock(IncomingEdge));
370 
371   Value *NewAdd =
372     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
373                               Incr->getName()+".int", Incr);
374   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
375 
376   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
377                                       ConstantInt::get(Int32Ty, ExitValue),
378                                       Compare->getName());
379 
380   // In the following deletions, PN may become dead and may be deleted.
381   // Use a WeakTrackingVH to observe whether this happens.
382   WeakTrackingVH WeakPH = PN;
383 
384   // Delete the old floating point exit comparison.  The branch starts using the
385   // new comparison.
386   NewCompare->takeName(Compare);
387   Compare->replaceAllUsesWith(NewCompare);
388   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
389 
390   // Delete the old floating point increment.
391   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
392   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
393 
394   // If the FP induction variable still has uses, this is because something else
395   // in the loop uses its value.  In order to canonicalize the induction
396   // variable, we chose to eliminate the IV and rewrite it in terms of an
397   // int->fp cast.
398   //
399   // We give preference to sitofp over uitofp because it is faster on most
400   // platforms.
401   if (WeakPH) {
402     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
403                                  &*PN->getParent()->getFirstInsertionPt());
404     PN->replaceAllUsesWith(Conv);
405     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
406   }
407   return true;
408 }
409 
410 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
411   // First step.  Check to see if there are any floating-point recurrences.
412   // If there are, change them into integer recurrences, permitting analysis by
413   // the SCEV routines.
414   BasicBlock *Header = L->getHeader();
415 
416   SmallVector<WeakTrackingVH, 8> PHIs;
417   for (PHINode &PN : Header->phis())
418     PHIs.push_back(&PN);
419 
420   bool Changed = false;
421   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
422     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
423       Changed |= handleFloatingPointIV(L, PN);
424 
425   // If the loop previously had floating-point IV, ScalarEvolution
426   // may not have been able to compute a trip count. Now that we've done some
427   // re-writing, the trip count may be computable.
428   if (Changed)
429     SE->forgetLoop(L);
430   return Changed;
431 }
432 
433 //===---------------------------------------------------------------------===//
434 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
435 // they will exit at the first iteration.
436 //===---------------------------------------------------------------------===//
437 
438 /// Check to see if this loop has loop invariant conditions which lead to loop
439 /// exits. If so, we know that if the exit path is taken, it is at the first
440 /// loop iteration. This lets us predict exit values of PHI nodes that live in
441 /// loop header.
442 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
443   // Verify the input to the pass is already in LCSSA form.
444   assert(L->isLCSSAForm(*DT));
445 
446   SmallVector<BasicBlock *, 8> ExitBlocks;
447   L->getUniqueExitBlocks(ExitBlocks);
448 
449   bool MadeAnyChanges = false;
450   for (auto *ExitBB : ExitBlocks) {
451     // If there are no more PHI nodes in this exit block, then no more
452     // values defined inside the loop are used on this path.
453     for (PHINode &PN : ExitBB->phis()) {
454       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
455            IncomingValIdx != E; ++IncomingValIdx) {
456         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
457 
458         // Can we prove that the exit must run on the first iteration if it
459         // runs at all?  (i.e. early exits are fine for our purposes, but
460         // traces which lead to this exit being taken on the 2nd iteration
461         // aren't.)  Note that this is about whether the exit branch is
462         // executed, not about whether it is taken.
463         if (!L->getLoopLatch() ||
464             !DT->dominates(IncomingBB, L->getLoopLatch()))
465           continue;
466 
467         // Get condition that leads to the exit path.
468         auto *TermInst = IncomingBB->getTerminator();
469 
470         Value *Cond = nullptr;
471         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
472           // Must be a conditional branch, otherwise the block
473           // should not be in the loop.
474           Cond = BI->getCondition();
475         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
476           Cond = SI->getCondition();
477         else
478           continue;
479 
480         if (!L->isLoopInvariant(Cond))
481           continue;
482 
483         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
484 
485         // Only deal with PHIs in the loop header.
486         if (!ExitVal || ExitVal->getParent() != L->getHeader())
487           continue;
488 
489         // If ExitVal is a PHI on the loop header, then we know its
490         // value along this exit because the exit can only be taken
491         // on the first iteration.
492         auto *LoopPreheader = L->getLoopPreheader();
493         assert(LoopPreheader && "Invalid loop");
494         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
495         if (PreheaderIdx != -1) {
496           assert(ExitVal->getParent() == L->getHeader() &&
497                  "ExitVal must be in loop header");
498           MadeAnyChanges = true;
499           PN.setIncomingValue(IncomingValIdx,
500                               ExitVal->getIncomingValue(PreheaderIdx));
501           SE->forgetValue(&PN);
502         }
503       }
504     }
505   }
506   return MadeAnyChanges;
507 }
508 
509 //===----------------------------------------------------------------------===//
510 //  IV Widening - Extend the width of an IV to cover its widest uses.
511 //===----------------------------------------------------------------------===//
512 
513 /// Update information about the induction variable that is extended by this
514 /// sign or zero extend operation. This is used to determine the final width of
515 /// the IV before actually widening it.
516 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
517                         ScalarEvolution *SE,
518                         const TargetTransformInfo *TTI) {
519   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
520   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
521     return;
522 
523   Type *Ty = Cast->getType();
524   uint64_t Width = SE->getTypeSizeInBits(Ty);
525   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
526     return;
527 
528   // Check that `Cast` actually extends the induction variable (we rely on this
529   // later).  This takes care of cases where `Cast` is extending a truncation of
530   // the narrow induction variable, and thus can end up being narrower than the
531   // "narrow" induction variable.
532   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
533   if (NarrowIVWidth >= Width)
534     return;
535 
536   // Cast is either an sext or zext up to this point.
537   // We should not widen an indvar if arithmetics on the wider indvar are more
538   // expensive than those on the narrower indvar. We check only the cost of ADD
539   // because at least an ADD is required to increment the induction variable. We
540   // could compute more comprehensively the cost of all instructions on the
541   // induction variable when necessary.
542   if (TTI &&
543       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
544           TTI->getArithmeticInstrCost(Instruction::Add,
545                                       Cast->getOperand(0)->getType())) {
546     return;
547   }
548 
549   if (!WI.WidestNativeType) {
550     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
551     WI.IsSigned = IsSigned;
552     return;
553   }
554 
555   // We extend the IV to satisfy the sign of its first user, arbitrarily.
556   if (WI.IsSigned != IsSigned)
557     return;
558 
559   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
560     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
561 }
562 
563 //===----------------------------------------------------------------------===//
564 //  Live IV Reduction - Minimize IVs live across the loop.
565 //===----------------------------------------------------------------------===//
566 
567 //===----------------------------------------------------------------------===//
568 //  Simplification of IV users based on SCEV evaluation.
569 //===----------------------------------------------------------------------===//
570 
571 namespace {
572 
573 class IndVarSimplifyVisitor : public IVVisitor {
574   ScalarEvolution *SE;
575   const TargetTransformInfo *TTI;
576   PHINode *IVPhi;
577 
578 public:
579   WideIVInfo WI;
580 
581   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
582                         const TargetTransformInfo *TTI,
583                         const DominatorTree *DTree)
584     : SE(SCEV), TTI(TTI), IVPhi(IV) {
585     DT = DTree;
586     WI.NarrowIV = IVPhi;
587   }
588 
589   // Implement the interface used by simplifyUsersOfIV.
590   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
591 };
592 
593 } // end anonymous namespace
594 
595 /// Iteratively perform simplification on a worklist of IV users. Each
596 /// successive simplification may push more users which may themselves be
597 /// candidates for simplification.
598 ///
599 /// Sign/Zero extend elimination is interleaved with IV simplification.
600 bool IndVarSimplify::simplifyAndExtend(Loop *L,
601                                        SCEVExpander &Rewriter,
602                                        LoopInfo *LI) {
603   SmallVector<WideIVInfo, 8> WideIVs;
604 
605   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
606           Intrinsic::getName(Intrinsic::experimental_guard));
607   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
608 
609   SmallVector<PHINode*, 8> LoopPhis;
610   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
611     LoopPhis.push_back(cast<PHINode>(I));
612   }
613   // Each round of simplification iterates through the SimplifyIVUsers worklist
614   // for all current phis, then determines whether any IVs can be
615   // widened. Widening adds new phis to LoopPhis, inducing another round of
616   // simplification on the wide IVs.
617   bool Changed = false;
618   while (!LoopPhis.empty()) {
619     // Evaluate as many IV expressions as possible before widening any IVs. This
620     // forces SCEV to set no-wrap flags before evaluating sign/zero
621     // extension. The first time SCEV attempts to normalize sign/zero extension,
622     // the result becomes final. So for the most predictable results, we delay
623     // evaluation of sign/zero extend evaluation until needed, and avoid running
624     // other SCEV based analysis prior to simplifyAndExtend.
625     do {
626       PHINode *CurrIV = LoopPhis.pop_back_val();
627 
628       // Information about sign/zero extensions of CurrIV.
629       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
630 
631       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
632                                    &Visitor);
633 
634       if (Visitor.WI.WidestNativeType) {
635         WideIVs.push_back(Visitor.WI);
636       }
637     } while(!LoopPhis.empty());
638 
639     // Continue if we disallowed widening.
640     if (!WidenIndVars)
641       continue;
642 
643     for (; !WideIVs.empty(); WideIVs.pop_back()) {
644       unsigned ElimExt;
645       unsigned Widened;
646       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
647                                           DT, DeadInsts, ElimExt, Widened,
648                                           HasGuards, UsePostIncrementRanges)) {
649         NumElimExt += ElimExt;
650         NumWidened += Widened;
651         Changed = true;
652         LoopPhis.push_back(WidePhi);
653       }
654     }
655   }
656   return Changed;
657 }
658 
659 //===----------------------------------------------------------------------===//
660 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
661 //===----------------------------------------------------------------------===//
662 
663 /// Given an Value which is hoped to be part of an add recurance in the given
664 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
665 /// that this is less general than SCEVs AddRec checking.
666 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
667   Instruction *IncI = dyn_cast<Instruction>(IncV);
668   if (!IncI)
669     return nullptr;
670 
671   switch (IncI->getOpcode()) {
672   case Instruction::Add:
673   case Instruction::Sub:
674     break;
675   case Instruction::GetElementPtr:
676     // An IV counter must preserve its type.
677     if (IncI->getNumOperands() == 2)
678       break;
679     LLVM_FALLTHROUGH;
680   default:
681     return nullptr;
682   }
683 
684   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
685   if (Phi && Phi->getParent() == L->getHeader()) {
686     if (L->isLoopInvariant(IncI->getOperand(1)))
687       return Phi;
688     return nullptr;
689   }
690   if (IncI->getOpcode() == Instruction::GetElementPtr)
691     return nullptr;
692 
693   // Allow add/sub to be commuted.
694   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
695   if (Phi && Phi->getParent() == L->getHeader()) {
696     if (L->isLoopInvariant(IncI->getOperand(0)))
697       return Phi;
698   }
699   return nullptr;
700 }
701 
702 /// Whether the current loop exit test is based on this value.  Currently this
703 /// is limited to a direct use in the loop condition.
704 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
705   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
706   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
707   // TODO: Allow non-icmp loop test.
708   if (!ICmp)
709     return false;
710 
711   // TODO: Allow indirect use.
712   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
713 }
714 
715 /// linearFunctionTestReplace policy. Return true unless we can show that the
716 /// current exit test is already sufficiently canonical.
717 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
718   assert(L->getLoopLatch() && "Must be in simplified form");
719 
720   // Avoid converting a constant or loop invariant test back to a runtime
721   // test.  This is critical for when SCEV's cached ExitCount is less precise
722   // than the current IR (such as after we've proven a particular exit is
723   // actually dead and thus the BE count never reaches our ExitCount.)
724   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
725   if (L->isLoopInvariant(BI->getCondition()))
726     return false;
727 
728   // Do LFTR to simplify the exit condition to an ICMP.
729   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
730   if (!Cond)
731     return true;
732 
733   // Do LFTR to simplify the exit ICMP to EQ/NE
734   ICmpInst::Predicate Pred = Cond->getPredicate();
735   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
736     return true;
737 
738   // Look for a loop invariant RHS
739   Value *LHS = Cond->getOperand(0);
740   Value *RHS = Cond->getOperand(1);
741   if (!L->isLoopInvariant(RHS)) {
742     if (!L->isLoopInvariant(LHS))
743       return true;
744     std::swap(LHS, RHS);
745   }
746   // Look for a simple IV counter LHS
747   PHINode *Phi = dyn_cast<PHINode>(LHS);
748   if (!Phi)
749     Phi = getLoopPhiForCounter(LHS, L);
750 
751   if (!Phi)
752     return true;
753 
754   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
755   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
756   if (Idx < 0)
757     return true;
758 
759   // Do LFTR if the exit condition's IV is *not* a simple counter.
760   Value *IncV = Phi->getIncomingValue(Idx);
761   return Phi != getLoopPhiForCounter(IncV, L);
762 }
763 
764 /// Return true if undefined behavior would provable be executed on the path to
765 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
766 /// anything about whether OnPathTo is actually executed or whether Root is
767 /// actually poison.  This can be used to assess whether a new use of Root can
768 /// be added at a location which is control equivalent with OnPathTo (such as
769 /// immediately before it) without introducing UB which didn't previously
770 /// exist.  Note that a false result conveys no information.
771 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
772                                           Instruction *OnPathTo,
773                                           DominatorTree *DT) {
774   // Basic approach is to assume Root is poison, propagate poison forward
775   // through all users we can easily track, and then check whether any of those
776   // users are provable UB and must execute before out exiting block might
777   // exit.
778 
779   // The set of all recursive users we've visited (which are assumed to all be
780   // poison because of said visit)
781   SmallSet<const Value *, 16> KnownPoison;
782   SmallVector<const Instruction*, 16> Worklist;
783   Worklist.push_back(Root);
784   while (!Worklist.empty()) {
785     const Instruction *I = Worklist.pop_back_val();
786 
787     // If we know this must trigger UB on a path leading our target.
788     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
789       return true;
790 
791     // If we can't analyze propagation through this instruction, just skip it
792     // and transitive users.  Safe as false is a conservative result.
793     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
794       continue;
795 
796     if (KnownPoison.insert(I).second)
797       for (const User *User : I->users())
798         Worklist.push_back(cast<Instruction>(User));
799   }
800 
801   // Might be non-UB, or might have a path we couldn't prove must execute on
802   // way to exiting bb.
803   return false;
804 }
805 
806 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
807 /// down to checking that all operands are constant and listing instructions
808 /// that may hide undef.
809 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
810                                unsigned Depth) {
811   if (isa<Constant>(V))
812     return !isa<UndefValue>(V);
813 
814   if (Depth >= 6)
815     return false;
816 
817   // Conservatively handle non-constant non-instructions. For example, Arguments
818   // may be undef.
819   Instruction *I = dyn_cast<Instruction>(V);
820   if (!I)
821     return false;
822 
823   // Load and return values may be undef.
824   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
825     return false;
826 
827   // Optimistically handle other instructions.
828   for (Value *Op : I->operands()) {
829     if (!Visited.insert(Op).second)
830       continue;
831     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
832       return false;
833   }
834   return true;
835 }
836 
837 /// Return true if the given value is concrete. We must prove that undef can
838 /// never reach it.
839 ///
840 /// TODO: If we decide that this is a good approach to checking for undef, we
841 /// may factor it into a common location.
842 static bool hasConcreteDef(Value *V) {
843   SmallPtrSet<Value*, 8> Visited;
844   Visited.insert(V);
845   return hasConcreteDefImpl(V, Visited, 0);
846 }
847 
848 /// Return true if this IV has any uses other than the (soon to be rewritten)
849 /// loop exit test.
850 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
851   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
852   Value *IncV = Phi->getIncomingValue(LatchIdx);
853 
854   for (User *U : Phi->users())
855     if (U != Cond && U != IncV) return false;
856 
857   for (User *U : IncV->users())
858     if (U != Cond && U != Phi) return false;
859   return true;
860 }
861 
862 /// Return true if the given phi is a "counter" in L.  A counter is an
863 /// add recurance (of integer or pointer type) with an arbitrary start, and a
864 /// step of 1.  Note that L must have exactly one latch.
865 static bool isLoopCounter(PHINode* Phi, Loop *L,
866                           ScalarEvolution *SE) {
867   assert(Phi->getParent() == L->getHeader());
868   assert(L->getLoopLatch());
869 
870   if (!SE->isSCEVable(Phi->getType()))
871     return false;
872 
873   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
874   if (!AR || AR->getLoop() != L || !AR->isAffine())
875     return false;
876 
877   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
878   if (!Step || !Step->isOne())
879     return false;
880 
881   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
882   Value *IncV = Phi->getIncomingValue(LatchIdx);
883   return (getLoopPhiForCounter(IncV, L) == Phi &&
884           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
885 }
886 
887 /// Search the loop header for a loop counter (anadd rec w/step of one)
888 /// suitable for use by LFTR.  If multiple counters are available, select the
889 /// "best" one based profitable heuristics.
890 ///
891 /// BECount may be an i8* pointer type. The pointer difference is already
892 /// valid count without scaling the address stride, so it remains a pointer
893 /// expression as far as SCEV is concerned.
894 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
895                                 const SCEV *BECount,
896                                 ScalarEvolution *SE, DominatorTree *DT) {
897   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
898 
899   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
900 
901   // Loop over all of the PHI nodes, looking for a simple counter.
902   PHINode *BestPhi = nullptr;
903   const SCEV *BestInit = nullptr;
904   BasicBlock *LatchBlock = L->getLoopLatch();
905   assert(LatchBlock && "Must be in simplified form");
906   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
907 
908   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
909     PHINode *Phi = cast<PHINode>(I);
910     if (!isLoopCounter(Phi, L, SE))
911       continue;
912 
913     // Avoid comparing an integer IV against a pointer Limit.
914     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
915       continue;
916 
917     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
918 
919     // AR may be a pointer type, while BECount is an integer type.
920     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
921     // AR may not be a narrower type, or we may never exit.
922     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
923     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
924       continue;
925 
926     // Avoid reusing a potentially undef value to compute other values that may
927     // have originally had a concrete definition.
928     if (!hasConcreteDef(Phi)) {
929       // We explicitly allow unknown phis as long as they are already used by
930       // the loop exit test.  This is legal since performing LFTR could not
931       // increase the number of undef users.
932       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
933       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
934           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
935         continue;
936     }
937 
938     // Avoid introducing undefined behavior due to poison which didn't exist in
939     // the original program.  (Annoyingly, the rules for poison and undef
940     // propagation are distinct, so this does NOT cover the undef case above.)
941     // We have to ensure that we don't introduce UB by introducing a use on an
942     // iteration where said IV produces poison.  Our strategy here differs for
943     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
944     // see code in linearFunctionTestReplace.  For pointers, we restrict
945     // transforms as there is no good way to reinfer inbounds once lost.
946     if (!Phi->getType()->isIntegerTy() &&
947         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
948       continue;
949 
950     const SCEV *Init = AR->getStart();
951 
952     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
953       // Don't force a live loop counter if another IV can be used.
954       if (AlmostDeadIV(Phi, LatchBlock, Cond))
955         continue;
956 
957       // Prefer to count-from-zero. This is a more "canonical" counter form. It
958       // also prefers integer to pointer IVs.
959       if (BestInit->isZero() != Init->isZero()) {
960         if (BestInit->isZero())
961           continue;
962       }
963       // If two IVs both count from zero or both count from nonzero then the
964       // narrower is likely a dead phi that has been widened. Use the wider phi
965       // to allow the other to be eliminated.
966       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
967         continue;
968     }
969     BestPhi = Phi;
970     BestInit = Init;
971   }
972   return BestPhi;
973 }
974 
975 /// Insert an IR expression which computes the value held by the IV IndVar
976 /// (which must be an loop counter w/unit stride) after the backedge of loop L
977 /// is taken ExitCount times.
978 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
979                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
980                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
981   assert(isLoopCounter(IndVar, L, SE));
982   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
983   const SCEV *IVInit = AR->getStart();
984 
985   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
986   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
987   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
988   // the existing GEPs whenever possible.
989   if (IndVar->getType()->isPointerTy() &&
990       !ExitCount->getType()->isPointerTy()) {
991     // IVOffset will be the new GEP offset that is interpreted by GEP as a
992     // signed value. ExitCount on the other hand represents the loop trip count,
993     // which is an unsigned value. FindLoopCounter only allows induction
994     // variables that have a positive unit stride of one. This means we don't
995     // have to handle the case of negative offsets (yet) and just need to zero
996     // extend ExitCount.
997     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
998     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
999     if (UsePostInc)
1000       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1001 
1002     // Expand the code for the iteration count.
1003     assert(SE->isLoopInvariant(IVOffset, L) &&
1004            "Computed iteration count is not loop invariant!");
1005 
1006     // We could handle pointer IVs other than i8*, but we need to compensate for
1007     // gep index scaling.
1008     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
1009                              cast<PointerType>(IndVar->getType())
1010                                  ->getElementType())->isOne() &&
1011            "unit stride pointer IV must be i8*");
1012 
1013     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1014     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1015     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1016   } else {
1017     // In any other case, convert both IVInit and ExitCount to integers before
1018     // comparing. This may result in SCEV expansion of pointers, but in practice
1019     // SCEV will fold the pointer arithmetic away as such:
1020     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1021     //
1022     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1023     // for simple memset-style loops.
1024     //
1025     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1026     // were generated on top of case #2, which is not expected.
1027 
1028     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1029     // For unit stride, IVCount = Start + ExitCount with 2's complement
1030     // overflow.
1031 
1032     // For integer IVs, truncate the IV before computing IVInit + BECount,
1033     // unless we know apriori that the limit must be a constant when evaluated
1034     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1035     // of the IV in the loop over a (potentially) expensive expansion of the
1036     // widened exit count add(zext(add)) expression.
1037     if (SE->getTypeSizeInBits(IVInit->getType())
1038         > SE->getTypeSizeInBits(ExitCount->getType())) {
1039       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1040         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1041       else
1042         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1043     }
1044 
1045     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1046 
1047     if (UsePostInc)
1048       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1049 
1050     // Expand the code for the iteration count.
1051     assert(SE->isLoopInvariant(IVLimit, L) &&
1052            "Computed iteration count is not loop invariant!");
1053     // Ensure that we generate the same type as IndVar, or a smaller integer
1054     // type. In the presence of null pointer values, we have an integer type
1055     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1056     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1057       IndVar->getType() : ExitCount->getType();
1058     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1059     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1060   }
1061 }
1062 
1063 /// This method rewrites the exit condition of the loop to be a canonical !=
1064 /// comparison against the incremented loop induction variable.  This pass is
1065 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1066 /// determine a loop-invariant trip count of the loop, which is actually a much
1067 /// broader range than just linear tests.
1068 bool IndVarSimplify::
1069 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1070                           const SCEV *ExitCount,
1071                           PHINode *IndVar, SCEVExpander &Rewriter) {
1072   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1073   assert(isLoopCounter(IndVar, L, SE));
1074   Instruction * const IncVar =
1075     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1076 
1077   // Initialize CmpIndVar to the preincremented IV.
1078   Value *CmpIndVar = IndVar;
1079   bool UsePostInc = false;
1080 
1081   // If the exiting block is the same as the backedge block, we prefer to
1082   // compare against the post-incremented value, otherwise we must compare
1083   // against the preincremented value.
1084   if (ExitingBB == L->getLoopLatch()) {
1085     // For pointer IVs, we chose to not strip inbounds which requires us not
1086     // to add a potentially UB introducing use.  We need to either a) show
1087     // the loop test we're modifying is already in post-inc form, or b) show
1088     // that adding a use must not introduce UB.
1089     bool SafeToPostInc =
1090         IndVar->getType()->isIntegerTy() ||
1091         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1092         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1093     if (SafeToPostInc) {
1094       UsePostInc = true;
1095       CmpIndVar = IncVar;
1096     }
1097   }
1098 
1099   // It may be necessary to drop nowrap flags on the incrementing instruction
1100   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1101   // case the increment might have previously been poison on the last iteration
1102   // only) or if LFTR switches to a different IV that was previously dynamically
1103   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1104   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1105   // check), because the pre-inc addrec flags may be adopted from the original
1106   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1107   // TODO: This handling is inaccurate for one case: If we switch to a
1108   // dynamically dead IV that wraps on the first loop iteration only, which is
1109   // not covered by the post-inc addrec. (If the new IV was not dynamically
1110   // dead, it could not be poison on the first iteration in the first place.)
1111   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1112     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1113     if (BO->hasNoUnsignedWrap())
1114       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1115     if (BO->hasNoSignedWrap())
1116       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1117   }
1118 
1119   Value *ExitCnt = genLoopLimit(
1120       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1121   assert(ExitCnt->getType()->isPointerTy() ==
1122              IndVar->getType()->isPointerTy() &&
1123          "genLoopLimit missed a cast");
1124 
1125   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1126   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1127   ICmpInst::Predicate P;
1128   if (L->contains(BI->getSuccessor(0)))
1129     P = ICmpInst::ICMP_NE;
1130   else
1131     P = ICmpInst::ICMP_EQ;
1132 
1133   IRBuilder<> Builder(BI);
1134 
1135   // The new loop exit condition should reuse the debug location of the
1136   // original loop exit condition.
1137   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1138     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1139 
1140   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1141   // avoid the expensive expansion of the limit expression in the wider type,
1142   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1143   // since we know (from the exit count bitwidth), that we can't self-wrap in
1144   // the narrower type.
1145   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1146   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1147   if (CmpIndVarSize > ExitCntSize) {
1148     assert(!CmpIndVar->getType()->isPointerTy() &&
1149            !ExitCnt->getType()->isPointerTy());
1150 
1151     // Before resorting to actually inserting the truncate, use the same
1152     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1153     // the other side of the comparison instead.  We still evaluate the limit
1154     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1155     // a truncate within in.
1156     bool Extended = false;
1157     const SCEV *IV = SE->getSCEV(CmpIndVar);
1158     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1159                                                   ExitCnt->getType());
1160     const SCEV *ZExtTrunc =
1161       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1162 
1163     if (ZExtTrunc == IV) {
1164       Extended = true;
1165       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1166                                    "wide.trip.count");
1167     } else {
1168       const SCEV *SExtTrunc =
1169         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1170       if (SExtTrunc == IV) {
1171         Extended = true;
1172         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1173                                      "wide.trip.count");
1174       }
1175     }
1176 
1177     if (Extended) {
1178       bool Discard;
1179       L->makeLoopInvariant(ExitCnt, Discard);
1180     } else
1181       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1182                                       "lftr.wideiv");
1183   }
1184   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1185                     << "      LHS:" << *CmpIndVar << '\n'
1186                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1187                     << "\n"
1188                     << "      RHS:\t" << *ExitCnt << "\n"
1189                     << "ExitCount:\t" << *ExitCount << "\n"
1190                     << "  was: " << *BI->getCondition() << "\n");
1191 
1192   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1193   Value *OrigCond = BI->getCondition();
1194   // It's tempting to use replaceAllUsesWith here to fully replace the old
1195   // comparison, but that's not immediately safe, since users of the old
1196   // comparison may not be dominated by the new comparison. Instead, just
1197   // update the branch to use the new comparison; in the common case this
1198   // will make old comparison dead.
1199   BI->setCondition(Cond);
1200   DeadInsts.emplace_back(OrigCond);
1201 
1202   ++NumLFTR;
1203   return true;
1204 }
1205 
1206 //===----------------------------------------------------------------------===//
1207 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1208 //===----------------------------------------------------------------------===//
1209 
1210 /// If there's a single exit block, sink any loop-invariant values that
1211 /// were defined in the preheader but not used inside the loop into the
1212 /// exit block to reduce register pressure in the loop.
1213 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1214   BasicBlock *ExitBlock = L->getExitBlock();
1215   if (!ExitBlock) return false;
1216 
1217   BasicBlock *Preheader = L->getLoopPreheader();
1218   if (!Preheader) return false;
1219 
1220   bool MadeAnyChanges = false;
1221   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1222   BasicBlock::iterator I(Preheader->getTerminator());
1223   while (I != Preheader->begin()) {
1224     --I;
1225     // New instructions were inserted at the end of the preheader.
1226     if (isa<PHINode>(I))
1227       break;
1228 
1229     // Don't move instructions which might have side effects, since the side
1230     // effects need to complete before instructions inside the loop.  Also don't
1231     // move instructions which might read memory, since the loop may modify
1232     // memory. Note that it's okay if the instruction might have undefined
1233     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1234     // block.
1235     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1236       continue;
1237 
1238     // Skip debug info intrinsics.
1239     if (isa<DbgInfoIntrinsic>(I))
1240       continue;
1241 
1242     // Skip eh pad instructions.
1243     if (I->isEHPad())
1244       continue;
1245 
1246     // Don't sink alloca: we never want to sink static alloca's out of the
1247     // entry block, and correctly sinking dynamic alloca's requires
1248     // checks for stacksave/stackrestore intrinsics.
1249     // FIXME: Refactor this check somehow?
1250     if (isa<AllocaInst>(I))
1251       continue;
1252 
1253     // Determine if there is a use in or before the loop (direct or
1254     // otherwise).
1255     bool UsedInLoop = false;
1256     for (Use &U : I->uses()) {
1257       Instruction *User = cast<Instruction>(U.getUser());
1258       BasicBlock *UseBB = User->getParent();
1259       if (PHINode *P = dyn_cast<PHINode>(User)) {
1260         unsigned i =
1261           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1262         UseBB = P->getIncomingBlock(i);
1263       }
1264       if (UseBB == Preheader || L->contains(UseBB)) {
1265         UsedInLoop = true;
1266         break;
1267       }
1268     }
1269 
1270     // If there is, the def must remain in the preheader.
1271     if (UsedInLoop)
1272       continue;
1273 
1274     // Otherwise, sink it to the exit block.
1275     Instruction *ToMove = &*I;
1276     bool Done = false;
1277 
1278     if (I != Preheader->begin()) {
1279       // Skip debug info intrinsics.
1280       do {
1281         --I;
1282       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1283 
1284       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1285         Done = true;
1286     } else {
1287       Done = true;
1288     }
1289 
1290     MadeAnyChanges = true;
1291     ToMove->moveBefore(*ExitBlock, InsertPt);
1292     if (Done) break;
1293     InsertPt = ToMove->getIterator();
1294   }
1295 
1296   return MadeAnyChanges;
1297 }
1298 
1299 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1300                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1301   auto *OldCond = BI->getCondition();
1302   BI->setCondition(NewCond);
1303   if (OldCond->use_empty())
1304     DeadInsts.emplace_back(OldCond);
1305 }
1306 
1307 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1308                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1309   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1310   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1311   auto *OldCond = BI->getCondition();
1312   auto *NewCond =
1313       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1314   replaceExitCond(BI, NewCond, DeadInsts);
1315 }
1316 
1317 static void replaceLoopPHINodesWithPreheaderValues(
1318     Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1319   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1320   auto *LoopPreheader = L->getLoopPreheader();
1321   auto *LoopHeader = L->getHeader();
1322   for (auto &PN : LoopHeader->phis()) {
1323     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1324     PN.replaceAllUsesWith(PreheaderIncoming);
1325     DeadInsts.emplace_back(&PN);
1326   }
1327 }
1328 
1329 static void replaceWithInvariantCond(
1330     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1331     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1332     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1333   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1334   Rewriter.setInsertPoint(BI);
1335   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1336   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1337   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1338   if (ExitIfTrue)
1339     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1340   IRBuilder<> Builder(BI);
1341   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1342                                      BI->getCondition()->getName());
1343   replaceExitCond(BI, NewCond, DeadInsts);
1344 }
1345 
1346 static bool optimizeLoopExitWithUnknownExitCount(
1347     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1348     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1349     ScalarEvolution *SE, SCEVExpander &Rewriter,
1350     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1351   ICmpInst::Predicate Pred;
1352   Value *LHS, *RHS;
1353   BasicBlock *TrueSucc, *FalseSucc;
1354   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1355                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1356     return false;
1357 
1358   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1359          "Not a loop exit!");
1360 
1361   // 'LHS pred RHS' should now mean that we stay in loop.
1362   if (L->contains(FalseSucc))
1363     Pred = CmpInst::getInversePredicate(Pred);
1364 
1365   // If we are proving loop exit, invert the predicate.
1366   if (Inverted)
1367     Pred = CmpInst::getInversePredicate(Pred);
1368 
1369   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1370   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1371   // Can we prove it to be trivially true?
1372   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1373     foldExit(L, ExitingBB, Inverted, DeadInsts);
1374     return true;
1375   }
1376   // Further logic works for non-inverted condition only.
1377   if (Inverted)
1378     return false;
1379 
1380   auto *ARTy = LHSS->getType();
1381   auto *MaxIterTy = MaxIter->getType();
1382   // If possible, adjust types.
1383   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1384     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1385   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1386     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1387     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1388     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1389       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1390   }
1391 
1392   if (SkipLastIter) {
1393     const SCEV *One = SE->getOne(MaxIter->getType());
1394     MaxIter = SE->getMinusSCEV(MaxIter, One);
1395   }
1396 
1397   // Check if there is a loop-invariant predicate equivalent to our check.
1398   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1399                                                                L, BI, MaxIter);
1400   if (!LIP)
1401     return false;
1402 
1403   // Can we prove it to be trivially true?
1404   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1405     foldExit(L, ExitingBB, Inverted, DeadInsts);
1406   else
1407     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1408                              Rewriter, DeadInsts);
1409 
1410   return true;
1411 }
1412 
1413 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1414   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1415   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1416   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1417   // it already has flags.  The alternative to this would be to extending the
1418   // set of "interesting" IV users to include the icmp, but doing that
1419   // regresses results in practice by querying SCEVs before trip counts which
1420   // rely on them which results in SCEV caching sub-optimal answers.  The
1421   // concern about caching sub-optimal results is why we only query SCEVs of
1422   // the loop invariant RHS here.
1423 
1424   auto *ExitingBB = L->getExitingBlock();
1425   if (!ExitingBB)
1426     return false;
1427   auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1428   if (!BI)
1429     return false;
1430   assert(BI->isConditional() && "exit branch must be conditional");
1431 
1432   auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1433   if (!ICmp)
1434     return false;
1435 
1436   auto *LHS = ICmp->getOperand(0);
1437   auto *RHS = ICmp->getOperand(1);
1438   // Avoid computing SCEVs in the loop to avoid poisoning cache with
1439   // sub-optimal results.
1440   if (!L->isLoopInvariant(RHS))
1441     return false;
1442 
1443   // Match (icmp signed-cond zext, RHS)
1444   Value *LHSOp = nullptr;
1445   if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1446     return false;
1447 
1448   const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1449   const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1450   const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1451   auto FullCR = ConstantRange::getFull(InnerBitWidth);
1452   FullCR = FullCR.zeroExtend(OuterBitWidth);
1453   if (!FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS))))
1454     return false;
1455 
1456   // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1457   // replace the signed condition with the unsigned version.
1458   ICmp->setPredicate(ICmp->getUnsignedPredicate());
1459   return true;
1460 }
1461 
1462 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1463   SmallVector<BasicBlock*, 16> ExitingBlocks;
1464   L->getExitingBlocks(ExitingBlocks);
1465 
1466   // Remove all exits which aren't both rewriteable and execute on every
1467   // iteration.
1468   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1469     // If our exitting block exits multiple loops, we can only rewrite the
1470     // innermost one.  Otherwise, we're changing how many times the innermost
1471     // loop runs before it exits.
1472     if (LI->getLoopFor(ExitingBB) != L)
1473       return true;
1474 
1475     // Can't rewrite non-branch yet.
1476     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1477     if (!BI)
1478       return true;
1479 
1480     // If already constant, nothing to do.
1481     if (isa<Constant>(BI->getCondition()))
1482       return true;
1483 
1484     // Likewise, the loop latch must be dominated by the exiting BB.
1485     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1486       return true;
1487 
1488     return false;
1489   });
1490 
1491   if (ExitingBlocks.empty())
1492     return false;
1493 
1494   // Get a symbolic upper bound on the loop backedge taken count.
1495   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1496   if (isa<SCEVCouldNotCompute>(MaxExitCount))
1497     return false;
1498 
1499   // Visit our exit blocks in order of dominance. We know from the fact that
1500   // all exits must dominate the latch, so there is a total dominance order
1501   // between them.
1502   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1503                // std::sort sorts in ascending order, so we want the inverse of
1504                // the normal dominance relation.
1505                if (A == B) return false;
1506                if (DT->properlyDominates(A, B))
1507                  return true;
1508                else {
1509                  assert(DT->properlyDominates(B, A) &&
1510                         "expected total dominance order!");
1511                  return false;
1512                }
1513   });
1514 #ifdef ASSERT
1515   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1516     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1517   }
1518 #endif
1519 
1520   bool Changed = false;
1521   bool SkipLastIter = false;
1522   SmallSet<const SCEV*, 8> DominatingExitCounts;
1523   for (BasicBlock *ExitingBB : ExitingBlocks) {
1524     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1525     if (isa<SCEVCouldNotCompute>(ExitCount)) {
1526       // Okay, we do not know the exit count here. Can we at least prove that it
1527       // will remain the same within iteration space?
1528       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1529       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1530         return optimizeLoopExitWithUnknownExitCount(
1531             L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1532             Rewriter, DeadInsts);
1533       };
1534 
1535       // TODO: We might have proved that we can skip the last iteration for
1536       // this check. In this case, we only want to check the condition on the
1537       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1538       // corner case:
1539       //
1540       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1541       //
1542       // If we could not prove that len != 0, then we also could not prove that
1543       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1544       // OptimizeCond will likely not prove anything for it, even if it could
1545       // prove the same fact for len.
1546       //
1547       // As a temporary solution, we query both last and pre-last iterations in
1548       // hope that we will be able to prove triviality for at least one of
1549       // them. We can stop querying MaxExitCount for this case once SCEV
1550       // understands that (MaxExitCount - 1) will not overflow here.
1551       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1552         Changed = true;
1553       else if (SkipLastIter)
1554         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1555           Changed = true;
1556       continue;
1557     }
1558 
1559     if (MaxExitCount == ExitCount)
1560       // If the loop has more than 1 iteration, all further checks will be
1561       // executed 1 iteration less.
1562       SkipLastIter = true;
1563 
1564     // If we know we'd exit on the first iteration, rewrite the exit to
1565     // reflect this.  This does not imply the loop must exit through this
1566     // exit; there may be an earlier one taken on the first iteration.
1567     // We know that the backedge can't be taken, so we replace all
1568     // the header PHIs with values coming from the preheader.
1569     if (ExitCount->isZero()) {
1570       foldExit(L, ExitingBB, true, DeadInsts);
1571       replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
1572       Changed = true;
1573       continue;
1574     }
1575 
1576     assert(ExitCount->getType()->isIntegerTy() &&
1577            MaxExitCount->getType()->isIntegerTy() &&
1578            "Exit counts must be integers");
1579 
1580     Type *WiderType =
1581       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1582     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1583     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1584     assert(MaxExitCount->getType() == ExitCount->getType());
1585 
1586     // Can we prove that some other exit must be taken strictly before this
1587     // one?
1588     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1589                                      MaxExitCount, ExitCount)) {
1590       foldExit(L, ExitingBB, false, DeadInsts);
1591       Changed = true;
1592       continue;
1593     }
1594 
1595     // As we run, keep track of which exit counts we've encountered.  If we
1596     // find a duplicate, we've found an exit which would have exited on the
1597     // exiting iteration, but (from the visit order) strictly follows another
1598     // which does the same and is thus dead.
1599     if (!DominatingExitCounts.insert(ExitCount).second) {
1600       foldExit(L, ExitingBB, false, DeadInsts);
1601       Changed = true;
1602       continue;
1603     }
1604 
1605     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1606     // here.  If we kept track of the min of dominanting exits so far, we could
1607     // discharge exits with EC >= MDEC. This is less powerful than the existing
1608     // transform (since later exits aren't considered), but potentially more
1609     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1610     // or a >u b.  Such a case is not currently known.
1611   }
1612   return Changed;
1613 }
1614 
1615 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1616   SmallVector<BasicBlock*, 16> ExitingBlocks;
1617   L->getExitingBlocks(ExitingBlocks);
1618 
1619   // Finally, see if we can rewrite our exit conditions into a loop invariant
1620   // form. If we have a read-only loop, and we can tell that we must exit down
1621   // a path which does not need any of the values computed within the loop, we
1622   // can rewrite the loop to exit on the first iteration.  Note that this
1623   // doesn't either a) tell us the loop exits on the first iteration (unless
1624   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1625   // This transformation looks a lot like a restricted form of dead loop
1626   // elimination, but restricted to read-only loops and without neccesssarily
1627   // needing to kill the loop entirely.
1628   if (!LoopPredication)
1629     return false;
1630 
1631   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1632   // through *explicit* control flow.  We have to eliminate the possibility of
1633   // implicit exits (see below) before we know it's truly exact.
1634   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1635   if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
1636     return false;
1637 
1638   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1639   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1640 
1641   auto BadExit = [&](BasicBlock *ExitingBB) {
1642     // If our exiting block exits multiple loops, we can only rewrite the
1643     // innermost one.  Otherwise, we're changing how many times the innermost
1644     // loop runs before it exits.
1645     if (LI->getLoopFor(ExitingBB) != L)
1646       return true;
1647 
1648     // Can't rewrite non-branch yet.
1649     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1650     if (!BI)
1651       return true;
1652 
1653     // If already constant, nothing to do.
1654     if (isa<Constant>(BI->getCondition()))
1655       return true;
1656 
1657     // If the exit block has phis, we need to be able to compute the values
1658     // within the loop which contains them.  This assumes trivially lcssa phis
1659     // have already been removed; TODO: generalize
1660     BasicBlock *ExitBlock =
1661     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1662     if (!ExitBlock->phis().empty())
1663       return true;
1664 
1665     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1666     if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
1667       return true;
1668 
1669     assert(SE->isLoopInvariant(ExitCount, L) &&
1670            "Exit count must be loop invariant");
1671     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1672     return false;
1673   };
1674 
1675   // If we have any exits which can't be predicated themselves, than we can't
1676   // predicate any exit which isn't guaranteed to execute before it.  Consider
1677   // two exits (a) and (b) which would both exit on the same iteration.  If we
1678   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1679   // we could convert a loop from exiting through (a) to one exiting through
1680   // (b).  Note that this problem exists only for exits with the same exit
1681   // count, and we could be more aggressive when exit counts are known inequal.
1682   llvm::sort(ExitingBlocks,
1683             [&](BasicBlock *A, BasicBlock *B) {
1684               // std::sort sorts in ascending order, so we want the inverse of
1685               // the normal dominance relation, plus a tie breaker for blocks
1686               // unordered by dominance.
1687               if (DT->properlyDominates(A, B)) return true;
1688               if (DT->properlyDominates(B, A)) return false;
1689               return A->getName() < B->getName();
1690             });
1691   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1692   // exits before the backedge).  If they aren't, reasoning about reachability
1693   // is complicated and we choose not to for now.
1694   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1695     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1696       return false;
1697 
1698   // Given our sorted total order, we know that exit[j] must be evaluated
1699   // after all exit[i] such j > i.
1700   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1701     if (BadExit(ExitingBlocks[i])) {
1702       ExitingBlocks.resize(i);
1703       break;
1704     }
1705 
1706   if (ExitingBlocks.empty())
1707     return false;
1708 
1709   // We rely on not being able to reach an exiting block on a later iteration
1710   // then it's statically compute exit count.  The implementaton of
1711   // getExitCount currently has this invariant, but assert it here so that
1712   // breakage is obvious if this ever changes..
1713   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1714         return DT->dominates(ExitingBB, L->getLoopLatch());
1715       }));
1716 
1717   // At this point, ExitingBlocks consists of only those blocks which are
1718   // predicatable.  Given that, we know we have at least one exit we can
1719   // predicate if the loop is doesn't have side effects and doesn't have any
1720   // implicit exits (because then our exact BTC isn't actually exact).
1721   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1722   // suggestions on how to improve this?  I can obviously bail out for outer
1723   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1724   // is that enough for *all* side effects?
1725   for (BasicBlock *BB : L->blocks())
1726     for (auto &I : *BB)
1727       // TODO:isGuaranteedToTransfer
1728       if (I.mayHaveSideEffects())
1729         return false;
1730 
1731   bool Changed = false;
1732   // Finally, do the actual predication for all predicatable blocks.  A couple
1733   // of notes here:
1734   // 1) We don't bother to constant fold dominated exits with identical exit
1735   //    counts; that's simply a form of CSE/equality propagation and we leave
1736   //    it for dedicated passes.
1737   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1738   //    legality constraints and we leave that to dedicated logic.  We want to
1739   //    predicate even if we can't insert a loop invariant expression as
1740   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1741   //    varying check.
1742   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1743   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1744   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1745   for (BasicBlock *ExitingBB : ExitingBlocks) {
1746     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1747 
1748     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1749     Value *NewCond;
1750     if (ExitCount == ExactBTC) {
1751       NewCond = L->contains(BI->getSuccessor(0)) ?
1752         B.getFalse() : B.getTrue();
1753     } else {
1754       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1755       if (!ExactBTCV)
1756         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1757       Value *RHS = ExactBTCV;
1758       if (ECV->getType() != RHS->getType()) {
1759         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1760         ECV = B.CreateZExt(ECV, WiderTy);
1761         RHS = B.CreateZExt(RHS, WiderTy);
1762       }
1763       auto Pred = L->contains(BI->getSuccessor(0)) ?
1764         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1765       NewCond = B.CreateICmp(Pred, ECV, RHS);
1766     }
1767     Value *OldCond = BI->getCondition();
1768     BI->setCondition(NewCond);
1769     if (OldCond->use_empty())
1770       DeadInsts.emplace_back(OldCond);
1771     Changed = true;
1772   }
1773 
1774   return Changed;
1775 }
1776 
1777 //===----------------------------------------------------------------------===//
1778 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1779 //===----------------------------------------------------------------------===//
1780 
1781 bool IndVarSimplify::run(Loop *L) {
1782   // We need (and expect!) the incoming loop to be in LCSSA.
1783   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1784          "LCSSA required to run indvars!");
1785 
1786   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1787   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1788   //    canonicalization can be a pessimization without LSR to "clean up"
1789   //    afterwards.
1790   //  - We depend on having a preheader; in particular,
1791   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1792   //    and we're in trouble if we can't find the induction variable even when
1793   //    we've manually inserted one.
1794   //  - LFTR relies on having a single backedge.
1795   if (!L->isLoopSimplifyForm())
1796     return false;
1797 
1798 #ifndef NDEBUG
1799   // Used below for a consistency check only
1800   // Note: Since the result returned by ScalarEvolution may depend on the order
1801   // in which previous results are added to its cache, the call to
1802   // getBackedgeTakenCount() may change following SCEV queries.
1803   const SCEV *BackedgeTakenCount;
1804   if (VerifyIndvars)
1805     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1806 #endif
1807 
1808   bool Changed = false;
1809   // If there are any floating-point recurrences, attempt to
1810   // transform them to use integer recurrences.
1811   Changed |= rewriteNonIntegerIVs(L);
1812 
1813   // Create a rewriter object which we'll use to transform the code with.
1814   SCEVExpander Rewriter(*SE, DL, "indvars");
1815 #ifndef NDEBUG
1816   Rewriter.setDebugType(DEBUG_TYPE);
1817 #endif
1818 
1819   // Eliminate redundant IV users.
1820   //
1821   // Simplification works best when run before other consumers of SCEV. We
1822   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1823   // other expressions involving loop IVs have been evaluated. This helps SCEV
1824   // set no-wrap flags before normalizing sign/zero extension.
1825   Rewriter.disableCanonicalMode();
1826   Changed |= simplifyAndExtend(L, Rewriter, LI);
1827 
1828   // Check to see if we can compute the final value of any expressions
1829   // that are recurrent in the loop, and substitute the exit values from the
1830   // loop into any instructions outside of the loop that use the final values
1831   // of the current expressions.
1832   if (ReplaceExitValue != NeverRepl) {
1833     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1834                                              ReplaceExitValue, DeadInsts)) {
1835       NumReplaced += Rewrites;
1836       Changed = true;
1837     }
1838   }
1839 
1840   // Eliminate redundant IV cycles.
1841   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1842 
1843   if (canonicalizeExitCondition(L))
1844     // We've changed the predicate, but have not changed exit counts, or the
1845     // values which can flow through any SCEV.  i.e, no invalidation needed.
1846     Changed = true;
1847 
1848   // Try to eliminate loop exits based on analyzeable exit counts
1849   if (optimizeLoopExits(L, Rewriter))  {
1850     Changed = true;
1851     // Given we've changed exit counts, notify SCEV
1852     // Some nested loops may share same folded exit basic block,
1853     // thus we need to notify top most loop.
1854     SE->forgetTopmostLoop(L);
1855   }
1856 
1857   // Try to form loop invariant tests for loop exits by changing how many
1858   // iterations of the loop run when that is unobservable.
1859   if (predicateLoopExits(L, Rewriter)) {
1860     Changed = true;
1861     // Given we've changed exit counts, notify SCEV
1862     SE->forgetLoop(L);
1863   }
1864 
1865   // If we have a trip count expression, rewrite the loop's exit condition
1866   // using it.
1867   if (!DisableLFTR) {
1868     BasicBlock *PreHeader = L->getLoopPreheader();
1869     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1870 
1871     SmallVector<BasicBlock*, 16> ExitingBlocks;
1872     L->getExitingBlocks(ExitingBlocks);
1873     for (BasicBlock *ExitingBB : ExitingBlocks) {
1874       // Can't rewrite non-branch yet.
1875       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1876         continue;
1877 
1878       // If our exitting block exits multiple loops, we can only rewrite the
1879       // innermost one.  Otherwise, we're changing how many times the innermost
1880       // loop runs before it exits.
1881       if (LI->getLoopFor(ExitingBB) != L)
1882         continue;
1883 
1884       if (!needsLFTR(L, ExitingBB))
1885         continue;
1886 
1887       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1888       if (isa<SCEVCouldNotCompute>(ExitCount))
1889         continue;
1890 
1891       // This was handled above, but as we form SCEVs, we can sometimes refine
1892       // existing ones; this allows exit counts to be folded to zero which
1893       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1894       // until stable to handle cases like this better.
1895       if (ExitCount->isZero())
1896         continue;
1897 
1898       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1899       if (!IndVar)
1900         continue;
1901 
1902       // Avoid high cost expansions.  Note: This heuristic is questionable in
1903       // that our definition of "high cost" is not exactly principled.
1904       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1905                                        TTI, PreHeaderBR))
1906         continue;
1907 
1908       // Check preconditions for proper SCEVExpander operation. SCEV does not
1909       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1910       // any pass that uses the SCEVExpander must do it. This does not work
1911       // well for loop passes because SCEVExpander makes assumptions about
1912       // all loops, while LoopPassManager only forces the current loop to be
1913       // simplified.
1914       //
1915       // FIXME: SCEV expansion has no way to bail out, so the caller must
1916       // explicitly check any assumptions made by SCEV. Brittle.
1917       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1918       if (!AR || AR->getLoop()->getLoopPreheader())
1919         Changed |= linearFunctionTestReplace(L, ExitingBB,
1920                                              ExitCount, IndVar,
1921                                              Rewriter);
1922     }
1923   }
1924   // Clear the rewriter cache, because values that are in the rewriter's cache
1925   // can be deleted in the loop below, causing the AssertingVH in the cache to
1926   // trigger.
1927   Rewriter.clear();
1928 
1929   // Now that we're done iterating through lists, clean up any instructions
1930   // which are now dead.
1931   while (!DeadInsts.empty()) {
1932     Value *V = DeadInsts.pop_back_val();
1933 
1934     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
1935       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
1936     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
1937       Changed |=
1938           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1939   }
1940 
1941   // The Rewriter may not be used from this point on.
1942 
1943   // Loop-invariant instructions in the preheader that aren't used in the
1944   // loop may be sunk below the loop to reduce register pressure.
1945   Changed |= sinkUnusedInvariants(L);
1946 
1947   // rewriteFirstIterationLoopExitValues does not rely on the computation of
1948   // trip count and therefore can further simplify exit values in addition to
1949   // rewriteLoopExitValues.
1950   Changed |= rewriteFirstIterationLoopExitValues(L);
1951 
1952   // Clean up dead instructions.
1953   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
1954 
1955   // Check a post-condition.
1956   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1957          "Indvars did not preserve LCSSA!");
1958 
1959   // Verify that LFTR, and any other change have not interfered with SCEV's
1960   // ability to compute trip count.  We may have *changed* the exit count, but
1961   // only by reducing it.
1962 #ifndef NDEBUG
1963   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1964     SE->forgetLoop(L);
1965     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
1966     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
1967         SE->getTypeSizeInBits(NewBECount->getType()))
1968       NewBECount = SE->getTruncateOrNoop(NewBECount,
1969                                          BackedgeTakenCount->getType());
1970     else
1971       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
1972                                                  NewBECount->getType());
1973     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
1974                                  NewBECount) && "indvars must preserve SCEV");
1975   }
1976   if (VerifyMemorySSA && MSSAU)
1977     MSSAU->getMemorySSA()->verifyMemorySSA();
1978 #endif
1979 
1980   return Changed;
1981 }
1982 
1983 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
1984                                           LoopStandardAnalysisResults &AR,
1985                                           LPMUpdater &) {
1986   Function *F = L.getHeader()->getParent();
1987   const DataLayout &DL = F->getParent()->getDataLayout();
1988 
1989   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
1990                      WidenIndVars && AllowIVWidening);
1991   if (!IVS.run(&L))
1992     return PreservedAnalyses::all();
1993 
1994   auto PA = getLoopPassPreservedAnalyses();
1995   PA.preserveSet<CFGAnalyses>();
1996   if (AR.MSSA)
1997     PA.preserve<MemorySSAAnalysis>();
1998   return PA;
1999 }
2000 
2001 namespace {
2002 
2003 struct IndVarSimplifyLegacyPass : public LoopPass {
2004   static char ID; // Pass identification, replacement for typeid
2005 
2006   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2007     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2008   }
2009 
2010   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2011     if (skipLoop(L))
2012       return false;
2013 
2014     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2015     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2016     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2017     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2018     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2019     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2020     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2021     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2022     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2023     MemorySSA *MSSA = nullptr;
2024     if (MSSAAnalysis)
2025       MSSA = &MSSAAnalysis->getMSSA();
2026 
2027     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2028     return IVS.run(L);
2029   }
2030 
2031   void getAnalysisUsage(AnalysisUsage &AU) const override {
2032     AU.setPreservesCFG();
2033     AU.addPreserved<MemorySSAWrapperPass>();
2034     getLoopAnalysisUsage(AU);
2035   }
2036 };
2037 
2038 } // end anonymous namespace
2039 
2040 char IndVarSimplifyLegacyPass::ID = 0;
2041 
2042 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2043                       "Induction Variable Simplification", false, false)
2044 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2045 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2046                     "Induction Variable Simplification", false, false)
2047 
2048 Pass *llvm::createIndVarSimplifyPass() {
2049   return new IndVarSimplifyLegacyPass();
2050 }
2051