xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 098a0d8fbc4ea1c687dd68e445bd0f95e7d9d4ae)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 struct RewritePhi;
141 
142 class IndVarSimplify {
143   LoopInfo *LI;
144   ScalarEvolution *SE;
145   DominatorTree *DT;
146   const DataLayout &DL;
147   TargetLibraryInfo *TLI;
148   const TargetTransformInfo *TTI;
149   std::unique_ptr<MemorySSAUpdater> MSSAU;
150 
151   SmallVector<WeakTrackingVH, 16> DeadInsts;
152   bool WidenIndVars;
153 
154   bool handleFloatingPointIV(Loop *L, PHINode *PH);
155   bool rewriteNonIntegerIVs(Loop *L);
156 
157   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
158   /// Try to eliminate loop exits based on analyzeable exit counts
159   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
160   /// Try to form loop invariant tests for loop exits by changing how many
161   /// iterations of the loop run when that is unobservable.
162   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
163 
164   bool rewriteFirstIterationLoopExitValues(Loop *L);
165 
166   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
167                                  const SCEV *ExitCount,
168                                  PHINode *IndVar, SCEVExpander &Rewriter);
169 
170   bool sinkUnusedInvariants(Loop *L);
171 
172 public:
173   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
174                  const DataLayout &DL, TargetLibraryInfo *TLI,
175                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
176       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
177         WidenIndVars(WidenIndVars) {
178     if (MSSA)
179       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
180   }
181 
182   bool run(Loop *L);
183 };
184 
185 } // end anonymous namespace
186 
187 //===----------------------------------------------------------------------===//
188 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
189 //===----------------------------------------------------------------------===//
190 
191 /// Convert APF to an integer, if possible.
192 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
193   bool isExact = false;
194   // See if we can convert this to an int64_t
195   uint64_t UIntVal;
196   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
197                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
198       !isExact)
199     return false;
200   IntVal = UIntVal;
201   return true;
202 }
203 
204 /// If the loop has floating induction variable then insert corresponding
205 /// integer induction variable if possible.
206 /// For example,
207 /// for(double i = 0; i < 10000; ++i)
208 ///   bar(i)
209 /// is converted into
210 /// for(int i = 0; i < 10000; ++i)
211 ///   bar((double)i);
212 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
213   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
214   unsigned BackEdge     = IncomingEdge^1;
215 
216   // Check incoming value.
217   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
218 
219   int64_t InitValue;
220   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
221     return false;
222 
223   // Check IV increment. Reject this PN if increment operation is not
224   // an add or increment value can not be represented by an integer.
225   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
226   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
227 
228   // If this is not an add of the PHI with a constantfp, or if the constant fp
229   // is not an integer, bail out.
230   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
231   int64_t IncValue;
232   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
233       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
234     return false;
235 
236   // Check Incr uses. One user is PN and the other user is an exit condition
237   // used by the conditional terminator.
238   Value::user_iterator IncrUse = Incr->user_begin();
239   Instruction *U1 = cast<Instruction>(*IncrUse++);
240   if (IncrUse == Incr->user_end()) return false;
241   Instruction *U2 = cast<Instruction>(*IncrUse++);
242   if (IncrUse != Incr->user_end()) return false;
243 
244   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
245   // only used by a branch, we can't transform it.
246   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
247   if (!Compare)
248     Compare = dyn_cast<FCmpInst>(U2);
249   if (!Compare || !Compare->hasOneUse() ||
250       !isa<BranchInst>(Compare->user_back()))
251     return false;
252 
253   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
254 
255   // We need to verify that the branch actually controls the iteration count
256   // of the loop.  If not, the new IV can overflow and no one will notice.
257   // The branch block must be in the loop and one of the successors must be out
258   // of the loop.
259   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
260   if (!L->contains(TheBr->getParent()) ||
261       (L->contains(TheBr->getSuccessor(0)) &&
262        L->contains(TheBr->getSuccessor(1))))
263     return false;
264 
265   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
266   // transform it.
267   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
268   int64_t ExitValue;
269   if (ExitValueVal == nullptr ||
270       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
271     return false;
272 
273   // Find new predicate for integer comparison.
274   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
275   switch (Compare->getPredicate()) {
276   default: return false;  // Unknown comparison.
277   case CmpInst::FCMP_OEQ:
278   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
279   case CmpInst::FCMP_ONE:
280   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
281   case CmpInst::FCMP_OGT:
282   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
283   case CmpInst::FCMP_OGE:
284   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
285   case CmpInst::FCMP_OLT:
286   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
287   case CmpInst::FCMP_OLE:
288   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
289   }
290 
291   // We convert the floating point induction variable to a signed i32 value if
292   // we can.  This is only safe if the comparison will not overflow in a way
293   // that won't be trapped by the integer equivalent operations.  Check for this
294   // now.
295   // TODO: We could use i64 if it is native and the range requires it.
296 
297   // The start/stride/exit values must all fit in signed i32.
298   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
299     return false;
300 
301   // If not actually striding (add x, 0.0), avoid touching the code.
302   if (IncValue == 0)
303     return false;
304 
305   // Positive and negative strides have different safety conditions.
306   if (IncValue > 0) {
307     // If we have a positive stride, we require the init to be less than the
308     // exit value.
309     if (InitValue >= ExitValue)
310       return false;
311 
312     uint32_t Range = uint32_t(ExitValue-InitValue);
313     // Check for infinite loop, either:
314     // while (i <= Exit) or until (i > Exit)
315     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
316       if (++Range == 0) return false;  // Range overflows.
317     }
318 
319     unsigned Leftover = Range % uint32_t(IncValue);
320 
321     // If this is an equality comparison, we require that the strided value
322     // exactly land on the exit value, otherwise the IV condition will wrap
323     // around and do things the fp IV wouldn't.
324     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
325         Leftover != 0)
326       return false;
327 
328     // If the stride would wrap around the i32 before exiting, we can't
329     // transform the IV.
330     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
331       return false;
332   } else {
333     // If we have a negative stride, we require the init to be greater than the
334     // exit value.
335     if (InitValue <= ExitValue)
336       return false;
337 
338     uint32_t Range = uint32_t(InitValue-ExitValue);
339     // Check for infinite loop, either:
340     // while (i >= Exit) or until (i < Exit)
341     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
342       if (++Range == 0) return false;  // Range overflows.
343     }
344 
345     unsigned Leftover = Range % uint32_t(-IncValue);
346 
347     // If this is an equality comparison, we require that the strided value
348     // exactly land on the exit value, otherwise the IV condition will wrap
349     // around and do things the fp IV wouldn't.
350     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
351         Leftover != 0)
352       return false;
353 
354     // If the stride would wrap around the i32 before exiting, we can't
355     // transform the IV.
356     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
357       return false;
358   }
359 
360   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
361 
362   // Insert new integer induction variable.
363   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
364   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
365                       PN->getIncomingBlock(IncomingEdge));
366 
367   Value *NewAdd =
368     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
369                               Incr->getName()+".int", Incr);
370   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
371 
372   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
373                                       ConstantInt::get(Int32Ty, ExitValue),
374                                       Compare->getName());
375 
376   // In the following deletions, PN may become dead and may be deleted.
377   // Use a WeakTrackingVH to observe whether this happens.
378   WeakTrackingVH WeakPH = PN;
379 
380   // Delete the old floating point exit comparison.  The branch starts using the
381   // new comparison.
382   NewCompare->takeName(Compare);
383   Compare->replaceAllUsesWith(NewCompare);
384   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
385 
386   // Delete the old floating point increment.
387   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
388   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
389 
390   // If the FP induction variable still has uses, this is because something else
391   // in the loop uses its value.  In order to canonicalize the induction
392   // variable, we chose to eliminate the IV and rewrite it in terms of an
393   // int->fp cast.
394   //
395   // We give preference to sitofp over uitofp because it is faster on most
396   // platforms.
397   if (WeakPH) {
398     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
399                                  &*PN->getParent()->getFirstInsertionPt());
400     PN->replaceAllUsesWith(Conv);
401     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
402   }
403   return true;
404 }
405 
406 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
407   // First step.  Check to see if there are any floating-point recurrences.
408   // If there are, change them into integer recurrences, permitting analysis by
409   // the SCEV routines.
410   BasicBlock *Header = L->getHeader();
411 
412   SmallVector<WeakTrackingVH, 8> PHIs;
413   for (PHINode &PN : Header->phis())
414     PHIs.push_back(&PN);
415 
416   bool Changed = false;
417   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
418     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
419       Changed |= handleFloatingPointIV(L, PN);
420 
421   // If the loop previously had floating-point IV, ScalarEvolution
422   // may not have been able to compute a trip count. Now that we've done some
423   // re-writing, the trip count may be computable.
424   if (Changed)
425     SE->forgetLoop(L);
426   return Changed;
427 }
428 
429 //===---------------------------------------------------------------------===//
430 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
431 // they will exit at the first iteration.
432 //===---------------------------------------------------------------------===//
433 
434 /// Check to see if this loop has loop invariant conditions which lead to loop
435 /// exits. If so, we know that if the exit path is taken, it is at the first
436 /// loop iteration. This lets us predict exit values of PHI nodes that live in
437 /// loop header.
438 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
439   // Verify the input to the pass is already in LCSSA form.
440   assert(L->isLCSSAForm(*DT));
441 
442   SmallVector<BasicBlock *, 8> ExitBlocks;
443   L->getUniqueExitBlocks(ExitBlocks);
444 
445   bool MadeAnyChanges = false;
446   for (auto *ExitBB : ExitBlocks) {
447     // If there are no more PHI nodes in this exit block, then no more
448     // values defined inside the loop are used on this path.
449     for (PHINode &PN : ExitBB->phis()) {
450       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
451            IncomingValIdx != E; ++IncomingValIdx) {
452         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
453 
454         // Can we prove that the exit must run on the first iteration if it
455         // runs at all?  (i.e. early exits are fine for our purposes, but
456         // traces which lead to this exit being taken on the 2nd iteration
457         // aren't.)  Note that this is about whether the exit branch is
458         // executed, not about whether it is taken.
459         if (!L->getLoopLatch() ||
460             !DT->dominates(IncomingBB, L->getLoopLatch()))
461           continue;
462 
463         // Get condition that leads to the exit path.
464         auto *TermInst = IncomingBB->getTerminator();
465 
466         Value *Cond = nullptr;
467         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
468           // Must be a conditional branch, otherwise the block
469           // should not be in the loop.
470           Cond = BI->getCondition();
471         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
472           Cond = SI->getCondition();
473         else
474           continue;
475 
476         if (!L->isLoopInvariant(Cond))
477           continue;
478 
479         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
480 
481         // Only deal with PHIs in the loop header.
482         if (!ExitVal || ExitVal->getParent() != L->getHeader())
483           continue;
484 
485         // If ExitVal is a PHI on the loop header, then we know its
486         // value along this exit because the exit can only be taken
487         // on the first iteration.
488         auto *LoopPreheader = L->getLoopPreheader();
489         assert(LoopPreheader && "Invalid loop");
490         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
491         if (PreheaderIdx != -1) {
492           assert(ExitVal->getParent() == L->getHeader() &&
493                  "ExitVal must be in loop header");
494           MadeAnyChanges = true;
495           PN.setIncomingValue(IncomingValIdx,
496                               ExitVal->getIncomingValue(PreheaderIdx));
497           SE->forgetValue(&PN);
498         }
499       }
500     }
501   }
502   return MadeAnyChanges;
503 }
504 
505 //===----------------------------------------------------------------------===//
506 //  IV Widening - Extend the width of an IV to cover its widest uses.
507 //===----------------------------------------------------------------------===//
508 
509 /// Update information about the induction variable that is extended by this
510 /// sign or zero extend operation. This is used to determine the final width of
511 /// the IV before actually widening it.
512 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
513                         ScalarEvolution *SE,
514                         const TargetTransformInfo *TTI) {
515   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
516   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
517     return;
518 
519   Type *Ty = Cast->getType();
520   uint64_t Width = SE->getTypeSizeInBits(Ty);
521   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
522     return;
523 
524   // Check that `Cast` actually extends the induction variable (we rely on this
525   // later).  This takes care of cases where `Cast` is extending a truncation of
526   // the narrow induction variable, and thus can end up being narrower than the
527   // "narrow" induction variable.
528   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
529   if (NarrowIVWidth >= Width)
530     return;
531 
532   // Cast is either an sext or zext up to this point.
533   // We should not widen an indvar if arithmetics on the wider indvar are more
534   // expensive than those on the narrower indvar. We check only the cost of ADD
535   // because at least an ADD is required to increment the induction variable. We
536   // could compute more comprehensively the cost of all instructions on the
537   // induction variable when necessary.
538   if (TTI &&
539       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
540           TTI->getArithmeticInstrCost(Instruction::Add,
541                                       Cast->getOperand(0)->getType())) {
542     return;
543   }
544 
545   if (!WI.WidestNativeType) {
546     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
547     WI.IsSigned = IsSigned;
548     return;
549   }
550 
551   // We extend the IV to satisfy the sign of its first user, arbitrarily.
552   if (WI.IsSigned != IsSigned)
553     return;
554 
555   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
556     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
557 }
558 
559 //===----------------------------------------------------------------------===//
560 //  Live IV Reduction - Minimize IVs live across the loop.
561 //===----------------------------------------------------------------------===//
562 
563 //===----------------------------------------------------------------------===//
564 //  Simplification of IV users based on SCEV evaluation.
565 //===----------------------------------------------------------------------===//
566 
567 namespace {
568 
569 class IndVarSimplifyVisitor : public IVVisitor {
570   ScalarEvolution *SE;
571   const TargetTransformInfo *TTI;
572   PHINode *IVPhi;
573 
574 public:
575   WideIVInfo WI;
576 
577   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
578                         const TargetTransformInfo *TTI,
579                         const DominatorTree *DTree)
580     : SE(SCEV), TTI(TTI), IVPhi(IV) {
581     DT = DTree;
582     WI.NarrowIV = IVPhi;
583   }
584 
585   // Implement the interface used by simplifyUsersOfIV.
586   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
587 };
588 
589 } // end anonymous namespace
590 
591 /// Iteratively perform simplification on a worklist of IV users. Each
592 /// successive simplification may push more users which may themselves be
593 /// candidates for simplification.
594 ///
595 /// Sign/Zero extend elimination is interleaved with IV simplification.
596 bool IndVarSimplify::simplifyAndExtend(Loop *L,
597                                        SCEVExpander &Rewriter,
598                                        LoopInfo *LI) {
599   SmallVector<WideIVInfo, 8> WideIVs;
600 
601   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
602           Intrinsic::getName(Intrinsic::experimental_guard));
603   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
604 
605   SmallVector<PHINode*, 8> LoopPhis;
606   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
607     LoopPhis.push_back(cast<PHINode>(I));
608   }
609   // Each round of simplification iterates through the SimplifyIVUsers worklist
610   // for all current phis, then determines whether any IVs can be
611   // widened. Widening adds new phis to LoopPhis, inducing another round of
612   // simplification on the wide IVs.
613   bool Changed = false;
614   while (!LoopPhis.empty()) {
615     // Evaluate as many IV expressions as possible before widening any IVs. This
616     // forces SCEV to set no-wrap flags before evaluating sign/zero
617     // extension. The first time SCEV attempts to normalize sign/zero extension,
618     // the result becomes final. So for the most predictable results, we delay
619     // evaluation of sign/zero extend evaluation until needed, and avoid running
620     // other SCEV based analysis prior to simplifyAndExtend.
621     do {
622       PHINode *CurrIV = LoopPhis.pop_back_val();
623 
624       // Information about sign/zero extensions of CurrIV.
625       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
626 
627       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
628                                    &Visitor);
629 
630       if (Visitor.WI.WidestNativeType) {
631         WideIVs.push_back(Visitor.WI);
632       }
633     } while(!LoopPhis.empty());
634 
635     // Continue if we disallowed widening.
636     if (!WidenIndVars)
637       continue;
638 
639     for (; !WideIVs.empty(); WideIVs.pop_back()) {
640       unsigned ElimExt;
641       unsigned Widened;
642       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
643                                           DT, DeadInsts, ElimExt, Widened,
644                                           HasGuards, UsePostIncrementRanges)) {
645         NumElimExt += ElimExt;
646         NumWidened += Widened;
647         Changed = true;
648         LoopPhis.push_back(WidePhi);
649       }
650     }
651   }
652   return Changed;
653 }
654 
655 //===----------------------------------------------------------------------===//
656 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
657 //===----------------------------------------------------------------------===//
658 
659 /// Given an Value which is hoped to be part of an add recurance in the given
660 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
661 /// that this is less general than SCEVs AddRec checking.
662 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
663   Instruction *IncI = dyn_cast<Instruction>(IncV);
664   if (!IncI)
665     return nullptr;
666 
667   switch (IncI->getOpcode()) {
668   case Instruction::Add:
669   case Instruction::Sub:
670     break;
671   case Instruction::GetElementPtr:
672     // An IV counter must preserve its type.
673     if (IncI->getNumOperands() == 2)
674       break;
675     LLVM_FALLTHROUGH;
676   default:
677     return nullptr;
678   }
679 
680   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
681   if (Phi && Phi->getParent() == L->getHeader()) {
682     if (L->isLoopInvariant(IncI->getOperand(1)))
683       return Phi;
684     return nullptr;
685   }
686   if (IncI->getOpcode() == Instruction::GetElementPtr)
687     return nullptr;
688 
689   // Allow add/sub to be commuted.
690   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
691   if (Phi && Phi->getParent() == L->getHeader()) {
692     if (L->isLoopInvariant(IncI->getOperand(0)))
693       return Phi;
694   }
695   return nullptr;
696 }
697 
698 /// Whether the current loop exit test is based on this value.  Currently this
699 /// is limited to a direct use in the loop condition.
700 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
701   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
702   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
703   // TODO: Allow non-icmp loop test.
704   if (!ICmp)
705     return false;
706 
707   // TODO: Allow indirect use.
708   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
709 }
710 
711 /// linearFunctionTestReplace policy. Return true unless we can show that the
712 /// current exit test is already sufficiently canonical.
713 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
714   assert(L->getLoopLatch() && "Must be in simplified form");
715 
716   // Avoid converting a constant or loop invariant test back to a runtime
717   // test.  This is critical for when SCEV's cached ExitCount is less precise
718   // than the current IR (such as after we've proven a particular exit is
719   // actually dead and thus the BE count never reaches our ExitCount.)
720   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
721   if (L->isLoopInvariant(BI->getCondition()))
722     return false;
723 
724   // Do LFTR to simplify the exit condition to an ICMP.
725   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
726   if (!Cond)
727     return true;
728 
729   // Do LFTR to simplify the exit ICMP to EQ/NE
730   ICmpInst::Predicate Pred = Cond->getPredicate();
731   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
732     return true;
733 
734   // Look for a loop invariant RHS
735   Value *LHS = Cond->getOperand(0);
736   Value *RHS = Cond->getOperand(1);
737   if (!L->isLoopInvariant(RHS)) {
738     if (!L->isLoopInvariant(LHS))
739       return true;
740     std::swap(LHS, RHS);
741   }
742   // Look for a simple IV counter LHS
743   PHINode *Phi = dyn_cast<PHINode>(LHS);
744   if (!Phi)
745     Phi = getLoopPhiForCounter(LHS, L);
746 
747   if (!Phi)
748     return true;
749 
750   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
751   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
752   if (Idx < 0)
753     return true;
754 
755   // Do LFTR if the exit condition's IV is *not* a simple counter.
756   Value *IncV = Phi->getIncomingValue(Idx);
757   return Phi != getLoopPhiForCounter(IncV, L);
758 }
759 
760 /// Return true if undefined behavior would provable be executed on the path to
761 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
762 /// anything about whether OnPathTo is actually executed or whether Root is
763 /// actually poison.  This can be used to assess whether a new use of Root can
764 /// be added at a location which is control equivalent with OnPathTo (such as
765 /// immediately before it) without introducing UB which didn't previously
766 /// exist.  Note that a false result conveys no information.
767 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
768                                           Instruction *OnPathTo,
769                                           DominatorTree *DT) {
770   // Basic approach is to assume Root is poison, propagate poison forward
771   // through all users we can easily track, and then check whether any of those
772   // users are provable UB and must execute before out exiting block might
773   // exit.
774 
775   // The set of all recursive users we've visited (which are assumed to all be
776   // poison because of said visit)
777   SmallSet<const Value *, 16> KnownPoison;
778   SmallVector<const Instruction*, 16> Worklist;
779   Worklist.push_back(Root);
780   while (!Worklist.empty()) {
781     const Instruction *I = Worklist.pop_back_val();
782 
783     // If we know this must trigger UB on a path leading our target.
784     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
785       return true;
786 
787     // If we can't analyze propagation through this instruction, just skip it
788     // and transitive users.  Safe as false is a conservative result.
789     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
790       continue;
791 
792     if (KnownPoison.insert(I).second)
793       for (const User *User : I->users())
794         Worklist.push_back(cast<Instruction>(User));
795   }
796 
797   // Might be non-UB, or might have a path we couldn't prove must execute on
798   // way to exiting bb.
799   return false;
800 }
801 
802 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
803 /// down to checking that all operands are constant and listing instructions
804 /// that may hide undef.
805 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
806                                unsigned Depth) {
807   if (isa<Constant>(V))
808     return !isa<UndefValue>(V);
809 
810   if (Depth >= 6)
811     return false;
812 
813   // Conservatively handle non-constant non-instructions. For example, Arguments
814   // may be undef.
815   Instruction *I = dyn_cast<Instruction>(V);
816   if (!I)
817     return false;
818 
819   // Load and return values may be undef.
820   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
821     return false;
822 
823   // Optimistically handle other instructions.
824   for (Value *Op : I->operands()) {
825     if (!Visited.insert(Op).second)
826       continue;
827     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
828       return false;
829   }
830   return true;
831 }
832 
833 /// Return true if the given value is concrete. We must prove that undef can
834 /// never reach it.
835 ///
836 /// TODO: If we decide that this is a good approach to checking for undef, we
837 /// may factor it into a common location.
838 static bool hasConcreteDef(Value *V) {
839   SmallPtrSet<Value*, 8> Visited;
840   Visited.insert(V);
841   return hasConcreteDefImpl(V, Visited, 0);
842 }
843 
844 /// Return true if this IV has any uses other than the (soon to be rewritten)
845 /// loop exit test.
846 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
847   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
848   Value *IncV = Phi->getIncomingValue(LatchIdx);
849 
850   for (User *U : Phi->users())
851     if (U != Cond && U != IncV) return false;
852 
853   for (User *U : IncV->users())
854     if (U != Cond && U != Phi) return false;
855   return true;
856 }
857 
858 /// Return true if the given phi is a "counter" in L.  A counter is an
859 /// add recurance (of integer or pointer type) with an arbitrary start, and a
860 /// step of 1.  Note that L must have exactly one latch.
861 static bool isLoopCounter(PHINode* Phi, Loop *L,
862                           ScalarEvolution *SE) {
863   assert(Phi->getParent() == L->getHeader());
864   assert(L->getLoopLatch());
865 
866   if (!SE->isSCEVable(Phi->getType()))
867     return false;
868 
869   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
870   if (!AR || AR->getLoop() != L || !AR->isAffine())
871     return false;
872 
873   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
874   if (!Step || !Step->isOne())
875     return false;
876 
877   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
878   Value *IncV = Phi->getIncomingValue(LatchIdx);
879   return (getLoopPhiForCounter(IncV, L) == Phi &&
880           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
881 }
882 
883 /// Search the loop header for a loop counter (anadd rec w/step of one)
884 /// suitable for use by LFTR.  If multiple counters are available, select the
885 /// "best" one based profitable heuristics.
886 ///
887 /// BECount may be an i8* pointer type. The pointer difference is already
888 /// valid count without scaling the address stride, so it remains a pointer
889 /// expression as far as SCEV is concerned.
890 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
891                                 const SCEV *BECount,
892                                 ScalarEvolution *SE, DominatorTree *DT) {
893   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
894 
895   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
896 
897   // Loop over all of the PHI nodes, looking for a simple counter.
898   PHINode *BestPhi = nullptr;
899   const SCEV *BestInit = nullptr;
900   BasicBlock *LatchBlock = L->getLoopLatch();
901   assert(LatchBlock && "Must be in simplified form");
902   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
903 
904   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
905     PHINode *Phi = cast<PHINode>(I);
906     if (!isLoopCounter(Phi, L, SE))
907       continue;
908 
909     // Avoid comparing an integer IV against a pointer Limit.
910     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
911       continue;
912 
913     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
914 
915     // AR may be a pointer type, while BECount is an integer type.
916     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
917     // AR may not be a narrower type, or we may never exit.
918     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
919     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
920       continue;
921 
922     // Avoid reusing a potentially undef value to compute other values that may
923     // have originally had a concrete definition.
924     if (!hasConcreteDef(Phi)) {
925       // We explicitly allow unknown phis as long as they are already used by
926       // the loop exit test.  This is legal since performing LFTR could not
927       // increase the number of undef users.
928       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
929       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
930           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
931         continue;
932     }
933 
934     // Avoid introducing undefined behavior due to poison which didn't exist in
935     // the original program.  (Annoyingly, the rules for poison and undef
936     // propagation are distinct, so this does NOT cover the undef case above.)
937     // We have to ensure that we don't introduce UB by introducing a use on an
938     // iteration where said IV produces poison.  Our strategy here differs for
939     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
940     // see code in linearFunctionTestReplace.  For pointers, we restrict
941     // transforms as there is no good way to reinfer inbounds once lost.
942     if (!Phi->getType()->isIntegerTy() &&
943         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
944       continue;
945 
946     const SCEV *Init = AR->getStart();
947 
948     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
949       // Don't force a live loop counter if another IV can be used.
950       if (AlmostDeadIV(Phi, LatchBlock, Cond))
951         continue;
952 
953       // Prefer to count-from-zero. This is a more "canonical" counter form. It
954       // also prefers integer to pointer IVs.
955       if (BestInit->isZero() != Init->isZero()) {
956         if (BestInit->isZero())
957           continue;
958       }
959       // If two IVs both count from zero or both count from nonzero then the
960       // narrower is likely a dead phi that has been widened. Use the wider phi
961       // to allow the other to be eliminated.
962       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
963         continue;
964     }
965     BestPhi = Phi;
966     BestInit = Init;
967   }
968   return BestPhi;
969 }
970 
971 /// Insert an IR expression which computes the value held by the IV IndVar
972 /// (which must be an loop counter w/unit stride) after the backedge of loop L
973 /// is taken ExitCount times.
974 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
975                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
976                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
977   assert(isLoopCounter(IndVar, L, SE));
978   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
979   const SCEV *IVInit = AR->getStart();
980 
981   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
982   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
983   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
984   // the existing GEPs whenever possible.
985   if (IndVar->getType()->isPointerTy() &&
986       !ExitCount->getType()->isPointerTy()) {
987     // IVOffset will be the new GEP offset that is interpreted by GEP as a
988     // signed value. ExitCount on the other hand represents the loop trip count,
989     // which is an unsigned value. FindLoopCounter only allows induction
990     // variables that have a positive unit stride of one. This means we don't
991     // have to handle the case of negative offsets (yet) and just need to zero
992     // extend ExitCount.
993     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
994     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
995     if (UsePostInc)
996       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
997 
998     // Expand the code for the iteration count.
999     assert(SE->isLoopInvariant(IVOffset, L) &&
1000            "Computed iteration count is not loop invariant!");
1001 
1002     // We could handle pointer IVs other than i8*, but we need to compensate for
1003     // gep index scaling.
1004     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
1005                              cast<PointerType>(IndVar->getType())
1006                                  ->getElementType())->isOne() &&
1007            "unit stride pointer IV must be i8*");
1008 
1009     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1010     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1011     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1012   } else {
1013     // In any other case, convert both IVInit and ExitCount to integers before
1014     // comparing. This may result in SCEV expansion of pointers, but in practice
1015     // SCEV will fold the pointer arithmetic away as such:
1016     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1017     //
1018     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1019     // for simple memset-style loops.
1020     //
1021     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1022     // were generated on top of case #2, which is not expected.
1023 
1024     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1025     // For unit stride, IVCount = Start + ExitCount with 2's complement
1026     // overflow.
1027 
1028     // For integer IVs, truncate the IV before computing IVInit + BECount,
1029     // unless we know apriori that the limit must be a constant when evaluated
1030     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1031     // of the IV in the loop over a (potentially) expensive expansion of the
1032     // widened exit count add(zext(add)) expression.
1033     if (SE->getTypeSizeInBits(IVInit->getType())
1034         > SE->getTypeSizeInBits(ExitCount->getType())) {
1035       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1036         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1037       else
1038         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1039     }
1040 
1041     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1042 
1043     if (UsePostInc)
1044       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1045 
1046     // Expand the code for the iteration count.
1047     assert(SE->isLoopInvariant(IVLimit, L) &&
1048            "Computed iteration count is not loop invariant!");
1049     // Ensure that we generate the same type as IndVar, or a smaller integer
1050     // type. In the presence of null pointer values, we have an integer type
1051     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1052     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1053       IndVar->getType() : ExitCount->getType();
1054     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1055     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1056   }
1057 }
1058 
1059 /// This method rewrites the exit condition of the loop to be a canonical !=
1060 /// comparison against the incremented loop induction variable.  This pass is
1061 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1062 /// determine a loop-invariant trip count of the loop, which is actually a much
1063 /// broader range than just linear tests.
1064 bool IndVarSimplify::
1065 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1066                           const SCEV *ExitCount,
1067                           PHINode *IndVar, SCEVExpander &Rewriter) {
1068   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1069   assert(isLoopCounter(IndVar, L, SE));
1070   Instruction * const IncVar =
1071     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1072 
1073   // Initialize CmpIndVar to the preincremented IV.
1074   Value *CmpIndVar = IndVar;
1075   bool UsePostInc = false;
1076 
1077   // If the exiting block is the same as the backedge block, we prefer to
1078   // compare against the post-incremented value, otherwise we must compare
1079   // against the preincremented value.
1080   if (ExitingBB == L->getLoopLatch()) {
1081     // For pointer IVs, we chose to not strip inbounds which requires us not
1082     // to add a potentially UB introducing use.  We need to either a) show
1083     // the loop test we're modifying is already in post-inc form, or b) show
1084     // that adding a use must not introduce UB.
1085     bool SafeToPostInc =
1086         IndVar->getType()->isIntegerTy() ||
1087         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1088         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1089     if (SafeToPostInc) {
1090       UsePostInc = true;
1091       CmpIndVar = IncVar;
1092     }
1093   }
1094 
1095   // It may be necessary to drop nowrap flags on the incrementing instruction
1096   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1097   // case the increment might have previously been poison on the last iteration
1098   // only) or if LFTR switches to a different IV that was previously dynamically
1099   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1100   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1101   // check), because the pre-inc addrec flags may be adopted from the original
1102   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1103   // TODO: This handling is inaccurate for one case: If we switch to a
1104   // dynamically dead IV that wraps on the first loop iteration only, which is
1105   // not covered by the post-inc addrec. (If the new IV was not dynamically
1106   // dead, it could not be poison on the first iteration in the first place.)
1107   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1108     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1109     if (BO->hasNoUnsignedWrap())
1110       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1111     if (BO->hasNoSignedWrap())
1112       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1113   }
1114 
1115   Value *ExitCnt = genLoopLimit(
1116       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1117   assert(ExitCnt->getType()->isPointerTy() ==
1118              IndVar->getType()->isPointerTy() &&
1119          "genLoopLimit missed a cast");
1120 
1121   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1122   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1123   ICmpInst::Predicate P;
1124   if (L->contains(BI->getSuccessor(0)))
1125     P = ICmpInst::ICMP_NE;
1126   else
1127     P = ICmpInst::ICMP_EQ;
1128 
1129   IRBuilder<> Builder(BI);
1130 
1131   // The new loop exit condition should reuse the debug location of the
1132   // original loop exit condition.
1133   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1134     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1135 
1136   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1137   // avoid the expensive expansion of the limit expression in the wider type,
1138   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1139   // since we know (from the exit count bitwidth), that we can't self-wrap in
1140   // the narrower type.
1141   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1142   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1143   if (CmpIndVarSize > ExitCntSize) {
1144     assert(!CmpIndVar->getType()->isPointerTy() &&
1145            !ExitCnt->getType()->isPointerTy());
1146 
1147     // Before resorting to actually inserting the truncate, use the same
1148     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1149     // the other side of the comparison instead.  We still evaluate the limit
1150     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1151     // a truncate within in.
1152     bool Extended = false;
1153     const SCEV *IV = SE->getSCEV(CmpIndVar);
1154     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1155                                                   ExitCnt->getType());
1156     const SCEV *ZExtTrunc =
1157       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1158 
1159     if (ZExtTrunc == IV) {
1160       Extended = true;
1161       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1162                                    "wide.trip.count");
1163     } else {
1164       const SCEV *SExtTrunc =
1165         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1166       if (SExtTrunc == IV) {
1167         Extended = true;
1168         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1169                                      "wide.trip.count");
1170       }
1171     }
1172 
1173     if (Extended) {
1174       bool Discard;
1175       L->makeLoopInvariant(ExitCnt, Discard);
1176     } else
1177       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1178                                       "lftr.wideiv");
1179   }
1180   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1181                     << "      LHS:" << *CmpIndVar << '\n'
1182                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1183                     << "\n"
1184                     << "      RHS:\t" << *ExitCnt << "\n"
1185                     << "ExitCount:\t" << *ExitCount << "\n"
1186                     << "  was: " << *BI->getCondition() << "\n");
1187 
1188   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1189   Value *OrigCond = BI->getCondition();
1190   // It's tempting to use replaceAllUsesWith here to fully replace the old
1191   // comparison, but that's not immediately safe, since users of the old
1192   // comparison may not be dominated by the new comparison. Instead, just
1193   // update the branch to use the new comparison; in the common case this
1194   // will make old comparison dead.
1195   BI->setCondition(Cond);
1196   DeadInsts.emplace_back(OrigCond);
1197 
1198   ++NumLFTR;
1199   return true;
1200 }
1201 
1202 //===----------------------------------------------------------------------===//
1203 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1204 //===----------------------------------------------------------------------===//
1205 
1206 /// If there's a single exit block, sink any loop-invariant values that
1207 /// were defined in the preheader but not used inside the loop into the
1208 /// exit block to reduce register pressure in the loop.
1209 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1210   BasicBlock *ExitBlock = L->getExitBlock();
1211   if (!ExitBlock) return false;
1212 
1213   BasicBlock *Preheader = L->getLoopPreheader();
1214   if (!Preheader) return false;
1215 
1216   bool MadeAnyChanges = false;
1217   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1218   BasicBlock::iterator I(Preheader->getTerminator());
1219   while (I != Preheader->begin()) {
1220     --I;
1221     // New instructions were inserted at the end of the preheader.
1222     if (isa<PHINode>(I))
1223       break;
1224 
1225     // Don't move instructions which might have side effects, since the side
1226     // effects need to complete before instructions inside the loop.  Also don't
1227     // move instructions which might read memory, since the loop may modify
1228     // memory. Note that it's okay if the instruction might have undefined
1229     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1230     // block.
1231     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1232       continue;
1233 
1234     // Skip debug info intrinsics.
1235     if (isa<DbgInfoIntrinsic>(I))
1236       continue;
1237 
1238     // Skip eh pad instructions.
1239     if (I->isEHPad())
1240       continue;
1241 
1242     // Don't sink alloca: we never want to sink static alloca's out of the
1243     // entry block, and correctly sinking dynamic alloca's requires
1244     // checks for stacksave/stackrestore intrinsics.
1245     // FIXME: Refactor this check somehow?
1246     if (isa<AllocaInst>(I))
1247       continue;
1248 
1249     // Determine if there is a use in or before the loop (direct or
1250     // otherwise).
1251     bool UsedInLoop = false;
1252     for (Use &U : I->uses()) {
1253       Instruction *User = cast<Instruction>(U.getUser());
1254       BasicBlock *UseBB = User->getParent();
1255       if (PHINode *P = dyn_cast<PHINode>(User)) {
1256         unsigned i =
1257           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1258         UseBB = P->getIncomingBlock(i);
1259       }
1260       if (UseBB == Preheader || L->contains(UseBB)) {
1261         UsedInLoop = true;
1262         break;
1263       }
1264     }
1265 
1266     // If there is, the def must remain in the preheader.
1267     if (UsedInLoop)
1268       continue;
1269 
1270     // Otherwise, sink it to the exit block.
1271     Instruction *ToMove = &*I;
1272     bool Done = false;
1273 
1274     if (I != Preheader->begin()) {
1275       // Skip debug info intrinsics.
1276       do {
1277         --I;
1278       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1279 
1280       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1281         Done = true;
1282     } else {
1283       Done = true;
1284     }
1285 
1286     MadeAnyChanges = true;
1287     ToMove->moveBefore(*ExitBlock, InsertPt);
1288     if (Done) break;
1289     InsertPt = ToMove->getIterator();
1290   }
1291 
1292   return MadeAnyChanges;
1293 }
1294 
1295 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1296                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1297   auto *OldCond = BI->getCondition();
1298   BI->setCondition(NewCond);
1299   if (OldCond->use_empty())
1300     DeadInsts.emplace_back(OldCond);
1301 }
1302 
1303 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1304                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1305   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1306   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1307   auto *OldCond = BI->getCondition();
1308   auto *NewCond =
1309       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1310   replaceExitCond(BI, NewCond, DeadInsts);
1311 }
1312 
1313 static void replaceLoopPHINodesWithPreheaderValues(
1314     Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1315   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1316   auto *LoopPreheader = L->getLoopPreheader();
1317   auto *LoopHeader = L->getHeader();
1318   for (auto &PN : LoopHeader->phis()) {
1319     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1320     PN.replaceAllUsesWith(PreheaderIncoming);
1321     DeadInsts.emplace_back(&PN);
1322   }
1323 }
1324 
1325 static void replaceWithInvariantCond(
1326     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1327     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1328     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1329   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1330   Rewriter.setInsertPoint(BI);
1331   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1332   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1333   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1334   if (ExitIfTrue)
1335     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1336   IRBuilder<> Builder(BI);
1337   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1338                                      BI->getCondition()->getName());
1339   replaceExitCond(BI, NewCond, DeadInsts);
1340 }
1341 
1342 static bool optimizeLoopExitWithUnknownExitCount(
1343     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1344     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1345     ScalarEvolution *SE, SCEVExpander &Rewriter,
1346     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1347   ICmpInst::Predicate Pred;
1348   Value *LHS, *RHS;
1349   using namespace PatternMatch;
1350   BasicBlock *TrueSucc, *FalseSucc;
1351   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1352                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1353     return false;
1354 
1355   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1356          "Not a loop exit!");
1357 
1358   // 'LHS pred RHS' should now mean that we stay in loop.
1359   if (L->contains(FalseSucc))
1360     Pred = CmpInst::getInversePredicate(Pred);
1361 
1362   // If we are proving loop exit, invert the predicate.
1363   if (Inverted)
1364     Pred = CmpInst::getInversePredicate(Pred);
1365 
1366   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1367   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1368   // Can we prove it to be trivially true?
1369   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1370     foldExit(L, ExitingBB, Inverted, DeadInsts);
1371     return true;
1372   }
1373   // Further logic works for non-inverted condition only.
1374   if (Inverted)
1375     return false;
1376 
1377   auto *ARTy = LHSS->getType();
1378   auto *MaxIterTy = MaxIter->getType();
1379   // If possible, adjust types.
1380   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1381     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1382   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1383     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1384     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1385     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1386       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1387   }
1388 
1389   if (SkipLastIter) {
1390     const SCEV *One = SE->getOne(MaxIter->getType());
1391     MaxIter = SE->getMinusSCEV(MaxIter, One);
1392   }
1393 
1394   // Check if there is a loop-invariant predicate equivalent to our check.
1395   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1396                                                                L, BI, MaxIter);
1397   if (!LIP)
1398     return false;
1399 
1400   // Can we prove it to be trivially true?
1401   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1402     foldExit(L, ExitingBB, Inverted, DeadInsts);
1403   else
1404     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1405                              Rewriter, DeadInsts);
1406 
1407   return true;
1408 }
1409 
1410 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1411   SmallVector<BasicBlock*, 16> ExitingBlocks;
1412   L->getExitingBlocks(ExitingBlocks);
1413 
1414   // Remove all exits which aren't both rewriteable and execute on every
1415   // iteration.
1416   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1417     // If our exitting block exits multiple loops, we can only rewrite the
1418     // innermost one.  Otherwise, we're changing how many times the innermost
1419     // loop runs before it exits.
1420     if (LI->getLoopFor(ExitingBB) != L)
1421       return true;
1422 
1423     // Can't rewrite non-branch yet.
1424     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1425     if (!BI)
1426       return true;
1427 
1428     // If already constant, nothing to do.
1429     if (isa<Constant>(BI->getCondition()))
1430       return true;
1431 
1432     // Likewise, the loop latch must be dominated by the exiting BB.
1433     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1434       return true;
1435 
1436     return false;
1437   });
1438 
1439   if (ExitingBlocks.empty())
1440     return false;
1441 
1442   // Get a symbolic upper bound on the loop backedge taken count.
1443   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1444   if (isa<SCEVCouldNotCompute>(MaxExitCount))
1445     return false;
1446 
1447   // Visit our exit blocks in order of dominance. We know from the fact that
1448   // all exits must dominate the latch, so there is a total dominance order
1449   // between them.
1450   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1451                // std::sort sorts in ascending order, so we want the inverse of
1452                // the normal dominance relation.
1453                if (A == B) return false;
1454                if (DT->properlyDominates(A, B))
1455                  return true;
1456                else {
1457                  assert(DT->properlyDominates(B, A) &&
1458                         "expected total dominance order!");
1459                  return false;
1460                }
1461   });
1462 #ifdef ASSERT
1463   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1464     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1465   }
1466 #endif
1467 
1468   bool Changed = false;
1469   bool SkipLastIter = false;
1470   SmallSet<const SCEV*, 8> DominatingExitCounts;
1471   for (BasicBlock *ExitingBB : ExitingBlocks) {
1472     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1473     if (isa<SCEVCouldNotCompute>(ExitCount)) {
1474       // Okay, we do not know the exit count here. Can we at least prove that it
1475       // will remain the same within iteration space?
1476       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1477       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1478         return optimizeLoopExitWithUnknownExitCount(
1479             L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1480             Rewriter, DeadInsts);
1481       };
1482 
1483       // TODO: We might have proved that we can skip the last iteration for
1484       // this check. In this case, we only want to check the condition on the
1485       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1486       // corner case:
1487       //
1488       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1489       //
1490       // If we could not prove that len != 0, then we also could not prove that
1491       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1492       // OptimizeCond will likely not prove anything for it, even if it could
1493       // prove the same fact for len.
1494       //
1495       // As a temporary solution, we query both last and pre-last iterations in
1496       // hope that we will be able to prove triviality for at least one of
1497       // them. We can stop querying MaxExitCount for this case once SCEV
1498       // understands that (MaxExitCount - 1) will not overflow here.
1499       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1500         Changed = true;
1501       else if (SkipLastIter)
1502         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1503           Changed = true;
1504       continue;
1505     }
1506 
1507     if (MaxExitCount == ExitCount)
1508       // If the loop has more than 1 iteration, all further checks will be
1509       // executed 1 iteration less.
1510       SkipLastIter = true;
1511 
1512     // If we know we'd exit on the first iteration, rewrite the exit to
1513     // reflect this.  This does not imply the loop must exit through this
1514     // exit; there may be an earlier one taken on the first iteration.
1515     // We know that the backedge can't be taken, so we replace all
1516     // the header PHIs with values coming from the preheader.
1517     if (ExitCount->isZero()) {
1518       foldExit(L, ExitingBB, true, DeadInsts);
1519       replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
1520       Changed = true;
1521       continue;
1522     }
1523 
1524     assert(ExitCount->getType()->isIntegerTy() &&
1525            MaxExitCount->getType()->isIntegerTy() &&
1526            "Exit counts must be integers");
1527 
1528     Type *WiderType =
1529       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1530     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1531     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1532     assert(MaxExitCount->getType() == ExitCount->getType());
1533 
1534     // Can we prove that some other exit must be taken strictly before this
1535     // one?
1536     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1537                                      MaxExitCount, ExitCount)) {
1538       foldExit(L, ExitingBB, false, DeadInsts);
1539       Changed = true;
1540       continue;
1541     }
1542 
1543     // As we run, keep track of which exit counts we've encountered.  If we
1544     // find a duplicate, we've found an exit which would have exited on the
1545     // exiting iteration, but (from the visit order) strictly follows another
1546     // which does the same and is thus dead.
1547     if (!DominatingExitCounts.insert(ExitCount).second) {
1548       foldExit(L, ExitingBB, false, DeadInsts);
1549       Changed = true;
1550       continue;
1551     }
1552 
1553     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1554     // here.  If we kept track of the min of dominanting exits so far, we could
1555     // discharge exits with EC >= MDEC. This is less powerful than the existing
1556     // transform (since later exits aren't considered), but potentially more
1557     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1558     // or a >u b.  Such a case is not currently known.
1559   }
1560   return Changed;
1561 }
1562 
1563 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1564   SmallVector<BasicBlock*, 16> ExitingBlocks;
1565   L->getExitingBlocks(ExitingBlocks);
1566 
1567   // Finally, see if we can rewrite our exit conditions into a loop invariant
1568   // form. If we have a read-only loop, and we can tell that we must exit down
1569   // a path which does not need any of the values computed within the loop, we
1570   // can rewrite the loop to exit on the first iteration.  Note that this
1571   // doesn't either a) tell us the loop exits on the first iteration (unless
1572   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1573   // This transformation looks a lot like a restricted form of dead loop
1574   // elimination, but restricted to read-only loops and without neccesssarily
1575   // needing to kill the loop entirely.
1576   if (!LoopPredication)
1577     return false;
1578 
1579   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1580   // through *explicit* control flow.  We have to eliminate the possibility of
1581   // implicit exits (see below) before we know it's truly exact.
1582   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1583   if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
1584     return false;
1585 
1586   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1587   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1588 
1589   auto BadExit = [&](BasicBlock *ExitingBB) {
1590     // If our exiting block exits multiple loops, we can only rewrite the
1591     // innermost one.  Otherwise, we're changing how many times the innermost
1592     // loop runs before it exits.
1593     if (LI->getLoopFor(ExitingBB) != L)
1594       return true;
1595 
1596     // Can't rewrite non-branch yet.
1597     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1598     if (!BI)
1599       return true;
1600 
1601     // If already constant, nothing to do.
1602     if (isa<Constant>(BI->getCondition()))
1603       return true;
1604 
1605     // If the exit block has phis, we need to be able to compute the values
1606     // within the loop which contains them.  This assumes trivially lcssa phis
1607     // have already been removed; TODO: generalize
1608     BasicBlock *ExitBlock =
1609     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1610     if (!ExitBlock->phis().empty())
1611       return true;
1612 
1613     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1614     if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
1615       return true;
1616 
1617     assert(SE->isLoopInvariant(ExitCount, L) &&
1618            "Exit count must be loop invariant");
1619     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1620     return false;
1621   };
1622 
1623   // If we have any exits which can't be predicated themselves, than we can't
1624   // predicate any exit which isn't guaranteed to execute before it.  Consider
1625   // two exits (a) and (b) which would both exit on the same iteration.  If we
1626   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1627   // we could convert a loop from exiting through (a) to one exiting through
1628   // (b).  Note that this problem exists only for exits with the same exit
1629   // count, and we could be more aggressive when exit counts are known inequal.
1630   llvm::sort(ExitingBlocks,
1631             [&](BasicBlock *A, BasicBlock *B) {
1632               // std::sort sorts in ascending order, so we want the inverse of
1633               // the normal dominance relation, plus a tie breaker for blocks
1634               // unordered by dominance.
1635               if (DT->properlyDominates(A, B)) return true;
1636               if (DT->properlyDominates(B, A)) return false;
1637               return A->getName() < B->getName();
1638             });
1639   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1640   // exits before the backedge).  If they aren't, reasoning about reachability
1641   // is complicated and we choose not to for now.
1642   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1643     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1644       return false;
1645 
1646   // Given our sorted total order, we know that exit[j] must be evaluated
1647   // after all exit[i] such j > i.
1648   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1649     if (BadExit(ExitingBlocks[i])) {
1650       ExitingBlocks.resize(i);
1651       break;
1652     }
1653 
1654   if (ExitingBlocks.empty())
1655     return false;
1656 
1657   // We rely on not being able to reach an exiting block on a later iteration
1658   // then it's statically compute exit count.  The implementaton of
1659   // getExitCount currently has this invariant, but assert it here so that
1660   // breakage is obvious if this ever changes..
1661   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1662         return DT->dominates(ExitingBB, L->getLoopLatch());
1663       }));
1664 
1665   // At this point, ExitingBlocks consists of only those blocks which are
1666   // predicatable.  Given that, we know we have at least one exit we can
1667   // predicate if the loop is doesn't have side effects and doesn't have any
1668   // implicit exits (because then our exact BTC isn't actually exact).
1669   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1670   // suggestions on how to improve this?  I can obviously bail out for outer
1671   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1672   // is that enough for *all* side effects?
1673   for (BasicBlock *BB : L->blocks())
1674     for (auto &I : *BB)
1675       // TODO:isGuaranteedToTransfer
1676       if (I.mayHaveSideEffects())
1677         return false;
1678 
1679   bool Changed = false;
1680   // Finally, do the actual predication for all predicatable blocks.  A couple
1681   // of notes here:
1682   // 1) We don't bother to constant fold dominated exits with identical exit
1683   //    counts; that's simply a form of CSE/equality propagation and we leave
1684   //    it for dedicated passes.
1685   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1686   //    legality constraints and we leave that to dedicated logic.  We want to
1687   //    predicate even if we can't insert a loop invariant expression as
1688   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1689   //    varying check.
1690   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1691   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1692   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1693   for (BasicBlock *ExitingBB : ExitingBlocks) {
1694     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1695 
1696     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1697     Value *NewCond;
1698     if (ExitCount == ExactBTC) {
1699       NewCond = L->contains(BI->getSuccessor(0)) ?
1700         B.getFalse() : B.getTrue();
1701     } else {
1702       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1703       if (!ExactBTCV)
1704         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1705       Value *RHS = ExactBTCV;
1706       if (ECV->getType() != RHS->getType()) {
1707         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1708         ECV = B.CreateZExt(ECV, WiderTy);
1709         RHS = B.CreateZExt(RHS, WiderTy);
1710       }
1711       auto Pred = L->contains(BI->getSuccessor(0)) ?
1712         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1713       NewCond = B.CreateICmp(Pred, ECV, RHS);
1714     }
1715     Value *OldCond = BI->getCondition();
1716     BI->setCondition(NewCond);
1717     if (OldCond->use_empty())
1718       DeadInsts.emplace_back(OldCond);
1719     Changed = true;
1720   }
1721 
1722   return Changed;
1723 }
1724 
1725 //===----------------------------------------------------------------------===//
1726 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1727 //===----------------------------------------------------------------------===//
1728 
1729 bool IndVarSimplify::run(Loop *L) {
1730   // We need (and expect!) the incoming loop to be in LCSSA.
1731   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1732          "LCSSA required to run indvars!");
1733 
1734   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1735   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1736   //    canonicalization can be a pessimization without LSR to "clean up"
1737   //    afterwards.
1738   //  - We depend on having a preheader; in particular,
1739   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1740   //    and we're in trouble if we can't find the induction variable even when
1741   //    we've manually inserted one.
1742   //  - LFTR relies on having a single backedge.
1743   if (!L->isLoopSimplifyForm())
1744     return false;
1745 
1746 #ifndef NDEBUG
1747   // Used below for a consistency check only
1748   // Note: Since the result returned by ScalarEvolution may depend on the order
1749   // in which previous results are added to its cache, the call to
1750   // getBackedgeTakenCount() may change following SCEV queries.
1751   const SCEV *BackedgeTakenCount;
1752   if (VerifyIndvars)
1753     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1754 #endif
1755 
1756   bool Changed = false;
1757   // If there are any floating-point recurrences, attempt to
1758   // transform them to use integer recurrences.
1759   Changed |= rewriteNonIntegerIVs(L);
1760 
1761   // Create a rewriter object which we'll use to transform the code with.
1762   SCEVExpander Rewriter(*SE, DL, "indvars");
1763 #ifndef NDEBUG
1764   Rewriter.setDebugType(DEBUG_TYPE);
1765 #endif
1766 
1767   // Eliminate redundant IV users.
1768   //
1769   // Simplification works best when run before other consumers of SCEV. We
1770   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1771   // other expressions involving loop IVs have been evaluated. This helps SCEV
1772   // set no-wrap flags before normalizing sign/zero extension.
1773   Rewriter.disableCanonicalMode();
1774   Changed |= simplifyAndExtend(L, Rewriter, LI);
1775 
1776   // Check to see if we can compute the final value of any expressions
1777   // that are recurrent in the loop, and substitute the exit values from the
1778   // loop into any instructions outside of the loop that use the final values
1779   // of the current expressions.
1780   if (ReplaceExitValue != NeverRepl) {
1781     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1782                                              ReplaceExitValue, DeadInsts)) {
1783       NumReplaced += Rewrites;
1784       Changed = true;
1785     }
1786   }
1787 
1788   // Eliminate redundant IV cycles.
1789   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1790 
1791   // Try to eliminate loop exits based on analyzeable exit counts
1792   if (optimizeLoopExits(L, Rewriter))  {
1793     Changed = true;
1794     // Given we've changed exit counts, notify SCEV
1795     // Some nested loops may share same folded exit basic block,
1796     // thus we need to notify top most loop.
1797     SE->forgetTopmostLoop(L);
1798   }
1799 
1800   // Try to form loop invariant tests for loop exits by changing how many
1801   // iterations of the loop run when that is unobservable.
1802   if (predicateLoopExits(L, Rewriter)) {
1803     Changed = true;
1804     // Given we've changed exit counts, notify SCEV
1805     SE->forgetLoop(L);
1806   }
1807 
1808   // If we have a trip count expression, rewrite the loop's exit condition
1809   // using it.
1810   if (!DisableLFTR) {
1811     BasicBlock *PreHeader = L->getLoopPreheader();
1812     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1813 
1814     SmallVector<BasicBlock*, 16> ExitingBlocks;
1815     L->getExitingBlocks(ExitingBlocks);
1816     for (BasicBlock *ExitingBB : ExitingBlocks) {
1817       // Can't rewrite non-branch yet.
1818       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1819         continue;
1820 
1821       // If our exitting block exits multiple loops, we can only rewrite the
1822       // innermost one.  Otherwise, we're changing how many times the innermost
1823       // loop runs before it exits.
1824       if (LI->getLoopFor(ExitingBB) != L)
1825         continue;
1826 
1827       if (!needsLFTR(L, ExitingBB))
1828         continue;
1829 
1830       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1831       if (isa<SCEVCouldNotCompute>(ExitCount))
1832         continue;
1833 
1834       // This was handled above, but as we form SCEVs, we can sometimes refine
1835       // existing ones; this allows exit counts to be folded to zero which
1836       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1837       // until stable to handle cases like this better.
1838       if (ExitCount->isZero())
1839         continue;
1840 
1841       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1842       if (!IndVar)
1843         continue;
1844 
1845       // Avoid high cost expansions.  Note: This heuristic is questionable in
1846       // that our definition of "high cost" is not exactly principled.
1847       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1848                                        TTI, PreHeaderBR))
1849         continue;
1850 
1851       // Check preconditions for proper SCEVExpander operation. SCEV does not
1852       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1853       // any pass that uses the SCEVExpander must do it. This does not work
1854       // well for loop passes because SCEVExpander makes assumptions about
1855       // all loops, while LoopPassManager only forces the current loop to be
1856       // simplified.
1857       //
1858       // FIXME: SCEV expansion has no way to bail out, so the caller must
1859       // explicitly check any assumptions made by SCEV. Brittle.
1860       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1861       if (!AR || AR->getLoop()->getLoopPreheader())
1862         Changed |= linearFunctionTestReplace(L, ExitingBB,
1863                                              ExitCount, IndVar,
1864                                              Rewriter);
1865     }
1866   }
1867   // Clear the rewriter cache, because values that are in the rewriter's cache
1868   // can be deleted in the loop below, causing the AssertingVH in the cache to
1869   // trigger.
1870   Rewriter.clear();
1871 
1872   // Now that we're done iterating through lists, clean up any instructions
1873   // which are now dead.
1874   while (!DeadInsts.empty()) {
1875     Value *V = DeadInsts.pop_back_val();
1876 
1877     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
1878       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
1879     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
1880       Changed |=
1881           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1882   }
1883 
1884   // The Rewriter may not be used from this point on.
1885 
1886   // Loop-invariant instructions in the preheader that aren't used in the
1887   // loop may be sunk below the loop to reduce register pressure.
1888   Changed |= sinkUnusedInvariants(L);
1889 
1890   // rewriteFirstIterationLoopExitValues does not rely on the computation of
1891   // trip count and therefore can further simplify exit values in addition to
1892   // rewriteLoopExitValues.
1893   Changed |= rewriteFirstIterationLoopExitValues(L);
1894 
1895   // Clean up dead instructions.
1896   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
1897 
1898   // Check a post-condition.
1899   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1900          "Indvars did not preserve LCSSA!");
1901 
1902   // Verify that LFTR, and any other change have not interfered with SCEV's
1903   // ability to compute trip count.  We may have *changed* the exit count, but
1904   // only by reducing it.
1905 #ifndef NDEBUG
1906   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1907     SE->forgetLoop(L);
1908     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
1909     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
1910         SE->getTypeSizeInBits(NewBECount->getType()))
1911       NewBECount = SE->getTruncateOrNoop(NewBECount,
1912                                          BackedgeTakenCount->getType());
1913     else
1914       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
1915                                                  NewBECount->getType());
1916     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
1917                                  NewBECount) && "indvars must preserve SCEV");
1918   }
1919   if (VerifyMemorySSA && MSSAU)
1920     MSSAU->getMemorySSA()->verifyMemorySSA();
1921 #endif
1922 
1923   return Changed;
1924 }
1925 
1926 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
1927                                           LoopStandardAnalysisResults &AR,
1928                                           LPMUpdater &) {
1929   Function *F = L.getHeader()->getParent();
1930   const DataLayout &DL = F->getParent()->getDataLayout();
1931 
1932   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
1933                      WidenIndVars && AllowIVWidening);
1934   if (!IVS.run(&L))
1935     return PreservedAnalyses::all();
1936 
1937   auto PA = getLoopPassPreservedAnalyses();
1938   PA.preserveSet<CFGAnalyses>();
1939   if (AR.MSSA)
1940     PA.preserve<MemorySSAAnalysis>();
1941   return PA;
1942 }
1943 
1944 namespace {
1945 
1946 struct IndVarSimplifyLegacyPass : public LoopPass {
1947   static char ID; // Pass identification, replacement for typeid
1948 
1949   IndVarSimplifyLegacyPass() : LoopPass(ID) {
1950     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
1951   }
1952 
1953   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1954     if (skipLoop(L))
1955       return false;
1956 
1957     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1958     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1959     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1960     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
1961     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
1962     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
1963     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
1964     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1965     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
1966     MemorySSA *MSSA = nullptr;
1967     if (MSSAAnalysis)
1968       MSSA = &MSSAAnalysis->getMSSA();
1969 
1970     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
1971     return IVS.run(L);
1972   }
1973 
1974   void getAnalysisUsage(AnalysisUsage &AU) const override {
1975     AU.setPreservesCFG();
1976     AU.addPreserved<MemorySSAWrapperPass>();
1977     getLoopAnalysisUsage(AU);
1978   }
1979 };
1980 
1981 } // end anonymous namespace
1982 
1983 char IndVarSimplifyLegacyPass::ID = 0;
1984 
1985 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
1986                       "Induction Variable Simplification", false, false)
1987 INITIALIZE_PASS_DEPENDENCY(LoopPass)
1988 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
1989                     "Induction Variable Simplification", false, false)
1990 
1991 Pass *llvm::createIndVarSimplifyPass() {
1992   return new IndVarSimplifyLegacyPass();
1993 }
1994