xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 541f824d32e8caee27f8fbd993a7dc8b63040b65)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 //   1. The exit condition for the loop is canonicalized to compare the
17 //      induction value against the exit value.  This turns loops like:
18 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 //   2. Any use outside of the loop of an expression derived from the indvar
20 //      is changed to compute the derived value outside of the loop, eliminating
21 //      the dependence on the exit value of the induction variable.  If the only
22 //      purpose of the loop is to compute the exit value of some derived
23 //      expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/None.h"
33 #include "llvm/ADT/Optional.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Transforms/Utils/Local.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/Pass.h"
71 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/CommandLine.h"
73 #include "llvm/Support/Compiler.h"
74 #include "llvm/Support/Debug.h"
75 #include "llvm/Support/ErrorHandling.h"
76 #include "llvm/Support/MathExtras.h"
77 #include "llvm/Support/raw_ostream.h"
78 #include "llvm/Transforms/Scalar.h"
79 #include "llvm/Transforms/Scalar/LoopPassManager.h"
80 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
81 #include "llvm/Transforms/Utils/LoopUtils.h"
82 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <utility>
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "indvars"
90 
91 STATISTIC(NumWidened     , "Number of indvars widened");
92 STATISTIC(NumReplaced    , "Number of exit values replaced");
93 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
94 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
95 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
96 
97 // Trip count verification can be enabled by default under NDEBUG if we
98 // implement a strong expression equivalence checker in SCEV. Until then, we
99 // use the verify-indvars flag, which may assert in some cases.
100 static cl::opt<bool> VerifyIndvars(
101   "verify-indvars", cl::Hidden,
102   cl::desc("Verify the ScalarEvolution result after running indvars"));
103 
104 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
105 
106 static cl::opt<ReplaceExitVal> ReplaceExitValue(
107     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
110                clEnumValN(OnlyCheapRepl, "cheap",
111                           "only replace exit value when the cost is cheap"),
112                clEnumValN(AlwaysRepl, "always",
113                           "always replace exit value whenever possible")));
114 
115 static cl::opt<bool> UsePostIncrementRanges(
116   "indvars-post-increment-ranges", cl::Hidden,
117   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
118   cl::init(true));
119 
120 static cl::opt<bool>
121 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
122             cl::desc("Disable Linear Function Test Replace optimization"));
123 
124 namespace {
125 
126 struct RewritePhi;
127 
128 class IndVarSimplify {
129   LoopInfo *LI;
130   ScalarEvolution *SE;
131   DominatorTree *DT;
132   const DataLayout &DL;
133   TargetLibraryInfo *TLI;
134   const TargetTransformInfo *TTI;
135 
136   SmallVector<WeakTrackingVH, 16> DeadInsts;
137 
138   bool isValidRewrite(Value *FromVal, Value *ToVal);
139 
140   bool handleFloatingPointIV(Loop *L, PHINode *PH);
141   bool rewriteNonIntegerIVs(Loop *L);
142 
143   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144 
145   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
146   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
147   bool rewriteFirstIterationLoopExitValues(Loop *L);
148 
149   bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
150                                  PHINode *IndVar, SCEVExpander &Rewriter);
151 
152   bool sinkUnusedInvariants(Loop *L);
153 
154 public:
155   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
156                  const DataLayout &DL, TargetLibraryInfo *TLI,
157                  TargetTransformInfo *TTI)
158       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
159 
160   bool run(Loop *L);
161 };
162 
163 } // end anonymous namespace
164 
165 /// Return true if the SCEV expansion generated by the rewriter can replace the
166 /// original value. SCEV guarantees that it produces the same value, but the way
167 /// it is produced may be illegal IR.  Ideally, this function will only be
168 /// called for verification.
169 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
170   // If an SCEV expression subsumed multiple pointers, its expansion could
171   // reassociate the GEP changing the base pointer. This is illegal because the
172   // final address produced by a GEP chain must be inbounds relative to its
173   // underlying object. Otherwise basic alias analysis, among other things,
174   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
175   // producing an expression involving multiple pointers. Until then, we must
176   // bail out here.
177   //
178   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
179   // because it understands lcssa phis while SCEV does not.
180   Value *FromPtr = FromVal;
181   Value *ToPtr = ToVal;
182   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
183     FromPtr = GEP->getPointerOperand();
184   }
185   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
186     ToPtr = GEP->getPointerOperand();
187   }
188   if (FromPtr != FromVal || ToPtr != ToVal) {
189     // Quickly check the common case
190     if (FromPtr == ToPtr)
191       return true;
192 
193     // SCEV may have rewritten an expression that produces the GEP's pointer
194     // operand. That's ok as long as the pointer operand has the same base
195     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
196     // base of a recurrence. This handles the case in which SCEV expansion
197     // converts a pointer type recurrence into a nonrecurrent pointer base
198     // indexed by an integer recurrence.
199 
200     // If the GEP base pointer is a vector of pointers, abort.
201     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
202       return false;
203 
204     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
205     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
206     if (FromBase == ToBase)
207       return true;
208 
209     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
210                       << " != " << *ToBase << "\n");
211 
212     return false;
213   }
214   return true;
215 }
216 
217 /// Determine the insertion point for this user. By default, insert immediately
218 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
219 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
220 /// common dominator for the incoming blocks.
221 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
222                                           DominatorTree *DT, LoopInfo *LI) {
223   PHINode *PHI = dyn_cast<PHINode>(User);
224   if (!PHI)
225     return User;
226 
227   Instruction *InsertPt = nullptr;
228   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
229     if (PHI->getIncomingValue(i) != Def)
230       continue;
231 
232     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
233     if (!InsertPt) {
234       InsertPt = InsertBB->getTerminator();
235       continue;
236     }
237     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
238     InsertPt = InsertBB->getTerminator();
239   }
240   assert(InsertPt && "Missing phi operand");
241 
242   auto *DefI = dyn_cast<Instruction>(Def);
243   if (!DefI)
244     return InsertPt;
245 
246   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
247 
248   auto *L = LI->getLoopFor(DefI->getParent());
249   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
250 
251   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
252     if (LI->getLoopFor(DTN->getBlock()) == L)
253       return DTN->getBlock()->getTerminator();
254 
255   llvm_unreachable("DefI dominates InsertPt!");
256 }
257 
258 //===----------------------------------------------------------------------===//
259 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
260 //===----------------------------------------------------------------------===//
261 
262 /// Convert APF to an integer, if possible.
263 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
264   bool isExact = false;
265   // See if we can convert this to an int64_t
266   uint64_t UIntVal;
267   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
268                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
269       !isExact)
270     return false;
271   IntVal = UIntVal;
272   return true;
273 }
274 
275 /// If the loop has floating induction variable then insert corresponding
276 /// integer induction variable if possible.
277 /// For example,
278 /// for(double i = 0; i < 10000; ++i)
279 ///   bar(i)
280 /// is converted into
281 /// for(int i = 0; i < 10000; ++i)
282 ///   bar((double)i);
283 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
284   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
285   unsigned BackEdge     = IncomingEdge^1;
286 
287   // Check incoming value.
288   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
289 
290   int64_t InitValue;
291   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
292     return false;
293 
294   // Check IV increment. Reject this PN if increment operation is not
295   // an add or increment value can not be represented by an integer.
296   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
297   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
298 
299   // If this is not an add of the PHI with a constantfp, or if the constant fp
300   // is not an integer, bail out.
301   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
302   int64_t IncValue;
303   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
304       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
305     return false;
306 
307   // Check Incr uses. One user is PN and the other user is an exit condition
308   // used by the conditional terminator.
309   Value::user_iterator IncrUse = Incr->user_begin();
310   Instruction *U1 = cast<Instruction>(*IncrUse++);
311   if (IncrUse == Incr->user_end()) return false;
312   Instruction *U2 = cast<Instruction>(*IncrUse++);
313   if (IncrUse != Incr->user_end()) return false;
314 
315   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
316   // only used by a branch, we can't transform it.
317   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
318   if (!Compare)
319     Compare = dyn_cast<FCmpInst>(U2);
320   if (!Compare || !Compare->hasOneUse() ||
321       !isa<BranchInst>(Compare->user_back()))
322     return false;
323 
324   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
325 
326   // We need to verify that the branch actually controls the iteration count
327   // of the loop.  If not, the new IV can overflow and no one will notice.
328   // The branch block must be in the loop and one of the successors must be out
329   // of the loop.
330   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
331   if (!L->contains(TheBr->getParent()) ||
332       (L->contains(TheBr->getSuccessor(0)) &&
333        L->contains(TheBr->getSuccessor(1))))
334     return false;
335 
336   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
337   // transform it.
338   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
339   int64_t ExitValue;
340   if (ExitValueVal == nullptr ||
341       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
342     return false;
343 
344   // Find new predicate for integer comparison.
345   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
346   switch (Compare->getPredicate()) {
347   default: return false;  // Unknown comparison.
348   case CmpInst::FCMP_OEQ:
349   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
350   case CmpInst::FCMP_ONE:
351   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
352   case CmpInst::FCMP_OGT:
353   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
354   case CmpInst::FCMP_OGE:
355   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
356   case CmpInst::FCMP_OLT:
357   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
358   case CmpInst::FCMP_OLE:
359   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
360   }
361 
362   // We convert the floating point induction variable to a signed i32 value if
363   // we can.  This is only safe if the comparison will not overflow in a way
364   // that won't be trapped by the integer equivalent operations.  Check for this
365   // now.
366   // TODO: We could use i64 if it is native and the range requires it.
367 
368   // The start/stride/exit values must all fit in signed i32.
369   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
370     return false;
371 
372   // If not actually striding (add x, 0.0), avoid touching the code.
373   if (IncValue == 0)
374     return false;
375 
376   // Positive and negative strides have different safety conditions.
377   if (IncValue > 0) {
378     // If we have a positive stride, we require the init to be less than the
379     // exit value.
380     if (InitValue >= ExitValue)
381       return false;
382 
383     uint32_t Range = uint32_t(ExitValue-InitValue);
384     // Check for infinite loop, either:
385     // while (i <= Exit) or until (i > Exit)
386     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
387       if (++Range == 0) return false;  // Range overflows.
388     }
389 
390     unsigned Leftover = Range % uint32_t(IncValue);
391 
392     // If this is an equality comparison, we require that the strided value
393     // exactly land on the exit value, otherwise the IV condition will wrap
394     // around and do things the fp IV wouldn't.
395     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
396         Leftover != 0)
397       return false;
398 
399     // If the stride would wrap around the i32 before exiting, we can't
400     // transform the IV.
401     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
402       return false;
403   } else {
404     // If we have a negative stride, we require the init to be greater than the
405     // exit value.
406     if (InitValue <= ExitValue)
407       return false;
408 
409     uint32_t Range = uint32_t(InitValue-ExitValue);
410     // Check for infinite loop, either:
411     // while (i >= Exit) or until (i < Exit)
412     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
413       if (++Range == 0) return false;  // Range overflows.
414     }
415 
416     unsigned Leftover = Range % uint32_t(-IncValue);
417 
418     // If this is an equality comparison, we require that the strided value
419     // exactly land on the exit value, otherwise the IV condition will wrap
420     // around and do things the fp IV wouldn't.
421     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
422         Leftover != 0)
423       return false;
424 
425     // If the stride would wrap around the i32 before exiting, we can't
426     // transform the IV.
427     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
428       return false;
429   }
430 
431   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
432 
433   // Insert new integer induction variable.
434   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
435   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
436                       PN->getIncomingBlock(IncomingEdge));
437 
438   Value *NewAdd =
439     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
440                               Incr->getName()+".int", Incr);
441   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
442 
443   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
444                                       ConstantInt::get(Int32Ty, ExitValue),
445                                       Compare->getName());
446 
447   // In the following deletions, PN may become dead and may be deleted.
448   // Use a WeakTrackingVH to observe whether this happens.
449   WeakTrackingVH WeakPH = PN;
450 
451   // Delete the old floating point exit comparison.  The branch starts using the
452   // new comparison.
453   NewCompare->takeName(Compare);
454   Compare->replaceAllUsesWith(NewCompare);
455   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
456 
457   // Delete the old floating point increment.
458   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
459   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
460 
461   // If the FP induction variable still has uses, this is because something else
462   // in the loop uses its value.  In order to canonicalize the induction
463   // variable, we chose to eliminate the IV and rewrite it in terms of an
464   // int->fp cast.
465   //
466   // We give preference to sitofp over uitofp because it is faster on most
467   // platforms.
468   if (WeakPH) {
469     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
470                                  &*PN->getParent()->getFirstInsertionPt());
471     PN->replaceAllUsesWith(Conv);
472     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
473   }
474   return true;
475 }
476 
477 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
478   // First step.  Check to see if there are any floating-point recurrences.
479   // If there are, change them into integer recurrences, permitting analysis by
480   // the SCEV routines.
481   BasicBlock *Header = L->getHeader();
482 
483   SmallVector<WeakTrackingVH, 8> PHIs;
484   for (PHINode &PN : Header->phis())
485     PHIs.push_back(&PN);
486 
487   bool Changed = false;
488   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
489     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
490       Changed |= handleFloatingPointIV(L, PN);
491 
492   // If the loop previously had floating-point IV, ScalarEvolution
493   // may not have been able to compute a trip count. Now that we've done some
494   // re-writing, the trip count may be computable.
495   if (Changed)
496     SE->forgetLoop(L);
497   return Changed;
498 }
499 
500 namespace {
501 
502 // Collect information about PHI nodes which can be transformed in
503 // rewriteLoopExitValues.
504 struct RewritePhi {
505   PHINode *PN;
506 
507   // Ith incoming value.
508   unsigned Ith;
509 
510   // Exit value after expansion.
511   Value *Val;
512 
513   // High Cost when expansion.
514   bool HighCost;
515 
516   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
517       : PN(P), Ith(I), Val(V), HighCost(H) {}
518 };
519 
520 } // end anonymous namespace
521 
522 //===----------------------------------------------------------------------===//
523 // rewriteLoopExitValues - Optimize IV users outside the loop.
524 // As a side effect, reduces the amount of IV processing within the loop.
525 //===----------------------------------------------------------------------===//
526 
527 /// Check to see if this loop has a computable loop-invariant execution count.
528 /// If so, this means that we can compute the final value of any expressions
529 /// that are recurrent in the loop, and substitute the exit values from the loop
530 /// into any instructions outside of the loop that use the final values of the
531 /// current expressions.
532 ///
533 /// This is mostly redundant with the regular IndVarSimplify activities that
534 /// happen later, except that it's more powerful in some cases, because it's
535 /// able to brute-force evaluate arbitrary instructions as long as they have
536 /// constant operands at the beginning of the loop.
537 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
538   // Check a pre-condition.
539   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
540          "Indvars did not preserve LCSSA!");
541 
542   SmallVector<BasicBlock*, 8> ExitBlocks;
543   L->getUniqueExitBlocks(ExitBlocks);
544 
545   SmallVector<RewritePhi, 8> RewritePhiSet;
546   // Find all values that are computed inside the loop, but used outside of it.
547   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
548   // the exit blocks of the loop to find them.
549   for (BasicBlock *ExitBB : ExitBlocks) {
550     // If there are no PHI nodes in this exit block, then no values defined
551     // inside the loop are used on this path, skip it.
552     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
553     if (!PN) continue;
554 
555     unsigned NumPreds = PN->getNumIncomingValues();
556 
557     // Iterate over all of the PHI nodes.
558     BasicBlock::iterator BBI = ExitBB->begin();
559     while ((PN = dyn_cast<PHINode>(BBI++))) {
560       if (PN->use_empty())
561         continue; // dead use, don't replace it
562 
563       if (!SE->isSCEVable(PN->getType()))
564         continue;
565 
566       // It's necessary to tell ScalarEvolution about this explicitly so that
567       // it can walk the def-use list and forget all SCEVs, as it may not be
568       // watching the PHI itself. Once the new exit value is in place, there
569       // may not be a def-use connection between the loop and every instruction
570       // which got a SCEVAddRecExpr for that loop.
571       SE->forgetValue(PN);
572 
573       // Iterate over all of the values in all the PHI nodes.
574       for (unsigned i = 0; i != NumPreds; ++i) {
575         // If the value being merged in is not integer or is not defined
576         // in the loop, skip it.
577         Value *InVal = PN->getIncomingValue(i);
578         if (!isa<Instruction>(InVal))
579           continue;
580 
581         // If this pred is for a subloop, not L itself, skip it.
582         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
583           continue; // The Block is in a subloop, skip it.
584 
585         // Check that InVal is defined in the loop.
586         Instruction *Inst = cast<Instruction>(InVal);
587         if (!L->contains(Inst))
588           continue;
589 
590         // Okay, this instruction has a user outside of the current loop
591         // and varies predictably *inside* the loop.  Evaluate the value it
592         // contains when the loop exits, if possible.
593         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
594         if (!SE->isLoopInvariant(ExitValue, L) ||
595             !isSafeToExpand(ExitValue, *SE))
596           continue;
597 
598         // Computing the value outside of the loop brings no benefit if it is
599         // definitely used inside the loop in a way which can not be optimized
600         // away.
601         if (ExitValue->getSCEVType()>=scMulExpr) {
602           bool HasHardInternalUses = false;
603           for (auto *IB : Inst->users()) {
604             Instruction *UseInstr = cast<Instruction>(IB);
605             unsigned Opc = UseInstr->getOpcode();
606             if (L->contains(UseInstr) && Opc == Instruction::Call) {
607               HasHardInternalUses = true;
608               break;
609             }
610           }
611           if (HasHardInternalUses)
612             continue;
613         }
614 
615         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
616         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
617 
618         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
619                           << '\n'
620                           << "  LoopVal = " << *Inst << "\n");
621 
622         if (!isValidRewrite(Inst, ExitVal)) {
623           DeadInsts.push_back(ExitVal);
624           continue;
625         }
626 
627 #ifndef NDEBUG
628         // If we reuse an instruction from a loop which is neither L nor one of
629         // its containing loops, we end up breaking LCSSA form for this loop by
630         // creating a new use of its instruction.
631         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
632           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
633             if (EVL != L)
634               assert(EVL->contains(L) && "LCSSA breach detected!");
635 #endif
636 
637         // Collect all the candidate PHINodes to be rewritten.
638         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
639       }
640     }
641   }
642 
643   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
644 
645   bool Changed = false;
646   // Transformation.
647   for (const RewritePhi &Phi : RewritePhiSet) {
648     PHINode *PN = Phi.PN;
649     Value *ExitVal = Phi.Val;
650 
651     // Only do the rewrite when the ExitValue can be expanded cheaply.
652     // If LoopCanBeDel is true, rewrite exit value aggressively.
653     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
654       DeadInsts.push_back(ExitVal);
655       continue;
656     }
657 
658     Changed = true;
659     ++NumReplaced;
660     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
661     PN->setIncomingValue(Phi.Ith, ExitVal);
662 
663     // If this instruction is dead now, delete it. Don't do it now to avoid
664     // invalidating iterators.
665     if (isInstructionTriviallyDead(Inst, TLI))
666       DeadInsts.push_back(Inst);
667 
668     // Replace PN with ExitVal if that is legal and does not break LCSSA.
669     if (PN->getNumIncomingValues() == 1 &&
670         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
671       PN->replaceAllUsesWith(ExitVal);
672       PN->eraseFromParent();
673     }
674   }
675 
676   // The insertion point instruction may have been deleted; clear it out
677   // so that the rewriter doesn't trip over it later.
678   Rewriter.clearInsertPoint();
679   return Changed;
680 }
681 
682 //===---------------------------------------------------------------------===//
683 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
684 // they will exit at the first iteration.
685 //===---------------------------------------------------------------------===//
686 
687 /// Check to see if this loop has loop invariant conditions which lead to loop
688 /// exits. If so, we know that if the exit path is taken, it is at the first
689 /// loop iteration. This lets us predict exit values of PHI nodes that live in
690 /// loop header.
691 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
692   // Verify the input to the pass is already in LCSSA form.
693   assert(L->isLCSSAForm(*DT));
694 
695   SmallVector<BasicBlock *, 8> ExitBlocks;
696   L->getUniqueExitBlocks(ExitBlocks);
697   auto *LoopHeader = L->getHeader();
698   assert(LoopHeader && "Invalid loop");
699 
700   bool MadeAnyChanges = false;
701   for (auto *ExitBB : ExitBlocks) {
702     // If there are no more PHI nodes in this exit block, then no more
703     // values defined inside the loop are used on this path.
704     for (PHINode &PN : ExitBB->phis()) {
705       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
706            IncomingValIdx != E; ++IncomingValIdx) {
707         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
708 
709         // We currently only support loop exits from loop header. If the
710         // incoming block is not loop header, we need to recursively check
711         // all conditions starting from loop header are loop invariants.
712         // Additional support might be added in the future.
713         if (IncomingBB != LoopHeader)
714           continue;
715 
716         // Get condition that leads to the exit path.
717         auto *TermInst = IncomingBB->getTerminator();
718 
719         Value *Cond = nullptr;
720         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
721           // Must be a conditional branch, otherwise the block
722           // should not be in the loop.
723           Cond = BI->getCondition();
724         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
725           Cond = SI->getCondition();
726         else
727           continue;
728 
729         if (!L->isLoopInvariant(Cond))
730           continue;
731 
732         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
733 
734         // Only deal with PHIs.
735         if (!ExitVal)
736           continue;
737 
738         // If ExitVal is a PHI on the loop header, then we know its
739         // value along this exit because the exit can only be taken
740         // on the first iteration.
741         auto *LoopPreheader = L->getLoopPreheader();
742         assert(LoopPreheader && "Invalid loop");
743         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
744         if (PreheaderIdx != -1) {
745           assert(ExitVal->getParent() == LoopHeader &&
746                  "ExitVal must be in loop header");
747           MadeAnyChanges = true;
748           PN.setIncomingValue(IncomingValIdx,
749                               ExitVal->getIncomingValue(PreheaderIdx));
750         }
751       }
752     }
753   }
754   return MadeAnyChanges;
755 }
756 
757 /// Check whether it is possible to delete the loop after rewriting exit
758 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
759 /// aggressively.
760 bool IndVarSimplify::canLoopBeDeleted(
761     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
762   BasicBlock *Preheader = L->getLoopPreheader();
763   // If there is no preheader, the loop will not be deleted.
764   if (!Preheader)
765     return false;
766 
767   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
768   // We obviate multiple ExitingBlocks case for simplicity.
769   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
770   // after exit value rewriting, we can enhance the logic here.
771   SmallVector<BasicBlock *, 4> ExitingBlocks;
772   L->getExitingBlocks(ExitingBlocks);
773   SmallVector<BasicBlock *, 8> ExitBlocks;
774   L->getUniqueExitBlocks(ExitBlocks);
775   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
776     return false;
777 
778   BasicBlock *ExitBlock = ExitBlocks[0];
779   BasicBlock::iterator BI = ExitBlock->begin();
780   while (PHINode *P = dyn_cast<PHINode>(BI)) {
781     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
782 
783     // If the Incoming value of P is found in RewritePhiSet, we know it
784     // could be rewritten to use a loop invariant value in transformation
785     // phase later. Skip it in the loop invariant check below.
786     bool found = false;
787     for (const RewritePhi &Phi : RewritePhiSet) {
788       unsigned i = Phi.Ith;
789       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
790         found = true;
791         break;
792       }
793     }
794 
795     Instruction *I;
796     if (!found && (I = dyn_cast<Instruction>(Incoming)))
797       if (!L->hasLoopInvariantOperands(I))
798         return false;
799 
800     ++BI;
801   }
802 
803   for (auto *BB : L->blocks())
804     if (llvm::any_of(*BB, [](Instruction &I) {
805           return I.mayHaveSideEffects();
806         }))
807       return false;
808 
809   return true;
810 }
811 
812 //===----------------------------------------------------------------------===//
813 //  IV Widening - Extend the width of an IV to cover its widest uses.
814 //===----------------------------------------------------------------------===//
815 
816 namespace {
817 
818 // Collect information about induction variables that are used by sign/zero
819 // extend operations. This information is recorded by CollectExtend and provides
820 // the input to WidenIV.
821 struct WideIVInfo {
822   PHINode *NarrowIV = nullptr;
823 
824   // Widest integer type created [sz]ext
825   Type *WidestNativeType = nullptr;
826 
827   // Was a sext user seen before a zext?
828   bool IsSigned = false;
829 };
830 
831 } // end anonymous namespace
832 
833 /// Update information about the induction variable that is extended by this
834 /// sign or zero extend operation. This is used to determine the final width of
835 /// the IV before actually widening it.
836 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
837                         const TargetTransformInfo *TTI) {
838   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
839   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
840     return;
841 
842   Type *Ty = Cast->getType();
843   uint64_t Width = SE->getTypeSizeInBits(Ty);
844   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
845     return;
846 
847   // Check that `Cast` actually extends the induction variable (we rely on this
848   // later).  This takes care of cases where `Cast` is extending a truncation of
849   // the narrow induction variable, and thus can end up being narrower than the
850   // "narrow" induction variable.
851   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
852   if (NarrowIVWidth >= Width)
853     return;
854 
855   // Cast is either an sext or zext up to this point.
856   // We should not widen an indvar if arithmetics on the wider indvar are more
857   // expensive than those on the narrower indvar. We check only the cost of ADD
858   // because at least an ADD is required to increment the induction variable. We
859   // could compute more comprehensively the cost of all instructions on the
860   // induction variable when necessary.
861   if (TTI &&
862       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
863           TTI->getArithmeticInstrCost(Instruction::Add,
864                                       Cast->getOperand(0)->getType())) {
865     return;
866   }
867 
868   if (!WI.WidestNativeType) {
869     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
870     WI.IsSigned = IsSigned;
871     return;
872   }
873 
874   // We extend the IV to satisfy the sign of its first user, arbitrarily.
875   if (WI.IsSigned != IsSigned)
876     return;
877 
878   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
879     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
880 }
881 
882 namespace {
883 
884 /// Record a link in the Narrow IV def-use chain along with the WideIV that
885 /// computes the same value as the Narrow IV def.  This avoids caching Use*
886 /// pointers.
887 struct NarrowIVDefUse {
888   Instruction *NarrowDef = nullptr;
889   Instruction *NarrowUse = nullptr;
890   Instruction *WideDef = nullptr;
891 
892   // True if the narrow def is never negative.  Tracking this information lets
893   // us use a sign extension instead of a zero extension or vice versa, when
894   // profitable and legal.
895   bool NeverNegative = false;
896 
897   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
898                  bool NeverNegative)
899       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
900         NeverNegative(NeverNegative) {}
901 };
902 
903 /// The goal of this transform is to remove sign and zero extends without
904 /// creating any new induction variables. To do this, it creates a new phi of
905 /// the wider type and redirects all users, either removing extends or inserting
906 /// truncs whenever we stop propagating the type.
907 class WidenIV {
908   // Parameters
909   PHINode *OrigPhi;
910   Type *WideType;
911 
912   // Context
913   LoopInfo        *LI;
914   Loop            *L;
915   ScalarEvolution *SE;
916   DominatorTree   *DT;
917 
918   // Does the module have any calls to the llvm.experimental.guard intrinsic
919   // at all? If not we can avoid scanning instructions looking for guards.
920   bool HasGuards;
921 
922   // Result
923   PHINode *WidePhi = nullptr;
924   Instruction *WideInc = nullptr;
925   const SCEV *WideIncExpr = nullptr;
926   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
927 
928   SmallPtrSet<Instruction *,16> Widened;
929   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
930 
931   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
932 
933   // A map tracking the kind of extension used to widen each narrow IV
934   // and narrow IV user.
935   // Key: pointer to a narrow IV or IV user.
936   // Value: the kind of extension used to widen this Instruction.
937   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
938 
939   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
940 
941   // A map with control-dependent ranges for post increment IV uses. The key is
942   // a pair of IV def and a use of this def denoting the context. The value is
943   // a ConstantRange representing possible values of the def at the given
944   // context.
945   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
946 
947   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
948                                               Instruction *UseI) {
949     DefUserPair Key(Def, UseI);
950     auto It = PostIncRangeInfos.find(Key);
951     return It == PostIncRangeInfos.end()
952                ? Optional<ConstantRange>(None)
953                : Optional<ConstantRange>(It->second);
954   }
955 
956   void calculatePostIncRanges(PHINode *OrigPhi);
957   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
958 
959   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
960     DefUserPair Key(Def, UseI);
961     auto It = PostIncRangeInfos.find(Key);
962     if (It == PostIncRangeInfos.end())
963       PostIncRangeInfos.insert({Key, R});
964     else
965       It->second = R.intersectWith(It->second);
966   }
967 
968 public:
969   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
970           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
971           bool HasGuards)
972       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
973         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
974         HasGuards(HasGuards), DeadInsts(DI) {
975     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
976     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
977   }
978 
979   PHINode *createWideIV(SCEVExpander &Rewriter);
980 
981 protected:
982   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
983                           Instruction *Use);
984 
985   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
986   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
987                                      const SCEVAddRecExpr *WideAR);
988   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
989 
990   ExtendKind getExtendKind(Instruction *I);
991 
992   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
993 
994   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
995 
996   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
997 
998   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
999                               unsigned OpCode) const;
1000 
1001   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1002 
1003   bool widenLoopCompare(NarrowIVDefUse DU);
1004   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1005   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1006 
1007   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1008 };
1009 
1010 } // end anonymous namespace
1011 
1012 /// Perform a quick domtree based check for loop invariance assuming that V is
1013 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
1014 /// purpose.
1015 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
1016   Instruction *Inst = dyn_cast<Instruction>(V);
1017   if (!Inst)
1018     return true;
1019 
1020   return DT->properlyDominates(Inst->getParent(), L->getHeader());
1021 }
1022 
1023 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1024                                  bool IsSigned, Instruction *Use) {
1025   // Set the debug location and conservative insertion point.
1026   IRBuilder<> Builder(Use);
1027   // Hoist the insertion point into loop preheaders as far as possible.
1028   for (const Loop *L = LI->getLoopFor(Use->getParent());
1029        L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1030        L = L->getParentLoop())
1031     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1032 
1033   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1034                     Builder.CreateZExt(NarrowOper, WideType);
1035 }
1036 
1037 /// Instantiate a wide operation to replace a narrow operation. This only needs
1038 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1039 /// 0 for any operation we decide not to clone.
1040 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1041                                   const SCEVAddRecExpr *WideAR) {
1042   unsigned Opcode = DU.NarrowUse->getOpcode();
1043   switch (Opcode) {
1044   default:
1045     return nullptr;
1046   case Instruction::Add:
1047   case Instruction::Mul:
1048   case Instruction::UDiv:
1049   case Instruction::Sub:
1050     return cloneArithmeticIVUser(DU, WideAR);
1051 
1052   case Instruction::And:
1053   case Instruction::Or:
1054   case Instruction::Xor:
1055   case Instruction::Shl:
1056   case Instruction::LShr:
1057   case Instruction::AShr:
1058     return cloneBitwiseIVUser(DU);
1059   }
1060 }
1061 
1062 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1063   Instruction *NarrowUse = DU.NarrowUse;
1064   Instruction *NarrowDef = DU.NarrowDef;
1065   Instruction *WideDef = DU.WideDef;
1066 
1067   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1068 
1069   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1070   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1071   // invariant and will be folded or hoisted. If it actually comes from a
1072   // widened IV, it should be removed during a future call to widenIVUse.
1073   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1074   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1075                    ? WideDef
1076                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1077                                       IsSigned, NarrowUse);
1078   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1079                    ? WideDef
1080                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1081                                       IsSigned, NarrowUse);
1082 
1083   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1084   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1085                                         NarrowBO->getName());
1086   IRBuilder<> Builder(NarrowUse);
1087   Builder.Insert(WideBO);
1088   WideBO->copyIRFlags(NarrowBO);
1089   return WideBO;
1090 }
1091 
1092 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1093                                             const SCEVAddRecExpr *WideAR) {
1094   Instruction *NarrowUse = DU.NarrowUse;
1095   Instruction *NarrowDef = DU.NarrowDef;
1096   Instruction *WideDef = DU.WideDef;
1097 
1098   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1099 
1100   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1101 
1102   // We're trying to find X such that
1103   //
1104   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1105   //
1106   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1107   // and check using SCEV if any of them are correct.
1108 
1109   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1110   // correct solution to X.
1111   auto GuessNonIVOperand = [&](bool SignExt) {
1112     const SCEV *WideLHS;
1113     const SCEV *WideRHS;
1114 
1115     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1116       if (SignExt)
1117         return SE->getSignExtendExpr(S, Ty);
1118       return SE->getZeroExtendExpr(S, Ty);
1119     };
1120 
1121     if (IVOpIdx == 0) {
1122       WideLHS = SE->getSCEV(WideDef);
1123       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1124       WideRHS = GetExtend(NarrowRHS, WideType);
1125     } else {
1126       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1127       WideLHS = GetExtend(NarrowLHS, WideType);
1128       WideRHS = SE->getSCEV(WideDef);
1129     }
1130 
1131     // WideUse is "WideDef `op.wide` X" as described in the comment.
1132     const SCEV *WideUse = nullptr;
1133 
1134     switch (NarrowUse->getOpcode()) {
1135     default:
1136       llvm_unreachable("No other possibility!");
1137 
1138     case Instruction::Add:
1139       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1140       break;
1141 
1142     case Instruction::Mul:
1143       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1144       break;
1145 
1146     case Instruction::UDiv:
1147       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1148       break;
1149 
1150     case Instruction::Sub:
1151       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1152       break;
1153     }
1154 
1155     return WideUse == WideAR;
1156   };
1157 
1158   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1159   if (!GuessNonIVOperand(SignExtend)) {
1160     SignExtend = !SignExtend;
1161     if (!GuessNonIVOperand(SignExtend))
1162       return nullptr;
1163   }
1164 
1165   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1166                    ? WideDef
1167                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1168                                       SignExtend, NarrowUse);
1169   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1170                    ? WideDef
1171                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1172                                       SignExtend, NarrowUse);
1173 
1174   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1175   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1176                                         NarrowBO->getName());
1177 
1178   IRBuilder<> Builder(NarrowUse);
1179   Builder.Insert(WideBO);
1180   WideBO->copyIRFlags(NarrowBO);
1181   return WideBO;
1182 }
1183 
1184 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1185   auto It = ExtendKindMap.find(I);
1186   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1187   return It->second;
1188 }
1189 
1190 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1191                                      unsigned OpCode) const {
1192   if (OpCode == Instruction::Add)
1193     return SE->getAddExpr(LHS, RHS);
1194   if (OpCode == Instruction::Sub)
1195     return SE->getMinusSCEV(LHS, RHS);
1196   if (OpCode == Instruction::Mul)
1197     return SE->getMulExpr(LHS, RHS);
1198 
1199   llvm_unreachable("Unsupported opcode.");
1200 }
1201 
1202 /// No-wrap operations can transfer sign extension of their result to their
1203 /// operands. Generate the SCEV value for the widened operation without
1204 /// actually modifying the IR yet. If the expression after extending the
1205 /// operands is an AddRec for this loop, return the AddRec and the kind of
1206 /// extension used.
1207 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1208   // Handle the common case of add<nsw/nuw>
1209   const unsigned OpCode = DU.NarrowUse->getOpcode();
1210   // Only Add/Sub/Mul instructions supported yet.
1211   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1212       OpCode != Instruction::Mul)
1213     return {nullptr, Unknown};
1214 
1215   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1216   // if extending the other will lead to a recurrence.
1217   const unsigned ExtendOperIdx =
1218       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1219   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1220 
1221   const SCEV *ExtendOperExpr = nullptr;
1222   const OverflowingBinaryOperator *OBO =
1223     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1224   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1225   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1226     ExtendOperExpr = SE->getSignExtendExpr(
1227       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1228   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1229     ExtendOperExpr = SE->getZeroExtendExpr(
1230       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1231   else
1232     return {nullptr, Unknown};
1233 
1234   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1235   // flags. This instruction may be guarded by control flow that the no-wrap
1236   // behavior depends on. Non-control-equivalent instructions can be mapped to
1237   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1238   // semantics to those operations.
1239   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1240   const SCEV *rhs = ExtendOperExpr;
1241 
1242   // Let's swap operands to the initial order for the case of non-commutative
1243   // operations, like SUB. See PR21014.
1244   if (ExtendOperIdx == 0)
1245     std::swap(lhs, rhs);
1246   const SCEVAddRecExpr *AddRec =
1247       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1248 
1249   if (!AddRec || AddRec->getLoop() != L)
1250     return {nullptr, Unknown};
1251 
1252   return {AddRec, ExtKind};
1253 }
1254 
1255 /// Is this instruction potentially interesting for further simplification after
1256 /// widening it's type? In other words, can the extend be safely hoisted out of
1257 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1258 /// so, return the extended recurrence and the kind of extension used. Otherwise
1259 /// return {nullptr, Unknown}.
1260 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1261   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1262     return {nullptr, Unknown};
1263 
1264   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1265   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1266       SE->getTypeSizeInBits(WideType)) {
1267     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1268     // index. So don't follow this use.
1269     return {nullptr, Unknown};
1270   }
1271 
1272   const SCEV *WideExpr;
1273   ExtendKind ExtKind;
1274   if (DU.NeverNegative) {
1275     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1276     if (isa<SCEVAddRecExpr>(WideExpr))
1277       ExtKind = SignExtended;
1278     else {
1279       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1280       ExtKind = ZeroExtended;
1281     }
1282   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1283     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1284     ExtKind = SignExtended;
1285   } else {
1286     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1287     ExtKind = ZeroExtended;
1288   }
1289   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1290   if (!AddRec || AddRec->getLoop() != L)
1291     return {nullptr, Unknown};
1292   return {AddRec, ExtKind};
1293 }
1294 
1295 /// This IV user cannot be widen. Replace this use of the original narrow IV
1296 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1297 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1298   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1299                     << *DU.NarrowUse << "\n");
1300   IRBuilder<> Builder(
1301       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1302   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1303   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1304 }
1305 
1306 /// If the narrow use is a compare instruction, then widen the compare
1307 //  (and possibly the other operand).  The extend operation is hoisted into the
1308 // loop preheader as far as possible.
1309 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1310   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1311   if (!Cmp)
1312     return false;
1313 
1314   // We can legally widen the comparison in the following two cases:
1315   //
1316   //  - The signedness of the IV extension and comparison match
1317   //
1318   //  - The narrow IV is always positive (and thus its sign extension is equal
1319   //    to its zero extension).  For instance, let's say we're zero extending
1320   //    %narrow for the following use
1321   //
1322   //      icmp slt i32 %narrow, %val   ... (A)
1323   //
1324   //    and %narrow is always positive.  Then
1325   //
1326   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1327   //          == icmp slt i32 zext(%narrow), sext(%val)
1328   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1329   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1330     return false;
1331 
1332   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1333   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1334   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1335   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1336 
1337   // Widen the compare instruction.
1338   IRBuilder<> Builder(
1339       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1340   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1341 
1342   // Widen the other operand of the compare, if necessary.
1343   if (CastWidth < IVWidth) {
1344     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1345     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1346   }
1347   return true;
1348 }
1349 
1350 /// If the narrow use is an instruction whose two operands are the defining
1351 /// instruction of DU and a load instruction, then we have the following:
1352 /// if the load is hoisted outside the loop, then we do not reach this function
1353 /// as scalar evolution analysis works fine in widenIVUse with variables
1354 /// hoisted outside the loop and efficient code is subsequently generated by
1355 /// not emitting truncate instructions. But when the load is not hoisted
1356 /// (whether due to limitation in alias analysis or due to a true legality),
1357 /// then scalar evolution can not proceed with loop variant values and
1358 /// inefficient code is generated. This function handles the non-hoisted load
1359 /// special case by making the optimization generate the same type of code for
1360 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1361 /// instruction). This special case is important especially when the induction
1362 /// variables are affecting addressing mode in code generation.
1363 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1364   Instruction *NarrowUse = DU.NarrowUse;
1365   Instruction *NarrowDef = DU.NarrowDef;
1366   Instruction *WideDef = DU.WideDef;
1367 
1368   // Handle the common case of add<nsw/nuw>
1369   const unsigned OpCode = NarrowUse->getOpcode();
1370   // Only Add/Sub/Mul instructions are supported.
1371   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1372       OpCode != Instruction::Mul)
1373     return false;
1374 
1375   // The operand that is not defined by NarrowDef of DU. Let's call it the
1376   // other operand.
1377   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1378   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1379          "bad DU");
1380 
1381   const SCEV *ExtendOperExpr = nullptr;
1382   const OverflowingBinaryOperator *OBO =
1383     cast<OverflowingBinaryOperator>(NarrowUse);
1384   ExtendKind ExtKind = getExtendKind(NarrowDef);
1385   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1386     ExtendOperExpr = SE->getSignExtendExpr(
1387       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1388   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1389     ExtendOperExpr = SE->getZeroExtendExpr(
1390       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1391   else
1392     return false;
1393 
1394   // We are interested in the other operand being a load instruction.
1395   // But, we should look into relaxing this restriction later on.
1396   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1397   if (I && I->getOpcode() != Instruction::Load)
1398     return false;
1399 
1400   // Verifying that Defining operand is an AddRec
1401   const SCEV *Op1 = SE->getSCEV(WideDef);
1402   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1403   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1404     return false;
1405   // Verifying that other operand is an Extend.
1406   if (ExtKind == SignExtended) {
1407     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1408       return false;
1409   } else {
1410     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1411       return false;
1412   }
1413 
1414   if (ExtKind == SignExtended) {
1415     for (Use &U : NarrowUse->uses()) {
1416       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1417       if (!User || User->getType() != WideType)
1418         return false;
1419     }
1420   } else { // ExtKind == ZeroExtended
1421     for (Use &U : NarrowUse->uses()) {
1422       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1423       if (!User || User->getType() != WideType)
1424         return false;
1425     }
1426   }
1427 
1428   return true;
1429 }
1430 
1431 /// Special Case for widening with variant Loads (see
1432 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1433 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1434   Instruction *NarrowUse = DU.NarrowUse;
1435   Instruction *NarrowDef = DU.NarrowDef;
1436   Instruction *WideDef = DU.WideDef;
1437 
1438   ExtendKind ExtKind = getExtendKind(NarrowDef);
1439 
1440   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1441 
1442   // Generating a widening use instruction.
1443   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1444                    ? WideDef
1445                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1446                                       ExtKind, NarrowUse);
1447   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1448                    ? WideDef
1449                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1450                                       ExtKind, NarrowUse);
1451 
1452   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1453   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1454                                         NarrowBO->getName());
1455   IRBuilder<> Builder(NarrowUse);
1456   Builder.Insert(WideBO);
1457   WideBO->copyIRFlags(NarrowBO);
1458 
1459   if (ExtKind == SignExtended)
1460     ExtendKindMap[NarrowUse] = SignExtended;
1461   else
1462     ExtendKindMap[NarrowUse] = ZeroExtended;
1463 
1464   // Update the Use.
1465   if (ExtKind == SignExtended) {
1466     for (Use &U : NarrowUse->uses()) {
1467       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1468       if (User && User->getType() == WideType) {
1469         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1470                           << *WideBO << "\n");
1471         ++NumElimExt;
1472         User->replaceAllUsesWith(WideBO);
1473         DeadInsts.emplace_back(User);
1474       }
1475     }
1476   } else { // ExtKind == ZeroExtended
1477     for (Use &U : NarrowUse->uses()) {
1478       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1479       if (User && User->getType() == WideType) {
1480         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1481                           << *WideBO << "\n");
1482         ++NumElimExt;
1483         User->replaceAllUsesWith(WideBO);
1484         DeadInsts.emplace_back(User);
1485       }
1486     }
1487   }
1488 }
1489 
1490 /// Determine whether an individual user of the narrow IV can be widened. If so,
1491 /// return the wide clone of the user.
1492 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1493   assert(ExtendKindMap.count(DU.NarrowDef) &&
1494          "Should already know the kind of extension used to widen NarrowDef");
1495 
1496   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1497   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1498     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1499       // For LCSSA phis, sink the truncate outside the loop.
1500       // After SimplifyCFG most loop exit targets have a single predecessor.
1501       // Otherwise fall back to a truncate within the loop.
1502       if (UsePhi->getNumOperands() != 1)
1503         truncateIVUse(DU, DT, LI);
1504       else {
1505         // Widening the PHI requires us to insert a trunc.  The logical place
1506         // for this trunc is in the same BB as the PHI.  This is not possible if
1507         // the BB is terminated by a catchswitch.
1508         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1509           return nullptr;
1510 
1511         PHINode *WidePhi =
1512           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1513                           UsePhi);
1514         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1515         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1516         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1517         UsePhi->replaceAllUsesWith(Trunc);
1518         DeadInsts.emplace_back(UsePhi);
1519         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1520                           << *WidePhi << "\n");
1521       }
1522       return nullptr;
1523     }
1524   }
1525 
1526   // This narrow use can be widened by a sext if it's non-negative or its narrow
1527   // def was widended by a sext. Same for zext.
1528   auto canWidenBySExt = [&]() {
1529     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1530   };
1531   auto canWidenByZExt = [&]() {
1532     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1533   };
1534 
1535   // Our raison d'etre! Eliminate sign and zero extension.
1536   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1537       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1538     Value *NewDef = DU.WideDef;
1539     if (DU.NarrowUse->getType() != WideType) {
1540       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1541       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1542       if (CastWidth < IVWidth) {
1543         // The cast isn't as wide as the IV, so insert a Trunc.
1544         IRBuilder<> Builder(DU.NarrowUse);
1545         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1546       }
1547       else {
1548         // A wider extend was hidden behind a narrower one. This may induce
1549         // another round of IV widening in which the intermediate IV becomes
1550         // dead. It should be very rare.
1551         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1552                           << " not wide enough to subsume " << *DU.NarrowUse
1553                           << "\n");
1554         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1555         NewDef = DU.NarrowUse;
1556       }
1557     }
1558     if (NewDef != DU.NarrowUse) {
1559       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1560                         << " replaced by " << *DU.WideDef << "\n");
1561       ++NumElimExt;
1562       DU.NarrowUse->replaceAllUsesWith(NewDef);
1563       DeadInsts.emplace_back(DU.NarrowUse);
1564     }
1565     // Now that the extend is gone, we want to expose it's uses for potential
1566     // further simplification. We don't need to directly inform SimplifyIVUsers
1567     // of the new users, because their parent IV will be processed later as a
1568     // new loop phi. If we preserved IVUsers analysis, we would also want to
1569     // push the uses of WideDef here.
1570 
1571     // No further widening is needed. The deceased [sz]ext had done it for us.
1572     return nullptr;
1573   }
1574 
1575   // Does this user itself evaluate to a recurrence after widening?
1576   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1577   if (!WideAddRec.first)
1578     WideAddRec = getWideRecurrence(DU);
1579 
1580   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1581   if (!WideAddRec.first) {
1582     // If use is a loop condition, try to promote the condition instead of
1583     // truncating the IV first.
1584     if (widenLoopCompare(DU))
1585       return nullptr;
1586 
1587     // We are here about to generate a truncate instruction that may hurt
1588     // performance because the scalar evolution expression computed earlier
1589     // in WideAddRec.first does not indicate a polynomial induction expression.
1590     // In that case, look at the operands of the use instruction to determine
1591     // if we can still widen the use instead of truncating its operand.
1592     if (widenWithVariantLoadUse(DU)) {
1593       widenWithVariantLoadUseCodegen(DU);
1594       return nullptr;
1595     }
1596 
1597     // This user does not evaluate to a recurrence after widening, so don't
1598     // follow it. Instead insert a Trunc to kill off the original use,
1599     // eventually isolating the original narrow IV so it can be removed.
1600     truncateIVUse(DU, DT, LI);
1601     return nullptr;
1602   }
1603   // Assume block terminators cannot evaluate to a recurrence. We can't to
1604   // insert a Trunc after a terminator if there happens to be a critical edge.
1605   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1606          "SCEV is not expected to evaluate a block terminator");
1607 
1608   // Reuse the IV increment that SCEVExpander created as long as it dominates
1609   // NarrowUse.
1610   Instruction *WideUse = nullptr;
1611   if (WideAddRec.first == WideIncExpr &&
1612       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1613     WideUse = WideInc;
1614   else {
1615     WideUse = cloneIVUser(DU, WideAddRec.first);
1616     if (!WideUse)
1617       return nullptr;
1618   }
1619   // Evaluation of WideAddRec ensured that the narrow expression could be
1620   // extended outside the loop without overflow. This suggests that the wide use
1621   // evaluates to the same expression as the extended narrow use, but doesn't
1622   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1623   // where it fails, we simply throw away the newly created wide use.
1624   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1625     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1626                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1627                       << "\n");
1628     DeadInsts.emplace_back(WideUse);
1629     return nullptr;
1630   }
1631 
1632   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1633   // Returning WideUse pushes it on the worklist.
1634   return WideUse;
1635 }
1636 
1637 /// Add eligible users of NarrowDef to NarrowIVUsers.
1638 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1639   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1640   bool NonNegativeDef =
1641       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1642                            SE->getConstant(NarrowSCEV->getType(), 0));
1643   for (User *U : NarrowDef->users()) {
1644     Instruction *NarrowUser = cast<Instruction>(U);
1645 
1646     // Handle data flow merges and bizarre phi cycles.
1647     if (!Widened.insert(NarrowUser).second)
1648       continue;
1649 
1650     bool NonNegativeUse = false;
1651     if (!NonNegativeDef) {
1652       // We might have a control-dependent range information for this context.
1653       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1654         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1655     }
1656 
1657     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1658                                NonNegativeDef || NonNegativeUse);
1659   }
1660 }
1661 
1662 /// Process a single induction variable. First use the SCEVExpander to create a
1663 /// wide induction variable that evaluates to the same recurrence as the
1664 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1665 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1666 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1667 ///
1668 /// It would be simpler to delete uses as they are processed, but we must avoid
1669 /// invalidating SCEV expressions.
1670 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1671   // Is this phi an induction variable?
1672   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1673   if (!AddRec)
1674     return nullptr;
1675 
1676   // Widen the induction variable expression.
1677   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1678                                ? SE->getSignExtendExpr(AddRec, WideType)
1679                                : SE->getZeroExtendExpr(AddRec, WideType);
1680 
1681   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1682          "Expect the new IV expression to preserve its type");
1683 
1684   // Can the IV be extended outside the loop without overflow?
1685   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1686   if (!AddRec || AddRec->getLoop() != L)
1687     return nullptr;
1688 
1689   // An AddRec must have loop-invariant operands. Since this AddRec is
1690   // materialized by a loop header phi, the expression cannot have any post-loop
1691   // operands, so they must dominate the loop header.
1692   assert(
1693       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1694       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1695       "Loop header phi recurrence inputs do not dominate the loop");
1696 
1697   // Iterate over IV uses (including transitive ones) looking for IV increments
1698   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1699   // the increment calculate control-dependent range information basing on
1700   // dominating conditions inside of the loop (e.g. a range check inside of the
1701   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1702   //
1703   // Control-dependent range information is later used to prove that a narrow
1704   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1705   // this on demand because when pushNarrowIVUsers needs this information some
1706   // of the dominating conditions might be already widened.
1707   if (UsePostIncrementRanges)
1708     calculatePostIncRanges(OrigPhi);
1709 
1710   // The rewriter provides a value for the desired IV expression. This may
1711   // either find an existing phi or materialize a new one. Either way, we
1712   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1713   // of the phi-SCC dominates the loop entry.
1714   Instruction *InsertPt = &L->getHeader()->front();
1715   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1716 
1717   // Remembering the WideIV increment generated by SCEVExpander allows
1718   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1719   // employ a general reuse mechanism because the call above is the only call to
1720   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1721   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1722     WideInc =
1723       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1724     WideIncExpr = SE->getSCEV(WideInc);
1725     // Propagate the debug location associated with the original loop increment
1726     // to the new (widened) increment.
1727     auto *OrigInc =
1728       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1729     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1730   }
1731 
1732   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1733   ++NumWidened;
1734 
1735   // Traverse the def-use chain using a worklist starting at the original IV.
1736   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1737 
1738   Widened.insert(OrigPhi);
1739   pushNarrowIVUsers(OrigPhi, WidePhi);
1740 
1741   while (!NarrowIVUsers.empty()) {
1742     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1743 
1744     // Process a def-use edge. This may replace the use, so don't hold a
1745     // use_iterator across it.
1746     Instruction *WideUse = widenIVUse(DU, Rewriter);
1747 
1748     // Follow all def-use edges from the previous narrow use.
1749     if (WideUse)
1750       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1751 
1752     // widenIVUse may have removed the def-use edge.
1753     if (DU.NarrowDef->use_empty())
1754       DeadInsts.emplace_back(DU.NarrowDef);
1755   }
1756 
1757   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1758   // evaluate the same recurrence, we can just copy the debug info over.
1759   SmallVector<DbgValueInst *, 1> DbgValues;
1760   llvm::findDbgValues(DbgValues, OrigPhi);
1761   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1762                                      ValueAsMetadata::get(WidePhi));
1763   for (auto &DbgValue : DbgValues)
1764     DbgValue->setOperand(0, MDPhi);
1765   return WidePhi;
1766 }
1767 
1768 /// Calculates control-dependent range for the given def at the given context
1769 /// by looking at dominating conditions inside of the loop
1770 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1771                                     Instruction *NarrowUser) {
1772   using namespace llvm::PatternMatch;
1773 
1774   Value *NarrowDefLHS;
1775   const APInt *NarrowDefRHS;
1776   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1777                                  m_APInt(NarrowDefRHS))) ||
1778       !NarrowDefRHS->isNonNegative())
1779     return;
1780 
1781   auto UpdateRangeFromCondition = [&] (Value *Condition,
1782                                        bool TrueDest) {
1783     CmpInst::Predicate Pred;
1784     Value *CmpRHS;
1785     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1786                                  m_Value(CmpRHS))))
1787       return;
1788 
1789     CmpInst::Predicate P =
1790             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1791 
1792     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1793     auto CmpConstrainedLHSRange =
1794             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1795     auto NarrowDefRange =
1796             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1797 
1798     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1799   };
1800 
1801   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1802     if (!HasGuards)
1803       return;
1804 
1805     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1806                                      Ctx->getParent()->rend())) {
1807       Value *C = nullptr;
1808       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1809         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1810     }
1811   };
1812 
1813   UpdateRangeFromGuards(NarrowUser);
1814 
1815   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1816   // If NarrowUserBB is statically unreachable asking dominator queries may
1817   // yield surprising results. (e.g. the block may not have a dom tree node)
1818   if (!DT->isReachableFromEntry(NarrowUserBB))
1819     return;
1820 
1821   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1822        L->contains(DTB->getBlock());
1823        DTB = DTB->getIDom()) {
1824     auto *BB = DTB->getBlock();
1825     auto *TI = BB->getTerminator();
1826     UpdateRangeFromGuards(TI);
1827 
1828     auto *BI = dyn_cast<BranchInst>(TI);
1829     if (!BI || !BI->isConditional())
1830       continue;
1831 
1832     auto *TrueSuccessor = BI->getSuccessor(0);
1833     auto *FalseSuccessor = BI->getSuccessor(1);
1834 
1835     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1836       return BBE.isSingleEdge() &&
1837              DT->dominates(BBE, NarrowUser->getParent());
1838     };
1839 
1840     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1841       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1842 
1843     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1844       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1845   }
1846 }
1847 
1848 /// Calculates PostIncRangeInfos map for the given IV
1849 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1850   SmallPtrSet<Instruction *, 16> Visited;
1851   SmallVector<Instruction *, 6> Worklist;
1852   Worklist.push_back(OrigPhi);
1853   Visited.insert(OrigPhi);
1854 
1855   while (!Worklist.empty()) {
1856     Instruction *NarrowDef = Worklist.pop_back_val();
1857 
1858     for (Use &U : NarrowDef->uses()) {
1859       auto *NarrowUser = cast<Instruction>(U.getUser());
1860 
1861       // Don't go looking outside the current loop.
1862       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1863       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1864         continue;
1865 
1866       if (!Visited.insert(NarrowUser).second)
1867         continue;
1868 
1869       Worklist.push_back(NarrowUser);
1870 
1871       calculatePostIncRange(NarrowDef, NarrowUser);
1872     }
1873   }
1874 }
1875 
1876 //===----------------------------------------------------------------------===//
1877 //  Live IV Reduction - Minimize IVs live across the loop.
1878 //===----------------------------------------------------------------------===//
1879 
1880 //===----------------------------------------------------------------------===//
1881 //  Simplification of IV users based on SCEV evaluation.
1882 //===----------------------------------------------------------------------===//
1883 
1884 namespace {
1885 
1886 class IndVarSimplifyVisitor : public IVVisitor {
1887   ScalarEvolution *SE;
1888   const TargetTransformInfo *TTI;
1889   PHINode *IVPhi;
1890 
1891 public:
1892   WideIVInfo WI;
1893 
1894   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1895                         const TargetTransformInfo *TTI,
1896                         const DominatorTree *DTree)
1897     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1898     DT = DTree;
1899     WI.NarrowIV = IVPhi;
1900   }
1901 
1902   // Implement the interface used by simplifyUsersOfIV.
1903   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1904 };
1905 
1906 } // end anonymous namespace
1907 
1908 /// Iteratively perform simplification on a worklist of IV users. Each
1909 /// successive simplification may push more users which may themselves be
1910 /// candidates for simplification.
1911 ///
1912 /// Sign/Zero extend elimination is interleaved with IV simplification.
1913 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1914                                        SCEVExpander &Rewriter,
1915                                        LoopInfo *LI) {
1916   SmallVector<WideIVInfo, 8> WideIVs;
1917 
1918   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1919           Intrinsic::getName(Intrinsic::experimental_guard));
1920   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1921 
1922   SmallVector<PHINode*, 8> LoopPhis;
1923   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1924     LoopPhis.push_back(cast<PHINode>(I));
1925   }
1926   // Each round of simplification iterates through the SimplifyIVUsers worklist
1927   // for all current phis, then determines whether any IVs can be
1928   // widened. Widening adds new phis to LoopPhis, inducing another round of
1929   // simplification on the wide IVs.
1930   bool Changed = false;
1931   while (!LoopPhis.empty()) {
1932     // Evaluate as many IV expressions as possible before widening any IVs. This
1933     // forces SCEV to set no-wrap flags before evaluating sign/zero
1934     // extension. The first time SCEV attempts to normalize sign/zero extension,
1935     // the result becomes final. So for the most predictable results, we delay
1936     // evaluation of sign/zero extend evaluation until needed, and avoid running
1937     // other SCEV based analysis prior to simplifyAndExtend.
1938     do {
1939       PHINode *CurrIV = LoopPhis.pop_back_val();
1940 
1941       // Information about sign/zero extensions of CurrIV.
1942       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1943 
1944       Changed |=
1945           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1946 
1947       if (Visitor.WI.WidestNativeType) {
1948         WideIVs.push_back(Visitor.WI);
1949       }
1950     } while(!LoopPhis.empty());
1951 
1952     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1953       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1954       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1955         Changed = true;
1956         LoopPhis.push_back(WidePhi);
1957       }
1958     }
1959   }
1960   return Changed;
1961 }
1962 
1963 //===----------------------------------------------------------------------===//
1964 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1965 //===----------------------------------------------------------------------===//
1966 
1967 /// Return true if this loop's backedge taken count expression can be safely and
1968 /// cheaply expanded into an instruction sequence that can be used by
1969 /// linearFunctionTestReplace.
1970 ///
1971 /// TODO: This fails for pointer-type loop counters with greater than one byte
1972 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1973 /// we could skip this check in the case that the LFTR loop counter (chosen by
1974 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1975 /// the loop test to an inequality test by checking the target data's alignment
1976 /// of element types (given that the initial pointer value originates from or is
1977 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1978 /// However, we don't yet have a strong motivation for converting loop tests
1979 /// into inequality tests.
1980 static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
1981                                         SCEVExpander &Rewriter) {
1982   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1983   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1984       BackedgeTakenCount->isZero())
1985     return false;
1986 
1987   if (!L->getExitingBlock())
1988     return false;
1989 
1990   // Can't rewrite non-branch yet.
1991   if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
1992     return false;
1993 
1994   if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
1995     return false;
1996 
1997   return true;
1998 }
1999 
2000 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
2001 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
2002   Instruction *IncI = dyn_cast<Instruction>(IncV);
2003   if (!IncI)
2004     return nullptr;
2005 
2006   switch (IncI->getOpcode()) {
2007   case Instruction::Add:
2008   case Instruction::Sub:
2009     break;
2010   case Instruction::GetElementPtr:
2011     // An IV counter must preserve its type.
2012     if (IncI->getNumOperands() == 2)
2013       break;
2014     LLVM_FALLTHROUGH;
2015   default:
2016     return nullptr;
2017   }
2018 
2019   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2020   if (Phi && Phi->getParent() == L->getHeader()) {
2021     if (isLoopInvariant(IncI->getOperand(1), L, DT))
2022       return Phi;
2023     return nullptr;
2024   }
2025   if (IncI->getOpcode() == Instruction::GetElementPtr)
2026     return nullptr;
2027 
2028   // Allow add/sub to be commuted.
2029   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2030   if (Phi && Phi->getParent() == L->getHeader()) {
2031     if (isLoopInvariant(IncI->getOperand(0), L, DT))
2032       return Phi;
2033   }
2034   return nullptr;
2035 }
2036 
2037 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
2038 static ICmpInst *getLoopTest(Loop *L) {
2039   assert(L->getExitingBlock() && "expected loop exit");
2040 
2041   BasicBlock *LatchBlock = L->getLoopLatch();
2042   // Don't bother with LFTR if the loop is not properly simplified.
2043   if (!LatchBlock)
2044     return nullptr;
2045 
2046   BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
2047   assert(BI && "expected exit branch");
2048 
2049   return dyn_cast<ICmpInst>(BI->getCondition());
2050 }
2051 
2052 /// linearFunctionTestReplace policy. Return true unless we can show that the
2053 /// current exit test is already sufficiently canonical.
2054 static bool needsLFTR(Loop *L, DominatorTree *DT) {
2055   // Do LFTR to simplify the exit condition to an ICMP.
2056   ICmpInst *Cond = getLoopTest(L);
2057   if (!Cond)
2058     return true;
2059 
2060   // Do LFTR to simplify the exit ICMP to EQ/NE
2061   ICmpInst::Predicate Pred = Cond->getPredicate();
2062   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2063     return true;
2064 
2065   // Look for a loop invariant RHS
2066   Value *LHS = Cond->getOperand(0);
2067   Value *RHS = Cond->getOperand(1);
2068   if (!isLoopInvariant(RHS, L, DT)) {
2069     if (!isLoopInvariant(LHS, L, DT))
2070       return true;
2071     std::swap(LHS, RHS);
2072   }
2073   // Look for a simple IV counter LHS
2074   PHINode *Phi = dyn_cast<PHINode>(LHS);
2075   if (!Phi)
2076     Phi = getLoopPhiForCounter(LHS, L, DT);
2077 
2078   if (!Phi)
2079     return true;
2080 
2081   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2082   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2083   if (Idx < 0)
2084     return true;
2085 
2086   // Do LFTR if the exit condition's IV is *not* a simple counter.
2087   Value *IncV = Phi->getIncomingValue(Idx);
2088   return Phi != getLoopPhiForCounter(IncV, L, DT);
2089 }
2090 
2091 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2092 /// down to checking that all operands are constant and listing instructions
2093 /// that may hide undef.
2094 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2095                                unsigned Depth) {
2096   if (isa<Constant>(V))
2097     return !isa<UndefValue>(V);
2098 
2099   if (Depth >= 6)
2100     return false;
2101 
2102   // Conservatively handle non-constant non-instructions. For example, Arguments
2103   // may be undef.
2104   Instruction *I = dyn_cast<Instruction>(V);
2105   if (!I)
2106     return false;
2107 
2108   // Load and return values may be undef.
2109   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2110     return false;
2111 
2112   // Optimistically handle other instructions.
2113   for (Value *Op : I->operands()) {
2114     if (!Visited.insert(Op).second)
2115       continue;
2116     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2117       return false;
2118   }
2119   return true;
2120 }
2121 
2122 /// Return true if the given value is concrete. We must prove that undef can
2123 /// never reach it.
2124 ///
2125 /// TODO: If we decide that this is a good approach to checking for undef, we
2126 /// may factor it into a common location.
2127 static bool hasConcreteDef(Value *V) {
2128   SmallPtrSet<Value*, 8> Visited;
2129   Visited.insert(V);
2130   return hasConcreteDefImpl(V, Visited, 0);
2131 }
2132 
2133 /// Return true if this IV has any uses other than the (soon to be rewritten)
2134 /// loop exit test.
2135 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2136   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2137   Value *IncV = Phi->getIncomingValue(LatchIdx);
2138 
2139   for (User *U : Phi->users())
2140     if (U != Cond && U != IncV) return false;
2141 
2142   for (User *U : IncV->users())
2143     if (U != Cond && U != Phi) return false;
2144   return true;
2145 }
2146 
2147 /// Find an affine IV in canonical form.
2148 ///
2149 /// BECount may be an i8* pointer type. The pointer difference is already
2150 /// valid count without scaling the address stride, so it remains a pointer
2151 /// expression as far as SCEV is concerned.
2152 ///
2153 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
2154 ///
2155 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
2156 ///
2157 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
2158 /// This is difficult in general for SCEV because of potential overflow. But we
2159 /// could at least handle constant BECounts.
2160 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2161                                 ScalarEvolution *SE, DominatorTree *DT) {
2162   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2163 
2164   Value *Cond =
2165     cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
2166 
2167   // Loop over all of the PHI nodes, looking for a simple counter.
2168   PHINode *BestPhi = nullptr;
2169   const SCEV *BestInit = nullptr;
2170   BasicBlock *LatchBlock = L->getLoopLatch();
2171   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2172   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2173 
2174   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2175     PHINode *Phi = cast<PHINode>(I);
2176     if (!SE->isSCEVable(Phi->getType()))
2177       continue;
2178 
2179     // Avoid comparing an integer IV against a pointer Limit.
2180     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2181       continue;
2182 
2183     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2184     if (!AR || AR->getLoop() != L || !AR->isAffine())
2185       continue;
2186 
2187     // AR may be a pointer type, while BECount is an integer type.
2188     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2189     // AR may not be a narrower type, or we may never exit.
2190     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2191     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2192       continue;
2193 
2194     const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2195     if (!Step || !Step->isOne())
2196       continue;
2197 
2198     int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2199     Value *IncV = Phi->getIncomingValue(LatchIdx);
2200     if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2201       continue;
2202 
2203     // Avoid reusing a potentially undef value to compute other values that may
2204     // have originally had a concrete definition.
2205     if (!hasConcreteDef(Phi)) {
2206       // We explicitly allow unknown phis as long as they are already used by
2207       // the loop test. In this case we assume that performing LFTR could not
2208       // increase the number of undef users.
2209       if (ICmpInst *Cond = getLoopTest(L)) {
2210         if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2211             Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2212           continue;
2213         }
2214       }
2215     }
2216     const SCEV *Init = AR->getStart();
2217 
2218     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2219       // Don't force a live loop counter if another IV can be used.
2220       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2221         continue;
2222 
2223       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2224       // also prefers integer to pointer IVs.
2225       if (BestInit->isZero() != Init->isZero()) {
2226         if (BestInit->isZero())
2227           continue;
2228       }
2229       // If two IVs both count from zero or both count from nonzero then the
2230       // narrower is likely a dead phi that has been widened. Use the wider phi
2231       // to allow the other to be eliminated.
2232       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2233         continue;
2234     }
2235     BestPhi = Phi;
2236     BestInit = Init;
2237   }
2238   return BestPhi;
2239 }
2240 
2241 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2242 /// the new loop test.
2243 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2244                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2245   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2246   assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2247   const SCEV *IVInit = AR->getStart();
2248 
2249   // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2250   // finds a valid pointer IV. Sign extend BECount in order to materialize a
2251   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2252   // the existing GEPs whenever possible.
2253   if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2254     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2255     // signed value. IVCount on the other hand represents the loop trip count,
2256     // which is an unsigned value. FindLoopCounter only allows induction
2257     // variables that have a positive unit stride of one. This means we don't
2258     // have to handle the case of negative offsets (yet) and just need to zero
2259     // extend IVCount.
2260     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2261     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2262 
2263     // Expand the code for the iteration count.
2264     assert(SE->isLoopInvariant(IVOffset, L) &&
2265            "Computed iteration count is not loop invariant!");
2266     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2267     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2268 
2269     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2270     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2271     // We could handle pointer IVs other than i8*, but we need to compensate for
2272     // gep index scaling. See canExpandBackedgeTakenCount comments.
2273     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2274                              cast<PointerType>(GEPBase->getType())
2275                                  ->getElementType())->isOne() &&
2276            "unit stride pointer IV must be i8*");
2277 
2278     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2279     return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2280   } else {
2281     // In any other case, convert both IVInit and IVCount to integers before
2282     // comparing. This may result in SCEV expansion of pointers, but in practice
2283     // SCEV will fold the pointer arithmetic away as such:
2284     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2285     //
2286     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2287     // for simple memset-style loops.
2288     //
2289     // IVInit integer and IVCount pointer would only occur if a canonical IV
2290     // were generated on top of case #2, which is not expected.
2291 
2292     const SCEV *IVLimit = nullptr;
2293     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2294     // For non-zero Start, compute IVCount here.
2295     if (AR->getStart()->isZero())
2296       IVLimit = IVCount;
2297     else {
2298       assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2299       const SCEV *IVInit = AR->getStart();
2300 
2301       // For integer IVs, truncate the IV before computing IVInit + BECount.
2302       if (SE->getTypeSizeInBits(IVInit->getType())
2303           > SE->getTypeSizeInBits(IVCount->getType()))
2304         IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2305 
2306       IVLimit = SE->getAddExpr(IVInit, IVCount);
2307     }
2308     // Expand the code for the iteration count.
2309     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2310     IRBuilder<> Builder(BI);
2311     assert(SE->isLoopInvariant(IVLimit, L) &&
2312            "Computed iteration count is not loop invariant!");
2313     // Ensure that we generate the same type as IndVar, or a smaller integer
2314     // type. In the presence of null pointer values, we have an integer type
2315     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2316     Type *LimitTy = IVCount->getType()->isPointerTy() ?
2317       IndVar->getType() : IVCount->getType();
2318     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2319   }
2320 }
2321 
2322 /// This method rewrites the exit condition of the loop to be a canonical !=
2323 /// comparison against the incremented loop induction variable.  This pass is
2324 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2325 /// determine a loop-invariant trip count of the loop, which is actually a much
2326 /// broader range than just linear tests.
2327 bool IndVarSimplify::
2328 linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
2329                           PHINode *IndVar, SCEVExpander &Rewriter) {
2330   assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2331 
2332   // Initialize CmpIndVar and IVCount to their preincremented values.
2333   Value *CmpIndVar = IndVar;
2334   const SCEV *IVCount = BackedgeTakenCount;
2335 
2336   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2337 
2338   // If the exiting block is the same as the backedge block, we prefer to
2339   // compare against the post-incremented value, otherwise we must compare
2340   // against the preincremented value.
2341   if (L->getExitingBlock() == L->getLoopLatch()) {
2342     // Add one to the "backedge-taken" count to get the trip count.
2343     // This addition may overflow, which is valid as long as the comparison is
2344     // truncated to BackedgeTakenCount->getType().
2345     IVCount = SE->getAddExpr(BackedgeTakenCount,
2346                              SE->getOne(BackedgeTakenCount->getType()));
2347     // The BackedgeTaken expression contains the number of times that the
2348     // backedge branches to the loop header.  This is one less than the
2349     // number of times the loop executes, so use the incremented indvar.
2350     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2351   }
2352 
2353   Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2354   assert(ExitCnt->getType()->isPointerTy() ==
2355              IndVar->getType()->isPointerTy() &&
2356          "genLoopLimit missed a cast");
2357 
2358   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2359   BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2360   ICmpInst::Predicate P;
2361   if (L->contains(BI->getSuccessor(0)))
2362     P = ICmpInst::ICMP_NE;
2363   else
2364     P = ICmpInst::ICMP_EQ;
2365 
2366   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2367                     << "      LHS:" << *CmpIndVar << '\n'
2368                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2369                     << "\n"
2370                     << "      RHS:\t" << *ExitCnt << "\n"
2371                     << "  IVCount:\t" << *IVCount << "\n");
2372 
2373   IRBuilder<> Builder(BI);
2374 
2375   // The new loop exit condition should reuse the debug location of the
2376   // original loop exit condition.
2377   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2378     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2379 
2380   // LFTR can ignore IV overflow and truncate to the width of
2381   // BECount. This avoids materializing the add(zext(add)) expression.
2382   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2383   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2384   if (CmpIndVarSize > ExitCntSize) {
2385     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2386     const SCEV *ARStart = AR->getStart();
2387     const SCEV *ARStep = AR->getStepRecurrence(*SE);
2388     // For constant IVCount, avoid truncation.
2389     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2390       const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2391       APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2392       // Note that the post-inc value of BackedgeTakenCount may have overflowed
2393       // above such that IVCount is now zero.
2394       if (IVCount != BackedgeTakenCount && Count == 0) {
2395         Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2396         ++Count;
2397       }
2398       else
2399         Count = Count.zext(CmpIndVarSize);
2400       APInt NewLimit;
2401       if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2402         NewLimit = Start - Count;
2403       else
2404         NewLimit = Start + Count;
2405       ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2406 
2407       LLVM_DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
2408     } else {
2409       // We try to extend trip count first. If that doesn't work we truncate IV.
2410       // Zext(trunc(IV)) == IV implies equivalence of the following two:
2411       // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2412       // one of the two holds, extend the trip count, otherwise we truncate IV.
2413       bool Extended = false;
2414       const SCEV *IV = SE->getSCEV(CmpIndVar);
2415       const SCEV *ZExtTrunc =
2416            SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2417                                                      ExitCnt->getType()),
2418                                  CmpIndVar->getType());
2419 
2420       if (ZExtTrunc == IV) {
2421         Extended = true;
2422         ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2423                                      "wide.trip.count");
2424       } else {
2425         const SCEV *SExtTrunc =
2426           SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2427                                                     ExitCnt->getType()),
2428                                 CmpIndVar->getType());
2429         if (SExtTrunc == IV) {
2430           Extended = true;
2431           ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2432                                        "wide.trip.count");
2433         }
2434       }
2435 
2436       if (!Extended)
2437         CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2438                                         "lftr.wideiv");
2439     }
2440   }
2441   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2442   Value *OrigCond = BI->getCondition();
2443   // It's tempting to use replaceAllUsesWith here to fully replace the old
2444   // comparison, but that's not immediately safe, since users of the old
2445   // comparison may not be dominated by the new comparison. Instead, just
2446   // update the branch to use the new comparison; in the common case this
2447   // will make old comparison dead.
2448   BI->setCondition(Cond);
2449   DeadInsts.push_back(OrigCond);
2450 
2451   ++NumLFTR;
2452   return true;
2453 }
2454 
2455 //===----------------------------------------------------------------------===//
2456 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2457 //===----------------------------------------------------------------------===//
2458 
2459 /// If there's a single exit block, sink any loop-invariant values that
2460 /// were defined in the preheader but not used inside the loop into the
2461 /// exit block to reduce register pressure in the loop.
2462 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2463   BasicBlock *ExitBlock = L->getExitBlock();
2464   if (!ExitBlock) return false;
2465 
2466   BasicBlock *Preheader = L->getLoopPreheader();
2467   if (!Preheader) return false;
2468 
2469   bool MadeAnyChanges = false;
2470   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2471   BasicBlock::iterator I(Preheader->getTerminator());
2472   while (I != Preheader->begin()) {
2473     --I;
2474     // New instructions were inserted at the end of the preheader.
2475     if (isa<PHINode>(I))
2476       break;
2477 
2478     // Don't move instructions which might have side effects, since the side
2479     // effects need to complete before instructions inside the loop.  Also don't
2480     // move instructions which might read memory, since the loop may modify
2481     // memory. Note that it's okay if the instruction might have undefined
2482     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2483     // block.
2484     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2485       continue;
2486 
2487     // Skip debug info intrinsics.
2488     if (isa<DbgInfoIntrinsic>(I))
2489       continue;
2490 
2491     // Skip eh pad instructions.
2492     if (I->isEHPad())
2493       continue;
2494 
2495     // Don't sink alloca: we never want to sink static alloca's out of the
2496     // entry block, and correctly sinking dynamic alloca's requires
2497     // checks for stacksave/stackrestore intrinsics.
2498     // FIXME: Refactor this check somehow?
2499     if (isa<AllocaInst>(I))
2500       continue;
2501 
2502     // Determine if there is a use in or before the loop (direct or
2503     // otherwise).
2504     bool UsedInLoop = false;
2505     for (Use &U : I->uses()) {
2506       Instruction *User = cast<Instruction>(U.getUser());
2507       BasicBlock *UseBB = User->getParent();
2508       if (PHINode *P = dyn_cast<PHINode>(User)) {
2509         unsigned i =
2510           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2511         UseBB = P->getIncomingBlock(i);
2512       }
2513       if (UseBB == Preheader || L->contains(UseBB)) {
2514         UsedInLoop = true;
2515         break;
2516       }
2517     }
2518 
2519     // If there is, the def must remain in the preheader.
2520     if (UsedInLoop)
2521       continue;
2522 
2523     // Otherwise, sink it to the exit block.
2524     Instruction *ToMove = &*I;
2525     bool Done = false;
2526 
2527     if (I != Preheader->begin()) {
2528       // Skip debug info intrinsics.
2529       do {
2530         --I;
2531       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2532 
2533       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2534         Done = true;
2535     } else {
2536       Done = true;
2537     }
2538 
2539     MadeAnyChanges = true;
2540     ToMove->moveBefore(*ExitBlock, InsertPt);
2541     if (Done) break;
2542     InsertPt = ToMove->getIterator();
2543   }
2544 
2545   return MadeAnyChanges;
2546 }
2547 
2548 //===----------------------------------------------------------------------===//
2549 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2550 //===----------------------------------------------------------------------===//
2551 
2552 bool IndVarSimplify::run(Loop *L) {
2553   // We need (and expect!) the incoming loop to be in LCSSA.
2554   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2555          "LCSSA required to run indvars!");
2556   bool Changed = false;
2557 
2558   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2559   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2560   //    canonicalization can be a pessimization without LSR to "clean up"
2561   //    afterwards.
2562   //  - We depend on having a preheader; in particular,
2563   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2564   //    and we're in trouble if we can't find the induction variable even when
2565   //    we've manually inserted one.
2566   //  - LFTR relies on having a single backedge.
2567   if (!L->isLoopSimplifyForm())
2568     return false;
2569 
2570   // If there are any floating-point recurrences, attempt to
2571   // transform them to use integer recurrences.
2572   Changed |= rewriteNonIntegerIVs(L);
2573 
2574   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2575 
2576   // Create a rewriter object which we'll use to transform the code with.
2577   SCEVExpander Rewriter(*SE, DL, "indvars");
2578 #ifndef NDEBUG
2579   Rewriter.setDebugType(DEBUG_TYPE);
2580 #endif
2581 
2582   // Eliminate redundant IV users.
2583   //
2584   // Simplification works best when run before other consumers of SCEV. We
2585   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2586   // other expressions involving loop IVs have been evaluated. This helps SCEV
2587   // set no-wrap flags before normalizing sign/zero extension.
2588   Rewriter.disableCanonicalMode();
2589   Changed |= simplifyAndExtend(L, Rewriter, LI);
2590 
2591   // Check to see if this loop has a computable loop-invariant execution count.
2592   // If so, this means that we can compute the final value of any expressions
2593   // that are recurrent in the loop, and substitute the exit values from the
2594   // loop into any instructions outside of the loop that use the final values of
2595   // the current expressions.
2596   //
2597   if (ReplaceExitValue != NeverRepl &&
2598       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2599     Changed |= rewriteLoopExitValues(L, Rewriter);
2600 
2601   // Eliminate redundant IV cycles.
2602   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2603 
2604   // If we have a trip count expression, rewrite the loop's exit condition
2605   // using it.  We can currently only handle loops with a single exit.
2606   if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
2607       needsLFTR(L, DT)) {
2608     PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2609     if (IndVar) {
2610       // Check preconditions for proper SCEVExpander operation. SCEV does not
2611       // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2612       // pass that uses the SCEVExpander must do it. This does not work well for
2613       // loop passes because SCEVExpander makes assumptions about all loops,
2614       // while LoopPassManager only forces the current loop to be simplified.
2615       //
2616       // FIXME: SCEV expansion has no way to bail out, so the caller must
2617       // explicitly check any assumptions made by SCEV. Brittle.
2618       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2619       if (!AR || AR->getLoop()->getLoopPreheader())
2620         Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2621                                              Rewriter);
2622     }
2623   }
2624   // Clear the rewriter cache, because values that are in the rewriter's cache
2625   // can be deleted in the loop below, causing the AssertingVH in the cache to
2626   // trigger.
2627   Rewriter.clear();
2628 
2629   // Now that we're done iterating through lists, clean up any instructions
2630   // which are now dead.
2631   while (!DeadInsts.empty())
2632     if (Instruction *Inst =
2633             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2634       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2635 
2636   // The Rewriter may not be used from this point on.
2637 
2638   // Loop-invariant instructions in the preheader that aren't used in the
2639   // loop may be sunk below the loop to reduce register pressure.
2640   Changed |= sinkUnusedInvariants(L);
2641 
2642   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2643   // trip count and therefore can further simplify exit values in addition to
2644   // rewriteLoopExitValues.
2645   Changed |= rewriteFirstIterationLoopExitValues(L);
2646 
2647   // Clean up dead instructions.
2648   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2649 
2650   // Check a post-condition.
2651   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2652          "Indvars did not preserve LCSSA!");
2653 
2654   // Verify that LFTR, and any other change have not interfered with SCEV's
2655   // ability to compute trip count.
2656 #ifndef NDEBUG
2657   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2658     SE->forgetLoop(L);
2659     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2660     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2661         SE->getTypeSizeInBits(NewBECount->getType()))
2662       NewBECount = SE->getTruncateOrNoop(NewBECount,
2663                                          BackedgeTakenCount->getType());
2664     else
2665       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2666                                                  NewBECount->getType());
2667     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2668   }
2669 #endif
2670 
2671   return Changed;
2672 }
2673 
2674 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2675                                           LoopStandardAnalysisResults &AR,
2676                                           LPMUpdater &) {
2677   Function *F = L.getHeader()->getParent();
2678   const DataLayout &DL = F->getParent()->getDataLayout();
2679 
2680   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2681   if (!IVS.run(&L))
2682     return PreservedAnalyses::all();
2683 
2684   auto PA = getLoopPassPreservedAnalyses();
2685   PA.preserveSet<CFGAnalyses>();
2686   return PA;
2687 }
2688 
2689 namespace {
2690 
2691 struct IndVarSimplifyLegacyPass : public LoopPass {
2692   static char ID; // Pass identification, replacement for typeid
2693 
2694   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2695     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2696   }
2697 
2698   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2699     if (skipLoop(L))
2700       return false;
2701 
2702     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2703     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2704     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2705     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2706     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2707     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2708     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2709     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2710 
2711     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2712     return IVS.run(L);
2713   }
2714 
2715   void getAnalysisUsage(AnalysisUsage &AU) const override {
2716     AU.setPreservesCFG();
2717     getLoopAnalysisUsage(AU);
2718   }
2719 };
2720 
2721 } // end anonymous namespace
2722 
2723 char IndVarSimplifyLegacyPass::ID = 0;
2724 
2725 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2726                       "Induction Variable Simplification", false, false)
2727 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2728 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2729                     "Induction Variable Simplification", false, false)
2730 
2731 Pass *llvm::createIndVarSimplifyPass() {
2732   return new IndVarSimplifyLegacyPass();
2733 }
2734