xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 34f68253ca3dbe4ab5ef138629bdd63e06901efc)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened     , "Number of indvars widened");
93 STATISTIC(NumReplaced    , "Number of exit values replaced");
94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102   "verify-indvars", cl::Hidden,
103   cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
106 
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111                clEnumValN(OnlyCheapRepl, "cheap",
112                           "only replace exit value when the cost is cheap"),
113                clEnumValN(NoHardUse, "noharduse",
114                           "only replace exit values when loop def likely dead"),
115                clEnumValN(AlwaysRepl, "always",
116                           "always replace exit value whenever possible")));
117 
118 static cl::opt<bool> UsePostIncrementRanges(
119   "indvars-post-increment-ranges", cl::Hidden,
120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121   cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125             cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 static cl::opt<bool>
128 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
129                 cl::desc("Predicate conditions in read only loops"));
130 
131 namespace {
132 
133 struct RewritePhi;
134 
135 class IndVarSimplify {
136   LoopInfo *LI;
137   ScalarEvolution *SE;
138   DominatorTree *DT;
139   const DataLayout &DL;
140   TargetLibraryInfo *TLI;
141   const TargetTransformInfo *TTI;
142 
143   SmallVector<WeakTrackingVH, 16> DeadInsts;
144 
145   bool isValidRewrite(Value *FromVal, Value *ToVal);
146 
147   bool handleFloatingPointIV(Loop *L, PHINode *PH);
148   bool rewriteNonIntegerIVs(Loop *L);
149 
150   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
151   /// Try to eliminate loop exits based on analyzeable exit counts
152   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153   /// Try to form loop invariant tests for loop exits by changing how many
154   /// iterations of the loop run when that is unobservable.
155   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156 
157   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
158   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
159   bool rewriteFirstIterationLoopExitValues(Loop *L);
160   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
161 
162   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
163                                  const SCEV *ExitCount,
164                                  PHINode *IndVar, SCEVExpander &Rewriter);
165 
166   bool sinkUnusedInvariants(Loop *L);
167 
168 public:
169   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
170                  const DataLayout &DL, TargetLibraryInfo *TLI,
171                  TargetTransformInfo *TTI)
172       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
173 
174   bool run(Loop *L);
175 };
176 
177 } // end anonymous namespace
178 
179 /// Return true if the SCEV expansion generated by the rewriter can replace the
180 /// original value. SCEV guarantees that it produces the same value, but the way
181 /// it is produced may be illegal IR.  Ideally, this function will only be
182 /// called for verification.
183 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
184   // If an SCEV expression subsumed multiple pointers, its expansion could
185   // reassociate the GEP changing the base pointer. This is illegal because the
186   // final address produced by a GEP chain must be inbounds relative to its
187   // underlying object. Otherwise basic alias analysis, among other things,
188   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
189   // producing an expression involving multiple pointers. Until then, we must
190   // bail out here.
191   //
192   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
193   // because it understands lcssa phis while SCEV does not.
194   Value *FromPtr = FromVal;
195   Value *ToPtr = ToVal;
196   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
197     FromPtr = GEP->getPointerOperand();
198   }
199   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
200     ToPtr = GEP->getPointerOperand();
201   }
202   if (FromPtr != FromVal || ToPtr != ToVal) {
203     // Quickly check the common case
204     if (FromPtr == ToPtr)
205       return true;
206 
207     // SCEV may have rewritten an expression that produces the GEP's pointer
208     // operand. That's ok as long as the pointer operand has the same base
209     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
210     // base of a recurrence. This handles the case in which SCEV expansion
211     // converts a pointer type recurrence into a nonrecurrent pointer base
212     // indexed by an integer recurrence.
213 
214     // If the GEP base pointer is a vector of pointers, abort.
215     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
216       return false;
217 
218     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
219     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
220     if (FromBase == ToBase)
221       return true;
222 
223     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
224                       << " != " << *ToBase << "\n");
225 
226     return false;
227   }
228   return true;
229 }
230 
231 /// Determine the insertion point for this user. By default, insert immediately
232 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
233 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
234 /// common dominator for the incoming blocks. A nullptr can be returned if no
235 /// viable location is found: it may happen if User is a PHI and Def only comes
236 /// to this PHI from unreachable blocks.
237 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
238                                           DominatorTree *DT, LoopInfo *LI) {
239   PHINode *PHI = dyn_cast<PHINode>(User);
240   if (!PHI)
241     return User;
242 
243   Instruction *InsertPt = nullptr;
244   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
245     if (PHI->getIncomingValue(i) != Def)
246       continue;
247 
248     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
249 
250     if (!DT->isReachableFromEntry(InsertBB))
251       continue;
252 
253     if (!InsertPt) {
254       InsertPt = InsertBB->getTerminator();
255       continue;
256     }
257     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
258     InsertPt = InsertBB->getTerminator();
259   }
260 
261   // If we have skipped all inputs, it means that Def only comes to Phi from
262   // unreachable blocks.
263   if (!InsertPt)
264     return nullptr;
265 
266   auto *DefI = dyn_cast<Instruction>(Def);
267   if (!DefI)
268     return InsertPt;
269 
270   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
271 
272   auto *L = LI->getLoopFor(DefI->getParent());
273   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
274 
275   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
276     if (LI->getLoopFor(DTN->getBlock()) == L)
277       return DTN->getBlock()->getTerminator();
278 
279   llvm_unreachable("DefI dominates InsertPt!");
280 }
281 
282 //===----------------------------------------------------------------------===//
283 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
284 //===----------------------------------------------------------------------===//
285 
286 /// Convert APF to an integer, if possible.
287 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
288   bool isExact = false;
289   // See if we can convert this to an int64_t
290   uint64_t UIntVal;
291   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
292                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
293       !isExact)
294     return false;
295   IntVal = UIntVal;
296   return true;
297 }
298 
299 /// If the loop has floating induction variable then insert corresponding
300 /// integer induction variable if possible.
301 /// For example,
302 /// for(double i = 0; i < 10000; ++i)
303 ///   bar(i)
304 /// is converted into
305 /// for(int i = 0; i < 10000; ++i)
306 ///   bar((double)i);
307 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
308   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
309   unsigned BackEdge     = IncomingEdge^1;
310 
311   // Check incoming value.
312   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
313 
314   int64_t InitValue;
315   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
316     return false;
317 
318   // Check IV increment. Reject this PN if increment operation is not
319   // an add or increment value can not be represented by an integer.
320   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
321   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
322 
323   // If this is not an add of the PHI with a constantfp, or if the constant fp
324   // is not an integer, bail out.
325   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
326   int64_t IncValue;
327   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
328       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
329     return false;
330 
331   // Check Incr uses. One user is PN and the other user is an exit condition
332   // used by the conditional terminator.
333   Value::user_iterator IncrUse = Incr->user_begin();
334   Instruction *U1 = cast<Instruction>(*IncrUse++);
335   if (IncrUse == Incr->user_end()) return false;
336   Instruction *U2 = cast<Instruction>(*IncrUse++);
337   if (IncrUse != Incr->user_end()) return false;
338 
339   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
340   // only used by a branch, we can't transform it.
341   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
342   if (!Compare)
343     Compare = dyn_cast<FCmpInst>(U2);
344   if (!Compare || !Compare->hasOneUse() ||
345       !isa<BranchInst>(Compare->user_back()))
346     return false;
347 
348   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
349 
350   // We need to verify that the branch actually controls the iteration count
351   // of the loop.  If not, the new IV can overflow and no one will notice.
352   // The branch block must be in the loop and one of the successors must be out
353   // of the loop.
354   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
355   if (!L->contains(TheBr->getParent()) ||
356       (L->contains(TheBr->getSuccessor(0)) &&
357        L->contains(TheBr->getSuccessor(1))))
358     return false;
359 
360   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
361   // transform it.
362   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
363   int64_t ExitValue;
364   if (ExitValueVal == nullptr ||
365       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
366     return false;
367 
368   // Find new predicate for integer comparison.
369   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
370   switch (Compare->getPredicate()) {
371   default: return false;  // Unknown comparison.
372   case CmpInst::FCMP_OEQ:
373   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
374   case CmpInst::FCMP_ONE:
375   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
376   case CmpInst::FCMP_OGT:
377   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
378   case CmpInst::FCMP_OGE:
379   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
380   case CmpInst::FCMP_OLT:
381   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
382   case CmpInst::FCMP_OLE:
383   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
384   }
385 
386   // We convert the floating point induction variable to a signed i32 value if
387   // we can.  This is only safe if the comparison will not overflow in a way
388   // that won't be trapped by the integer equivalent operations.  Check for this
389   // now.
390   // TODO: We could use i64 if it is native and the range requires it.
391 
392   // The start/stride/exit values must all fit in signed i32.
393   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
394     return false;
395 
396   // If not actually striding (add x, 0.0), avoid touching the code.
397   if (IncValue == 0)
398     return false;
399 
400   // Positive and negative strides have different safety conditions.
401   if (IncValue > 0) {
402     // If we have a positive stride, we require the init to be less than the
403     // exit value.
404     if (InitValue >= ExitValue)
405       return false;
406 
407     uint32_t Range = uint32_t(ExitValue-InitValue);
408     // Check for infinite loop, either:
409     // while (i <= Exit) or until (i > Exit)
410     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
411       if (++Range == 0) return false;  // Range overflows.
412     }
413 
414     unsigned Leftover = Range % uint32_t(IncValue);
415 
416     // If this is an equality comparison, we require that the strided value
417     // exactly land on the exit value, otherwise the IV condition will wrap
418     // around and do things the fp IV wouldn't.
419     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
420         Leftover != 0)
421       return false;
422 
423     // If the stride would wrap around the i32 before exiting, we can't
424     // transform the IV.
425     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
426       return false;
427   } else {
428     // If we have a negative stride, we require the init to be greater than the
429     // exit value.
430     if (InitValue <= ExitValue)
431       return false;
432 
433     uint32_t Range = uint32_t(InitValue-ExitValue);
434     // Check for infinite loop, either:
435     // while (i >= Exit) or until (i < Exit)
436     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
437       if (++Range == 0) return false;  // Range overflows.
438     }
439 
440     unsigned Leftover = Range % uint32_t(-IncValue);
441 
442     // If this is an equality comparison, we require that the strided value
443     // exactly land on the exit value, otherwise the IV condition will wrap
444     // around and do things the fp IV wouldn't.
445     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
446         Leftover != 0)
447       return false;
448 
449     // If the stride would wrap around the i32 before exiting, we can't
450     // transform the IV.
451     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
452       return false;
453   }
454 
455   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
456 
457   // Insert new integer induction variable.
458   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
459   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
460                       PN->getIncomingBlock(IncomingEdge));
461 
462   Value *NewAdd =
463     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
464                               Incr->getName()+".int", Incr);
465   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
466 
467   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
468                                       ConstantInt::get(Int32Ty, ExitValue),
469                                       Compare->getName());
470 
471   // In the following deletions, PN may become dead and may be deleted.
472   // Use a WeakTrackingVH to observe whether this happens.
473   WeakTrackingVH WeakPH = PN;
474 
475   // Delete the old floating point exit comparison.  The branch starts using the
476   // new comparison.
477   NewCompare->takeName(Compare);
478   Compare->replaceAllUsesWith(NewCompare);
479   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
480 
481   // Delete the old floating point increment.
482   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
483   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
484 
485   // If the FP induction variable still has uses, this is because something else
486   // in the loop uses its value.  In order to canonicalize the induction
487   // variable, we chose to eliminate the IV and rewrite it in terms of an
488   // int->fp cast.
489   //
490   // We give preference to sitofp over uitofp because it is faster on most
491   // platforms.
492   if (WeakPH) {
493     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
494                                  &*PN->getParent()->getFirstInsertionPt());
495     PN->replaceAllUsesWith(Conv);
496     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
497   }
498   return true;
499 }
500 
501 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
502   // First step.  Check to see if there are any floating-point recurrences.
503   // If there are, change them into integer recurrences, permitting analysis by
504   // the SCEV routines.
505   BasicBlock *Header = L->getHeader();
506 
507   SmallVector<WeakTrackingVH, 8> PHIs;
508   for (PHINode &PN : Header->phis())
509     PHIs.push_back(&PN);
510 
511   bool Changed = false;
512   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
513     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
514       Changed |= handleFloatingPointIV(L, PN);
515 
516   // If the loop previously had floating-point IV, ScalarEvolution
517   // may not have been able to compute a trip count. Now that we've done some
518   // re-writing, the trip count may be computable.
519   if (Changed)
520     SE->forgetLoop(L);
521   return Changed;
522 }
523 
524 namespace {
525 
526 // Collect information about PHI nodes which can be transformed in
527 // rewriteLoopExitValues.
528 struct RewritePhi {
529   PHINode *PN;
530 
531   // Ith incoming value.
532   unsigned Ith;
533 
534   // Exit value after expansion.
535   Value *Val;
536 
537   // High Cost when expansion.
538   bool HighCost;
539 
540   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
541       : PN(P), Ith(I), Val(V), HighCost(H) {}
542 };
543 
544 } // end anonymous namespace
545 
546 //===----------------------------------------------------------------------===//
547 // rewriteLoopExitValues - Optimize IV users outside the loop.
548 // As a side effect, reduces the amount of IV processing within the loop.
549 //===----------------------------------------------------------------------===//
550 
551 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
552   SmallPtrSet<const Instruction *, 8> Visited;
553   SmallVector<const Instruction *, 8> WorkList;
554   Visited.insert(I);
555   WorkList.push_back(I);
556   while (!WorkList.empty()) {
557     const Instruction *Curr = WorkList.pop_back_val();
558     // This use is outside the loop, nothing to do.
559     if (!L->contains(Curr))
560       continue;
561     // Do we assume it is a "hard" use which will not be eliminated easily?
562     if (Curr->mayHaveSideEffects())
563       return true;
564     // Otherwise, add all its users to worklist.
565     for (auto U : Curr->users()) {
566       auto *UI = cast<Instruction>(U);
567       if (Visited.insert(UI).second)
568         WorkList.push_back(UI);
569     }
570   }
571   return false;
572 }
573 
574 /// Check to see if this loop has a computable loop-invariant execution count.
575 /// If so, this means that we can compute the final value of any expressions
576 /// that are recurrent in the loop, and substitute the exit values from the loop
577 /// into any instructions outside of the loop that use the final values of the
578 /// current expressions.
579 ///
580 /// This is mostly redundant with the regular IndVarSimplify activities that
581 /// happen later, except that it's more powerful in some cases, because it's
582 /// able to brute-force evaluate arbitrary instructions as long as they have
583 /// constant operands at the beginning of the loop.
584 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
585   // Check a pre-condition.
586   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
587          "Indvars did not preserve LCSSA!");
588 
589   SmallVector<BasicBlock*, 8> ExitBlocks;
590   L->getUniqueExitBlocks(ExitBlocks);
591 
592   SmallVector<RewritePhi, 8> RewritePhiSet;
593   // Find all values that are computed inside the loop, but used outside of it.
594   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
595   // the exit blocks of the loop to find them.
596   for (BasicBlock *ExitBB : ExitBlocks) {
597     // If there are no PHI nodes in this exit block, then no values defined
598     // inside the loop are used on this path, skip it.
599     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
600     if (!PN) continue;
601 
602     unsigned NumPreds = PN->getNumIncomingValues();
603 
604     // Iterate over all of the PHI nodes.
605     BasicBlock::iterator BBI = ExitBB->begin();
606     while ((PN = dyn_cast<PHINode>(BBI++))) {
607       if (PN->use_empty())
608         continue; // dead use, don't replace it
609 
610       if (!SE->isSCEVable(PN->getType()))
611         continue;
612 
613       // It's necessary to tell ScalarEvolution about this explicitly so that
614       // it can walk the def-use list and forget all SCEVs, as it may not be
615       // watching the PHI itself. Once the new exit value is in place, there
616       // may not be a def-use connection between the loop and every instruction
617       // which got a SCEVAddRecExpr for that loop.
618       SE->forgetValue(PN);
619 
620       // Iterate over all of the values in all the PHI nodes.
621       for (unsigned i = 0; i != NumPreds; ++i) {
622         // If the value being merged in is not integer or is not defined
623         // in the loop, skip it.
624         Value *InVal = PN->getIncomingValue(i);
625         if (!isa<Instruction>(InVal))
626           continue;
627 
628         // If this pred is for a subloop, not L itself, skip it.
629         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
630           continue; // The Block is in a subloop, skip it.
631 
632         // Check that InVal is defined in the loop.
633         Instruction *Inst = cast<Instruction>(InVal);
634         if (!L->contains(Inst))
635           continue;
636 
637         // Okay, this instruction has a user outside of the current loop
638         // and varies predictably *inside* the loop.  Evaluate the value it
639         // contains when the loop exits, if possible.  We prefer to start with
640         // expressions which are true for all exits (so as to maximize
641         // expression reuse by the SCEVExpander), but resort to per-exit
642         // evaluation if that fails.
643         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
644         if (isa<SCEVCouldNotCompute>(ExitValue) ||
645             !SE->isLoopInvariant(ExitValue, L) ||
646             !isSafeToExpand(ExitValue, *SE)) {
647           // TODO: This should probably be sunk into SCEV in some way; maybe a
648           // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
649           // most SCEV expressions and other recurrence types (e.g. shift
650           // recurrences).  Is there existing code we can reuse?
651           const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
652           if (isa<SCEVCouldNotCompute>(ExitCount))
653             continue;
654           if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
655             if (AddRec->getLoop() == L)
656               ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
657           if (isa<SCEVCouldNotCompute>(ExitValue) ||
658               !SE->isLoopInvariant(ExitValue, L) ||
659               !isSafeToExpand(ExitValue, *SE))
660             continue;
661         }
662 
663         // Computing the value outside of the loop brings no benefit if it is
664         // definitely used inside the loop in a way which can not be optimized
665         // away.  Avoid doing so unless we know we have a value which computes
666         // the ExitValue already.  TODO: This should be merged into SCEV
667         // expander to leverage its knowledge of existing expressions.
668         if (ReplaceExitValue != AlwaysRepl &&
669             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
670             hasHardUserWithinLoop(L, Inst))
671           continue;
672 
673         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
674         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
675 
676         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
677                           << '\n'
678                           << "  LoopVal = " << *Inst << "\n");
679 
680         if (!isValidRewrite(Inst, ExitVal)) {
681           DeadInsts.push_back(ExitVal);
682           continue;
683         }
684 
685 #ifndef NDEBUG
686         // If we reuse an instruction from a loop which is neither L nor one of
687         // its containing loops, we end up breaking LCSSA form for this loop by
688         // creating a new use of its instruction.
689         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
690           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
691             if (EVL != L)
692               assert(EVL->contains(L) && "LCSSA breach detected!");
693 #endif
694 
695         // Collect all the candidate PHINodes to be rewritten.
696         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
697       }
698     }
699   }
700 
701   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
702 
703   bool Changed = false;
704   // Transformation.
705   for (const RewritePhi &Phi : RewritePhiSet) {
706     PHINode *PN = Phi.PN;
707     Value *ExitVal = Phi.Val;
708 
709     // Only do the rewrite when the ExitValue can be expanded cheaply.
710     // If LoopCanBeDel is true, rewrite exit value aggressively.
711     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
712       DeadInsts.push_back(ExitVal);
713       continue;
714     }
715 
716     Changed = true;
717     ++NumReplaced;
718     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
719     PN->setIncomingValue(Phi.Ith, ExitVal);
720 
721     // If this instruction is dead now, delete it. Don't do it now to avoid
722     // invalidating iterators.
723     if (isInstructionTriviallyDead(Inst, TLI))
724       DeadInsts.push_back(Inst);
725 
726     // Replace PN with ExitVal if that is legal and does not break LCSSA.
727     if (PN->getNumIncomingValues() == 1 &&
728         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
729       PN->replaceAllUsesWith(ExitVal);
730       PN->eraseFromParent();
731     }
732   }
733 
734   // The insertion point instruction may have been deleted; clear it out
735   // so that the rewriter doesn't trip over it later.
736   Rewriter.clearInsertPoint();
737   return Changed;
738 }
739 
740 //===---------------------------------------------------------------------===//
741 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
742 // they will exit at the first iteration.
743 //===---------------------------------------------------------------------===//
744 
745 /// Check to see if this loop has loop invariant conditions which lead to loop
746 /// exits. If so, we know that if the exit path is taken, it is at the first
747 /// loop iteration. This lets us predict exit values of PHI nodes that live in
748 /// loop header.
749 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
750   // Verify the input to the pass is already in LCSSA form.
751   assert(L->isLCSSAForm(*DT));
752 
753   SmallVector<BasicBlock *, 8> ExitBlocks;
754   L->getUniqueExitBlocks(ExitBlocks);
755 
756   bool MadeAnyChanges = false;
757   for (auto *ExitBB : ExitBlocks) {
758     // If there are no more PHI nodes in this exit block, then no more
759     // values defined inside the loop are used on this path.
760     for (PHINode &PN : ExitBB->phis()) {
761       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
762            IncomingValIdx != E; ++IncomingValIdx) {
763         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
764 
765         // Can we prove that the exit must run on the first iteration if it
766         // runs at all?  (i.e. early exits are fine for our purposes, but
767         // traces which lead to this exit being taken on the 2nd iteration
768         // aren't.)  Note that this is about whether the exit branch is
769         // executed, not about whether it is taken.
770         if (!L->getLoopLatch() ||
771             !DT->dominates(IncomingBB, L->getLoopLatch()))
772           continue;
773 
774         // Get condition that leads to the exit path.
775         auto *TermInst = IncomingBB->getTerminator();
776 
777         Value *Cond = nullptr;
778         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
779           // Must be a conditional branch, otherwise the block
780           // should not be in the loop.
781           Cond = BI->getCondition();
782         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
783           Cond = SI->getCondition();
784         else
785           continue;
786 
787         if (!L->isLoopInvariant(Cond))
788           continue;
789 
790         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
791 
792         // Only deal with PHIs in the loop header.
793         if (!ExitVal || ExitVal->getParent() != L->getHeader())
794           continue;
795 
796         // If ExitVal is a PHI on the loop header, then we know its
797         // value along this exit because the exit can only be taken
798         // on the first iteration.
799         auto *LoopPreheader = L->getLoopPreheader();
800         assert(LoopPreheader && "Invalid loop");
801         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
802         if (PreheaderIdx != -1) {
803           assert(ExitVal->getParent() == L->getHeader() &&
804                  "ExitVal must be in loop header");
805           MadeAnyChanges = true;
806           PN.setIncomingValue(IncomingValIdx,
807                               ExitVal->getIncomingValue(PreheaderIdx));
808         }
809       }
810     }
811   }
812   return MadeAnyChanges;
813 }
814 
815 /// Check whether it is possible to delete the loop after rewriting exit
816 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
817 /// aggressively.
818 bool IndVarSimplify::canLoopBeDeleted(
819     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
820   BasicBlock *Preheader = L->getLoopPreheader();
821   // If there is no preheader, the loop will not be deleted.
822   if (!Preheader)
823     return false;
824 
825   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
826   // We obviate multiple ExitingBlocks case for simplicity.
827   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
828   // after exit value rewriting, we can enhance the logic here.
829   SmallVector<BasicBlock *, 4> ExitingBlocks;
830   L->getExitingBlocks(ExitingBlocks);
831   SmallVector<BasicBlock *, 8> ExitBlocks;
832   L->getUniqueExitBlocks(ExitBlocks);
833   if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
834     return false;
835 
836   BasicBlock *ExitBlock = ExitBlocks[0];
837   BasicBlock::iterator BI = ExitBlock->begin();
838   while (PHINode *P = dyn_cast<PHINode>(BI)) {
839     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
840 
841     // If the Incoming value of P is found in RewritePhiSet, we know it
842     // could be rewritten to use a loop invariant value in transformation
843     // phase later. Skip it in the loop invariant check below.
844     bool found = false;
845     for (const RewritePhi &Phi : RewritePhiSet) {
846       unsigned i = Phi.Ith;
847       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
848         found = true;
849         break;
850       }
851     }
852 
853     Instruction *I;
854     if (!found && (I = dyn_cast<Instruction>(Incoming)))
855       if (!L->hasLoopInvariantOperands(I))
856         return false;
857 
858     ++BI;
859   }
860 
861   for (auto *BB : L->blocks())
862     if (llvm::any_of(*BB, [](Instruction &I) {
863           return I.mayHaveSideEffects();
864         }))
865       return false;
866 
867   return true;
868 }
869 
870 //===----------------------------------------------------------------------===//
871 //  IV Widening - Extend the width of an IV to cover its widest uses.
872 //===----------------------------------------------------------------------===//
873 
874 namespace {
875 
876 // Collect information about induction variables that are used by sign/zero
877 // extend operations. This information is recorded by CollectExtend and provides
878 // the input to WidenIV.
879 struct WideIVInfo {
880   PHINode *NarrowIV = nullptr;
881 
882   // Widest integer type created [sz]ext
883   Type *WidestNativeType = nullptr;
884 
885   // Was a sext user seen before a zext?
886   bool IsSigned = false;
887 };
888 
889 } // end anonymous namespace
890 
891 /// Update information about the induction variable that is extended by this
892 /// sign or zero extend operation. This is used to determine the final width of
893 /// the IV before actually widening it.
894 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
895                         const TargetTransformInfo *TTI) {
896   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
897   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
898     return;
899 
900   Type *Ty = Cast->getType();
901   uint64_t Width = SE->getTypeSizeInBits(Ty);
902   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
903     return;
904 
905   // Check that `Cast` actually extends the induction variable (we rely on this
906   // later).  This takes care of cases where `Cast` is extending a truncation of
907   // the narrow induction variable, and thus can end up being narrower than the
908   // "narrow" induction variable.
909   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
910   if (NarrowIVWidth >= Width)
911     return;
912 
913   // Cast is either an sext or zext up to this point.
914   // We should not widen an indvar if arithmetics on the wider indvar are more
915   // expensive than those on the narrower indvar. We check only the cost of ADD
916   // because at least an ADD is required to increment the induction variable. We
917   // could compute more comprehensively the cost of all instructions on the
918   // induction variable when necessary.
919   if (TTI &&
920       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
921           TTI->getArithmeticInstrCost(Instruction::Add,
922                                       Cast->getOperand(0)->getType())) {
923     return;
924   }
925 
926   if (!WI.WidestNativeType) {
927     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
928     WI.IsSigned = IsSigned;
929     return;
930   }
931 
932   // We extend the IV to satisfy the sign of its first user, arbitrarily.
933   if (WI.IsSigned != IsSigned)
934     return;
935 
936   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
937     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
938 }
939 
940 namespace {
941 
942 /// Record a link in the Narrow IV def-use chain along with the WideIV that
943 /// computes the same value as the Narrow IV def.  This avoids caching Use*
944 /// pointers.
945 struct NarrowIVDefUse {
946   Instruction *NarrowDef = nullptr;
947   Instruction *NarrowUse = nullptr;
948   Instruction *WideDef = nullptr;
949 
950   // True if the narrow def is never negative.  Tracking this information lets
951   // us use a sign extension instead of a zero extension or vice versa, when
952   // profitable and legal.
953   bool NeverNegative = false;
954 
955   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
956                  bool NeverNegative)
957       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
958         NeverNegative(NeverNegative) {}
959 };
960 
961 /// The goal of this transform is to remove sign and zero extends without
962 /// creating any new induction variables. To do this, it creates a new phi of
963 /// the wider type and redirects all users, either removing extends or inserting
964 /// truncs whenever we stop propagating the type.
965 class WidenIV {
966   // Parameters
967   PHINode *OrigPhi;
968   Type *WideType;
969 
970   // Context
971   LoopInfo        *LI;
972   Loop            *L;
973   ScalarEvolution *SE;
974   DominatorTree   *DT;
975 
976   // Does the module have any calls to the llvm.experimental.guard intrinsic
977   // at all? If not we can avoid scanning instructions looking for guards.
978   bool HasGuards;
979 
980   // Result
981   PHINode *WidePhi = nullptr;
982   Instruction *WideInc = nullptr;
983   const SCEV *WideIncExpr = nullptr;
984   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
985 
986   SmallPtrSet<Instruction *,16> Widened;
987   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
988 
989   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
990 
991   // A map tracking the kind of extension used to widen each narrow IV
992   // and narrow IV user.
993   // Key: pointer to a narrow IV or IV user.
994   // Value: the kind of extension used to widen this Instruction.
995   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
996 
997   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
998 
999   // A map with control-dependent ranges for post increment IV uses. The key is
1000   // a pair of IV def and a use of this def denoting the context. The value is
1001   // a ConstantRange representing possible values of the def at the given
1002   // context.
1003   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1004 
1005   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1006                                               Instruction *UseI) {
1007     DefUserPair Key(Def, UseI);
1008     auto It = PostIncRangeInfos.find(Key);
1009     return It == PostIncRangeInfos.end()
1010                ? Optional<ConstantRange>(None)
1011                : Optional<ConstantRange>(It->second);
1012   }
1013 
1014   void calculatePostIncRanges(PHINode *OrigPhi);
1015   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1016 
1017   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1018     DefUserPair Key(Def, UseI);
1019     auto It = PostIncRangeInfos.find(Key);
1020     if (It == PostIncRangeInfos.end())
1021       PostIncRangeInfos.insert({Key, R});
1022     else
1023       It->second = R.intersectWith(It->second);
1024   }
1025 
1026 public:
1027   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1028           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1029           bool HasGuards)
1030       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1031         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1032         HasGuards(HasGuards), DeadInsts(DI) {
1033     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1034     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1035   }
1036 
1037   PHINode *createWideIV(SCEVExpander &Rewriter);
1038 
1039 protected:
1040   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1041                           Instruction *Use);
1042 
1043   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1044   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1045                                      const SCEVAddRecExpr *WideAR);
1046   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1047 
1048   ExtendKind getExtendKind(Instruction *I);
1049 
1050   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1051 
1052   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1053 
1054   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1055 
1056   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1057                               unsigned OpCode) const;
1058 
1059   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1060 
1061   bool widenLoopCompare(NarrowIVDefUse DU);
1062   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1063   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1064 
1065   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1066 };
1067 
1068 } // end anonymous namespace
1069 
1070 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1071                                  bool IsSigned, Instruction *Use) {
1072   // Set the debug location and conservative insertion point.
1073   IRBuilder<> Builder(Use);
1074   // Hoist the insertion point into loop preheaders as far as possible.
1075   for (const Loop *L = LI->getLoopFor(Use->getParent());
1076        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1077        L = L->getParentLoop())
1078     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1079 
1080   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1081                     Builder.CreateZExt(NarrowOper, WideType);
1082 }
1083 
1084 /// Instantiate a wide operation to replace a narrow operation. This only needs
1085 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1086 /// 0 for any operation we decide not to clone.
1087 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1088                                   const SCEVAddRecExpr *WideAR) {
1089   unsigned Opcode = DU.NarrowUse->getOpcode();
1090   switch (Opcode) {
1091   default:
1092     return nullptr;
1093   case Instruction::Add:
1094   case Instruction::Mul:
1095   case Instruction::UDiv:
1096   case Instruction::Sub:
1097     return cloneArithmeticIVUser(DU, WideAR);
1098 
1099   case Instruction::And:
1100   case Instruction::Or:
1101   case Instruction::Xor:
1102   case Instruction::Shl:
1103   case Instruction::LShr:
1104   case Instruction::AShr:
1105     return cloneBitwiseIVUser(DU);
1106   }
1107 }
1108 
1109 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1110   Instruction *NarrowUse = DU.NarrowUse;
1111   Instruction *NarrowDef = DU.NarrowDef;
1112   Instruction *WideDef = DU.WideDef;
1113 
1114   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1115 
1116   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1117   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1118   // invariant and will be folded or hoisted. If it actually comes from a
1119   // widened IV, it should be removed during a future call to widenIVUse.
1120   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1121   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1122                    ? WideDef
1123                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1124                                       IsSigned, NarrowUse);
1125   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1126                    ? WideDef
1127                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1128                                       IsSigned, NarrowUse);
1129 
1130   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1131   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1132                                         NarrowBO->getName());
1133   IRBuilder<> Builder(NarrowUse);
1134   Builder.Insert(WideBO);
1135   WideBO->copyIRFlags(NarrowBO);
1136   return WideBO;
1137 }
1138 
1139 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1140                                             const SCEVAddRecExpr *WideAR) {
1141   Instruction *NarrowUse = DU.NarrowUse;
1142   Instruction *NarrowDef = DU.NarrowDef;
1143   Instruction *WideDef = DU.WideDef;
1144 
1145   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1146 
1147   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1148 
1149   // We're trying to find X such that
1150   //
1151   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1152   //
1153   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1154   // and check using SCEV if any of them are correct.
1155 
1156   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1157   // correct solution to X.
1158   auto GuessNonIVOperand = [&](bool SignExt) {
1159     const SCEV *WideLHS;
1160     const SCEV *WideRHS;
1161 
1162     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1163       if (SignExt)
1164         return SE->getSignExtendExpr(S, Ty);
1165       return SE->getZeroExtendExpr(S, Ty);
1166     };
1167 
1168     if (IVOpIdx == 0) {
1169       WideLHS = SE->getSCEV(WideDef);
1170       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1171       WideRHS = GetExtend(NarrowRHS, WideType);
1172     } else {
1173       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1174       WideLHS = GetExtend(NarrowLHS, WideType);
1175       WideRHS = SE->getSCEV(WideDef);
1176     }
1177 
1178     // WideUse is "WideDef `op.wide` X" as described in the comment.
1179     const SCEV *WideUse = nullptr;
1180 
1181     switch (NarrowUse->getOpcode()) {
1182     default:
1183       llvm_unreachable("No other possibility!");
1184 
1185     case Instruction::Add:
1186       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1187       break;
1188 
1189     case Instruction::Mul:
1190       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1191       break;
1192 
1193     case Instruction::UDiv:
1194       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1195       break;
1196 
1197     case Instruction::Sub:
1198       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1199       break;
1200     }
1201 
1202     return WideUse == WideAR;
1203   };
1204 
1205   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1206   if (!GuessNonIVOperand(SignExtend)) {
1207     SignExtend = !SignExtend;
1208     if (!GuessNonIVOperand(SignExtend))
1209       return nullptr;
1210   }
1211 
1212   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1213                    ? WideDef
1214                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1215                                       SignExtend, NarrowUse);
1216   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1217                    ? WideDef
1218                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1219                                       SignExtend, NarrowUse);
1220 
1221   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1222   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1223                                         NarrowBO->getName());
1224 
1225   IRBuilder<> Builder(NarrowUse);
1226   Builder.Insert(WideBO);
1227   WideBO->copyIRFlags(NarrowBO);
1228   return WideBO;
1229 }
1230 
1231 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1232   auto It = ExtendKindMap.find(I);
1233   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1234   return It->second;
1235 }
1236 
1237 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1238                                      unsigned OpCode) const {
1239   if (OpCode == Instruction::Add)
1240     return SE->getAddExpr(LHS, RHS);
1241   if (OpCode == Instruction::Sub)
1242     return SE->getMinusSCEV(LHS, RHS);
1243   if (OpCode == Instruction::Mul)
1244     return SE->getMulExpr(LHS, RHS);
1245 
1246   llvm_unreachable("Unsupported opcode.");
1247 }
1248 
1249 /// No-wrap operations can transfer sign extension of their result to their
1250 /// operands. Generate the SCEV value for the widened operation without
1251 /// actually modifying the IR yet. If the expression after extending the
1252 /// operands is an AddRec for this loop, return the AddRec and the kind of
1253 /// extension used.
1254 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1255   // Handle the common case of add<nsw/nuw>
1256   const unsigned OpCode = DU.NarrowUse->getOpcode();
1257   // Only Add/Sub/Mul instructions supported yet.
1258   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1259       OpCode != Instruction::Mul)
1260     return {nullptr, Unknown};
1261 
1262   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1263   // if extending the other will lead to a recurrence.
1264   const unsigned ExtendOperIdx =
1265       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1266   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1267 
1268   const SCEV *ExtendOperExpr = nullptr;
1269   const OverflowingBinaryOperator *OBO =
1270     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1271   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1272   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1273     ExtendOperExpr = SE->getSignExtendExpr(
1274       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1275   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1276     ExtendOperExpr = SE->getZeroExtendExpr(
1277       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1278   else
1279     return {nullptr, Unknown};
1280 
1281   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1282   // flags. This instruction may be guarded by control flow that the no-wrap
1283   // behavior depends on. Non-control-equivalent instructions can be mapped to
1284   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1285   // semantics to those operations.
1286   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1287   const SCEV *rhs = ExtendOperExpr;
1288 
1289   // Let's swap operands to the initial order for the case of non-commutative
1290   // operations, like SUB. See PR21014.
1291   if (ExtendOperIdx == 0)
1292     std::swap(lhs, rhs);
1293   const SCEVAddRecExpr *AddRec =
1294       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1295 
1296   if (!AddRec || AddRec->getLoop() != L)
1297     return {nullptr, Unknown};
1298 
1299   return {AddRec, ExtKind};
1300 }
1301 
1302 /// Is this instruction potentially interesting for further simplification after
1303 /// widening it's type? In other words, can the extend be safely hoisted out of
1304 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1305 /// so, return the extended recurrence and the kind of extension used. Otherwise
1306 /// return {nullptr, Unknown}.
1307 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1308   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1309     return {nullptr, Unknown};
1310 
1311   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1312   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1313       SE->getTypeSizeInBits(WideType)) {
1314     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1315     // index. So don't follow this use.
1316     return {nullptr, Unknown};
1317   }
1318 
1319   const SCEV *WideExpr;
1320   ExtendKind ExtKind;
1321   if (DU.NeverNegative) {
1322     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1323     if (isa<SCEVAddRecExpr>(WideExpr))
1324       ExtKind = SignExtended;
1325     else {
1326       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1327       ExtKind = ZeroExtended;
1328     }
1329   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1330     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1331     ExtKind = SignExtended;
1332   } else {
1333     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1334     ExtKind = ZeroExtended;
1335   }
1336   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1337   if (!AddRec || AddRec->getLoop() != L)
1338     return {nullptr, Unknown};
1339   return {AddRec, ExtKind};
1340 }
1341 
1342 /// This IV user cannot be widened. Replace this use of the original narrow IV
1343 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1344 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1345   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1346   if (!InsertPt)
1347     return;
1348   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1349                     << *DU.NarrowUse << "\n");
1350   IRBuilder<> Builder(InsertPt);
1351   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1352   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1353 }
1354 
1355 /// If the narrow use is a compare instruction, then widen the compare
1356 //  (and possibly the other operand).  The extend operation is hoisted into the
1357 // loop preheader as far as possible.
1358 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1359   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1360   if (!Cmp)
1361     return false;
1362 
1363   // We can legally widen the comparison in the following two cases:
1364   //
1365   //  - The signedness of the IV extension and comparison match
1366   //
1367   //  - The narrow IV is always positive (and thus its sign extension is equal
1368   //    to its zero extension).  For instance, let's say we're zero extending
1369   //    %narrow for the following use
1370   //
1371   //      icmp slt i32 %narrow, %val   ... (A)
1372   //
1373   //    and %narrow is always positive.  Then
1374   //
1375   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1376   //          == icmp slt i32 zext(%narrow), sext(%val)
1377   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1378   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1379     return false;
1380 
1381   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1382   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1383   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1384   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1385 
1386   // Widen the compare instruction.
1387   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1388   if (!InsertPt)
1389     return false;
1390   IRBuilder<> Builder(InsertPt);
1391   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1392 
1393   // Widen the other operand of the compare, if necessary.
1394   if (CastWidth < IVWidth) {
1395     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1396     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1397   }
1398   return true;
1399 }
1400 
1401 /// If the narrow use is an instruction whose two operands are the defining
1402 /// instruction of DU and a load instruction, then we have the following:
1403 /// if the load is hoisted outside the loop, then we do not reach this function
1404 /// as scalar evolution analysis works fine in widenIVUse with variables
1405 /// hoisted outside the loop and efficient code is subsequently generated by
1406 /// not emitting truncate instructions. But when the load is not hoisted
1407 /// (whether due to limitation in alias analysis or due to a true legality),
1408 /// then scalar evolution can not proceed with loop variant values and
1409 /// inefficient code is generated. This function handles the non-hoisted load
1410 /// special case by making the optimization generate the same type of code for
1411 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1412 /// instruction). This special case is important especially when the induction
1413 /// variables are affecting addressing mode in code generation.
1414 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1415   Instruction *NarrowUse = DU.NarrowUse;
1416   Instruction *NarrowDef = DU.NarrowDef;
1417   Instruction *WideDef = DU.WideDef;
1418 
1419   // Handle the common case of add<nsw/nuw>
1420   const unsigned OpCode = NarrowUse->getOpcode();
1421   // Only Add/Sub/Mul instructions are supported.
1422   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1423       OpCode != Instruction::Mul)
1424     return false;
1425 
1426   // The operand that is not defined by NarrowDef of DU. Let's call it the
1427   // other operand.
1428   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1429   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1430          "bad DU");
1431 
1432   const SCEV *ExtendOperExpr = nullptr;
1433   const OverflowingBinaryOperator *OBO =
1434     cast<OverflowingBinaryOperator>(NarrowUse);
1435   ExtendKind ExtKind = getExtendKind(NarrowDef);
1436   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1437     ExtendOperExpr = SE->getSignExtendExpr(
1438       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1439   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1440     ExtendOperExpr = SE->getZeroExtendExpr(
1441       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1442   else
1443     return false;
1444 
1445   // We are interested in the other operand being a load instruction.
1446   // But, we should look into relaxing this restriction later on.
1447   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1448   if (I && I->getOpcode() != Instruction::Load)
1449     return false;
1450 
1451   // Verifying that Defining operand is an AddRec
1452   const SCEV *Op1 = SE->getSCEV(WideDef);
1453   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1454   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1455     return false;
1456   // Verifying that other operand is an Extend.
1457   if (ExtKind == SignExtended) {
1458     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1459       return false;
1460   } else {
1461     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1462       return false;
1463   }
1464 
1465   if (ExtKind == SignExtended) {
1466     for (Use &U : NarrowUse->uses()) {
1467       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1468       if (!User || User->getType() != WideType)
1469         return false;
1470     }
1471   } else { // ExtKind == ZeroExtended
1472     for (Use &U : NarrowUse->uses()) {
1473       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1474       if (!User || User->getType() != WideType)
1475         return false;
1476     }
1477   }
1478 
1479   return true;
1480 }
1481 
1482 /// Special Case for widening with variant Loads (see
1483 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1484 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1485   Instruction *NarrowUse = DU.NarrowUse;
1486   Instruction *NarrowDef = DU.NarrowDef;
1487   Instruction *WideDef = DU.WideDef;
1488 
1489   ExtendKind ExtKind = getExtendKind(NarrowDef);
1490 
1491   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1492 
1493   // Generating a widening use instruction.
1494   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1495                    ? WideDef
1496                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1497                                       ExtKind, NarrowUse);
1498   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1499                    ? WideDef
1500                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1501                                       ExtKind, NarrowUse);
1502 
1503   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1504   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1505                                         NarrowBO->getName());
1506   IRBuilder<> Builder(NarrowUse);
1507   Builder.Insert(WideBO);
1508   WideBO->copyIRFlags(NarrowBO);
1509 
1510   if (ExtKind == SignExtended)
1511     ExtendKindMap[NarrowUse] = SignExtended;
1512   else
1513     ExtendKindMap[NarrowUse] = ZeroExtended;
1514 
1515   // Update the Use.
1516   if (ExtKind == SignExtended) {
1517     for (Use &U : NarrowUse->uses()) {
1518       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1519       if (User && User->getType() == WideType) {
1520         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1521                           << *WideBO << "\n");
1522         ++NumElimExt;
1523         User->replaceAllUsesWith(WideBO);
1524         DeadInsts.emplace_back(User);
1525       }
1526     }
1527   } else { // ExtKind == ZeroExtended
1528     for (Use &U : NarrowUse->uses()) {
1529       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1530       if (User && User->getType() == WideType) {
1531         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1532                           << *WideBO << "\n");
1533         ++NumElimExt;
1534         User->replaceAllUsesWith(WideBO);
1535         DeadInsts.emplace_back(User);
1536       }
1537     }
1538   }
1539 }
1540 
1541 /// Determine whether an individual user of the narrow IV can be widened. If so,
1542 /// return the wide clone of the user.
1543 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1544   assert(ExtendKindMap.count(DU.NarrowDef) &&
1545          "Should already know the kind of extension used to widen NarrowDef");
1546 
1547   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1548   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1549     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1550       // For LCSSA phis, sink the truncate outside the loop.
1551       // After SimplifyCFG most loop exit targets have a single predecessor.
1552       // Otherwise fall back to a truncate within the loop.
1553       if (UsePhi->getNumOperands() != 1)
1554         truncateIVUse(DU, DT, LI);
1555       else {
1556         // Widening the PHI requires us to insert a trunc.  The logical place
1557         // for this trunc is in the same BB as the PHI.  This is not possible if
1558         // the BB is terminated by a catchswitch.
1559         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1560           return nullptr;
1561 
1562         PHINode *WidePhi =
1563           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1564                           UsePhi);
1565         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1566         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1567         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1568         UsePhi->replaceAllUsesWith(Trunc);
1569         DeadInsts.emplace_back(UsePhi);
1570         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1571                           << *WidePhi << "\n");
1572       }
1573       return nullptr;
1574     }
1575   }
1576 
1577   // This narrow use can be widened by a sext if it's non-negative or its narrow
1578   // def was widended by a sext. Same for zext.
1579   auto canWidenBySExt = [&]() {
1580     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1581   };
1582   auto canWidenByZExt = [&]() {
1583     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1584   };
1585 
1586   // Our raison d'etre! Eliminate sign and zero extension.
1587   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1588       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1589     Value *NewDef = DU.WideDef;
1590     if (DU.NarrowUse->getType() != WideType) {
1591       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1592       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1593       if (CastWidth < IVWidth) {
1594         // The cast isn't as wide as the IV, so insert a Trunc.
1595         IRBuilder<> Builder(DU.NarrowUse);
1596         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1597       }
1598       else {
1599         // A wider extend was hidden behind a narrower one. This may induce
1600         // another round of IV widening in which the intermediate IV becomes
1601         // dead. It should be very rare.
1602         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1603                           << " not wide enough to subsume " << *DU.NarrowUse
1604                           << "\n");
1605         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1606         NewDef = DU.NarrowUse;
1607       }
1608     }
1609     if (NewDef != DU.NarrowUse) {
1610       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1611                         << " replaced by " << *DU.WideDef << "\n");
1612       ++NumElimExt;
1613       DU.NarrowUse->replaceAllUsesWith(NewDef);
1614       DeadInsts.emplace_back(DU.NarrowUse);
1615     }
1616     // Now that the extend is gone, we want to expose it's uses for potential
1617     // further simplification. We don't need to directly inform SimplifyIVUsers
1618     // of the new users, because their parent IV will be processed later as a
1619     // new loop phi. If we preserved IVUsers analysis, we would also want to
1620     // push the uses of WideDef here.
1621 
1622     // No further widening is needed. The deceased [sz]ext had done it for us.
1623     return nullptr;
1624   }
1625 
1626   // Does this user itself evaluate to a recurrence after widening?
1627   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1628   if (!WideAddRec.first)
1629     WideAddRec = getWideRecurrence(DU);
1630 
1631   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1632   if (!WideAddRec.first) {
1633     // If use is a loop condition, try to promote the condition instead of
1634     // truncating the IV first.
1635     if (widenLoopCompare(DU))
1636       return nullptr;
1637 
1638     // We are here about to generate a truncate instruction that may hurt
1639     // performance because the scalar evolution expression computed earlier
1640     // in WideAddRec.first does not indicate a polynomial induction expression.
1641     // In that case, look at the operands of the use instruction to determine
1642     // if we can still widen the use instead of truncating its operand.
1643     if (widenWithVariantLoadUse(DU)) {
1644       widenWithVariantLoadUseCodegen(DU);
1645       return nullptr;
1646     }
1647 
1648     // This user does not evaluate to a recurrence after widening, so don't
1649     // follow it. Instead insert a Trunc to kill off the original use,
1650     // eventually isolating the original narrow IV so it can be removed.
1651     truncateIVUse(DU, DT, LI);
1652     return nullptr;
1653   }
1654   // Assume block terminators cannot evaluate to a recurrence. We can't to
1655   // insert a Trunc after a terminator if there happens to be a critical edge.
1656   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1657          "SCEV is not expected to evaluate a block terminator");
1658 
1659   // Reuse the IV increment that SCEVExpander created as long as it dominates
1660   // NarrowUse.
1661   Instruction *WideUse = nullptr;
1662   if (WideAddRec.first == WideIncExpr &&
1663       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1664     WideUse = WideInc;
1665   else {
1666     WideUse = cloneIVUser(DU, WideAddRec.first);
1667     if (!WideUse)
1668       return nullptr;
1669   }
1670   // Evaluation of WideAddRec ensured that the narrow expression could be
1671   // extended outside the loop without overflow. This suggests that the wide use
1672   // evaluates to the same expression as the extended narrow use, but doesn't
1673   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1674   // where it fails, we simply throw away the newly created wide use.
1675   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1676     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1677                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1678                       << "\n");
1679     DeadInsts.emplace_back(WideUse);
1680     return nullptr;
1681   }
1682 
1683   // if we reached this point then we are going to replace
1684   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1685   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1686 
1687   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1688   // Returning WideUse pushes it on the worklist.
1689   return WideUse;
1690 }
1691 
1692 /// Add eligible users of NarrowDef to NarrowIVUsers.
1693 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1694   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1695   bool NonNegativeDef =
1696       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1697                            SE->getConstant(NarrowSCEV->getType(), 0));
1698   for (User *U : NarrowDef->users()) {
1699     Instruction *NarrowUser = cast<Instruction>(U);
1700 
1701     // Handle data flow merges and bizarre phi cycles.
1702     if (!Widened.insert(NarrowUser).second)
1703       continue;
1704 
1705     bool NonNegativeUse = false;
1706     if (!NonNegativeDef) {
1707       // We might have a control-dependent range information for this context.
1708       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1709         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1710     }
1711 
1712     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1713                                NonNegativeDef || NonNegativeUse);
1714   }
1715 }
1716 
1717 /// Process a single induction variable. First use the SCEVExpander to create a
1718 /// wide induction variable that evaluates to the same recurrence as the
1719 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1720 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1721 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1722 ///
1723 /// It would be simpler to delete uses as they are processed, but we must avoid
1724 /// invalidating SCEV expressions.
1725 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1726   // Is this phi an induction variable?
1727   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1728   if (!AddRec)
1729     return nullptr;
1730 
1731   // Widen the induction variable expression.
1732   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1733                                ? SE->getSignExtendExpr(AddRec, WideType)
1734                                : SE->getZeroExtendExpr(AddRec, WideType);
1735 
1736   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1737          "Expect the new IV expression to preserve its type");
1738 
1739   // Can the IV be extended outside the loop without overflow?
1740   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1741   if (!AddRec || AddRec->getLoop() != L)
1742     return nullptr;
1743 
1744   // An AddRec must have loop-invariant operands. Since this AddRec is
1745   // materialized by a loop header phi, the expression cannot have any post-loop
1746   // operands, so they must dominate the loop header.
1747   assert(
1748       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1749       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1750       "Loop header phi recurrence inputs do not dominate the loop");
1751 
1752   // Iterate over IV uses (including transitive ones) looking for IV increments
1753   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1754   // the increment calculate control-dependent range information basing on
1755   // dominating conditions inside of the loop (e.g. a range check inside of the
1756   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1757   //
1758   // Control-dependent range information is later used to prove that a narrow
1759   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1760   // this on demand because when pushNarrowIVUsers needs this information some
1761   // of the dominating conditions might be already widened.
1762   if (UsePostIncrementRanges)
1763     calculatePostIncRanges(OrigPhi);
1764 
1765   // The rewriter provides a value for the desired IV expression. This may
1766   // either find an existing phi or materialize a new one. Either way, we
1767   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1768   // of the phi-SCC dominates the loop entry.
1769   Instruction *InsertPt = &L->getHeader()->front();
1770   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1771 
1772   // Remembering the WideIV increment generated by SCEVExpander allows
1773   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1774   // employ a general reuse mechanism because the call above is the only call to
1775   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1776   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1777     WideInc =
1778       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1779     WideIncExpr = SE->getSCEV(WideInc);
1780     // Propagate the debug location associated with the original loop increment
1781     // to the new (widened) increment.
1782     auto *OrigInc =
1783       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1784     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1785   }
1786 
1787   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1788   ++NumWidened;
1789 
1790   // Traverse the def-use chain using a worklist starting at the original IV.
1791   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1792 
1793   Widened.insert(OrigPhi);
1794   pushNarrowIVUsers(OrigPhi, WidePhi);
1795 
1796   while (!NarrowIVUsers.empty()) {
1797     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1798 
1799     // Process a def-use edge. This may replace the use, so don't hold a
1800     // use_iterator across it.
1801     Instruction *WideUse = widenIVUse(DU, Rewriter);
1802 
1803     // Follow all def-use edges from the previous narrow use.
1804     if (WideUse)
1805       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1806 
1807     // widenIVUse may have removed the def-use edge.
1808     if (DU.NarrowDef->use_empty())
1809       DeadInsts.emplace_back(DU.NarrowDef);
1810   }
1811 
1812   // Attach any debug information to the new PHI.
1813   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1814 
1815   return WidePhi;
1816 }
1817 
1818 /// Calculates control-dependent range for the given def at the given context
1819 /// by looking at dominating conditions inside of the loop
1820 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1821                                     Instruction *NarrowUser) {
1822   using namespace llvm::PatternMatch;
1823 
1824   Value *NarrowDefLHS;
1825   const APInt *NarrowDefRHS;
1826   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1827                                  m_APInt(NarrowDefRHS))) ||
1828       !NarrowDefRHS->isNonNegative())
1829     return;
1830 
1831   auto UpdateRangeFromCondition = [&] (Value *Condition,
1832                                        bool TrueDest) {
1833     CmpInst::Predicate Pred;
1834     Value *CmpRHS;
1835     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1836                                  m_Value(CmpRHS))))
1837       return;
1838 
1839     CmpInst::Predicate P =
1840             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1841 
1842     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1843     auto CmpConstrainedLHSRange =
1844             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1845     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1846         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1847 
1848     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1849   };
1850 
1851   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1852     if (!HasGuards)
1853       return;
1854 
1855     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1856                                      Ctx->getParent()->rend())) {
1857       Value *C = nullptr;
1858       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1859         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1860     }
1861   };
1862 
1863   UpdateRangeFromGuards(NarrowUser);
1864 
1865   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1866   // If NarrowUserBB is statically unreachable asking dominator queries may
1867   // yield surprising results. (e.g. the block may not have a dom tree node)
1868   if (!DT->isReachableFromEntry(NarrowUserBB))
1869     return;
1870 
1871   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1872        L->contains(DTB->getBlock());
1873        DTB = DTB->getIDom()) {
1874     auto *BB = DTB->getBlock();
1875     auto *TI = BB->getTerminator();
1876     UpdateRangeFromGuards(TI);
1877 
1878     auto *BI = dyn_cast<BranchInst>(TI);
1879     if (!BI || !BI->isConditional())
1880       continue;
1881 
1882     auto *TrueSuccessor = BI->getSuccessor(0);
1883     auto *FalseSuccessor = BI->getSuccessor(1);
1884 
1885     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1886       return BBE.isSingleEdge() &&
1887              DT->dominates(BBE, NarrowUser->getParent());
1888     };
1889 
1890     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1891       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1892 
1893     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1894       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1895   }
1896 }
1897 
1898 /// Calculates PostIncRangeInfos map for the given IV
1899 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1900   SmallPtrSet<Instruction *, 16> Visited;
1901   SmallVector<Instruction *, 6> Worklist;
1902   Worklist.push_back(OrigPhi);
1903   Visited.insert(OrigPhi);
1904 
1905   while (!Worklist.empty()) {
1906     Instruction *NarrowDef = Worklist.pop_back_val();
1907 
1908     for (Use &U : NarrowDef->uses()) {
1909       auto *NarrowUser = cast<Instruction>(U.getUser());
1910 
1911       // Don't go looking outside the current loop.
1912       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1913       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1914         continue;
1915 
1916       if (!Visited.insert(NarrowUser).second)
1917         continue;
1918 
1919       Worklist.push_back(NarrowUser);
1920 
1921       calculatePostIncRange(NarrowDef, NarrowUser);
1922     }
1923   }
1924 }
1925 
1926 //===----------------------------------------------------------------------===//
1927 //  Live IV Reduction - Minimize IVs live across the loop.
1928 //===----------------------------------------------------------------------===//
1929 
1930 //===----------------------------------------------------------------------===//
1931 //  Simplification of IV users based on SCEV evaluation.
1932 //===----------------------------------------------------------------------===//
1933 
1934 namespace {
1935 
1936 class IndVarSimplifyVisitor : public IVVisitor {
1937   ScalarEvolution *SE;
1938   const TargetTransformInfo *TTI;
1939   PHINode *IVPhi;
1940 
1941 public:
1942   WideIVInfo WI;
1943 
1944   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1945                         const TargetTransformInfo *TTI,
1946                         const DominatorTree *DTree)
1947     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1948     DT = DTree;
1949     WI.NarrowIV = IVPhi;
1950   }
1951 
1952   // Implement the interface used by simplifyUsersOfIV.
1953   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1954 };
1955 
1956 } // end anonymous namespace
1957 
1958 /// Iteratively perform simplification on a worklist of IV users. Each
1959 /// successive simplification may push more users which may themselves be
1960 /// candidates for simplification.
1961 ///
1962 /// Sign/Zero extend elimination is interleaved with IV simplification.
1963 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1964                                        SCEVExpander &Rewriter,
1965                                        LoopInfo *LI) {
1966   SmallVector<WideIVInfo, 8> WideIVs;
1967 
1968   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1969           Intrinsic::getName(Intrinsic::experimental_guard));
1970   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1971 
1972   SmallVector<PHINode*, 8> LoopPhis;
1973   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1974     LoopPhis.push_back(cast<PHINode>(I));
1975   }
1976   // Each round of simplification iterates through the SimplifyIVUsers worklist
1977   // for all current phis, then determines whether any IVs can be
1978   // widened. Widening adds new phis to LoopPhis, inducing another round of
1979   // simplification on the wide IVs.
1980   bool Changed = false;
1981   while (!LoopPhis.empty()) {
1982     // Evaluate as many IV expressions as possible before widening any IVs. This
1983     // forces SCEV to set no-wrap flags before evaluating sign/zero
1984     // extension. The first time SCEV attempts to normalize sign/zero extension,
1985     // the result becomes final. So for the most predictable results, we delay
1986     // evaluation of sign/zero extend evaluation until needed, and avoid running
1987     // other SCEV based analysis prior to simplifyAndExtend.
1988     do {
1989       PHINode *CurrIV = LoopPhis.pop_back_val();
1990 
1991       // Information about sign/zero extensions of CurrIV.
1992       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1993 
1994       Changed |=
1995           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1996 
1997       if (Visitor.WI.WidestNativeType) {
1998         WideIVs.push_back(Visitor.WI);
1999       }
2000     } while(!LoopPhis.empty());
2001 
2002     for (; !WideIVs.empty(); WideIVs.pop_back()) {
2003       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2004       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2005         Changed = true;
2006         LoopPhis.push_back(WidePhi);
2007       }
2008     }
2009   }
2010   return Changed;
2011 }
2012 
2013 //===----------------------------------------------------------------------===//
2014 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2015 //===----------------------------------------------------------------------===//
2016 
2017 /// Given an Value which is hoped to be part of an add recurance in the given
2018 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
2019 /// that this is less general than SCEVs AddRec checking.
2020 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2021   Instruction *IncI = dyn_cast<Instruction>(IncV);
2022   if (!IncI)
2023     return nullptr;
2024 
2025   switch (IncI->getOpcode()) {
2026   case Instruction::Add:
2027   case Instruction::Sub:
2028     break;
2029   case Instruction::GetElementPtr:
2030     // An IV counter must preserve its type.
2031     if (IncI->getNumOperands() == 2)
2032       break;
2033     LLVM_FALLTHROUGH;
2034   default:
2035     return nullptr;
2036   }
2037 
2038   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2039   if (Phi && Phi->getParent() == L->getHeader()) {
2040     if (L->isLoopInvariant(IncI->getOperand(1)))
2041       return Phi;
2042     return nullptr;
2043   }
2044   if (IncI->getOpcode() == Instruction::GetElementPtr)
2045     return nullptr;
2046 
2047   // Allow add/sub to be commuted.
2048   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2049   if (Phi && Phi->getParent() == L->getHeader()) {
2050     if (L->isLoopInvariant(IncI->getOperand(0)))
2051       return Phi;
2052   }
2053   return nullptr;
2054 }
2055 
2056 /// Whether the current loop exit test is based on this value.  Currently this
2057 /// is limited to a direct use in the loop condition.
2058 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2059   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2060   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2061   // TODO: Allow non-icmp loop test.
2062   if (!ICmp)
2063     return false;
2064 
2065   // TODO: Allow indirect use.
2066   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2067 }
2068 
2069 /// linearFunctionTestReplace policy. Return true unless we can show that the
2070 /// current exit test is already sufficiently canonical.
2071 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2072   assert(L->getLoopLatch() && "Must be in simplified form");
2073 
2074   // Avoid converting a constant or loop invariant test back to a runtime
2075   // test.  This is critical for when SCEV's cached ExitCount is less precise
2076   // than the current IR (such as after we've proven a particular exit is
2077   // actually dead and thus the BE count never reaches our ExitCount.)
2078   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2079   if (L->isLoopInvariant(BI->getCondition()))
2080     return false;
2081 
2082   // Do LFTR to simplify the exit condition to an ICMP.
2083   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2084   if (!Cond)
2085     return true;
2086 
2087   // Do LFTR to simplify the exit ICMP to EQ/NE
2088   ICmpInst::Predicate Pred = Cond->getPredicate();
2089   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2090     return true;
2091 
2092   // Look for a loop invariant RHS
2093   Value *LHS = Cond->getOperand(0);
2094   Value *RHS = Cond->getOperand(1);
2095   if (!L->isLoopInvariant(RHS)) {
2096     if (!L->isLoopInvariant(LHS))
2097       return true;
2098     std::swap(LHS, RHS);
2099   }
2100   // Look for a simple IV counter LHS
2101   PHINode *Phi = dyn_cast<PHINode>(LHS);
2102   if (!Phi)
2103     Phi = getLoopPhiForCounter(LHS, L);
2104 
2105   if (!Phi)
2106     return true;
2107 
2108   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2109   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2110   if (Idx < 0)
2111     return true;
2112 
2113   // Do LFTR if the exit condition's IV is *not* a simple counter.
2114   Value *IncV = Phi->getIncomingValue(Idx);
2115   return Phi != getLoopPhiForCounter(IncV, L);
2116 }
2117 
2118 /// Return true if undefined behavior would provable be executed on the path to
2119 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2120 /// anything about whether OnPathTo is actually executed or whether Root is
2121 /// actually poison.  This can be used to assess whether a new use of Root can
2122 /// be added at a location which is control equivalent with OnPathTo (such as
2123 /// immediately before it) without introducing UB which didn't previously
2124 /// exist.  Note that a false result conveys no information.
2125 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2126                                           Instruction *OnPathTo,
2127                                           DominatorTree *DT) {
2128   // Basic approach is to assume Root is poison, propagate poison forward
2129   // through all users we can easily track, and then check whether any of those
2130   // users are provable UB and must execute before out exiting block might
2131   // exit.
2132 
2133   // The set of all recursive users we've visited (which are assumed to all be
2134   // poison because of said visit)
2135   SmallSet<const Value *, 16> KnownPoison;
2136   SmallVector<const Instruction*, 16> Worklist;
2137   Worklist.push_back(Root);
2138   while (!Worklist.empty()) {
2139     const Instruction *I = Worklist.pop_back_val();
2140 
2141     // If we know this must trigger UB on a path leading our target.
2142     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2143       return true;
2144 
2145     // If we can't analyze propagation through this instruction, just skip it
2146     // and transitive users.  Safe as false is a conservative result.
2147     if (!propagatesFullPoison(I) && I != Root)
2148       continue;
2149 
2150     if (KnownPoison.insert(I).second)
2151       for (const User *User : I->users())
2152         Worklist.push_back(cast<Instruction>(User));
2153   }
2154 
2155   // Might be non-UB, or might have a path we couldn't prove must execute on
2156   // way to exiting bb.
2157   return false;
2158 }
2159 
2160 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2161 /// down to checking that all operands are constant and listing instructions
2162 /// that may hide undef.
2163 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2164                                unsigned Depth) {
2165   if (isa<Constant>(V))
2166     return !isa<UndefValue>(V);
2167 
2168   if (Depth >= 6)
2169     return false;
2170 
2171   // Conservatively handle non-constant non-instructions. For example, Arguments
2172   // may be undef.
2173   Instruction *I = dyn_cast<Instruction>(V);
2174   if (!I)
2175     return false;
2176 
2177   // Load and return values may be undef.
2178   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2179     return false;
2180 
2181   // Optimistically handle other instructions.
2182   for (Value *Op : I->operands()) {
2183     if (!Visited.insert(Op).second)
2184       continue;
2185     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2186       return false;
2187   }
2188   return true;
2189 }
2190 
2191 /// Return true if the given value is concrete. We must prove that undef can
2192 /// never reach it.
2193 ///
2194 /// TODO: If we decide that this is a good approach to checking for undef, we
2195 /// may factor it into a common location.
2196 static bool hasConcreteDef(Value *V) {
2197   SmallPtrSet<Value*, 8> Visited;
2198   Visited.insert(V);
2199   return hasConcreteDefImpl(V, Visited, 0);
2200 }
2201 
2202 /// Return true if this IV has any uses other than the (soon to be rewritten)
2203 /// loop exit test.
2204 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2205   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2206   Value *IncV = Phi->getIncomingValue(LatchIdx);
2207 
2208   for (User *U : Phi->users())
2209     if (U != Cond && U != IncV) return false;
2210 
2211   for (User *U : IncV->users())
2212     if (U != Cond && U != Phi) return false;
2213   return true;
2214 }
2215 
2216 /// Return true if the given phi is a "counter" in L.  A counter is an
2217 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2218 /// step of 1.  Note that L must have exactly one latch.
2219 static bool isLoopCounter(PHINode* Phi, Loop *L,
2220                           ScalarEvolution *SE) {
2221   assert(Phi->getParent() == L->getHeader());
2222   assert(L->getLoopLatch());
2223 
2224   if (!SE->isSCEVable(Phi->getType()))
2225     return false;
2226 
2227   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2228   if (!AR || AR->getLoop() != L || !AR->isAffine())
2229     return false;
2230 
2231   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2232   if (!Step || !Step->isOne())
2233     return false;
2234 
2235   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2236   Value *IncV = Phi->getIncomingValue(LatchIdx);
2237   return (getLoopPhiForCounter(IncV, L) == Phi);
2238 }
2239 
2240 /// Search the loop header for a loop counter (anadd rec w/step of one)
2241 /// suitable for use by LFTR.  If multiple counters are available, select the
2242 /// "best" one based profitable heuristics.
2243 ///
2244 /// BECount may be an i8* pointer type. The pointer difference is already
2245 /// valid count without scaling the address stride, so it remains a pointer
2246 /// expression as far as SCEV is concerned.
2247 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2248                                 const SCEV *BECount,
2249                                 ScalarEvolution *SE, DominatorTree *DT) {
2250   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2251 
2252   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2253 
2254   // Loop over all of the PHI nodes, looking for a simple counter.
2255   PHINode *BestPhi = nullptr;
2256   const SCEV *BestInit = nullptr;
2257   BasicBlock *LatchBlock = L->getLoopLatch();
2258   assert(LatchBlock && "Must be in simplified form");
2259   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2260 
2261   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2262     PHINode *Phi = cast<PHINode>(I);
2263     if (!isLoopCounter(Phi, L, SE))
2264       continue;
2265 
2266     // Avoid comparing an integer IV against a pointer Limit.
2267     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2268       continue;
2269 
2270     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2271 
2272     // AR may be a pointer type, while BECount is an integer type.
2273     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2274     // AR may not be a narrower type, or we may never exit.
2275     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2276     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2277       continue;
2278 
2279     // Avoid reusing a potentially undef value to compute other values that may
2280     // have originally had a concrete definition.
2281     if (!hasConcreteDef(Phi)) {
2282       // We explicitly allow unknown phis as long as they are already used by
2283       // the loop exit test.  This is legal since performing LFTR could not
2284       // increase the number of undef users.
2285       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2286       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2287           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2288         continue;
2289     }
2290 
2291     // Avoid introducing undefined behavior due to poison which didn't exist in
2292     // the original program.  (Annoyingly, the rules for poison and undef
2293     // propagation are distinct, so this does NOT cover the undef case above.)
2294     // We have to ensure that we don't introduce UB by introducing a use on an
2295     // iteration where said IV produces poison.  Our strategy here differs for
2296     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2297     // see code in linearFunctionTestReplace.  For pointers, we restrict
2298     // transforms as there is no good way to reinfer inbounds once lost.
2299     if (!Phi->getType()->isIntegerTy() &&
2300         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2301       continue;
2302 
2303     const SCEV *Init = AR->getStart();
2304 
2305     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2306       // Don't force a live loop counter if another IV can be used.
2307       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2308         continue;
2309 
2310       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2311       // also prefers integer to pointer IVs.
2312       if (BestInit->isZero() != Init->isZero()) {
2313         if (BestInit->isZero())
2314           continue;
2315       }
2316       // If two IVs both count from zero or both count from nonzero then the
2317       // narrower is likely a dead phi that has been widened. Use the wider phi
2318       // to allow the other to be eliminated.
2319       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2320         continue;
2321     }
2322     BestPhi = Phi;
2323     BestInit = Init;
2324   }
2325   return BestPhi;
2326 }
2327 
2328 /// Insert an IR expression which computes the value held by the IV IndVar
2329 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2330 /// is taken ExitCount times.
2331 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2332                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2333                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2334   assert(isLoopCounter(IndVar, L, SE));
2335   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2336   const SCEV *IVInit = AR->getStart();
2337 
2338   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2339   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2340   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2341   // the existing GEPs whenever possible.
2342   if (IndVar->getType()->isPointerTy() &&
2343       !ExitCount->getType()->isPointerTy()) {
2344     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2345     // signed value. ExitCount on the other hand represents the loop trip count,
2346     // which is an unsigned value. FindLoopCounter only allows induction
2347     // variables that have a positive unit stride of one. This means we don't
2348     // have to handle the case of negative offsets (yet) and just need to zero
2349     // extend ExitCount.
2350     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2351     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2352     if (UsePostInc)
2353       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2354 
2355     // Expand the code for the iteration count.
2356     assert(SE->isLoopInvariant(IVOffset, L) &&
2357            "Computed iteration count is not loop invariant!");
2358 
2359     // We could handle pointer IVs other than i8*, but we need to compensate for
2360     // gep index scaling.
2361     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2362                              cast<PointerType>(IndVar->getType())
2363                                  ->getElementType())->isOne() &&
2364            "unit stride pointer IV must be i8*");
2365 
2366     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2367     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2368     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2369   } else {
2370     // In any other case, convert both IVInit and ExitCount to integers before
2371     // comparing. This may result in SCEV expansion of pointers, but in practice
2372     // SCEV will fold the pointer arithmetic away as such:
2373     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2374     //
2375     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2376     // for simple memset-style loops.
2377     //
2378     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2379     // were generated on top of case #2, which is not expected.
2380 
2381     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2382     // For unit stride, IVCount = Start + ExitCount with 2's complement
2383     // overflow.
2384 
2385     // For integer IVs, truncate the IV before computing IVInit + BECount,
2386     // unless we know apriori that the limit must be a constant when evaluated
2387     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2388     // of the IV in the loop over a (potentially) expensive expansion of the
2389     // widened exit count add(zext(add)) expression.
2390     if (SE->getTypeSizeInBits(IVInit->getType())
2391         > SE->getTypeSizeInBits(ExitCount->getType())) {
2392       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2393         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2394       else
2395         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2396     }
2397 
2398     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2399 
2400     if (UsePostInc)
2401       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2402 
2403     // Expand the code for the iteration count.
2404     assert(SE->isLoopInvariant(IVLimit, L) &&
2405            "Computed iteration count is not loop invariant!");
2406     // Ensure that we generate the same type as IndVar, or a smaller integer
2407     // type. In the presence of null pointer values, we have an integer type
2408     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2409     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2410       IndVar->getType() : ExitCount->getType();
2411     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2412     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2413   }
2414 }
2415 
2416 /// This method rewrites the exit condition of the loop to be a canonical !=
2417 /// comparison against the incremented loop induction variable.  This pass is
2418 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2419 /// determine a loop-invariant trip count of the loop, which is actually a much
2420 /// broader range than just linear tests.
2421 bool IndVarSimplify::
2422 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2423                           const SCEV *ExitCount,
2424                           PHINode *IndVar, SCEVExpander &Rewriter) {
2425   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2426   assert(isLoopCounter(IndVar, L, SE));
2427   Instruction * const IncVar =
2428     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2429 
2430   // Initialize CmpIndVar to the preincremented IV.
2431   Value *CmpIndVar = IndVar;
2432   bool UsePostInc = false;
2433 
2434   // If the exiting block is the same as the backedge block, we prefer to
2435   // compare against the post-incremented value, otherwise we must compare
2436   // against the preincremented value.
2437   if (ExitingBB == L->getLoopLatch()) {
2438     // For pointer IVs, we chose to not strip inbounds which requires us not
2439     // to add a potentially UB introducing use.  We need to either a) show
2440     // the loop test we're modifying is already in post-inc form, or b) show
2441     // that adding a use must not introduce UB.
2442     bool SafeToPostInc =
2443         IndVar->getType()->isIntegerTy() ||
2444         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2445         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2446     if (SafeToPostInc) {
2447       UsePostInc = true;
2448       CmpIndVar = IncVar;
2449     }
2450   }
2451 
2452   // It may be necessary to drop nowrap flags on the incrementing instruction
2453   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2454   // case the increment might have previously been poison on the last iteration
2455   // only) or if LFTR switches to a different IV that was previously dynamically
2456   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2457   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2458   // check), because the pre-inc addrec flags may be adopted from the original
2459   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2460   // TODO: This handling is inaccurate for one case: If we switch to a
2461   // dynamically dead IV that wraps on the first loop iteration only, which is
2462   // not covered by the post-inc addrec. (If the new IV was not dynamically
2463   // dead, it could not be poison on the first iteration in the first place.)
2464   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2465     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2466     if (BO->hasNoUnsignedWrap())
2467       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2468     if (BO->hasNoSignedWrap())
2469       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2470   }
2471 
2472   Value *ExitCnt = genLoopLimit(
2473       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2474   assert(ExitCnt->getType()->isPointerTy() ==
2475              IndVar->getType()->isPointerTy() &&
2476          "genLoopLimit missed a cast");
2477 
2478   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2479   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2480   ICmpInst::Predicate P;
2481   if (L->contains(BI->getSuccessor(0)))
2482     P = ICmpInst::ICMP_NE;
2483   else
2484     P = ICmpInst::ICMP_EQ;
2485 
2486   IRBuilder<> Builder(BI);
2487 
2488   // The new loop exit condition should reuse the debug location of the
2489   // original loop exit condition.
2490   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2491     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2492 
2493   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2494   // avoid the expensive expansion of the limit expression in the wider type,
2495   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2496   // since we know (from the exit count bitwidth), that we can't self-wrap in
2497   // the narrower type.
2498   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2499   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2500   if (CmpIndVarSize > ExitCntSize) {
2501     assert(!CmpIndVar->getType()->isPointerTy() &&
2502            !ExitCnt->getType()->isPointerTy());
2503 
2504     // Before resorting to actually inserting the truncate, use the same
2505     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2506     // the other side of the comparison instead.  We still evaluate the limit
2507     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2508     // a truncate within in.
2509     bool Extended = false;
2510     const SCEV *IV = SE->getSCEV(CmpIndVar);
2511     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2512                                                   ExitCnt->getType());
2513     const SCEV *ZExtTrunc =
2514       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2515 
2516     if (ZExtTrunc == IV) {
2517       Extended = true;
2518       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2519                                    "wide.trip.count");
2520     } else {
2521       const SCEV *SExtTrunc =
2522         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2523       if (SExtTrunc == IV) {
2524         Extended = true;
2525         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2526                                      "wide.trip.count");
2527       }
2528     }
2529 
2530     if (Extended) {
2531       bool Discard;
2532       L->makeLoopInvariant(ExitCnt, Discard);
2533     } else
2534       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2535                                       "lftr.wideiv");
2536   }
2537   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2538                     << "      LHS:" << *CmpIndVar << '\n'
2539                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2540                     << "\n"
2541                     << "      RHS:\t" << *ExitCnt << "\n"
2542                     << "ExitCount:\t" << *ExitCount << "\n"
2543                     << "  was: " << *BI->getCondition() << "\n");
2544 
2545   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2546   Value *OrigCond = BI->getCondition();
2547   // It's tempting to use replaceAllUsesWith here to fully replace the old
2548   // comparison, but that's not immediately safe, since users of the old
2549   // comparison may not be dominated by the new comparison. Instead, just
2550   // update the branch to use the new comparison; in the common case this
2551   // will make old comparison dead.
2552   BI->setCondition(Cond);
2553   DeadInsts.push_back(OrigCond);
2554 
2555   ++NumLFTR;
2556   return true;
2557 }
2558 
2559 //===----------------------------------------------------------------------===//
2560 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2561 //===----------------------------------------------------------------------===//
2562 
2563 /// If there's a single exit block, sink any loop-invariant values that
2564 /// were defined in the preheader but not used inside the loop into the
2565 /// exit block to reduce register pressure in the loop.
2566 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2567   BasicBlock *ExitBlock = L->getExitBlock();
2568   if (!ExitBlock) return false;
2569 
2570   BasicBlock *Preheader = L->getLoopPreheader();
2571   if (!Preheader) return false;
2572 
2573   bool MadeAnyChanges = false;
2574   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2575   BasicBlock::iterator I(Preheader->getTerminator());
2576   while (I != Preheader->begin()) {
2577     --I;
2578     // New instructions were inserted at the end of the preheader.
2579     if (isa<PHINode>(I))
2580       break;
2581 
2582     // Don't move instructions which might have side effects, since the side
2583     // effects need to complete before instructions inside the loop.  Also don't
2584     // move instructions which might read memory, since the loop may modify
2585     // memory. Note that it's okay if the instruction might have undefined
2586     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2587     // block.
2588     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2589       continue;
2590 
2591     // Skip debug info intrinsics.
2592     if (isa<DbgInfoIntrinsic>(I))
2593       continue;
2594 
2595     // Skip eh pad instructions.
2596     if (I->isEHPad())
2597       continue;
2598 
2599     // Don't sink alloca: we never want to sink static alloca's out of the
2600     // entry block, and correctly sinking dynamic alloca's requires
2601     // checks for stacksave/stackrestore intrinsics.
2602     // FIXME: Refactor this check somehow?
2603     if (isa<AllocaInst>(I))
2604       continue;
2605 
2606     // Determine if there is a use in or before the loop (direct or
2607     // otherwise).
2608     bool UsedInLoop = false;
2609     for (Use &U : I->uses()) {
2610       Instruction *User = cast<Instruction>(U.getUser());
2611       BasicBlock *UseBB = User->getParent();
2612       if (PHINode *P = dyn_cast<PHINode>(User)) {
2613         unsigned i =
2614           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2615         UseBB = P->getIncomingBlock(i);
2616       }
2617       if (UseBB == Preheader || L->contains(UseBB)) {
2618         UsedInLoop = true;
2619         break;
2620       }
2621     }
2622 
2623     // If there is, the def must remain in the preheader.
2624     if (UsedInLoop)
2625       continue;
2626 
2627     // Otherwise, sink it to the exit block.
2628     Instruction *ToMove = &*I;
2629     bool Done = false;
2630 
2631     if (I != Preheader->begin()) {
2632       // Skip debug info intrinsics.
2633       do {
2634         --I;
2635       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2636 
2637       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2638         Done = true;
2639     } else {
2640       Done = true;
2641     }
2642 
2643     MadeAnyChanges = true;
2644     ToMove->moveBefore(*ExitBlock, InsertPt);
2645     if (Done) break;
2646     InsertPt = ToMove->getIterator();
2647   }
2648 
2649   return MadeAnyChanges;
2650 }
2651 
2652 /// Return a symbolic upper bound for the backedge taken count of the loop.
2653 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2654 /// an arbitrary expression as opposed to only constants.
2655 /// TODO: Move into the ScalarEvolution class.
2656 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2657                                             DominatorTree &DT, Loop *L) {
2658   SmallVector<BasicBlock*, 16> ExitingBlocks;
2659   L->getExitingBlocks(ExitingBlocks);
2660 
2661   // Form an expression for the maximum exit count possible for this loop. We
2662   // merge the max and exact information to approximate a version of
2663   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2664   SmallVector<const SCEV*, 4> ExitCounts;
2665   for (BasicBlock *ExitingBB : ExitingBlocks) {
2666     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2667     if (isa<SCEVCouldNotCompute>(ExitCount))
2668       ExitCount = SE.getExitCount(L, ExitingBB,
2669                                   ScalarEvolution::ConstantMaximum);
2670     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2671       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2672              "We should only have known counts for exiting blocks that "
2673              "dominate latch!");
2674       ExitCounts.push_back(ExitCount);
2675     }
2676   }
2677   if (ExitCounts.empty())
2678     return SE.getCouldNotCompute();
2679   return SE.getUMinFromMismatchedTypes(ExitCounts);
2680 }
2681 
2682 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2683   SmallVector<BasicBlock*, 16> ExitingBlocks;
2684   L->getExitingBlocks(ExitingBlocks);
2685 
2686   // Remove all exits which aren't both rewriteable and analyzeable.
2687   auto NewEnd = llvm::remove_if(ExitingBlocks,
2688                                 [&](BasicBlock *ExitingBB) {
2689     // If our exitting block exits multiple loops, we can only rewrite the
2690     // innermost one.  Otherwise, we're changing how many times the innermost
2691     // loop runs before it exits.
2692     if (LI->getLoopFor(ExitingBB) != L)
2693       return true;
2694 
2695     // Can't rewrite non-branch yet.
2696     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2697     if (!BI)
2698       return true;
2699 
2700     // If already constant, nothing to do.
2701     if (isa<Constant>(BI->getCondition()))
2702       return true;
2703 
2704     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2705     if (isa<SCEVCouldNotCompute>(ExitCount))
2706       return true;
2707     return false;
2708    });
2709   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2710 
2711   if (ExitingBlocks.empty())
2712     return false;
2713 
2714   // Get a symbolic upper bound on the loop backedge taken count.
2715   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2716   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2717     return false;
2718 
2719   // Visit our exit blocks in order of dominance.  We know from the fact that
2720   // all exits (left) are analyzeable that the must be a total dominance order
2721   // between them as each must dominate the latch.  The visit order only
2722   // matters for the provably equal case.
2723   llvm::sort(ExitingBlocks,
2724              [&](BasicBlock *A, BasicBlock *B) {
2725                // std::sort sorts in ascending order, so we want the inverse of
2726                // the normal dominance relation.
2727                if (DT->properlyDominates(A, B)) return true;
2728                if (DT->properlyDominates(B, A)) return false;
2729                llvm_unreachable("expected total dominance order!");
2730              });
2731 #ifdef ASSERT
2732   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2733     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2734   }
2735 #endif
2736 
2737   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2738     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2739     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2740     auto *OldCond = BI->getCondition();
2741     auto *NewCond = ConstantInt::get(OldCond->getType(),
2742                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2743     BI->setCondition(NewCond);
2744     if (OldCond->use_empty())
2745       DeadInsts.push_back(OldCond);
2746   };
2747 
2748   bool Changed = false;
2749   SmallSet<const SCEV*, 8> DominatingExitCounts;
2750   for (BasicBlock *ExitingBB : ExitingBlocks) {
2751     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2752     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2753 
2754     // If we know we'd exit on the first iteration, rewrite the exit to
2755     // reflect this.  This does not imply the loop must exit through this
2756     // exit; there may be an earlier one taken on the first iteration.
2757     // TODO: Given we know the backedge can't be taken, we should go ahead
2758     // and break it.  Or at least, kill all the header phis and simplify.
2759     if (ExitCount->isZero()) {
2760       FoldExit(ExitingBB, true);
2761       Changed = true;
2762       continue;
2763     }
2764 
2765     // If we end up with a pointer exit count, bail.  Note that we can end up
2766     // with a pointer exit count for one exiting block, and not for another in
2767     // the same loop.
2768     if (!ExitCount->getType()->isIntegerTy() ||
2769         !MaxExitCount->getType()->isIntegerTy())
2770       continue;
2771 
2772     Type *WiderType =
2773       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2774     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2775     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2776     assert(MaxExitCount->getType() == ExitCount->getType());
2777 
2778     // Can we prove that some other exit must be taken strictly before this
2779     // one?
2780     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2781                                      MaxExitCount, ExitCount)) {
2782       FoldExit(ExitingBB, false);
2783       Changed = true;
2784       continue;
2785     }
2786 
2787     // As we run, keep track of which exit counts we've encountered.  If we
2788     // find a duplicate, we've found an exit which would have exited on the
2789     // exiting iteration, but (from the visit order) strictly follows another
2790     // which does the same and is thus dead.
2791     if (!DominatingExitCounts.insert(ExitCount).second) {
2792       FoldExit(ExitingBB, false);
2793       Changed = true;
2794       continue;
2795     }
2796 
2797     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2798     // here.  If we kept track of the min of dominanting exits so far, we could
2799     // discharge exits with EC >= MDEC. This is less powerful than the existing
2800     // transform (since later exits aren't considered), but potentially more
2801     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2802     // or a >u b.  Such a case is not currently known.
2803   }
2804   return Changed;
2805 }
2806 
2807 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2808   SmallVector<BasicBlock*, 16> ExitingBlocks;
2809   L->getExitingBlocks(ExitingBlocks);
2810 
2811   bool Changed = false;
2812 
2813   // Finally, see if we can rewrite our exit conditions into a loop invariant
2814   // form.  If we have a read-only loop, and we can tell that we must exit down
2815   // a path which does not need any of the values computed within the loop, we
2816   // can rewrite the loop to exit on the first iteration.  Note that this
2817   // doesn't either a) tell us the loop exits on the first iteration (unless
2818   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2819   // This transformation looks a lot like a restricted form of dead loop
2820   // elimination, but restricted to read-only loops and without neccesssarily
2821   // needing to kill the loop entirely.
2822   if (!LoopPredication)
2823     return Changed;
2824 
2825   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2826     return Changed;
2827 
2828   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2829   // through *explicit* control flow.  We have to eliminate the possibility of
2830   // implicit exits (see below) before we know it's truly exact.
2831   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2832   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2833       !SE->isLoopInvariant(ExactBTC, L) ||
2834       !isSafeToExpand(ExactBTC, *SE))
2835     return Changed;
2836 
2837   auto BadExit = [&](BasicBlock *ExitingBB) {
2838     // If our exiting block exits multiple loops, we can only rewrite the
2839     // innermost one.  Otherwise, we're changing how many times the innermost
2840     // loop runs before it exits.
2841     if (LI->getLoopFor(ExitingBB) != L)
2842       return true;
2843 
2844     // Can't rewrite non-branch yet.
2845     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2846     if (!BI)
2847       return true;
2848 
2849     // If already constant, nothing to do.
2850     if (isa<Constant>(BI->getCondition()))
2851       return true;
2852 
2853     // If the exit block has phis, we need to be able to compute the values
2854     // within the loop which contains them.  This assumes trivially lcssa phis
2855     // have already been removed; TODO: generalize
2856     BasicBlock *ExitBlock =
2857     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2858     if (!ExitBlock->phis().empty())
2859       return true;
2860 
2861     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2862     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2863     if (!SE->isLoopInvariant(ExitCount, L) ||
2864         !isSafeToExpand(ExitCount, *SE))
2865       return true;
2866 
2867     return false;
2868   };
2869 
2870   // If we have any exits which can't be predicated themselves, than we can't
2871   // predicate any exit which isn't guaranteed to execute before it.  Consider
2872   // two exits (a) and (b) which would both exit on the same iteration.  If we
2873   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2874   // we could convert a loop from exiting through (a) to one exiting through
2875   // (b).  Note that this problem exists only for exits with the same exit
2876   // count, and we could be more aggressive when exit counts are known inequal.
2877   llvm::sort(ExitingBlocks,
2878             [&](BasicBlock *A, BasicBlock *B) {
2879               // std::sort sorts in ascending order, so we want the inverse of
2880               // the normal dominance relation, plus a tie breaker for blocks
2881               // unordered by dominance.
2882               if (DT->properlyDominates(A, B)) return true;
2883               if (DT->properlyDominates(B, A)) return false;
2884               return A->getName() < B->getName();
2885             });
2886   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2887   // exits before the backedge).  If they aren't, reasoning about reachability
2888   // is complicated and we choose not to for now.
2889   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2890     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2891       return Changed;
2892 
2893   // Given our sorted total order, we know that exit[j] must be evaluated
2894   // after all exit[i] such j > i.
2895   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2896     if (BadExit(ExitingBlocks[i])) {
2897       ExitingBlocks.resize(i);
2898       break;
2899     }
2900 
2901   if (ExitingBlocks.empty())
2902     return Changed;
2903 
2904   // We rely on not being able to reach an exiting block on a later iteration
2905   // then it's statically compute exit count.  The implementaton of
2906   // getExitCount currently has this invariant, but assert it here so that
2907   // breakage is obvious if this ever changes..
2908   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2909         return DT->dominates(ExitingBB, L->getLoopLatch());
2910       }));
2911 
2912   // At this point, ExitingBlocks consists of only those blocks which are
2913   // predicatable.  Given that, we know we have at least one exit we can
2914   // predicate if the loop is doesn't have side effects and doesn't have any
2915   // implicit exits (because then our exact BTC isn't actually exact).
2916   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2917   // suggestions on how to improve this?  I can obviously bail out for outer
2918   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2919   // is that enough for *all* side effects?
2920   for (BasicBlock *BB : L->blocks())
2921     for (auto &I : *BB)
2922       // TODO:isGuaranteedToTransfer
2923       if (I.mayHaveSideEffects() || I.mayThrow())
2924         return Changed;
2925 
2926   // Finally, do the actual predication for all predicatable blocks.  A couple
2927   // of notes here:
2928   // 1) We don't bother to constant fold dominated exits with identical exit
2929   //    counts; that's simply a form of CSE/equality propagation and we leave
2930   //    it for dedicated passes.
2931   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2932   //    legality constraints and we leave that to dedicated logic.  We want to
2933   //    predicate even if we can't insert a loop invariant expression as
2934   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2935   //    varying check.
2936   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2937   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2938   Value *ExactBTCV = nullptr; //lazy generated if needed
2939   for (BasicBlock *ExitingBB : ExitingBlocks) {
2940     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2941 
2942     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2943     Value *NewCond;
2944     if (ExitCount == ExactBTC) {
2945       NewCond = L->contains(BI->getSuccessor(0)) ?
2946         B.getFalse() : B.getTrue();
2947     } else {
2948       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2949       if (!ExactBTCV)
2950         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2951       Value *RHS = ExactBTCV;
2952       if (ECV->getType() != RHS->getType()) {
2953         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2954         ECV = B.CreateZExt(ECV, WiderTy);
2955         RHS = B.CreateZExt(RHS, WiderTy);
2956       }
2957       auto Pred = L->contains(BI->getSuccessor(0)) ?
2958         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2959       NewCond = B.CreateICmp(Pred, ECV, RHS);
2960     }
2961     Value *OldCond = BI->getCondition();
2962     BI->setCondition(NewCond);
2963     if (OldCond->use_empty())
2964       DeadInsts.push_back(OldCond);
2965     Changed = true;
2966   }
2967 
2968   return Changed;
2969 }
2970 
2971 //===----------------------------------------------------------------------===//
2972 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2973 //===----------------------------------------------------------------------===//
2974 
2975 bool IndVarSimplify::run(Loop *L) {
2976   // We need (and expect!) the incoming loop to be in LCSSA.
2977   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2978          "LCSSA required to run indvars!");
2979   bool Changed = false;
2980 
2981   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2982   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2983   //    canonicalization can be a pessimization without LSR to "clean up"
2984   //    afterwards.
2985   //  - We depend on having a preheader; in particular,
2986   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2987   //    and we're in trouble if we can't find the induction variable even when
2988   //    we've manually inserted one.
2989   //  - LFTR relies on having a single backedge.
2990   if (!L->isLoopSimplifyForm())
2991     return false;
2992 
2993   // If there are any floating-point recurrences, attempt to
2994   // transform them to use integer recurrences.
2995   Changed |= rewriteNonIntegerIVs(L);
2996 
2997 #ifndef NDEBUG
2998   // Used below for a consistency check only
2999   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
3000 #endif
3001 
3002   // Create a rewriter object which we'll use to transform the code with.
3003   SCEVExpander Rewriter(*SE, DL, "indvars");
3004 #ifndef NDEBUG
3005   Rewriter.setDebugType(DEBUG_TYPE);
3006 #endif
3007 
3008   // Eliminate redundant IV users.
3009   //
3010   // Simplification works best when run before other consumers of SCEV. We
3011   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3012   // other expressions involving loop IVs have been evaluated. This helps SCEV
3013   // set no-wrap flags before normalizing sign/zero extension.
3014   Rewriter.disableCanonicalMode();
3015   Changed |= simplifyAndExtend(L, Rewriter, LI);
3016 
3017   // Check to see if we can compute the final value of any expressions
3018   // that are recurrent in the loop, and substitute the exit values from the
3019   // loop into any instructions outside of the loop that use the final values
3020   // of the current expressions.
3021   if (ReplaceExitValue != NeverRepl)
3022     Changed |= rewriteLoopExitValues(L, Rewriter);
3023 
3024   // Eliminate redundant IV cycles.
3025   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3026 
3027   // Try to eliminate loop exits based on analyzeable exit counts
3028   Changed |= optimizeLoopExits(L, Rewriter);
3029 
3030   // Try to form loop invariant tests for loop exits by changing how many
3031   // iterations of the loop run when that is unobservable.
3032   Changed |= predicateLoopExits(L, Rewriter);
3033 
3034   // If we have a trip count expression, rewrite the loop's exit condition
3035   // using it.
3036   if (!DisableLFTR) {
3037     SmallVector<BasicBlock*, 16> ExitingBlocks;
3038     L->getExitingBlocks(ExitingBlocks);
3039     for (BasicBlock *ExitingBB : ExitingBlocks) {
3040       // Can't rewrite non-branch yet.
3041       if (!isa<BranchInst>(ExitingBB->getTerminator()))
3042         continue;
3043 
3044       // If our exitting block exits multiple loops, we can only rewrite the
3045       // innermost one.  Otherwise, we're changing how many times the innermost
3046       // loop runs before it exits.
3047       if (LI->getLoopFor(ExitingBB) != L)
3048         continue;
3049 
3050       if (!needsLFTR(L, ExitingBB))
3051         continue;
3052 
3053       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3054       if (isa<SCEVCouldNotCompute>(ExitCount))
3055         continue;
3056 
3057       // This was handled above, but as we form SCEVs, we can sometimes refine
3058       // existing ones; this allows exit counts to be folded to zero which
3059       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
3060       // until stable to handle cases like this better.
3061       if (ExitCount->isZero())
3062         continue;
3063 
3064       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3065       if (!IndVar)
3066         continue;
3067 
3068       // Avoid high cost expansions.  Note: This heuristic is questionable in
3069       // that our definition of "high cost" is not exactly principled.
3070       if (Rewriter.isHighCostExpansion(ExitCount, L))
3071         continue;
3072 
3073       // Check preconditions for proper SCEVExpander operation. SCEV does not
3074       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3075       // any pass that uses the SCEVExpander must do it. This does not work
3076       // well for loop passes because SCEVExpander makes assumptions about
3077       // all loops, while LoopPassManager only forces the current loop to be
3078       // simplified.
3079       //
3080       // FIXME: SCEV expansion has no way to bail out, so the caller must
3081       // explicitly check any assumptions made by SCEV. Brittle.
3082       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3083       if (!AR || AR->getLoop()->getLoopPreheader())
3084         Changed |= linearFunctionTestReplace(L, ExitingBB,
3085                                              ExitCount, IndVar,
3086                                              Rewriter);
3087     }
3088   }
3089   // Clear the rewriter cache, because values that are in the rewriter's cache
3090   // can be deleted in the loop below, causing the AssertingVH in the cache to
3091   // trigger.
3092   Rewriter.clear();
3093 
3094   // Now that we're done iterating through lists, clean up any instructions
3095   // which are now dead.
3096   while (!DeadInsts.empty())
3097     if (Instruction *Inst =
3098             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3099       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3100 
3101   // The Rewriter may not be used from this point on.
3102 
3103   // Loop-invariant instructions in the preheader that aren't used in the
3104   // loop may be sunk below the loop to reduce register pressure.
3105   Changed |= sinkUnusedInvariants(L);
3106 
3107   // rewriteFirstIterationLoopExitValues does not rely on the computation of
3108   // trip count and therefore can further simplify exit values in addition to
3109   // rewriteLoopExitValues.
3110   Changed |= rewriteFirstIterationLoopExitValues(L);
3111 
3112   // Clean up dead instructions.
3113   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3114 
3115   // Check a post-condition.
3116   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3117          "Indvars did not preserve LCSSA!");
3118 
3119   // Verify that LFTR, and any other change have not interfered with SCEV's
3120   // ability to compute trip count.
3121 #ifndef NDEBUG
3122   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3123     SE->forgetLoop(L);
3124     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3125     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3126         SE->getTypeSizeInBits(NewBECount->getType()))
3127       NewBECount = SE->getTruncateOrNoop(NewBECount,
3128                                          BackedgeTakenCount->getType());
3129     else
3130       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3131                                                  NewBECount->getType());
3132     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
3133   }
3134 #endif
3135 
3136   return Changed;
3137 }
3138 
3139 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3140                                           LoopStandardAnalysisResults &AR,
3141                                           LPMUpdater &) {
3142   Function *F = L.getHeader()->getParent();
3143   const DataLayout &DL = F->getParent()->getDataLayout();
3144 
3145   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3146   if (!IVS.run(&L))
3147     return PreservedAnalyses::all();
3148 
3149   auto PA = getLoopPassPreservedAnalyses();
3150   PA.preserveSet<CFGAnalyses>();
3151   return PA;
3152 }
3153 
3154 namespace {
3155 
3156 struct IndVarSimplifyLegacyPass : public LoopPass {
3157   static char ID; // Pass identification, replacement for typeid
3158 
3159   IndVarSimplifyLegacyPass() : LoopPass(ID) {
3160     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3161   }
3162 
3163   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3164     if (skipLoop(L))
3165       return false;
3166 
3167     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3168     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3169     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3170     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3171     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3172     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3173     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3174     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3175 
3176     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3177     return IVS.run(L);
3178   }
3179 
3180   void getAnalysisUsage(AnalysisUsage &AU) const override {
3181     AU.setPreservesCFG();
3182     getLoopAnalysisUsage(AU);
3183   }
3184 };
3185 
3186 } // end anonymous namespace
3187 
3188 char IndVarSimplifyLegacyPass::ID = 0;
3189 
3190 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3191                       "Induction Variable Simplification", false, false)
3192 INITIALIZE_PASS_DEPENDENCY(LoopPass)
3193 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3194                     "Induction Variable Simplification", false, false)
3195 
3196 Pass *llvm::createIndVarSimplifyPass() {
3197   return new IndVarSimplifyLegacyPass();
3198 }
3199