xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 34ba96a3d49e14abb2a36114a4ed6f97e1b9a9ee)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/InitializePasses.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/Local.h"
83 #include "llvm/Transforms/Utils/LoopUtils.h"
84 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
85 #include <cassert>
86 #include <cstdint>
87 #include <utility>
88 
89 using namespace llvm;
90 
91 #define DEBUG_TYPE "indvars"
92 
93 STATISTIC(NumWidened     , "Number of indvars widened");
94 STATISTIC(NumReplaced    , "Number of exit values replaced");
95 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
96 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
97 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
98 
99 // Trip count verification can be enabled by default under NDEBUG if we
100 // implement a strong expression equivalence checker in SCEV. Until then, we
101 // use the verify-indvars flag, which may assert in some cases.
102 static cl::opt<bool> VerifyIndvars(
103   "verify-indvars", cl::Hidden,
104   cl::desc("Verify the ScalarEvolution result after running indvars"));
105 
106 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
107 
108 static cl::opt<ReplaceExitVal> ReplaceExitValue(
109     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
110     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
111     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
112                clEnumValN(OnlyCheapRepl, "cheap",
113                           "only replace exit value when the cost is cheap"),
114                clEnumValN(NoHardUse, "noharduse",
115                           "only replace exit values when loop def likely dead"),
116                clEnumValN(AlwaysRepl, "always",
117                           "always replace exit value whenever possible")));
118 
119 static cl::opt<bool> UsePostIncrementRanges(
120   "indvars-post-increment-ranges", cl::Hidden,
121   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
122   cl::init(true));
123 
124 static cl::opt<bool>
125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
126             cl::desc("Disable Linear Function Test Replace optimization"));
127 
128 static cl::opt<bool>
129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
130                 cl::desc("Predicate conditions in read only loops"));
131 
132 namespace {
133 
134 struct RewritePhi;
135 
136 class IndVarSimplify {
137   LoopInfo *LI;
138   ScalarEvolution *SE;
139   DominatorTree *DT;
140   const DataLayout &DL;
141   TargetLibraryInfo *TLI;
142   const TargetTransformInfo *TTI;
143 
144   SmallVector<WeakTrackingVH, 16> DeadInsts;
145 
146   bool isValidRewrite(Value *FromVal, Value *ToVal);
147 
148   bool handleFloatingPointIV(Loop *L, PHINode *PH);
149   bool rewriteNonIntegerIVs(Loop *L);
150 
151   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152   /// Try to eliminate loop exits based on analyzeable exit counts
153   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
154   /// Try to form loop invariant tests for loop exits by changing how many
155   /// iterations of the loop run when that is unobservable.
156   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
157 
158   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
159   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
160   bool rewriteFirstIterationLoopExitValues(Loop *L);
161   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
162 
163   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
164                                  const SCEV *ExitCount,
165                                  PHINode *IndVar, SCEVExpander &Rewriter);
166 
167   bool sinkUnusedInvariants(Loop *L);
168 
169 public:
170   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
171                  const DataLayout &DL, TargetLibraryInfo *TLI,
172                  TargetTransformInfo *TTI)
173       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
174 
175   bool run(Loop *L);
176 };
177 
178 } // end anonymous namespace
179 
180 /// Return true if the SCEV expansion generated by the rewriter can replace the
181 /// original value. SCEV guarantees that it produces the same value, but the way
182 /// it is produced may be illegal IR.  Ideally, this function will only be
183 /// called for verification.
184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
185   // If an SCEV expression subsumed multiple pointers, its expansion could
186   // reassociate the GEP changing the base pointer. This is illegal because the
187   // final address produced by a GEP chain must be inbounds relative to its
188   // underlying object. Otherwise basic alias analysis, among other things,
189   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
190   // producing an expression involving multiple pointers. Until then, we must
191   // bail out here.
192   //
193   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
194   // because it understands lcssa phis while SCEV does not.
195   Value *FromPtr = FromVal;
196   Value *ToPtr = ToVal;
197   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
198     FromPtr = GEP->getPointerOperand();
199   }
200   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
201     ToPtr = GEP->getPointerOperand();
202   }
203   if (FromPtr != FromVal || ToPtr != ToVal) {
204     // Quickly check the common case
205     if (FromPtr == ToPtr)
206       return true;
207 
208     // SCEV may have rewritten an expression that produces the GEP's pointer
209     // operand. That's ok as long as the pointer operand has the same base
210     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
211     // base of a recurrence. This handles the case in which SCEV expansion
212     // converts a pointer type recurrence into a nonrecurrent pointer base
213     // indexed by an integer recurrence.
214 
215     // If the GEP base pointer is a vector of pointers, abort.
216     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
217       return false;
218 
219     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
220     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
221     if (FromBase == ToBase)
222       return true;
223 
224     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
225                       << " != " << *ToBase << "\n");
226 
227     return false;
228   }
229   return true;
230 }
231 
232 /// Determine the insertion point for this user. By default, insert immediately
233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
235 /// common dominator for the incoming blocks. A nullptr can be returned if no
236 /// viable location is found: it may happen if User is a PHI and Def only comes
237 /// to this PHI from unreachable blocks.
238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
239                                           DominatorTree *DT, LoopInfo *LI) {
240   PHINode *PHI = dyn_cast<PHINode>(User);
241   if (!PHI)
242     return User;
243 
244   Instruction *InsertPt = nullptr;
245   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
246     if (PHI->getIncomingValue(i) != Def)
247       continue;
248 
249     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
250 
251     if (!DT->isReachableFromEntry(InsertBB))
252       continue;
253 
254     if (!InsertPt) {
255       InsertPt = InsertBB->getTerminator();
256       continue;
257     }
258     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
259     InsertPt = InsertBB->getTerminator();
260   }
261 
262   // If we have skipped all inputs, it means that Def only comes to Phi from
263   // unreachable blocks.
264   if (!InsertPt)
265     return nullptr;
266 
267   auto *DefI = dyn_cast<Instruction>(Def);
268   if (!DefI)
269     return InsertPt;
270 
271   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
272 
273   auto *L = LI->getLoopFor(DefI->getParent());
274   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
275 
276   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
277     if (LI->getLoopFor(DTN->getBlock()) == L)
278       return DTN->getBlock()->getTerminator();
279 
280   llvm_unreachable("DefI dominates InsertPt!");
281 }
282 
283 //===----------------------------------------------------------------------===//
284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
285 //===----------------------------------------------------------------------===//
286 
287 /// Convert APF to an integer, if possible.
288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
289   bool isExact = false;
290   // See if we can convert this to an int64_t
291   uint64_t UIntVal;
292   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
293                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
294       !isExact)
295     return false;
296   IntVal = UIntVal;
297   return true;
298 }
299 
300 /// If the loop has floating induction variable then insert corresponding
301 /// integer induction variable if possible.
302 /// For example,
303 /// for(double i = 0; i < 10000; ++i)
304 ///   bar(i)
305 /// is converted into
306 /// for(int i = 0; i < 10000; ++i)
307 ///   bar((double)i);
308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
309   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
310   unsigned BackEdge     = IncomingEdge^1;
311 
312   // Check incoming value.
313   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
314 
315   int64_t InitValue;
316   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
317     return false;
318 
319   // Check IV increment. Reject this PN if increment operation is not
320   // an add or increment value can not be represented by an integer.
321   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
322   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
323 
324   // If this is not an add of the PHI with a constantfp, or if the constant fp
325   // is not an integer, bail out.
326   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
327   int64_t IncValue;
328   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
329       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
330     return false;
331 
332   // Check Incr uses. One user is PN and the other user is an exit condition
333   // used by the conditional terminator.
334   Value::user_iterator IncrUse = Incr->user_begin();
335   Instruction *U1 = cast<Instruction>(*IncrUse++);
336   if (IncrUse == Incr->user_end()) return false;
337   Instruction *U2 = cast<Instruction>(*IncrUse++);
338   if (IncrUse != Incr->user_end()) return false;
339 
340   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
341   // only used by a branch, we can't transform it.
342   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
343   if (!Compare)
344     Compare = dyn_cast<FCmpInst>(U2);
345   if (!Compare || !Compare->hasOneUse() ||
346       !isa<BranchInst>(Compare->user_back()))
347     return false;
348 
349   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
350 
351   // We need to verify that the branch actually controls the iteration count
352   // of the loop.  If not, the new IV can overflow and no one will notice.
353   // The branch block must be in the loop and one of the successors must be out
354   // of the loop.
355   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
356   if (!L->contains(TheBr->getParent()) ||
357       (L->contains(TheBr->getSuccessor(0)) &&
358        L->contains(TheBr->getSuccessor(1))))
359     return false;
360 
361   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
362   // transform it.
363   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
364   int64_t ExitValue;
365   if (ExitValueVal == nullptr ||
366       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
367     return false;
368 
369   // Find new predicate for integer comparison.
370   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
371   switch (Compare->getPredicate()) {
372   default: return false;  // Unknown comparison.
373   case CmpInst::FCMP_OEQ:
374   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
375   case CmpInst::FCMP_ONE:
376   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
377   case CmpInst::FCMP_OGT:
378   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
379   case CmpInst::FCMP_OGE:
380   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
381   case CmpInst::FCMP_OLT:
382   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
383   case CmpInst::FCMP_OLE:
384   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
385   }
386 
387   // We convert the floating point induction variable to a signed i32 value if
388   // we can.  This is only safe if the comparison will not overflow in a way
389   // that won't be trapped by the integer equivalent operations.  Check for this
390   // now.
391   // TODO: We could use i64 if it is native and the range requires it.
392 
393   // The start/stride/exit values must all fit in signed i32.
394   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
395     return false;
396 
397   // If not actually striding (add x, 0.0), avoid touching the code.
398   if (IncValue == 0)
399     return false;
400 
401   // Positive and negative strides have different safety conditions.
402   if (IncValue > 0) {
403     // If we have a positive stride, we require the init to be less than the
404     // exit value.
405     if (InitValue >= ExitValue)
406       return false;
407 
408     uint32_t Range = uint32_t(ExitValue-InitValue);
409     // Check for infinite loop, either:
410     // while (i <= Exit) or until (i > Exit)
411     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
412       if (++Range == 0) return false;  // Range overflows.
413     }
414 
415     unsigned Leftover = Range % uint32_t(IncValue);
416 
417     // If this is an equality comparison, we require that the strided value
418     // exactly land on the exit value, otherwise the IV condition will wrap
419     // around and do things the fp IV wouldn't.
420     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
421         Leftover != 0)
422       return false;
423 
424     // If the stride would wrap around the i32 before exiting, we can't
425     // transform the IV.
426     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
427       return false;
428   } else {
429     // If we have a negative stride, we require the init to be greater than the
430     // exit value.
431     if (InitValue <= ExitValue)
432       return false;
433 
434     uint32_t Range = uint32_t(InitValue-ExitValue);
435     // Check for infinite loop, either:
436     // while (i >= Exit) or until (i < Exit)
437     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
438       if (++Range == 0) return false;  // Range overflows.
439     }
440 
441     unsigned Leftover = Range % uint32_t(-IncValue);
442 
443     // If this is an equality comparison, we require that the strided value
444     // exactly land on the exit value, otherwise the IV condition will wrap
445     // around and do things the fp IV wouldn't.
446     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
447         Leftover != 0)
448       return false;
449 
450     // If the stride would wrap around the i32 before exiting, we can't
451     // transform the IV.
452     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
453       return false;
454   }
455 
456   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
457 
458   // Insert new integer induction variable.
459   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
460   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
461                       PN->getIncomingBlock(IncomingEdge));
462 
463   Value *NewAdd =
464     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
465                               Incr->getName()+".int", Incr);
466   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
467 
468   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
469                                       ConstantInt::get(Int32Ty, ExitValue),
470                                       Compare->getName());
471 
472   // In the following deletions, PN may become dead and may be deleted.
473   // Use a WeakTrackingVH to observe whether this happens.
474   WeakTrackingVH WeakPH = PN;
475 
476   // Delete the old floating point exit comparison.  The branch starts using the
477   // new comparison.
478   NewCompare->takeName(Compare);
479   Compare->replaceAllUsesWith(NewCompare);
480   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
481 
482   // Delete the old floating point increment.
483   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
484   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
485 
486   // If the FP induction variable still has uses, this is because something else
487   // in the loop uses its value.  In order to canonicalize the induction
488   // variable, we chose to eliminate the IV and rewrite it in terms of an
489   // int->fp cast.
490   //
491   // We give preference to sitofp over uitofp because it is faster on most
492   // platforms.
493   if (WeakPH) {
494     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
495                                  &*PN->getParent()->getFirstInsertionPt());
496     PN->replaceAllUsesWith(Conv);
497     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
498   }
499   return true;
500 }
501 
502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
503   // First step.  Check to see if there are any floating-point recurrences.
504   // If there are, change them into integer recurrences, permitting analysis by
505   // the SCEV routines.
506   BasicBlock *Header = L->getHeader();
507 
508   SmallVector<WeakTrackingVH, 8> PHIs;
509   for (PHINode &PN : Header->phis())
510     PHIs.push_back(&PN);
511 
512   bool Changed = false;
513   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
514     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
515       Changed |= handleFloatingPointIV(L, PN);
516 
517   // If the loop previously had floating-point IV, ScalarEvolution
518   // may not have been able to compute a trip count. Now that we've done some
519   // re-writing, the trip count may be computable.
520   if (Changed)
521     SE->forgetLoop(L);
522   return Changed;
523 }
524 
525 namespace {
526 
527 // Collect information about PHI nodes which can be transformed in
528 // rewriteLoopExitValues.
529 struct RewritePhi {
530   PHINode *PN;
531 
532   // Ith incoming value.
533   unsigned Ith;
534 
535   // Exit value after expansion.
536   Value *Val;
537 
538   // High Cost when expansion.
539   bool HighCost;
540 
541   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
542       : PN(P), Ith(I), Val(V), HighCost(H) {}
543 };
544 
545 } // end anonymous namespace
546 
547 //===----------------------------------------------------------------------===//
548 // rewriteLoopExitValues - Optimize IV users outside the loop.
549 // As a side effect, reduces the amount of IV processing within the loop.
550 //===----------------------------------------------------------------------===//
551 
552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
553   SmallPtrSet<const Instruction *, 8> Visited;
554   SmallVector<const Instruction *, 8> WorkList;
555   Visited.insert(I);
556   WorkList.push_back(I);
557   while (!WorkList.empty()) {
558     const Instruction *Curr = WorkList.pop_back_val();
559     // This use is outside the loop, nothing to do.
560     if (!L->contains(Curr))
561       continue;
562     // Do we assume it is a "hard" use which will not be eliminated easily?
563     if (Curr->mayHaveSideEffects())
564       return true;
565     // Otherwise, add all its users to worklist.
566     for (auto U : Curr->users()) {
567       auto *UI = cast<Instruction>(U);
568       if (Visited.insert(UI).second)
569         WorkList.push_back(UI);
570     }
571   }
572   return false;
573 }
574 
575 /// Check to see if this loop has a computable loop-invariant execution count.
576 /// If so, this means that we can compute the final value of any expressions
577 /// that are recurrent in the loop, and substitute the exit values from the loop
578 /// into any instructions outside of the loop that use the final values of the
579 /// current expressions.
580 ///
581 /// This is mostly redundant with the regular IndVarSimplify activities that
582 /// happen later, except that it's more powerful in some cases, because it's
583 /// able to brute-force evaluate arbitrary instructions as long as they have
584 /// constant operands at the beginning of the loop.
585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
586   // Check a pre-condition.
587   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
588          "Indvars did not preserve LCSSA!");
589 
590   SmallVector<BasicBlock*, 8> ExitBlocks;
591   L->getUniqueExitBlocks(ExitBlocks);
592 
593   SmallVector<RewritePhi, 8> RewritePhiSet;
594   // Find all values that are computed inside the loop, but used outside of it.
595   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
596   // the exit blocks of the loop to find them.
597   for (BasicBlock *ExitBB : ExitBlocks) {
598     // If there are no PHI nodes in this exit block, then no values defined
599     // inside the loop are used on this path, skip it.
600     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
601     if (!PN) continue;
602 
603     unsigned NumPreds = PN->getNumIncomingValues();
604 
605     // Iterate over all of the PHI nodes.
606     BasicBlock::iterator BBI = ExitBB->begin();
607     while ((PN = dyn_cast<PHINode>(BBI++))) {
608       if (PN->use_empty())
609         continue; // dead use, don't replace it
610 
611       if (!SE->isSCEVable(PN->getType()))
612         continue;
613 
614       // It's necessary to tell ScalarEvolution about this explicitly so that
615       // it can walk the def-use list and forget all SCEVs, as it may not be
616       // watching the PHI itself. Once the new exit value is in place, there
617       // may not be a def-use connection between the loop and every instruction
618       // which got a SCEVAddRecExpr for that loop.
619       SE->forgetValue(PN);
620 
621       // Iterate over all of the values in all the PHI nodes.
622       for (unsigned i = 0; i != NumPreds; ++i) {
623         // If the value being merged in is not integer or is not defined
624         // in the loop, skip it.
625         Value *InVal = PN->getIncomingValue(i);
626         if (!isa<Instruction>(InVal))
627           continue;
628 
629         // If this pred is for a subloop, not L itself, skip it.
630         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
631           continue; // The Block is in a subloop, skip it.
632 
633         // Check that InVal is defined in the loop.
634         Instruction *Inst = cast<Instruction>(InVal);
635         if (!L->contains(Inst))
636           continue;
637 
638         // Okay, this instruction has a user outside of the current loop
639         // and varies predictably *inside* the loop.  Evaluate the value it
640         // contains when the loop exits, if possible.  We prefer to start with
641         // expressions which are true for all exits (so as to maximize
642         // expression reuse by the SCEVExpander), but resort to per-exit
643         // evaluation if that fails.
644         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
645         if (isa<SCEVCouldNotCompute>(ExitValue) ||
646             !SE->isLoopInvariant(ExitValue, L) ||
647             !isSafeToExpand(ExitValue, *SE)) {
648           // TODO: This should probably be sunk into SCEV in some way; maybe a
649           // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
650           // most SCEV expressions and other recurrence types (e.g. shift
651           // recurrences).  Is there existing code we can reuse?
652           const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
653           if (isa<SCEVCouldNotCompute>(ExitCount))
654             continue;
655           if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
656             if (AddRec->getLoop() == L)
657               ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
658           if (isa<SCEVCouldNotCompute>(ExitValue) ||
659               !SE->isLoopInvariant(ExitValue, L) ||
660               !isSafeToExpand(ExitValue, *SE))
661             continue;
662         }
663 
664         // Computing the value outside of the loop brings no benefit if it is
665         // definitely used inside the loop in a way which can not be optimized
666         // away.  Avoid doing so unless we know we have a value which computes
667         // the ExitValue already.  TODO: This should be merged into SCEV
668         // expander to leverage its knowledge of existing expressions.
669         if (ReplaceExitValue != AlwaysRepl &&
670             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
671             hasHardUserWithinLoop(L, Inst))
672           continue;
673 
674         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
675         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
676 
677         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
678                           << '\n'
679                           << "  LoopVal = " << *Inst << "\n");
680 
681         if (!isValidRewrite(Inst, ExitVal)) {
682           DeadInsts.push_back(ExitVal);
683           continue;
684         }
685 
686 #ifndef NDEBUG
687         // If we reuse an instruction from a loop which is neither L nor one of
688         // its containing loops, we end up breaking LCSSA form for this loop by
689         // creating a new use of its instruction.
690         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
691           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
692             if (EVL != L)
693               assert(EVL->contains(L) && "LCSSA breach detected!");
694 #endif
695 
696         // Collect all the candidate PHINodes to be rewritten.
697         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
698       }
699     }
700   }
701 
702   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
703 
704   bool Changed = false;
705   // Transformation.
706   for (const RewritePhi &Phi : RewritePhiSet) {
707     PHINode *PN = Phi.PN;
708     Value *ExitVal = Phi.Val;
709 
710     // Only do the rewrite when the ExitValue can be expanded cheaply.
711     // If LoopCanBeDel is true, rewrite exit value aggressively.
712     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
713       DeadInsts.push_back(ExitVal);
714       continue;
715     }
716 
717     Changed = true;
718     ++NumReplaced;
719     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
720     PN->setIncomingValue(Phi.Ith, ExitVal);
721 
722     // If this instruction is dead now, delete it. Don't do it now to avoid
723     // invalidating iterators.
724     if (isInstructionTriviallyDead(Inst, TLI))
725       DeadInsts.push_back(Inst);
726 
727     // Replace PN with ExitVal if that is legal and does not break LCSSA.
728     if (PN->getNumIncomingValues() == 1 &&
729         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
730       PN->replaceAllUsesWith(ExitVal);
731       PN->eraseFromParent();
732     }
733   }
734 
735   // The insertion point instruction may have been deleted; clear it out
736   // so that the rewriter doesn't trip over it later.
737   Rewriter.clearInsertPoint();
738   return Changed;
739 }
740 
741 //===---------------------------------------------------------------------===//
742 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
743 // they will exit at the first iteration.
744 //===---------------------------------------------------------------------===//
745 
746 /// Check to see if this loop has loop invariant conditions which lead to loop
747 /// exits. If so, we know that if the exit path is taken, it is at the first
748 /// loop iteration. This lets us predict exit values of PHI nodes that live in
749 /// loop header.
750 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
751   // Verify the input to the pass is already in LCSSA form.
752   assert(L->isLCSSAForm(*DT));
753 
754   SmallVector<BasicBlock *, 8> ExitBlocks;
755   L->getUniqueExitBlocks(ExitBlocks);
756 
757   bool MadeAnyChanges = false;
758   for (auto *ExitBB : ExitBlocks) {
759     // If there are no more PHI nodes in this exit block, then no more
760     // values defined inside the loop are used on this path.
761     for (PHINode &PN : ExitBB->phis()) {
762       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
763            IncomingValIdx != E; ++IncomingValIdx) {
764         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
765 
766         // Can we prove that the exit must run on the first iteration if it
767         // runs at all?  (i.e. early exits are fine for our purposes, but
768         // traces which lead to this exit being taken on the 2nd iteration
769         // aren't.)  Note that this is about whether the exit branch is
770         // executed, not about whether it is taken.
771         if (!L->getLoopLatch() ||
772             !DT->dominates(IncomingBB, L->getLoopLatch()))
773           continue;
774 
775         // Get condition that leads to the exit path.
776         auto *TermInst = IncomingBB->getTerminator();
777 
778         Value *Cond = nullptr;
779         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
780           // Must be a conditional branch, otherwise the block
781           // should not be in the loop.
782           Cond = BI->getCondition();
783         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
784           Cond = SI->getCondition();
785         else
786           continue;
787 
788         if (!L->isLoopInvariant(Cond))
789           continue;
790 
791         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
792 
793         // Only deal with PHIs in the loop header.
794         if (!ExitVal || ExitVal->getParent() != L->getHeader())
795           continue;
796 
797         // If ExitVal is a PHI on the loop header, then we know its
798         // value along this exit because the exit can only be taken
799         // on the first iteration.
800         auto *LoopPreheader = L->getLoopPreheader();
801         assert(LoopPreheader && "Invalid loop");
802         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
803         if (PreheaderIdx != -1) {
804           assert(ExitVal->getParent() == L->getHeader() &&
805                  "ExitVal must be in loop header");
806           MadeAnyChanges = true;
807           PN.setIncomingValue(IncomingValIdx,
808                               ExitVal->getIncomingValue(PreheaderIdx));
809         }
810       }
811     }
812   }
813   return MadeAnyChanges;
814 }
815 
816 /// Check whether it is possible to delete the loop after rewriting exit
817 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
818 /// aggressively.
819 bool IndVarSimplify::canLoopBeDeleted(
820     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
821   BasicBlock *Preheader = L->getLoopPreheader();
822   // If there is no preheader, the loop will not be deleted.
823   if (!Preheader)
824     return false;
825 
826   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
827   // We obviate multiple ExitingBlocks case for simplicity.
828   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
829   // after exit value rewriting, we can enhance the logic here.
830   SmallVector<BasicBlock *, 4> ExitingBlocks;
831   L->getExitingBlocks(ExitingBlocks);
832   SmallVector<BasicBlock *, 8> ExitBlocks;
833   L->getUniqueExitBlocks(ExitBlocks);
834   if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
835     return false;
836 
837   BasicBlock *ExitBlock = ExitBlocks[0];
838   BasicBlock::iterator BI = ExitBlock->begin();
839   while (PHINode *P = dyn_cast<PHINode>(BI)) {
840     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
841 
842     // If the Incoming value of P is found in RewritePhiSet, we know it
843     // could be rewritten to use a loop invariant value in transformation
844     // phase later. Skip it in the loop invariant check below.
845     bool found = false;
846     for (const RewritePhi &Phi : RewritePhiSet) {
847       unsigned i = Phi.Ith;
848       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
849         found = true;
850         break;
851       }
852     }
853 
854     Instruction *I;
855     if (!found && (I = dyn_cast<Instruction>(Incoming)))
856       if (!L->hasLoopInvariantOperands(I))
857         return false;
858 
859     ++BI;
860   }
861 
862   for (auto *BB : L->blocks())
863     if (llvm::any_of(*BB, [](Instruction &I) {
864           return I.mayHaveSideEffects();
865         }))
866       return false;
867 
868   return true;
869 }
870 
871 //===----------------------------------------------------------------------===//
872 //  IV Widening - Extend the width of an IV to cover its widest uses.
873 //===----------------------------------------------------------------------===//
874 
875 namespace {
876 
877 // Collect information about induction variables that are used by sign/zero
878 // extend operations. This information is recorded by CollectExtend and provides
879 // the input to WidenIV.
880 struct WideIVInfo {
881   PHINode *NarrowIV = nullptr;
882 
883   // Widest integer type created [sz]ext
884   Type *WidestNativeType = nullptr;
885 
886   // Was a sext user seen before a zext?
887   bool IsSigned = false;
888 };
889 
890 } // end anonymous namespace
891 
892 /// Update information about the induction variable that is extended by this
893 /// sign or zero extend operation. This is used to determine the final width of
894 /// the IV before actually widening it.
895 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
896                         const TargetTransformInfo *TTI) {
897   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
898   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
899     return;
900 
901   Type *Ty = Cast->getType();
902   uint64_t Width = SE->getTypeSizeInBits(Ty);
903   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
904     return;
905 
906   // Check that `Cast` actually extends the induction variable (we rely on this
907   // later).  This takes care of cases where `Cast` is extending a truncation of
908   // the narrow induction variable, and thus can end up being narrower than the
909   // "narrow" induction variable.
910   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
911   if (NarrowIVWidth >= Width)
912     return;
913 
914   // Cast is either an sext or zext up to this point.
915   // We should not widen an indvar if arithmetics on the wider indvar are more
916   // expensive than those on the narrower indvar. We check only the cost of ADD
917   // because at least an ADD is required to increment the induction variable. We
918   // could compute more comprehensively the cost of all instructions on the
919   // induction variable when necessary.
920   if (TTI &&
921       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
922           TTI->getArithmeticInstrCost(Instruction::Add,
923                                       Cast->getOperand(0)->getType())) {
924     return;
925   }
926 
927   if (!WI.WidestNativeType) {
928     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
929     WI.IsSigned = IsSigned;
930     return;
931   }
932 
933   // We extend the IV to satisfy the sign of its first user, arbitrarily.
934   if (WI.IsSigned != IsSigned)
935     return;
936 
937   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
938     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
939 }
940 
941 namespace {
942 
943 /// Record a link in the Narrow IV def-use chain along with the WideIV that
944 /// computes the same value as the Narrow IV def.  This avoids caching Use*
945 /// pointers.
946 struct NarrowIVDefUse {
947   Instruction *NarrowDef = nullptr;
948   Instruction *NarrowUse = nullptr;
949   Instruction *WideDef = nullptr;
950 
951   // True if the narrow def is never negative.  Tracking this information lets
952   // us use a sign extension instead of a zero extension or vice versa, when
953   // profitable and legal.
954   bool NeverNegative = false;
955 
956   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
957                  bool NeverNegative)
958       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
959         NeverNegative(NeverNegative) {}
960 };
961 
962 /// The goal of this transform is to remove sign and zero extends without
963 /// creating any new induction variables. To do this, it creates a new phi of
964 /// the wider type and redirects all users, either removing extends or inserting
965 /// truncs whenever we stop propagating the type.
966 class WidenIV {
967   // Parameters
968   PHINode *OrigPhi;
969   Type *WideType;
970 
971   // Context
972   LoopInfo        *LI;
973   Loop            *L;
974   ScalarEvolution *SE;
975   DominatorTree   *DT;
976 
977   // Does the module have any calls to the llvm.experimental.guard intrinsic
978   // at all? If not we can avoid scanning instructions looking for guards.
979   bool HasGuards;
980 
981   // Result
982   PHINode *WidePhi = nullptr;
983   Instruction *WideInc = nullptr;
984   const SCEV *WideIncExpr = nullptr;
985   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
986 
987   SmallPtrSet<Instruction *,16> Widened;
988   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
989 
990   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
991 
992   // A map tracking the kind of extension used to widen each narrow IV
993   // and narrow IV user.
994   // Key: pointer to a narrow IV or IV user.
995   // Value: the kind of extension used to widen this Instruction.
996   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
997 
998   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
999 
1000   // A map with control-dependent ranges for post increment IV uses. The key is
1001   // a pair of IV def and a use of this def denoting the context. The value is
1002   // a ConstantRange representing possible values of the def at the given
1003   // context.
1004   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1005 
1006   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1007                                               Instruction *UseI) {
1008     DefUserPair Key(Def, UseI);
1009     auto It = PostIncRangeInfos.find(Key);
1010     return It == PostIncRangeInfos.end()
1011                ? Optional<ConstantRange>(None)
1012                : Optional<ConstantRange>(It->second);
1013   }
1014 
1015   void calculatePostIncRanges(PHINode *OrigPhi);
1016   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1017 
1018   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1019     DefUserPair Key(Def, UseI);
1020     auto It = PostIncRangeInfos.find(Key);
1021     if (It == PostIncRangeInfos.end())
1022       PostIncRangeInfos.insert({Key, R});
1023     else
1024       It->second = R.intersectWith(It->second);
1025   }
1026 
1027 public:
1028   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1029           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1030           bool HasGuards)
1031       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1032         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1033         HasGuards(HasGuards), DeadInsts(DI) {
1034     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1035     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1036   }
1037 
1038   PHINode *createWideIV(SCEVExpander &Rewriter);
1039 
1040 protected:
1041   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1042                           Instruction *Use);
1043 
1044   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1045   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1046                                      const SCEVAddRecExpr *WideAR);
1047   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1048 
1049   ExtendKind getExtendKind(Instruction *I);
1050 
1051   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1052 
1053   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1054 
1055   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1056 
1057   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1058                               unsigned OpCode) const;
1059 
1060   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1061 
1062   bool widenLoopCompare(NarrowIVDefUse DU);
1063   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1064   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1065 
1066   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1067 };
1068 
1069 } // end anonymous namespace
1070 
1071 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1072                                  bool IsSigned, Instruction *Use) {
1073   // Set the debug location and conservative insertion point.
1074   IRBuilder<> Builder(Use);
1075   // Hoist the insertion point into loop preheaders as far as possible.
1076   for (const Loop *L = LI->getLoopFor(Use->getParent());
1077        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1078        L = L->getParentLoop())
1079     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1080 
1081   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1082                     Builder.CreateZExt(NarrowOper, WideType);
1083 }
1084 
1085 /// Instantiate a wide operation to replace a narrow operation. This only needs
1086 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1087 /// 0 for any operation we decide not to clone.
1088 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1089                                   const SCEVAddRecExpr *WideAR) {
1090   unsigned Opcode = DU.NarrowUse->getOpcode();
1091   switch (Opcode) {
1092   default:
1093     return nullptr;
1094   case Instruction::Add:
1095   case Instruction::Mul:
1096   case Instruction::UDiv:
1097   case Instruction::Sub:
1098     return cloneArithmeticIVUser(DU, WideAR);
1099 
1100   case Instruction::And:
1101   case Instruction::Or:
1102   case Instruction::Xor:
1103   case Instruction::Shl:
1104   case Instruction::LShr:
1105   case Instruction::AShr:
1106     return cloneBitwiseIVUser(DU);
1107   }
1108 }
1109 
1110 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1111   Instruction *NarrowUse = DU.NarrowUse;
1112   Instruction *NarrowDef = DU.NarrowDef;
1113   Instruction *WideDef = DU.WideDef;
1114 
1115   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1116 
1117   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1118   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1119   // invariant and will be folded or hoisted. If it actually comes from a
1120   // widened IV, it should be removed during a future call to widenIVUse.
1121   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1122   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1123                    ? WideDef
1124                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1125                                       IsSigned, NarrowUse);
1126   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1127                    ? WideDef
1128                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1129                                       IsSigned, NarrowUse);
1130 
1131   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1132   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1133                                         NarrowBO->getName());
1134   IRBuilder<> Builder(NarrowUse);
1135   Builder.Insert(WideBO);
1136   WideBO->copyIRFlags(NarrowBO);
1137   return WideBO;
1138 }
1139 
1140 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1141                                             const SCEVAddRecExpr *WideAR) {
1142   Instruction *NarrowUse = DU.NarrowUse;
1143   Instruction *NarrowDef = DU.NarrowDef;
1144   Instruction *WideDef = DU.WideDef;
1145 
1146   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1147 
1148   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1149 
1150   // We're trying to find X such that
1151   //
1152   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1153   //
1154   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1155   // and check using SCEV if any of them are correct.
1156 
1157   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1158   // correct solution to X.
1159   auto GuessNonIVOperand = [&](bool SignExt) {
1160     const SCEV *WideLHS;
1161     const SCEV *WideRHS;
1162 
1163     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1164       if (SignExt)
1165         return SE->getSignExtendExpr(S, Ty);
1166       return SE->getZeroExtendExpr(S, Ty);
1167     };
1168 
1169     if (IVOpIdx == 0) {
1170       WideLHS = SE->getSCEV(WideDef);
1171       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1172       WideRHS = GetExtend(NarrowRHS, WideType);
1173     } else {
1174       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1175       WideLHS = GetExtend(NarrowLHS, WideType);
1176       WideRHS = SE->getSCEV(WideDef);
1177     }
1178 
1179     // WideUse is "WideDef `op.wide` X" as described in the comment.
1180     const SCEV *WideUse = nullptr;
1181 
1182     switch (NarrowUse->getOpcode()) {
1183     default:
1184       llvm_unreachable("No other possibility!");
1185 
1186     case Instruction::Add:
1187       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1188       break;
1189 
1190     case Instruction::Mul:
1191       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1192       break;
1193 
1194     case Instruction::UDiv:
1195       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1196       break;
1197 
1198     case Instruction::Sub:
1199       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1200       break;
1201     }
1202 
1203     return WideUse == WideAR;
1204   };
1205 
1206   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1207   if (!GuessNonIVOperand(SignExtend)) {
1208     SignExtend = !SignExtend;
1209     if (!GuessNonIVOperand(SignExtend))
1210       return nullptr;
1211   }
1212 
1213   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1214                    ? WideDef
1215                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1216                                       SignExtend, NarrowUse);
1217   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1218                    ? WideDef
1219                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1220                                       SignExtend, NarrowUse);
1221 
1222   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1223   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1224                                         NarrowBO->getName());
1225 
1226   IRBuilder<> Builder(NarrowUse);
1227   Builder.Insert(WideBO);
1228   WideBO->copyIRFlags(NarrowBO);
1229   return WideBO;
1230 }
1231 
1232 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1233   auto It = ExtendKindMap.find(I);
1234   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1235   return It->second;
1236 }
1237 
1238 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1239                                      unsigned OpCode) const {
1240   if (OpCode == Instruction::Add)
1241     return SE->getAddExpr(LHS, RHS);
1242   if (OpCode == Instruction::Sub)
1243     return SE->getMinusSCEV(LHS, RHS);
1244   if (OpCode == Instruction::Mul)
1245     return SE->getMulExpr(LHS, RHS);
1246 
1247   llvm_unreachable("Unsupported opcode.");
1248 }
1249 
1250 /// No-wrap operations can transfer sign extension of their result to their
1251 /// operands. Generate the SCEV value for the widened operation without
1252 /// actually modifying the IR yet. If the expression after extending the
1253 /// operands is an AddRec for this loop, return the AddRec and the kind of
1254 /// extension used.
1255 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1256   // Handle the common case of add<nsw/nuw>
1257   const unsigned OpCode = DU.NarrowUse->getOpcode();
1258   // Only Add/Sub/Mul instructions supported yet.
1259   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1260       OpCode != Instruction::Mul)
1261     return {nullptr, Unknown};
1262 
1263   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1264   // if extending the other will lead to a recurrence.
1265   const unsigned ExtendOperIdx =
1266       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1267   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1268 
1269   const SCEV *ExtendOperExpr = nullptr;
1270   const OverflowingBinaryOperator *OBO =
1271     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1272   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1273   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1274     ExtendOperExpr = SE->getSignExtendExpr(
1275       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1276   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1277     ExtendOperExpr = SE->getZeroExtendExpr(
1278       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1279   else
1280     return {nullptr, Unknown};
1281 
1282   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1283   // flags. This instruction may be guarded by control flow that the no-wrap
1284   // behavior depends on. Non-control-equivalent instructions can be mapped to
1285   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1286   // semantics to those operations.
1287   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1288   const SCEV *rhs = ExtendOperExpr;
1289 
1290   // Let's swap operands to the initial order for the case of non-commutative
1291   // operations, like SUB. See PR21014.
1292   if (ExtendOperIdx == 0)
1293     std::swap(lhs, rhs);
1294   const SCEVAddRecExpr *AddRec =
1295       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1296 
1297   if (!AddRec || AddRec->getLoop() != L)
1298     return {nullptr, Unknown};
1299 
1300   return {AddRec, ExtKind};
1301 }
1302 
1303 /// Is this instruction potentially interesting for further simplification after
1304 /// widening it's type? In other words, can the extend be safely hoisted out of
1305 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1306 /// so, return the extended recurrence and the kind of extension used. Otherwise
1307 /// return {nullptr, Unknown}.
1308 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1309   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1310     return {nullptr, Unknown};
1311 
1312   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1313   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1314       SE->getTypeSizeInBits(WideType)) {
1315     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1316     // index. So don't follow this use.
1317     return {nullptr, Unknown};
1318   }
1319 
1320   const SCEV *WideExpr;
1321   ExtendKind ExtKind;
1322   if (DU.NeverNegative) {
1323     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1324     if (isa<SCEVAddRecExpr>(WideExpr))
1325       ExtKind = SignExtended;
1326     else {
1327       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1328       ExtKind = ZeroExtended;
1329     }
1330   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1331     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1332     ExtKind = SignExtended;
1333   } else {
1334     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1335     ExtKind = ZeroExtended;
1336   }
1337   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1338   if (!AddRec || AddRec->getLoop() != L)
1339     return {nullptr, Unknown};
1340   return {AddRec, ExtKind};
1341 }
1342 
1343 /// This IV user cannot be widened. Replace this use of the original narrow IV
1344 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1345 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1346   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1347   if (!InsertPt)
1348     return;
1349   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1350                     << *DU.NarrowUse << "\n");
1351   IRBuilder<> Builder(InsertPt);
1352   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1353   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1354 }
1355 
1356 /// If the narrow use is a compare instruction, then widen the compare
1357 //  (and possibly the other operand).  The extend operation is hoisted into the
1358 // loop preheader as far as possible.
1359 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1360   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1361   if (!Cmp)
1362     return false;
1363 
1364   // We can legally widen the comparison in the following two cases:
1365   //
1366   //  - The signedness of the IV extension and comparison match
1367   //
1368   //  - The narrow IV is always positive (and thus its sign extension is equal
1369   //    to its zero extension).  For instance, let's say we're zero extending
1370   //    %narrow for the following use
1371   //
1372   //      icmp slt i32 %narrow, %val   ... (A)
1373   //
1374   //    and %narrow is always positive.  Then
1375   //
1376   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1377   //          == icmp slt i32 zext(%narrow), sext(%val)
1378   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1379   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1380     return false;
1381 
1382   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1383   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1384   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1385   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1386 
1387   // Widen the compare instruction.
1388   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1389   if (!InsertPt)
1390     return false;
1391   IRBuilder<> Builder(InsertPt);
1392   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1393 
1394   // Widen the other operand of the compare, if necessary.
1395   if (CastWidth < IVWidth) {
1396     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1397     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1398   }
1399   return true;
1400 }
1401 
1402 /// If the narrow use is an instruction whose two operands are the defining
1403 /// instruction of DU and a load instruction, then we have the following:
1404 /// if the load is hoisted outside the loop, then we do not reach this function
1405 /// as scalar evolution analysis works fine in widenIVUse with variables
1406 /// hoisted outside the loop and efficient code is subsequently generated by
1407 /// not emitting truncate instructions. But when the load is not hoisted
1408 /// (whether due to limitation in alias analysis or due to a true legality),
1409 /// then scalar evolution can not proceed with loop variant values and
1410 /// inefficient code is generated. This function handles the non-hoisted load
1411 /// special case by making the optimization generate the same type of code for
1412 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1413 /// instruction). This special case is important especially when the induction
1414 /// variables are affecting addressing mode in code generation.
1415 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1416   Instruction *NarrowUse = DU.NarrowUse;
1417   Instruction *NarrowDef = DU.NarrowDef;
1418   Instruction *WideDef = DU.WideDef;
1419 
1420   // Handle the common case of add<nsw/nuw>
1421   const unsigned OpCode = NarrowUse->getOpcode();
1422   // Only Add/Sub/Mul instructions are supported.
1423   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1424       OpCode != Instruction::Mul)
1425     return false;
1426 
1427   // The operand that is not defined by NarrowDef of DU. Let's call it the
1428   // other operand.
1429   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1430   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1431          "bad DU");
1432 
1433   const SCEV *ExtendOperExpr = nullptr;
1434   const OverflowingBinaryOperator *OBO =
1435     cast<OverflowingBinaryOperator>(NarrowUse);
1436   ExtendKind ExtKind = getExtendKind(NarrowDef);
1437   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1438     ExtendOperExpr = SE->getSignExtendExpr(
1439       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1440   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1441     ExtendOperExpr = SE->getZeroExtendExpr(
1442       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1443   else
1444     return false;
1445 
1446   // We are interested in the other operand being a load instruction.
1447   // But, we should look into relaxing this restriction later on.
1448   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1449   if (I && I->getOpcode() != Instruction::Load)
1450     return false;
1451 
1452   // Verifying that Defining operand is an AddRec
1453   const SCEV *Op1 = SE->getSCEV(WideDef);
1454   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1455   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1456     return false;
1457   // Verifying that other operand is an Extend.
1458   if (ExtKind == SignExtended) {
1459     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1460       return false;
1461   } else {
1462     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1463       return false;
1464   }
1465 
1466   if (ExtKind == SignExtended) {
1467     for (Use &U : NarrowUse->uses()) {
1468       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1469       if (!User || User->getType() != WideType)
1470         return false;
1471     }
1472   } else { // ExtKind == ZeroExtended
1473     for (Use &U : NarrowUse->uses()) {
1474       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1475       if (!User || User->getType() != WideType)
1476         return false;
1477     }
1478   }
1479 
1480   return true;
1481 }
1482 
1483 /// Special Case for widening with variant Loads (see
1484 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1485 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1486   Instruction *NarrowUse = DU.NarrowUse;
1487   Instruction *NarrowDef = DU.NarrowDef;
1488   Instruction *WideDef = DU.WideDef;
1489 
1490   ExtendKind ExtKind = getExtendKind(NarrowDef);
1491 
1492   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1493 
1494   // Generating a widening use instruction.
1495   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1496                    ? WideDef
1497                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1498                                       ExtKind, NarrowUse);
1499   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1500                    ? WideDef
1501                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1502                                       ExtKind, NarrowUse);
1503 
1504   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1505   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1506                                         NarrowBO->getName());
1507   IRBuilder<> Builder(NarrowUse);
1508   Builder.Insert(WideBO);
1509   WideBO->copyIRFlags(NarrowBO);
1510 
1511   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1512 
1513   ExtendKindMap[NarrowUse] = ExtKind;
1514 
1515   for (Use &U : NarrowUse->uses()) {
1516     Instruction *User = nullptr;
1517     if (ExtKind == SignExtended)
1518       User = dyn_cast<SExtInst>(U.getUser());
1519     else
1520       User = dyn_cast<ZExtInst>(U.getUser());
1521     if (User && User->getType() == WideType) {
1522       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1523                         << *WideBO << "\n");
1524       ++NumElimExt;
1525       User->replaceAllUsesWith(WideBO);
1526       DeadInsts.emplace_back(User);
1527     }
1528   }
1529 }
1530 
1531 /// Determine whether an individual user of the narrow IV can be widened. If so,
1532 /// return the wide clone of the user.
1533 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1534   assert(ExtendKindMap.count(DU.NarrowDef) &&
1535          "Should already know the kind of extension used to widen NarrowDef");
1536 
1537   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1538   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1539     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1540       // For LCSSA phis, sink the truncate outside the loop.
1541       // After SimplifyCFG most loop exit targets have a single predecessor.
1542       // Otherwise fall back to a truncate within the loop.
1543       if (UsePhi->getNumOperands() != 1)
1544         truncateIVUse(DU, DT, LI);
1545       else {
1546         // Widening the PHI requires us to insert a trunc.  The logical place
1547         // for this trunc is in the same BB as the PHI.  This is not possible if
1548         // the BB is terminated by a catchswitch.
1549         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1550           return nullptr;
1551 
1552         PHINode *WidePhi =
1553           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1554                           UsePhi);
1555         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1556         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1557         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1558         UsePhi->replaceAllUsesWith(Trunc);
1559         DeadInsts.emplace_back(UsePhi);
1560         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1561                           << *WidePhi << "\n");
1562       }
1563       return nullptr;
1564     }
1565   }
1566 
1567   // This narrow use can be widened by a sext if it's non-negative or its narrow
1568   // def was widended by a sext. Same for zext.
1569   auto canWidenBySExt = [&]() {
1570     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1571   };
1572   auto canWidenByZExt = [&]() {
1573     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1574   };
1575 
1576   // Our raison d'etre! Eliminate sign and zero extension.
1577   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1578       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1579     Value *NewDef = DU.WideDef;
1580     if (DU.NarrowUse->getType() != WideType) {
1581       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1582       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1583       if (CastWidth < IVWidth) {
1584         // The cast isn't as wide as the IV, so insert a Trunc.
1585         IRBuilder<> Builder(DU.NarrowUse);
1586         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1587       }
1588       else {
1589         // A wider extend was hidden behind a narrower one. This may induce
1590         // another round of IV widening in which the intermediate IV becomes
1591         // dead. It should be very rare.
1592         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1593                           << " not wide enough to subsume " << *DU.NarrowUse
1594                           << "\n");
1595         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1596         NewDef = DU.NarrowUse;
1597       }
1598     }
1599     if (NewDef != DU.NarrowUse) {
1600       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1601                         << " replaced by " << *DU.WideDef << "\n");
1602       ++NumElimExt;
1603       DU.NarrowUse->replaceAllUsesWith(NewDef);
1604       DeadInsts.emplace_back(DU.NarrowUse);
1605     }
1606     // Now that the extend is gone, we want to expose it's uses for potential
1607     // further simplification. We don't need to directly inform SimplifyIVUsers
1608     // of the new users, because their parent IV will be processed later as a
1609     // new loop phi. If we preserved IVUsers analysis, we would also want to
1610     // push the uses of WideDef here.
1611 
1612     // No further widening is needed. The deceased [sz]ext had done it for us.
1613     return nullptr;
1614   }
1615 
1616   // Does this user itself evaluate to a recurrence after widening?
1617   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1618   if (!WideAddRec.first)
1619     WideAddRec = getWideRecurrence(DU);
1620 
1621   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1622   if (!WideAddRec.first) {
1623     // If use is a loop condition, try to promote the condition instead of
1624     // truncating the IV first.
1625     if (widenLoopCompare(DU))
1626       return nullptr;
1627 
1628     // We are here about to generate a truncate instruction that may hurt
1629     // performance because the scalar evolution expression computed earlier
1630     // in WideAddRec.first does not indicate a polynomial induction expression.
1631     // In that case, look at the operands of the use instruction to determine
1632     // if we can still widen the use instead of truncating its operand.
1633     if (widenWithVariantLoadUse(DU)) {
1634       widenWithVariantLoadUseCodegen(DU);
1635       return nullptr;
1636     }
1637 
1638     // This user does not evaluate to a recurrence after widening, so don't
1639     // follow it. Instead insert a Trunc to kill off the original use,
1640     // eventually isolating the original narrow IV so it can be removed.
1641     truncateIVUse(DU, DT, LI);
1642     return nullptr;
1643   }
1644   // Assume block terminators cannot evaluate to a recurrence. We can't to
1645   // insert a Trunc after a terminator if there happens to be a critical edge.
1646   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1647          "SCEV is not expected to evaluate a block terminator");
1648 
1649   // Reuse the IV increment that SCEVExpander created as long as it dominates
1650   // NarrowUse.
1651   Instruction *WideUse = nullptr;
1652   if (WideAddRec.first == WideIncExpr &&
1653       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1654     WideUse = WideInc;
1655   else {
1656     WideUse = cloneIVUser(DU, WideAddRec.first);
1657     if (!WideUse)
1658       return nullptr;
1659   }
1660   // Evaluation of WideAddRec ensured that the narrow expression could be
1661   // extended outside the loop without overflow. This suggests that the wide use
1662   // evaluates to the same expression as the extended narrow use, but doesn't
1663   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1664   // where it fails, we simply throw away the newly created wide use.
1665   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1666     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1667                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1668                       << "\n");
1669     DeadInsts.emplace_back(WideUse);
1670     return nullptr;
1671   }
1672 
1673   // if we reached this point then we are going to replace
1674   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1675   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1676 
1677   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1678   // Returning WideUse pushes it on the worklist.
1679   return WideUse;
1680 }
1681 
1682 /// Add eligible users of NarrowDef to NarrowIVUsers.
1683 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1684   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1685   bool NonNegativeDef =
1686       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1687                            SE->getConstant(NarrowSCEV->getType(), 0));
1688   for (User *U : NarrowDef->users()) {
1689     Instruction *NarrowUser = cast<Instruction>(U);
1690 
1691     // Handle data flow merges and bizarre phi cycles.
1692     if (!Widened.insert(NarrowUser).second)
1693       continue;
1694 
1695     bool NonNegativeUse = false;
1696     if (!NonNegativeDef) {
1697       // We might have a control-dependent range information for this context.
1698       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1699         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1700     }
1701 
1702     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1703                                NonNegativeDef || NonNegativeUse);
1704   }
1705 }
1706 
1707 /// Process a single induction variable. First use the SCEVExpander to create a
1708 /// wide induction variable that evaluates to the same recurrence as the
1709 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1710 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1711 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1712 ///
1713 /// It would be simpler to delete uses as they are processed, but we must avoid
1714 /// invalidating SCEV expressions.
1715 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1716   // Is this phi an induction variable?
1717   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1718   if (!AddRec)
1719     return nullptr;
1720 
1721   // Widen the induction variable expression.
1722   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1723                                ? SE->getSignExtendExpr(AddRec, WideType)
1724                                : SE->getZeroExtendExpr(AddRec, WideType);
1725 
1726   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1727          "Expect the new IV expression to preserve its type");
1728 
1729   // Can the IV be extended outside the loop without overflow?
1730   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1731   if (!AddRec || AddRec->getLoop() != L)
1732     return nullptr;
1733 
1734   // An AddRec must have loop-invariant operands. Since this AddRec is
1735   // materialized by a loop header phi, the expression cannot have any post-loop
1736   // operands, so they must dominate the loop header.
1737   assert(
1738       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1739       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1740       "Loop header phi recurrence inputs do not dominate the loop");
1741 
1742   // Iterate over IV uses (including transitive ones) looking for IV increments
1743   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1744   // the increment calculate control-dependent range information basing on
1745   // dominating conditions inside of the loop (e.g. a range check inside of the
1746   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1747   //
1748   // Control-dependent range information is later used to prove that a narrow
1749   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1750   // this on demand because when pushNarrowIVUsers needs this information some
1751   // of the dominating conditions might be already widened.
1752   if (UsePostIncrementRanges)
1753     calculatePostIncRanges(OrigPhi);
1754 
1755   // The rewriter provides a value for the desired IV expression. This may
1756   // either find an existing phi or materialize a new one. Either way, we
1757   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1758   // of the phi-SCC dominates the loop entry.
1759   Instruction *InsertPt = &L->getHeader()->front();
1760   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1761 
1762   // Remembering the WideIV increment generated by SCEVExpander allows
1763   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1764   // employ a general reuse mechanism because the call above is the only call to
1765   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1766   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1767     WideInc =
1768       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1769     WideIncExpr = SE->getSCEV(WideInc);
1770     // Propagate the debug location associated with the original loop increment
1771     // to the new (widened) increment.
1772     auto *OrigInc =
1773       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1774     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1775   }
1776 
1777   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1778   ++NumWidened;
1779 
1780   // Traverse the def-use chain using a worklist starting at the original IV.
1781   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1782 
1783   Widened.insert(OrigPhi);
1784   pushNarrowIVUsers(OrigPhi, WidePhi);
1785 
1786   while (!NarrowIVUsers.empty()) {
1787     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1788 
1789     // Process a def-use edge. This may replace the use, so don't hold a
1790     // use_iterator across it.
1791     Instruction *WideUse = widenIVUse(DU, Rewriter);
1792 
1793     // Follow all def-use edges from the previous narrow use.
1794     if (WideUse)
1795       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1796 
1797     // widenIVUse may have removed the def-use edge.
1798     if (DU.NarrowDef->use_empty())
1799       DeadInsts.emplace_back(DU.NarrowDef);
1800   }
1801 
1802   // Attach any debug information to the new PHI.
1803   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1804 
1805   return WidePhi;
1806 }
1807 
1808 /// Calculates control-dependent range for the given def at the given context
1809 /// by looking at dominating conditions inside of the loop
1810 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1811                                     Instruction *NarrowUser) {
1812   using namespace llvm::PatternMatch;
1813 
1814   Value *NarrowDefLHS;
1815   const APInt *NarrowDefRHS;
1816   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1817                                  m_APInt(NarrowDefRHS))) ||
1818       !NarrowDefRHS->isNonNegative())
1819     return;
1820 
1821   auto UpdateRangeFromCondition = [&] (Value *Condition,
1822                                        bool TrueDest) {
1823     CmpInst::Predicate Pred;
1824     Value *CmpRHS;
1825     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1826                                  m_Value(CmpRHS))))
1827       return;
1828 
1829     CmpInst::Predicate P =
1830             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1831 
1832     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1833     auto CmpConstrainedLHSRange =
1834             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1835     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1836         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1837 
1838     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1839   };
1840 
1841   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1842     if (!HasGuards)
1843       return;
1844 
1845     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1846                                      Ctx->getParent()->rend())) {
1847       Value *C = nullptr;
1848       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1849         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1850     }
1851   };
1852 
1853   UpdateRangeFromGuards(NarrowUser);
1854 
1855   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1856   // If NarrowUserBB is statically unreachable asking dominator queries may
1857   // yield surprising results. (e.g. the block may not have a dom tree node)
1858   if (!DT->isReachableFromEntry(NarrowUserBB))
1859     return;
1860 
1861   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1862        L->contains(DTB->getBlock());
1863        DTB = DTB->getIDom()) {
1864     auto *BB = DTB->getBlock();
1865     auto *TI = BB->getTerminator();
1866     UpdateRangeFromGuards(TI);
1867 
1868     auto *BI = dyn_cast<BranchInst>(TI);
1869     if (!BI || !BI->isConditional())
1870       continue;
1871 
1872     auto *TrueSuccessor = BI->getSuccessor(0);
1873     auto *FalseSuccessor = BI->getSuccessor(1);
1874 
1875     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1876       return BBE.isSingleEdge() &&
1877              DT->dominates(BBE, NarrowUser->getParent());
1878     };
1879 
1880     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1881       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1882 
1883     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1884       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1885   }
1886 }
1887 
1888 /// Calculates PostIncRangeInfos map for the given IV
1889 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1890   SmallPtrSet<Instruction *, 16> Visited;
1891   SmallVector<Instruction *, 6> Worklist;
1892   Worklist.push_back(OrigPhi);
1893   Visited.insert(OrigPhi);
1894 
1895   while (!Worklist.empty()) {
1896     Instruction *NarrowDef = Worklist.pop_back_val();
1897 
1898     for (Use &U : NarrowDef->uses()) {
1899       auto *NarrowUser = cast<Instruction>(U.getUser());
1900 
1901       // Don't go looking outside the current loop.
1902       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1903       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1904         continue;
1905 
1906       if (!Visited.insert(NarrowUser).second)
1907         continue;
1908 
1909       Worklist.push_back(NarrowUser);
1910 
1911       calculatePostIncRange(NarrowDef, NarrowUser);
1912     }
1913   }
1914 }
1915 
1916 //===----------------------------------------------------------------------===//
1917 //  Live IV Reduction - Minimize IVs live across the loop.
1918 //===----------------------------------------------------------------------===//
1919 
1920 //===----------------------------------------------------------------------===//
1921 //  Simplification of IV users based on SCEV evaluation.
1922 //===----------------------------------------------------------------------===//
1923 
1924 namespace {
1925 
1926 class IndVarSimplifyVisitor : public IVVisitor {
1927   ScalarEvolution *SE;
1928   const TargetTransformInfo *TTI;
1929   PHINode *IVPhi;
1930 
1931 public:
1932   WideIVInfo WI;
1933 
1934   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1935                         const TargetTransformInfo *TTI,
1936                         const DominatorTree *DTree)
1937     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1938     DT = DTree;
1939     WI.NarrowIV = IVPhi;
1940   }
1941 
1942   // Implement the interface used by simplifyUsersOfIV.
1943   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1944 };
1945 
1946 } // end anonymous namespace
1947 
1948 /// Iteratively perform simplification on a worklist of IV users. Each
1949 /// successive simplification may push more users which may themselves be
1950 /// candidates for simplification.
1951 ///
1952 /// Sign/Zero extend elimination is interleaved with IV simplification.
1953 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1954                                        SCEVExpander &Rewriter,
1955                                        LoopInfo *LI) {
1956   SmallVector<WideIVInfo, 8> WideIVs;
1957 
1958   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1959           Intrinsic::getName(Intrinsic::experimental_guard));
1960   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1961 
1962   SmallVector<PHINode*, 8> LoopPhis;
1963   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1964     LoopPhis.push_back(cast<PHINode>(I));
1965   }
1966   // Each round of simplification iterates through the SimplifyIVUsers worklist
1967   // for all current phis, then determines whether any IVs can be
1968   // widened. Widening adds new phis to LoopPhis, inducing another round of
1969   // simplification on the wide IVs.
1970   bool Changed = false;
1971   while (!LoopPhis.empty()) {
1972     // Evaluate as many IV expressions as possible before widening any IVs. This
1973     // forces SCEV to set no-wrap flags before evaluating sign/zero
1974     // extension. The first time SCEV attempts to normalize sign/zero extension,
1975     // the result becomes final. So for the most predictable results, we delay
1976     // evaluation of sign/zero extend evaluation until needed, and avoid running
1977     // other SCEV based analysis prior to simplifyAndExtend.
1978     do {
1979       PHINode *CurrIV = LoopPhis.pop_back_val();
1980 
1981       // Information about sign/zero extensions of CurrIV.
1982       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1983 
1984       Changed |=
1985           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1986 
1987       if (Visitor.WI.WidestNativeType) {
1988         WideIVs.push_back(Visitor.WI);
1989       }
1990     } while(!LoopPhis.empty());
1991 
1992     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1993       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1994       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1995         Changed = true;
1996         LoopPhis.push_back(WidePhi);
1997       }
1998     }
1999   }
2000   return Changed;
2001 }
2002 
2003 //===----------------------------------------------------------------------===//
2004 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2005 //===----------------------------------------------------------------------===//
2006 
2007 /// Given an Value which is hoped to be part of an add recurance in the given
2008 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
2009 /// that this is less general than SCEVs AddRec checking.
2010 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2011   Instruction *IncI = dyn_cast<Instruction>(IncV);
2012   if (!IncI)
2013     return nullptr;
2014 
2015   switch (IncI->getOpcode()) {
2016   case Instruction::Add:
2017   case Instruction::Sub:
2018     break;
2019   case Instruction::GetElementPtr:
2020     // An IV counter must preserve its type.
2021     if (IncI->getNumOperands() == 2)
2022       break;
2023     LLVM_FALLTHROUGH;
2024   default:
2025     return nullptr;
2026   }
2027 
2028   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2029   if (Phi && Phi->getParent() == L->getHeader()) {
2030     if (L->isLoopInvariant(IncI->getOperand(1)))
2031       return Phi;
2032     return nullptr;
2033   }
2034   if (IncI->getOpcode() == Instruction::GetElementPtr)
2035     return nullptr;
2036 
2037   // Allow add/sub to be commuted.
2038   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2039   if (Phi && Phi->getParent() == L->getHeader()) {
2040     if (L->isLoopInvariant(IncI->getOperand(0)))
2041       return Phi;
2042   }
2043   return nullptr;
2044 }
2045 
2046 /// Whether the current loop exit test is based on this value.  Currently this
2047 /// is limited to a direct use in the loop condition.
2048 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2049   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2050   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2051   // TODO: Allow non-icmp loop test.
2052   if (!ICmp)
2053     return false;
2054 
2055   // TODO: Allow indirect use.
2056   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2057 }
2058 
2059 /// linearFunctionTestReplace policy. Return true unless we can show that the
2060 /// current exit test is already sufficiently canonical.
2061 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2062   assert(L->getLoopLatch() && "Must be in simplified form");
2063 
2064   // Avoid converting a constant or loop invariant test back to a runtime
2065   // test.  This is critical for when SCEV's cached ExitCount is less precise
2066   // than the current IR (such as after we've proven a particular exit is
2067   // actually dead and thus the BE count never reaches our ExitCount.)
2068   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2069   if (L->isLoopInvariant(BI->getCondition()))
2070     return false;
2071 
2072   // Do LFTR to simplify the exit condition to an ICMP.
2073   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2074   if (!Cond)
2075     return true;
2076 
2077   // Do LFTR to simplify the exit ICMP to EQ/NE
2078   ICmpInst::Predicate Pred = Cond->getPredicate();
2079   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2080     return true;
2081 
2082   // Look for a loop invariant RHS
2083   Value *LHS = Cond->getOperand(0);
2084   Value *RHS = Cond->getOperand(1);
2085   if (!L->isLoopInvariant(RHS)) {
2086     if (!L->isLoopInvariant(LHS))
2087       return true;
2088     std::swap(LHS, RHS);
2089   }
2090   // Look for a simple IV counter LHS
2091   PHINode *Phi = dyn_cast<PHINode>(LHS);
2092   if (!Phi)
2093     Phi = getLoopPhiForCounter(LHS, L);
2094 
2095   if (!Phi)
2096     return true;
2097 
2098   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2099   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2100   if (Idx < 0)
2101     return true;
2102 
2103   // Do LFTR if the exit condition's IV is *not* a simple counter.
2104   Value *IncV = Phi->getIncomingValue(Idx);
2105   return Phi != getLoopPhiForCounter(IncV, L);
2106 }
2107 
2108 /// Return true if undefined behavior would provable be executed on the path to
2109 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2110 /// anything about whether OnPathTo is actually executed or whether Root is
2111 /// actually poison.  This can be used to assess whether a new use of Root can
2112 /// be added at a location which is control equivalent with OnPathTo (such as
2113 /// immediately before it) without introducing UB which didn't previously
2114 /// exist.  Note that a false result conveys no information.
2115 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2116                                           Instruction *OnPathTo,
2117                                           DominatorTree *DT) {
2118   // Basic approach is to assume Root is poison, propagate poison forward
2119   // through all users we can easily track, and then check whether any of those
2120   // users are provable UB and must execute before out exiting block might
2121   // exit.
2122 
2123   // The set of all recursive users we've visited (which are assumed to all be
2124   // poison because of said visit)
2125   SmallSet<const Value *, 16> KnownPoison;
2126   SmallVector<const Instruction*, 16> Worklist;
2127   Worklist.push_back(Root);
2128   while (!Worklist.empty()) {
2129     const Instruction *I = Worklist.pop_back_val();
2130 
2131     // If we know this must trigger UB on a path leading our target.
2132     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2133       return true;
2134 
2135     // If we can't analyze propagation through this instruction, just skip it
2136     // and transitive users.  Safe as false is a conservative result.
2137     if (!propagatesFullPoison(I) && I != Root)
2138       continue;
2139 
2140     if (KnownPoison.insert(I).second)
2141       for (const User *User : I->users())
2142         Worklist.push_back(cast<Instruction>(User));
2143   }
2144 
2145   // Might be non-UB, or might have a path we couldn't prove must execute on
2146   // way to exiting bb.
2147   return false;
2148 }
2149 
2150 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2151 /// down to checking that all operands are constant and listing instructions
2152 /// that may hide undef.
2153 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2154                                unsigned Depth) {
2155   if (isa<Constant>(V))
2156     return !isa<UndefValue>(V);
2157 
2158   if (Depth >= 6)
2159     return false;
2160 
2161   // Conservatively handle non-constant non-instructions. For example, Arguments
2162   // may be undef.
2163   Instruction *I = dyn_cast<Instruction>(V);
2164   if (!I)
2165     return false;
2166 
2167   // Load and return values may be undef.
2168   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2169     return false;
2170 
2171   // Optimistically handle other instructions.
2172   for (Value *Op : I->operands()) {
2173     if (!Visited.insert(Op).second)
2174       continue;
2175     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2176       return false;
2177   }
2178   return true;
2179 }
2180 
2181 /// Return true if the given value is concrete. We must prove that undef can
2182 /// never reach it.
2183 ///
2184 /// TODO: If we decide that this is a good approach to checking for undef, we
2185 /// may factor it into a common location.
2186 static bool hasConcreteDef(Value *V) {
2187   SmallPtrSet<Value*, 8> Visited;
2188   Visited.insert(V);
2189   return hasConcreteDefImpl(V, Visited, 0);
2190 }
2191 
2192 /// Return true if this IV has any uses other than the (soon to be rewritten)
2193 /// loop exit test.
2194 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2195   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2196   Value *IncV = Phi->getIncomingValue(LatchIdx);
2197 
2198   for (User *U : Phi->users())
2199     if (U != Cond && U != IncV) return false;
2200 
2201   for (User *U : IncV->users())
2202     if (U != Cond && U != Phi) return false;
2203   return true;
2204 }
2205 
2206 /// Return true if the given phi is a "counter" in L.  A counter is an
2207 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2208 /// step of 1.  Note that L must have exactly one latch.
2209 static bool isLoopCounter(PHINode* Phi, Loop *L,
2210                           ScalarEvolution *SE) {
2211   assert(Phi->getParent() == L->getHeader());
2212   assert(L->getLoopLatch());
2213 
2214   if (!SE->isSCEVable(Phi->getType()))
2215     return false;
2216 
2217   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2218   if (!AR || AR->getLoop() != L || !AR->isAffine())
2219     return false;
2220 
2221   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2222   if (!Step || !Step->isOne())
2223     return false;
2224 
2225   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2226   Value *IncV = Phi->getIncomingValue(LatchIdx);
2227   return (getLoopPhiForCounter(IncV, L) == Phi);
2228 }
2229 
2230 /// Search the loop header for a loop counter (anadd rec w/step of one)
2231 /// suitable for use by LFTR.  If multiple counters are available, select the
2232 /// "best" one based profitable heuristics.
2233 ///
2234 /// BECount may be an i8* pointer type. The pointer difference is already
2235 /// valid count without scaling the address stride, so it remains a pointer
2236 /// expression as far as SCEV is concerned.
2237 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2238                                 const SCEV *BECount,
2239                                 ScalarEvolution *SE, DominatorTree *DT) {
2240   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2241 
2242   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2243 
2244   // Loop over all of the PHI nodes, looking for a simple counter.
2245   PHINode *BestPhi = nullptr;
2246   const SCEV *BestInit = nullptr;
2247   BasicBlock *LatchBlock = L->getLoopLatch();
2248   assert(LatchBlock && "Must be in simplified form");
2249   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2250 
2251   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2252     PHINode *Phi = cast<PHINode>(I);
2253     if (!isLoopCounter(Phi, L, SE))
2254       continue;
2255 
2256     // Avoid comparing an integer IV against a pointer Limit.
2257     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2258       continue;
2259 
2260     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2261 
2262     // AR may be a pointer type, while BECount is an integer type.
2263     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2264     // AR may not be a narrower type, or we may never exit.
2265     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2266     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2267       continue;
2268 
2269     // Avoid reusing a potentially undef value to compute other values that may
2270     // have originally had a concrete definition.
2271     if (!hasConcreteDef(Phi)) {
2272       // We explicitly allow unknown phis as long as they are already used by
2273       // the loop exit test.  This is legal since performing LFTR could not
2274       // increase the number of undef users.
2275       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2276       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2277           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2278         continue;
2279     }
2280 
2281     // Avoid introducing undefined behavior due to poison which didn't exist in
2282     // the original program.  (Annoyingly, the rules for poison and undef
2283     // propagation are distinct, so this does NOT cover the undef case above.)
2284     // We have to ensure that we don't introduce UB by introducing a use on an
2285     // iteration where said IV produces poison.  Our strategy here differs for
2286     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2287     // see code in linearFunctionTestReplace.  For pointers, we restrict
2288     // transforms as there is no good way to reinfer inbounds once lost.
2289     if (!Phi->getType()->isIntegerTy() &&
2290         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2291       continue;
2292 
2293     const SCEV *Init = AR->getStart();
2294 
2295     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2296       // Don't force a live loop counter if another IV can be used.
2297       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2298         continue;
2299 
2300       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2301       // also prefers integer to pointer IVs.
2302       if (BestInit->isZero() != Init->isZero()) {
2303         if (BestInit->isZero())
2304           continue;
2305       }
2306       // If two IVs both count from zero or both count from nonzero then the
2307       // narrower is likely a dead phi that has been widened. Use the wider phi
2308       // to allow the other to be eliminated.
2309       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2310         continue;
2311     }
2312     BestPhi = Phi;
2313     BestInit = Init;
2314   }
2315   return BestPhi;
2316 }
2317 
2318 /// Insert an IR expression which computes the value held by the IV IndVar
2319 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2320 /// is taken ExitCount times.
2321 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2322                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2323                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2324   assert(isLoopCounter(IndVar, L, SE));
2325   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2326   const SCEV *IVInit = AR->getStart();
2327 
2328   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2329   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2330   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2331   // the existing GEPs whenever possible.
2332   if (IndVar->getType()->isPointerTy() &&
2333       !ExitCount->getType()->isPointerTy()) {
2334     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2335     // signed value. ExitCount on the other hand represents the loop trip count,
2336     // which is an unsigned value. FindLoopCounter only allows induction
2337     // variables that have a positive unit stride of one. This means we don't
2338     // have to handle the case of negative offsets (yet) and just need to zero
2339     // extend ExitCount.
2340     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2341     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2342     if (UsePostInc)
2343       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2344 
2345     // Expand the code for the iteration count.
2346     assert(SE->isLoopInvariant(IVOffset, L) &&
2347            "Computed iteration count is not loop invariant!");
2348 
2349     // We could handle pointer IVs other than i8*, but we need to compensate for
2350     // gep index scaling.
2351     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2352                              cast<PointerType>(IndVar->getType())
2353                                  ->getElementType())->isOne() &&
2354            "unit stride pointer IV must be i8*");
2355 
2356     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2357     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2358     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2359   } else {
2360     // In any other case, convert both IVInit and ExitCount to integers before
2361     // comparing. This may result in SCEV expansion of pointers, but in practice
2362     // SCEV will fold the pointer arithmetic away as such:
2363     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2364     //
2365     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2366     // for simple memset-style loops.
2367     //
2368     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2369     // were generated on top of case #2, which is not expected.
2370 
2371     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2372     // For unit stride, IVCount = Start + ExitCount with 2's complement
2373     // overflow.
2374 
2375     // For integer IVs, truncate the IV before computing IVInit + BECount,
2376     // unless we know apriori that the limit must be a constant when evaluated
2377     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2378     // of the IV in the loop over a (potentially) expensive expansion of the
2379     // widened exit count add(zext(add)) expression.
2380     if (SE->getTypeSizeInBits(IVInit->getType())
2381         > SE->getTypeSizeInBits(ExitCount->getType())) {
2382       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2383         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2384       else
2385         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2386     }
2387 
2388     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2389 
2390     if (UsePostInc)
2391       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2392 
2393     // Expand the code for the iteration count.
2394     assert(SE->isLoopInvariant(IVLimit, L) &&
2395            "Computed iteration count is not loop invariant!");
2396     // Ensure that we generate the same type as IndVar, or a smaller integer
2397     // type. In the presence of null pointer values, we have an integer type
2398     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2399     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2400       IndVar->getType() : ExitCount->getType();
2401     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2402     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2403   }
2404 }
2405 
2406 /// This method rewrites the exit condition of the loop to be a canonical !=
2407 /// comparison against the incremented loop induction variable.  This pass is
2408 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2409 /// determine a loop-invariant trip count of the loop, which is actually a much
2410 /// broader range than just linear tests.
2411 bool IndVarSimplify::
2412 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2413                           const SCEV *ExitCount,
2414                           PHINode *IndVar, SCEVExpander &Rewriter) {
2415   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2416   assert(isLoopCounter(IndVar, L, SE));
2417   Instruction * const IncVar =
2418     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2419 
2420   // Initialize CmpIndVar to the preincremented IV.
2421   Value *CmpIndVar = IndVar;
2422   bool UsePostInc = false;
2423 
2424   // If the exiting block is the same as the backedge block, we prefer to
2425   // compare against the post-incremented value, otherwise we must compare
2426   // against the preincremented value.
2427   if (ExitingBB == L->getLoopLatch()) {
2428     // For pointer IVs, we chose to not strip inbounds which requires us not
2429     // to add a potentially UB introducing use.  We need to either a) show
2430     // the loop test we're modifying is already in post-inc form, or b) show
2431     // that adding a use must not introduce UB.
2432     bool SafeToPostInc =
2433         IndVar->getType()->isIntegerTy() ||
2434         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2435         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2436     if (SafeToPostInc) {
2437       UsePostInc = true;
2438       CmpIndVar = IncVar;
2439     }
2440   }
2441 
2442   // It may be necessary to drop nowrap flags on the incrementing instruction
2443   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2444   // case the increment might have previously been poison on the last iteration
2445   // only) or if LFTR switches to a different IV that was previously dynamically
2446   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2447   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2448   // check), because the pre-inc addrec flags may be adopted from the original
2449   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2450   // TODO: This handling is inaccurate for one case: If we switch to a
2451   // dynamically dead IV that wraps on the first loop iteration only, which is
2452   // not covered by the post-inc addrec. (If the new IV was not dynamically
2453   // dead, it could not be poison on the first iteration in the first place.)
2454   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2455     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2456     if (BO->hasNoUnsignedWrap())
2457       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2458     if (BO->hasNoSignedWrap())
2459       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2460   }
2461 
2462   Value *ExitCnt = genLoopLimit(
2463       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2464   assert(ExitCnt->getType()->isPointerTy() ==
2465              IndVar->getType()->isPointerTy() &&
2466          "genLoopLimit missed a cast");
2467 
2468   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2469   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2470   ICmpInst::Predicate P;
2471   if (L->contains(BI->getSuccessor(0)))
2472     P = ICmpInst::ICMP_NE;
2473   else
2474     P = ICmpInst::ICMP_EQ;
2475 
2476   IRBuilder<> Builder(BI);
2477 
2478   // The new loop exit condition should reuse the debug location of the
2479   // original loop exit condition.
2480   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2481     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2482 
2483   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2484   // avoid the expensive expansion of the limit expression in the wider type,
2485   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2486   // since we know (from the exit count bitwidth), that we can't self-wrap in
2487   // the narrower type.
2488   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2489   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2490   if (CmpIndVarSize > ExitCntSize) {
2491     assert(!CmpIndVar->getType()->isPointerTy() &&
2492            !ExitCnt->getType()->isPointerTy());
2493 
2494     // Before resorting to actually inserting the truncate, use the same
2495     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2496     // the other side of the comparison instead.  We still evaluate the limit
2497     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2498     // a truncate within in.
2499     bool Extended = false;
2500     const SCEV *IV = SE->getSCEV(CmpIndVar);
2501     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2502                                                   ExitCnt->getType());
2503     const SCEV *ZExtTrunc =
2504       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2505 
2506     if (ZExtTrunc == IV) {
2507       Extended = true;
2508       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2509                                    "wide.trip.count");
2510     } else {
2511       const SCEV *SExtTrunc =
2512         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2513       if (SExtTrunc == IV) {
2514         Extended = true;
2515         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2516                                      "wide.trip.count");
2517       }
2518     }
2519 
2520     if (Extended) {
2521       bool Discard;
2522       L->makeLoopInvariant(ExitCnt, Discard);
2523     } else
2524       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2525                                       "lftr.wideiv");
2526   }
2527   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2528                     << "      LHS:" << *CmpIndVar << '\n'
2529                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2530                     << "\n"
2531                     << "      RHS:\t" << *ExitCnt << "\n"
2532                     << "ExitCount:\t" << *ExitCount << "\n"
2533                     << "  was: " << *BI->getCondition() << "\n");
2534 
2535   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2536   Value *OrigCond = BI->getCondition();
2537   // It's tempting to use replaceAllUsesWith here to fully replace the old
2538   // comparison, but that's not immediately safe, since users of the old
2539   // comparison may not be dominated by the new comparison. Instead, just
2540   // update the branch to use the new comparison; in the common case this
2541   // will make old comparison dead.
2542   BI->setCondition(Cond);
2543   DeadInsts.push_back(OrigCond);
2544 
2545   ++NumLFTR;
2546   return true;
2547 }
2548 
2549 //===----------------------------------------------------------------------===//
2550 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2551 //===----------------------------------------------------------------------===//
2552 
2553 /// If there's a single exit block, sink any loop-invariant values that
2554 /// were defined in the preheader but not used inside the loop into the
2555 /// exit block to reduce register pressure in the loop.
2556 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2557   BasicBlock *ExitBlock = L->getExitBlock();
2558   if (!ExitBlock) return false;
2559 
2560   BasicBlock *Preheader = L->getLoopPreheader();
2561   if (!Preheader) return false;
2562 
2563   bool MadeAnyChanges = false;
2564   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2565   BasicBlock::iterator I(Preheader->getTerminator());
2566   while (I != Preheader->begin()) {
2567     --I;
2568     // New instructions were inserted at the end of the preheader.
2569     if (isa<PHINode>(I))
2570       break;
2571 
2572     // Don't move instructions which might have side effects, since the side
2573     // effects need to complete before instructions inside the loop.  Also don't
2574     // move instructions which might read memory, since the loop may modify
2575     // memory. Note that it's okay if the instruction might have undefined
2576     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2577     // block.
2578     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2579       continue;
2580 
2581     // Skip debug info intrinsics.
2582     if (isa<DbgInfoIntrinsic>(I))
2583       continue;
2584 
2585     // Skip eh pad instructions.
2586     if (I->isEHPad())
2587       continue;
2588 
2589     // Don't sink alloca: we never want to sink static alloca's out of the
2590     // entry block, and correctly sinking dynamic alloca's requires
2591     // checks for stacksave/stackrestore intrinsics.
2592     // FIXME: Refactor this check somehow?
2593     if (isa<AllocaInst>(I))
2594       continue;
2595 
2596     // Determine if there is a use in or before the loop (direct or
2597     // otherwise).
2598     bool UsedInLoop = false;
2599     for (Use &U : I->uses()) {
2600       Instruction *User = cast<Instruction>(U.getUser());
2601       BasicBlock *UseBB = User->getParent();
2602       if (PHINode *P = dyn_cast<PHINode>(User)) {
2603         unsigned i =
2604           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2605         UseBB = P->getIncomingBlock(i);
2606       }
2607       if (UseBB == Preheader || L->contains(UseBB)) {
2608         UsedInLoop = true;
2609         break;
2610       }
2611     }
2612 
2613     // If there is, the def must remain in the preheader.
2614     if (UsedInLoop)
2615       continue;
2616 
2617     // Otherwise, sink it to the exit block.
2618     Instruction *ToMove = &*I;
2619     bool Done = false;
2620 
2621     if (I != Preheader->begin()) {
2622       // Skip debug info intrinsics.
2623       do {
2624         --I;
2625       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2626 
2627       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2628         Done = true;
2629     } else {
2630       Done = true;
2631     }
2632 
2633     MadeAnyChanges = true;
2634     ToMove->moveBefore(*ExitBlock, InsertPt);
2635     if (Done) break;
2636     InsertPt = ToMove->getIterator();
2637   }
2638 
2639   return MadeAnyChanges;
2640 }
2641 
2642 /// Return a symbolic upper bound for the backedge taken count of the loop.
2643 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2644 /// an arbitrary expression as opposed to only constants.
2645 /// TODO: Move into the ScalarEvolution class.
2646 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2647                                             DominatorTree &DT, Loop *L) {
2648   SmallVector<BasicBlock*, 16> ExitingBlocks;
2649   L->getExitingBlocks(ExitingBlocks);
2650 
2651   // Form an expression for the maximum exit count possible for this loop. We
2652   // merge the max and exact information to approximate a version of
2653   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2654   SmallVector<const SCEV*, 4> ExitCounts;
2655   for (BasicBlock *ExitingBB : ExitingBlocks) {
2656     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2657     if (isa<SCEVCouldNotCompute>(ExitCount))
2658       ExitCount = SE.getExitCount(L, ExitingBB,
2659                                   ScalarEvolution::ConstantMaximum);
2660     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2661       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2662              "We should only have known counts for exiting blocks that "
2663              "dominate latch!");
2664       ExitCounts.push_back(ExitCount);
2665     }
2666   }
2667   if (ExitCounts.empty())
2668     return SE.getCouldNotCompute();
2669   return SE.getUMinFromMismatchedTypes(ExitCounts);
2670 }
2671 
2672 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2673   SmallVector<BasicBlock*, 16> ExitingBlocks;
2674   L->getExitingBlocks(ExitingBlocks);
2675 
2676   // Remove all exits which aren't both rewriteable and analyzeable.
2677   auto NewEnd = llvm::remove_if(ExitingBlocks,
2678                                 [&](BasicBlock *ExitingBB) {
2679     // If our exitting block exits multiple loops, we can only rewrite the
2680     // innermost one.  Otherwise, we're changing how many times the innermost
2681     // loop runs before it exits.
2682     if (LI->getLoopFor(ExitingBB) != L)
2683       return true;
2684 
2685     // Can't rewrite non-branch yet.
2686     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2687     if (!BI)
2688       return true;
2689 
2690     // If already constant, nothing to do.
2691     if (isa<Constant>(BI->getCondition()))
2692       return true;
2693 
2694     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2695     if (isa<SCEVCouldNotCompute>(ExitCount))
2696       return true;
2697     return false;
2698    });
2699   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2700 
2701   if (ExitingBlocks.empty())
2702     return false;
2703 
2704   // Get a symbolic upper bound on the loop backedge taken count.
2705   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2706   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2707     return false;
2708 
2709   // Visit our exit blocks in order of dominance.  We know from the fact that
2710   // all exits (left) are analyzeable that the must be a total dominance order
2711   // between them as each must dominate the latch.  The visit order only
2712   // matters for the provably equal case.
2713   llvm::sort(ExitingBlocks,
2714              [&](BasicBlock *A, BasicBlock *B) {
2715                // std::sort sorts in ascending order, so we want the inverse of
2716                // the normal dominance relation.
2717                if (DT->properlyDominates(A, B)) return true;
2718                if (DT->properlyDominates(B, A)) return false;
2719                llvm_unreachable("expected total dominance order!");
2720              });
2721 #ifdef ASSERT
2722   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2723     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2724   }
2725 #endif
2726 
2727   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2728     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2729     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2730     auto *OldCond = BI->getCondition();
2731     auto *NewCond = ConstantInt::get(OldCond->getType(),
2732                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2733     BI->setCondition(NewCond);
2734     if (OldCond->use_empty())
2735       DeadInsts.push_back(OldCond);
2736   };
2737 
2738   bool Changed = false;
2739   SmallSet<const SCEV*, 8> DominatingExitCounts;
2740   for (BasicBlock *ExitingBB : ExitingBlocks) {
2741     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2742     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2743 
2744     // If we know we'd exit on the first iteration, rewrite the exit to
2745     // reflect this.  This does not imply the loop must exit through this
2746     // exit; there may be an earlier one taken on the first iteration.
2747     // TODO: Given we know the backedge can't be taken, we should go ahead
2748     // and break it.  Or at least, kill all the header phis and simplify.
2749     if (ExitCount->isZero()) {
2750       FoldExit(ExitingBB, true);
2751       Changed = true;
2752       continue;
2753     }
2754 
2755     // If we end up with a pointer exit count, bail.  Note that we can end up
2756     // with a pointer exit count for one exiting block, and not for another in
2757     // the same loop.
2758     if (!ExitCount->getType()->isIntegerTy() ||
2759         !MaxExitCount->getType()->isIntegerTy())
2760       continue;
2761 
2762     Type *WiderType =
2763       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2764     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2765     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2766     assert(MaxExitCount->getType() == ExitCount->getType());
2767 
2768     // Can we prove that some other exit must be taken strictly before this
2769     // one?
2770     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2771                                      MaxExitCount, ExitCount)) {
2772       FoldExit(ExitingBB, false);
2773       Changed = true;
2774       continue;
2775     }
2776 
2777     // As we run, keep track of which exit counts we've encountered.  If we
2778     // find a duplicate, we've found an exit which would have exited on the
2779     // exiting iteration, but (from the visit order) strictly follows another
2780     // which does the same and is thus dead.
2781     if (!DominatingExitCounts.insert(ExitCount).second) {
2782       FoldExit(ExitingBB, false);
2783       Changed = true;
2784       continue;
2785     }
2786 
2787     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2788     // here.  If we kept track of the min of dominanting exits so far, we could
2789     // discharge exits with EC >= MDEC. This is less powerful than the existing
2790     // transform (since later exits aren't considered), but potentially more
2791     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2792     // or a >u b.  Such a case is not currently known.
2793   }
2794   return Changed;
2795 }
2796 
2797 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2798   SmallVector<BasicBlock*, 16> ExitingBlocks;
2799   L->getExitingBlocks(ExitingBlocks);
2800 
2801   bool Changed = false;
2802 
2803   // Finally, see if we can rewrite our exit conditions into a loop invariant
2804   // form.  If we have a read-only loop, and we can tell that we must exit down
2805   // a path which does not need any of the values computed within the loop, we
2806   // can rewrite the loop to exit on the first iteration.  Note that this
2807   // doesn't either a) tell us the loop exits on the first iteration (unless
2808   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2809   // This transformation looks a lot like a restricted form of dead loop
2810   // elimination, but restricted to read-only loops and without neccesssarily
2811   // needing to kill the loop entirely.
2812   if (!LoopPredication)
2813     return Changed;
2814 
2815   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2816     return Changed;
2817 
2818   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2819   // through *explicit* control flow.  We have to eliminate the possibility of
2820   // implicit exits (see below) before we know it's truly exact.
2821   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2822   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2823       !SE->isLoopInvariant(ExactBTC, L) ||
2824       !isSafeToExpand(ExactBTC, *SE))
2825     return Changed;
2826 
2827   // If we end up with a pointer exit count, bail.  It may be unsized.
2828   if (!ExactBTC->getType()->isIntegerTy())
2829     return Changed;
2830 
2831   auto BadExit = [&](BasicBlock *ExitingBB) {
2832     // If our exiting block exits multiple loops, we can only rewrite the
2833     // innermost one.  Otherwise, we're changing how many times the innermost
2834     // loop runs before it exits.
2835     if (LI->getLoopFor(ExitingBB) != L)
2836       return true;
2837 
2838     // Can't rewrite non-branch yet.
2839     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2840     if (!BI)
2841       return true;
2842 
2843     // If already constant, nothing to do.
2844     if (isa<Constant>(BI->getCondition()))
2845       return true;
2846 
2847     // If the exit block has phis, we need to be able to compute the values
2848     // within the loop which contains them.  This assumes trivially lcssa phis
2849     // have already been removed; TODO: generalize
2850     BasicBlock *ExitBlock =
2851     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2852     if (!ExitBlock->phis().empty())
2853       return true;
2854 
2855     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2856     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2857     if (!SE->isLoopInvariant(ExitCount, L) ||
2858         !isSafeToExpand(ExitCount, *SE))
2859       return true;
2860 
2861     // If we end up with a pointer exit count, bail.  It may be unsized.
2862     if (!ExitCount->getType()->isIntegerTy())
2863       return true;
2864 
2865     return false;
2866   };
2867 
2868   // If we have any exits which can't be predicated themselves, than we can't
2869   // predicate any exit which isn't guaranteed to execute before it.  Consider
2870   // two exits (a) and (b) which would both exit on the same iteration.  If we
2871   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2872   // we could convert a loop from exiting through (a) to one exiting through
2873   // (b).  Note that this problem exists only for exits with the same exit
2874   // count, and we could be more aggressive when exit counts are known inequal.
2875   llvm::sort(ExitingBlocks,
2876             [&](BasicBlock *A, BasicBlock *B) {
2877               // std::sort sorts in ascending order, so we want the inverse of
2878               // the normal dominance relation, plus a tie breaker for blocks
2879               // unordered by dominance.
2880               if (DT->properlyDominates(A, B)) return true;
2881               if (DT->properlyDominates(B, A)) return false;
2882               return A->getName() < B->getName();
2883             });
2884   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2885   // exits before the backedge).  If they aren't, reasoning about reachability
2886   // is complicated and we choose not to for now.
2887   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2888     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2889       return Changed;
2890 
2891   // Given our sorted total order, we know that exit[j] must be evaluated
2892   // after all exit[i] such j > i.
2893   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2894     if (BadExit(ExitingBlocks[i])) {
2895       ExitingBlocks.resize(i);
2896       break;
2897     }
2898 
2899   if (ExitingBlocks.empty())
2900     return Changed;
2901 
2902   // We rely on not being able to reach an exiting block on a later iteration
2903   // then it's statically compute exit count.  The implementaton of
2904   // getExitCount currently has this invariant, but assert it here so that
2905   // breakage is obvious if this ever changes..
2906   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2907         return DT->dominates(ExitingBB, L->getLoopLatch());
2908       }));
2909 
2910   // At this point, ExitingBlocks consists of only those blocks which are
2911   // predicatable.  Given that, we know we have at least one exit we can
2912   // predicate if the loop is doesn't have side effects and doesn't have any
2913   // implicit exits (because then our exact BTC isn't actually exact).
2914   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2915   // suggestions on how to improve this?  I can obviously bail out for outer
2916   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2917   // is that enough for *all* side effects?
2918   for (BasicBlock *BB : L->blocks())
2919     for (auto &I : *BB)
2920       // TODO:isGuaranteedToTransfer
2921       if (I.mayHaveSideEffects() || I.mayThrow())
2922         return Changed;
2923 
2924   // Finally, do the actual predication for all predicatable blocks.  A couple
2925   // of notes here:
2926   // 1) We don't bother to constant fold dominated exits with identical exit
2927   //    counts; that's simply a form of CSE/equality propagation and we leave
2928   //    it for dedicated passes.
2929   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2930   //    legality constraints and we leave that to dedicated logic.  We want to
2931   //    predicate even if we can't insert a loop invariant expression as
2932   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2933   //    varying check.
2934   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2935   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2936   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2937   for (BasicBlock *ExitingBB : ExitingBlocks) {
2938     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2939 
2940     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2941     Value *NewCond;
2942     if (ExitCount == ExactBTC) {
2943       NewCond = L->contains(BI->getSuccessor(0)) ?
2944         B.getFalse() : B.getTrue();
2945     } else {
2946       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2947       if (!ExactBTCV)
2948         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2949       Value *RHS = ExactBTCV;
2950       if (ECV->getType() != RHS->getType()) {
2951         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2952         ECV = B.CreateZExt(ECV, WiderTy);
2953         RHS = B.CreateZExt(RHS, WiderTy);
2954       }
2955       auto Pred = L->contains(BI->getSuccessor(0)) ?
2956         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2957       NewCond = B.CreateICmp(Pred, ECV, RHS);
2958     }
2959     Value *OldCond = BI->getCondition();
2960     BI->setCondition(NewCond);
2961     if (OldCond->use_empty())
2962       DeadInsts.push_back(OldCond);
2963     Changed = true;
2964   }
2965 
2966   return Changed;
2967 }
2968 
2969 //===----------------------------------------------------------------------===//
2970 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2971 //===----------------------------------------------------------------------===//
2972 
2973 bool IndVarSimplify::run(Loop *L) {
2974   // We need (and expect!) the incoming loop to be in LCSSA.
2975   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2976          "LCSSA required to run indvars!");
2977   bool Changed = false;
2978 
2979   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2980   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2981   //    canonicalization can be a pessimization without LSR to "clean up"
2982   //    afterwards.
2983   //  - We depend on having a preheader; in particular,
2984   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2985   //    and we're in trouble if we can't find the induction variable even when
2986   //    we've manually inserted one.
2987   //  - LFTR relies on having a single backedge.
2988   if (!L->isLoopSimplifyForm())
2989     return false;
2990 
2991 #ifndef NDEBUG
2992   // Used below for a consistency check only
2993   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2994 #endif
2995 
2996   // If there are any floating-point recurrences, attempt to
2997   // transform them to use integer recurrences.
2998   Changed |= rewriteNonIntegerIVs(L);
2999 
3000   // Create a rewriter object which we'll use to transform the code with.
3001   SCEVExpander Rewriter(*SE, DL, "indvars");
3002 #ifndef NDEBUG
3003   Rewriter.setDebugType(DEBUG_TYPE);
3004 #endif
3005 
3006   // Eliminate redundant IV users.
3007   //
3008   // Simplification works best when run before other consumers of SCEV. We
3009   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3010   // other expressions involving loop IVs have been evaluated. This helps SCEV
3011   // set no-wrap flags before normalizing sign/zero extension.
3012   Rewriter.disableCanonicalMode();
3013   Changed |= simplifyAndExtend(L, Rewriter, LI);
3014 
3015   // Check to see if we can compute the final value of any expressions
3016   // that are recurrent in the loop, and substitute the exit values from the
3017   // loop into any instructions outside of the loop that use the final values
3018   // of the current expressions.
3019   if (ReplaceExitValue != NeverRepl)
3020     Changed |= rewriteLoopExitValues(L, Rewriter);
3021 
3022   // Eliminate redundant IV cycles.
3023   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3024 
3025   // Try to eliminate loop exits based on analyzeable exit counts
3026   if (optimizeLoopExits(L, Rewriter))  {
3027     Changed = true;
3028     // Given we've changed exit counts, notify SCEV
3029     SE->forgetLoop(L);
3030   }
3031 
3032   // Try to form loop invariant tests for loop exits by changing how many
3033   // iterations of the loop run when that is unobservable.
3034   if (predicateLoopExits(L, Rewriter)) {
3035     Changed = true;
3036     // Given we've changed exit counts, notify SCEV
3037     SE->forgetLoop(L);
3038   }
3039 
3040   // If we have a trip count expression, rewrite the loop's exit condition
3041   // using it.
3042   if (!DisableLFTR) {
3043     SmallVector<BasicBlock*, 16> ExitingBlocks;
3044     L->getExitingBlocks(ExitingBlocks);
3045     for (BasicBlock *ExitingBB : ExitingBlocks) {
3046       // Can't rewrite non-branch yet.
3047       if (!isa<BranchInst>(ExitingBB->getTerminator()))
3048         continue;
3049 
3050       // If our exitting block exits multiple loops, we can only rewrite the
3051       // innermost one.  Otherwise, we're changing how many times the innermost
3052       // loop runs before it exits.
3053       if (LI->getLoopFor(ExitingBB) != L)
3054         continue;
3055 
3056       if (!needsLFTR(L, ExitingBB))
3057         continue;
3058 
3059       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3060       if (isa<SCEVCouldNotCompute>(ExitCount))
3061         continue;
3062 
3063       // This was handled above, but as we form SCEVs, we can sometimes refine
3064       // existing ones; this allows exit counts to be folded to zero which
3065       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
3066       // until stable to handle cases like this better.
3067       if (ExitCount->isZero())
3068         continue;
3069 
3070       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3071       if (!IndVar)
3072         continue;
3073 
3074       // Avoid high cost expansions.  Note: This heuristic is questionable in
3075       // that our definition of "high cost" is not exactly principled.
3076       if (Rewriter.isHighCostExpansion(ExitCount, L))
3077         continue;
3078 
3079       // Check preconditions for proper SCEVExpander operation. SCEV does not
3080       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3081       // any pass that uses the SCEVExpander must do it. This does not work
3082       // well for loop passes because SCEVExpander makes assumptions about
3083       // all loops, while LoopPassManager only forces the current loop to be
3084       // simplified.
3085       //
3086       // FIXME: SCEV expansion has no way to bail out, so the caller must
3087       // explicitly check any assumptions made by SCEV. Brittle.
3088       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3089       if (!AR || AR->getLoop()->getLoopPreheader())
3090         Changed |= linearFunctionTestReplace(L, ExitingBB,
3091                                              ExitCount, IndVar,
3092                                              Rewriter);
3093     }
3094   }
3095   // Clear the rewriter cache, because values that are in the rewriter's cache
3096   // can be deleted in the loop below, causing the AssertingVH in the cache to
3097   // trigger.
3098   Rewriter.clear();
3099 
3100   // Now that we're done iterating through lists, clean up any instructions
3101   // which are now dead.
3102   while (!DeadInsts.empty())
3103     if (Instruction *Inst =
3104             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3105       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3106 
3107   // The Rewriter may not be used from this point on.
3108 
3109   // Loop-invariant instructions in the preheader that aren't used in the
3110   // loop may be sunk below the loop to reduce register pressure.
3111   Changed |= sinkUnusedInvariants(L);
3112 
3113   // rewriteFirstIterationLoopExitValues does not rely on the computation of
3114   // trip count and therefore can further simplify exit values in addition to
3115   // rewriteLoopExitValues.
3116   Changed |= rewriteFirstIterationLoopExitValues(L);
3117 
3118   // Clean up dead instructions.
3119   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3120 
3121   // Check a post-condition.
3122   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3123          "Indvars did not preserve LCSSA!");
3124 
3125   // Verify that LFTR, and any other change have not interfered with SCEV's
3126   // ability to compute trip count.  We may have *changed* the exit count, but
3127   // only by reducing it.
3128 #ifndef NDEBUG
3129   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3130     SE->forgetLoop(L);
3131     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3132     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3133         SE->getTypeSizeInBits(NewBECount->getType()))
3134       NewBECount = SE->getTruncateOrNoop(NewBECount,
3135                                          BackedgeTakenCount->getType());
3136     else
3137       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3138                                                  NewBECount->getType());
3139     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
3140                                  NewBECount) && "indvars must preserve SCEV");
3141   }
3142 #endif
3143 
3144   return Changed;
3145 }
3146 
3147 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3148                                           LoopStandardAnalysisResults &AR,
3149                                           LPMUpdater &) {
3150   Function *F = L.getHeader()->getParent();
3151   const DataLayout &DL = F->getParent()->getDataLayout();
3152 
3153   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3154   if (!IVS.run(&L))
3155     return PreservedAnalyses::all();
3156 
3157   auto PA = getLoopPassPreservedAnalyses();
3158   PA.preserveSet<CFGAnalyses>();
3159   return PA;
3160 }
3161 
3162 namespace {
3163 
3164 struct IndVarSimplifyLegacyPass : public LoopPass {
3165   static char ID; // Pass identification, replacement for typeid
3166 
3167   IndVarSimplifyLegacyPass() : LoopPass(ID) {
3168     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3169   }
3170 
3171   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3172     if (skipLoop(L))
3173       return false;
3174 
3175     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3176     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3177     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3178     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3179     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3180     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3181     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3182     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3183 
3184     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3185     return IVS.run(L);
3186   }
3187 
3188   void getAnalysisUsage(AnalysisUsage &AU) const override {
3189     AU.setPreservesCFG();
3190     getLoopAnalysisUsage(AU);
3191   }
3192 };
3193 
3194 } // end anonymous namespace
3195 
3196 char IndVarSimplifyLegacyPass::ID = 0;
3197 
3198 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3199                       "Induction Variable Simplification", false, false)
3200 INITIALIZE_PASS_DEPENDENCY(LoopPass)
3201 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3202                     "Induction Variable Simplification", false, false)
3203 
3204 Pass *llvm::createIndVarSimplifyPass() {
3205   return new IndVarSimplifyLegacyPass();
3206 }
3207