xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision dcdd12b68c2ae7a30221a30d00a4d64f510e1695)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/iterator_range.h"
38 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/Analysis/LoopPass.h"
40 #include "llvm/Analysis/ScalarEvolution.h"
41 #include "llvm/Analysis/ScalarEvolutionExpander.h"
42 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
43 #include "llvm/Analysis/TargetLibraryInfo.h"
44 #include "llvm/Analysis/TargetTransformInfo.h"
45 #include "llvm/Transforms/Utils/Local.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/ConstantRange.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Dominators.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/IRBuilder.h"
55 #include "llvm/IR/InstrTypes.h"
56 #include "llvm/IR/Instruction.h"
57 #include "llvm/IR/Instructions.h"
58 #include "llvm/IR/IntrinsicInst.h"
59 #include "llvm/IR/Intrinsics.h"
60 #include "llvm/IR/Module.h"
61 #include "llvm/IR/Operator.h"
62 #include "llvm/IR/PassManager.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/Type.h"
65 #include "llvm/IR/Use.h"
66 #include "llvm/IR/User.h"
67 #include "llvm/IR/Value.h"
68 #include "llvm/IR/ValueHandle.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/CommandLine.h"
72 #include "llvm/Support/Compiler.h"
73 #include "llvm/Support/Debug.h"
74 #include "llvm/Support/ErrorHandling.h"
75 #include "llvm/Support/MathExtras.h"
76 #include "llvm/Support/raw_ostream.h"
77 #include "llvm/Transforms/Scalar.h"
78 #include "llvm/Transforms/Scalar/LoopPassManager.h"
79 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
80 #include "llvm/Transforms/Utils/LoopUtils.h"
81 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
82 #include <cassert>
83 #include <cstdint>
84 #include <utility>
85 
86 using namespace llvm;
87 
88 #define DEBUG_TYPE "indvars"
89 
90 STATISTIC(NumWidened     , "Number of indvars widened");
91 STATISTIC(NumReplaced    , "Number of exit values replaced");
92 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
93 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
94 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
95 
96 // Trip count verification can be enabled by default under NDEBUG if we
97 // implement a strong expression equivalence checker in SCEV. Until then, we
98 // use the verify-indvars flag, which may assert in some cases.
99 static cl::opt<bool> VerifyIndvars(
100   "verify-indvars", cl::Hidden,
101   cl::desc("Verify the ScalarEvolution result after running indvars"));
102 
103 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
104 
105 static cl::opt<ReplaceExitVal> ReplaceExitValue(
106     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
107     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
108     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
109                clEnumValN(OnlyCheapRepl, "cheap",
110                           "only replace exit value when the cost is cheap"),
111                clEnumValN(NoHardUse, "noharduse",
112                           "only replace exit values when loop def likely dead"),
113                clEnumValN(AlwaysRepl, "always",
114                           "always replace exit value whenever possible")));
115 
116 static cl::opt<bool> UsePostIncrementRanges(
117   "indvars-post-increment-ranges", cl::Hidden,
118   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
119   cl::init(true));
120 
121 static cl::opt<bool>
122 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
123             cl::desc("Disable Linear Function Test Replace optimization"));
124 
125 namespace {
126 
127 struct RewritePhi;
128 
129 class IndVarSimplify {
130   LoopInfo *LI;
131   ScalarEvolution *SE;
132   DominatorTree *DT;
133   const DataLayout &DL;
134   TargetLibraryInfo *TLI;
135   const TargetTransformInfo *TTI;
136 
137   SmallVector<WeakTrackingVH, 16> DeadInsts;
138 
139   bool isValidRewrite(Value *FromVal, Value *ToVal);
140 
141   bool handleFloatingPointIV(Loop *L, PHINode *PH);
142   bool rewriteNonIntegerIVs(Loop *L);
143 
144   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
145 
146   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
147   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
148   bool rewriteFirstIterationLoopExitValues(Loop *L);
149   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
150 
151   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
152                                  const SCEV *BackedgeTakenCount,
153                                  PHINode *IndVar, SCEVExpander &Rewriter);
154 
155   bool sinkUnusedInvariants(Loop *L);
156 
157 public:
158   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
159                  const DataLayout &DL, TargetLibraryInfo *TLI,
160                  TargetTransformInfo *TTI)
161       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
162 
163   bool run(Loop *L);
164 };
165 
166 } // end anonymous namespace
167 
168 /// Return true if the SCEV expansion generated by the rewriter can replace the
169 /// original value. SCEV guarantees that it produces the same value, but the way
170 /// it is produced may be illegal IR.  Ideally, this function will only be
171 /// called for verification.
172 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
173   // If an SCEV expression subsumed multiple pointers, its expansion could
174   // reassociate the GEP changing the base pointer. This is illegal because the
175   // final address produced by a GEP chain must be inbounds relative to its
176   // underlying object. Otherwise basic alias analysis, among other things,
177   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
178   // producing an expression involving multiple pointers. Until then, we must
179   // bail out here.
180   //
181   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
182   // because it understands lcssa phis while SCEV does not.
183   Value *FromPtr = FromVal;
184   Value *ToPtr = ToVal;
185   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
186     FromPtr = GEP->getPointerOperand();
187   }
188   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
189     ToPtr = GEP->getPointerOperand();
190   }
191   if (FromPtr != FromVal || ToPtr != ToVal) {
192     // Quickly check the common case
193     if (FromPtr == ToPtr)
194       return true;
195 
196     // SCEV may have rewritten an expression that produces the GEP's pointer
197     // operand. That's ok as long as the pointer operand has the same base
198     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
199     // base of a recurrence. This handles the case in which SCEV expansion
200     // converts a pointer type recurrence into a nonrecurrent pointer base
201     // indexed by an integer recurrence.
202 
203     // If the GEP base pointer is a vector of pointers, abort.
204     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
205       return false;
206 
207     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
208     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
209     if (FromBase == ToBase)
210       return true;
211 
212     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
213                       << " != " << *ToBase << "\n");
214 
215     return false;
216   }
217   return true;
218 }
219 
220 /// Determine the insertion point for this user. By default, insert immediately
221 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
222 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
223 /// common dominator for the incoming blocks. A nullptr can be returned if no
224 /// viable location is found: it may happen if User is a PHI and Def only comes
225 /// to this PHI from unreachable blocks.
226 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
227                                           DominatorTree *DT, LoopInfo *LI) {
228   PHINode *PHI = dyn_cast<PHINode>(User);
229   if (!PHI)
230     return User;
231 
232   Instruction *InsertPt = nullptr;
233   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
234     if (PHI->getIncomingValue(i) != Def)
235       continue;
236 
237     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
238 
239     if (!DT->isReachableFromEntry(InsertBB))
240       continue;
241 
242     if (!InsertPt) {
243       InsertPt = InsertBB->getTerminator();
244       continue;
245     }
246     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
247     InsertPt = InsertBB->getTerminator();
248   }
249 
250   // If we have skipped all inputs, it means that Def only comes to Phi from
251   // unreachable blocks.
252   if (!InsertPt)
253     return nullptr;
254 
255   auto *DefI = dyn_cast<Instruction>(Def);
256   if (!DefI)
257     return InsertPt;
258 
259   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
260 
261   auto *L = LI->getLoopFor(DefI->getParent());
262   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
263 
264   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
265     if (LI->getLoopFor(DTN->getBlock()) == L)
266       return DTN->getBlock()->getTerminator();
267 
268   llvm_unreachable("DefI dominates InsertPt!");
269 }
270 
271 //===----------------------------------------------------------------------===//
272 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
273 //===----------------------------------------------------------------------===//
274 
275 /// Convert APF to an integer, if possible.
276 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
277   bool isExact = false;
278   // See if we can convert this to an int64_t
279   uint64_t UIntVal;
280   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
281                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
282       !isExact)
283     return false;
284   IntVal = UIntVal;
285   return true;
286 }
287 
288 /// If the loop has floating induction variable then insert corresponding
289 /// integer induction variable if possible.
290 /// For example,
291 /// for(double i = 0; i < 10000; ++i)
292 ///   bar(i)
293 /// is converted into
294 /// for(int i = 0; i < 10000; ++i)
295 ///   bar((double)i);
296 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
297   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
298   unsigned BackEdge     = IncomingEdge^1;
299 
300   // Check incoming value.
301   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
302 
303   int64_t InitValue;
304   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
305     return false;
306 
307   // Check IV increment. Reject this PN if increment operation is not
308   // an add or increment value can not be represented by an integer.
309   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
310   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
311 
312   // If this is not an add of the PHI with a constantfp, or if the constant fp
313   // is not an integer, bail out.
314   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
315   int64_t IncValue;
316   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
317       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
318     return false;
319 
320   // Check Incr uses. One user is PN and the other user is an exit condition
321   // used by the conditional terminator.
322   Value::user_iterator IncrUse = Incr->user_begin();
323   Instruction *U1 = cast<Instruction>(*IncrUse++);
324   if (IncrUse == Incr->user_end()) return false;
325   Instruction *U2 = cast<Instruction>(*IncrUse++);
326   if (IncrUse != Incr->user_end()) return false;
327 
328   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
329   // only used by a branch, we can't transform it.
330   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
331   if (!Compare)
332     Compare = dyn_cast<FCmpInst>(U2);
333   if (!Compare || !Compare->hasOneUse() ||
334       !isa<BranchInst>(Compare->user_back()))
335     return false;
336 
337   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
338 
339   // We need to verify that the branch actually controls the iteration count
340   // of the loop.  If not, the new IV can overflow and no one will notice.
341   // The branch block must be in the loop and one of the successors must be out
342   // of the loop.
343   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
344   if (!L->contains(TheBr->getParent()) ||
345       (L->contains(TheBr->getSuccessor(0)) &&
346        L->contains(TheBr->getSuccessor(1))))
347     return false;
348 
349   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
350   // transform it.
351   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
352   int64_t ExitValue;
353   if (ExitValueVal == nullptr ||
354       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
355     return false;
356 
357   // Find new predicate for integer comparison.
358   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
359   switch (Compare->getPredicate()) {
360   default: return false;  // Unknown comparison.
361   case CmpInst::FCMP_OEQ:
362   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
363   case CmpInst::FCMP_ONE:
364   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
365   case CmpInst::FCMP_OGT:
366   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
367   case CmpInst::FCMP_OGE:
368   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
369   case CmpInst::FCMP_OLT:
370   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
371   case CmpInst::FCMP_OLE:
372   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
373   }
374 
375   // We convert the floating point induction variable to a signed i32 value if
376   // we can.  This is only safe if the comparison will not overflow in a way
377   // that won't be trapped by the integer equivalent operations.  Check for this
378   // now.
379   // TODO: We could use i64 if it is native and the range requires it.
380 
381   // The start/stride/exit values must all fit in signed i32.
382   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
383     return false;
384 
385   // If not actually striding (add x, 0.0), avoid touching the code.
386   if (IncValue == 0)
387     return false;
388 
389   // Positive and negative strides have different safety conditions.
390   if (IncValue > 0) {
391     // If we have a positive stride, we require the init to be less than the
392     // exit value.
393     if (InitValue >= ExitValue)
394       return false;
395 
396     uint32_t Range = uint32_t(ExitValue-InitValue);
397     // Check for infinite loop, either:
398     // while (i <= Exit) or until (i > Exit)
399     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
400       if (++Range == 0) return false;  // Range overflows.
401     }
402 
403     unsigned Leftover = Range % uint32_t(IncValue);
404 
405     // If this is an equality comparison, we require that the strided value
406     // exactly land on the exit value, otherwise the IV condition will wrap
407     // around and do things the fp IV wouldn't.
408     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
409         Leftover != 0)
410       return false;
411 
412     // If the stride would wrap around the i32 before exiting, we can't
413     // transform the IV.
414     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
415       return false;
416   } else {
417     // If we have a negative stride, we require the init to be greater than the
418     // exit value.
419     if (InitValue <= ExitValue)
420       return false;
421 
422     uint32_t Range = uint32_t(InitValue-ExitValue);
423     // Check for infinite loop, either:
424     // while (i >= Exit) or until (i < Exit)
425     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
426       if (++Range == 0) return false;  // Range overflows.
427     }
428 
429     unsigned Leftover = Range % uint32_t(-IncValue);
430 
431     // If this is an equality comparison, we require that the strided value
432     // exactly land on the exit value, otherwise the IV condition will wrap
433     // around and do things the fp IV wouldn't.
434     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
435         Leftover != 0)
436       return false;
437 
438     // If the stride would wrap around the i32 before exiting, we can't
439     // transform the IV.
440     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
441       return false;
442   }
443 
444   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
445 
446   // Insert new integer induction variable.
447   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
448   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
449                       PN->getIncomingBlock(IncomingEdge));
450 
451   Value *NewAdd =
452     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
453                               Incr->getName()+".int", Incr);
454   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
455 
456   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
457                                       ConstantInt::get(Int32Ty, ExitValue),
458                                       Compare->getName());
459 
460   // In the following deletions, PN may become dead and may be deleted.
461   // Use a WeakTrackingVH to observe whether this happens.
462   WeakTrackingVH WeakPH = PN;
463 
464   // Delete the old floating point exit comparison.  The branch starts using the
465   // new comparison.
466   NewCompare->takeName(Compare);
467   Compare->replaceAllUsesWith(NewCompare);
468   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
469 
470   // Delete the old floating point increment.
471   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
472   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
473 
474   // If the FP induction variable still has uses, this is because something else
475   // in the loop uses its value.  In order to canonicalize the induction
476   // variable, we chose to eliminate the IV and rewrite it in terms of an
477   // int->fp cast.
478   //
479   // We give preference to sitofp over uitofp because it is faster on most
480   // platforms.
481   if (WeakPH) {
482     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
483                                  &*PN->getParent()->getFirstInsertionPt());
484     PN->replaceAllUsesWith(Conv);
485     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
486   }
487   return true;
488 }
489 
490 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
491   // First step.  Check to see if there are any floating-point recurrences.
492   // If there are, change them into integer recurrences, permitting analysis by
493   // the SCEV routines.
494   BasicBlock *Header = L->getHeader();
495 
496   SmallVector<WeakTrackingVH, 8> PHIs;
497   for (PHINode &PN : Header->phis())
498     PHIs.push_back(&PN);
499 
500   bool Changed = false;
501   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
502     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
503       Changed |= handleFloatingPointIV(L, PN);
504 
505   // If the loop previously had floating-point IV, ScalarEvolution
506   // may not have been able to compute a trip count. Now that we've done some
507   // re-writing, the trip count may be computable.
508   if (Changed)
509     SE->forgetLoop(L);
510   return Changed;
511 }
512 
513 namespace {
514 
515 // Collect information about PHI nodes which can be transformed in
516 // rewriteLoopExitValues.
517 struct RewritePhi {
518   PHINode *PN;
519 
520   // Ith incoming value.
521   unsigned Ith;
522 
523   // Exit value after expansion.
524   Value *Val;
525 
526   // High Cost when expansion.
527   bool HighCost;
528 
529   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
530       : PN(P), Ith(I), Val(V), HighCost(H) {}
531 };
532 
533 } // end anonymous namespace
534 
535 //===----------------------------------------------------------------------===//
536 // rewriteLoopExitValues - Optimize IV users outside the loop.
537 // As a side effect, reduces the amount of IV processing within the loop.
538 //===----------------------------------------------------------------------===//
539 
540 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
541   SmallPtrSet<const Instruction *, 8> Visited;
542   SmallVector<const Instruction *, 8> WorkList;
543   Visited.insert(I);
544   WorkList.push_back(I);
545   while (!WorkList.empty()) {
546     const Instruction *Curr = WorkList.pop_back_val();
547     // This use is outside the loop, nothing to do.
548     if (!L->contains(Curr))
549       continue;
550     // Do we assume it is a "hard" use which will not be eliminated easily?
551     if (Curr->mayHaveSideEffects())
552       return true;
553     // Otherwise, add all its users to worklist.
554     for (auto U : Curr->users()) {
555       auto *UI = cast<Instruction>(U);
556       if (Visited.insert(UI).second)
557         WorkList.push_back(UI);
558     }
559   }
560   return false;
561 }
562 
563 /// Check to see if this loop has a computable loop-invariant execution count.
564 /// If so, this means that we can compute the final value of any expressions
565 /// that are recurrent in the loop, and substitute the exit values from the loop
566 /// into any instructions outside of the loop that use the final values of the
567 /// current expressions.
568 ///
569 /// This is mostly redundant with the regular IndVarSimplify activities that
570 /// happen later, except that it's more powerful in some cases, because it's
571 /// able to brute-force evaluate arbitrary instructions as long as they have
572 /// constant operands at the beginning of the loop.
573 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
574   // Check a pre-condition.
575   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
576          "Indvars did not preserve LCSSA!");
577 
578   SmallVector<BasicBlock*, 8> ExitBlocks;
579   L->getUniqueExitBlocks(ExitBlocks);
580 
581   SmallVector<RewritePhi, 8> RewritePhiSet;
582   // Find all values that are computed inside the loop, but used outside of it.
583   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
584   // the exit blocks of the loop to find them.
585   for (BasicBlock *ExitBB : ExitBlocks) {
586     // If there are no PHI nodes in this exit block, then no values defined
587     // inside the loop are used on this path, skip it.
588     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
589     if (!PN) continue;
590 
591     unsigned NumPreds = PN->getNumIncomingValues();
592 
593     // Iterate over all of the PHI nodes.
594     BasicBlock::iterator BBI = ExitBB->begin();
595     while ((PN = dyn_cast<PHINode>(BBI++))) {
596       if (PN->use_empty())
597         continue; // dead use, don't replace it
598 
599       if (!SE->isSCEVable(PN->getType()))
600         continue;
601 
602       // It's necessary to tell ScalarEvolution about this explicitly so that
603       // it can walk the def-use list and forget all SCEVs, as it may not be
604       // watching the PHI itself. Once the new exit value is in place, there
605       // may not be a def-use connection between the loop and every instruction
606       // which got a SCEVAddRecExpr for that loop.
607       SE->forgetValue(PN);
608 
609       // Iterate over all of the values in all the PHI nodes.
610       for (unsigned i = 0; i != NumPreds; ++i) {
611         // If the value being merged in is not integer or is not defined
612         // in the loop, skip it.
613         Value *InVal = PN->getIncomingValue(i);
614         if (!isa<Instruction>(InVal))
615           continue;
616 
617         // If this pred is for a subloop, not L itself, skip it.
618         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
619           continue; // The Block is in a subloop, skip it.
620 
621         // Check that InVal is defined in the loop.
622         Instruction *Inst = cast<Instruction>(InVal);
623         if (!L->contains(Inst))
624           continue;
625 
626         // Okay, this instruction has a user outside of the current loop
627         // and varies predictably *inside* the loop.  Evaluate the value it
628         // contains when the loop exits, if possible.
629         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
630         if (!SE->isLoopInvariant(ExitValue, L) ||
631             !isSafeToExpand(ExitValue, *SE))
632           continue;
633 
634         // Computing the value outside of the loop brings no benefit if it is
635         // definitely used inside the loop in a way which can not be optimized
636         // away.
637         if (ReplaceExitValue != AlwaysRepl &&
638             !isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
639           continue;
640 
641         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
642         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
643 
644         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
645                           << '\n'
646                           << "  LoopVal = " << *Inst << "\n");
647 
648         if (!isValidRewrite(Inst, ExitVal)) {
649           DeadInsts.push_back(ExitVal);
650           continue;
651         }
652 
653 #ifndef NDEBUG
654         // If we reuse an instruction from a loop which is neither L nor one of
655         // its containing loops, we end up breaking LCSSA form for this loop by
656         // creating a new use of its instruction.
657         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
658           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
659             if (EVL != L)
660               assert(EVL->contains(L) && "LCSSA breach detected!");
661 #endif
662 
663         // Collect all the candidate PHINodes to be rewritten.
664         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
665       }
666     }
667   }
668 
669   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
670 
671   bool Changed = false;
672   // Transformation.
673   for (const RewritePhi &Phi : RewritePhiSet) {
674     PHINode *PN = Phi.PN;
675     Value *ExitVal = Phi.Val;
676 
677     // Only do the rewrite when the ExitValue can be expanded cheaply.
678     // If LoopCanBeDel is true, rewrite exit value aggressively.
679     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
680       DeadInsts.push_back(ExitVal);
681       continue;
682     }
683 
684     Changed = true;
685     ++NumReplaced;
686     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
687     PN->setIncomingValue(Phi.Ith, ExitVal);
688 
689     // If this instruction is dead now, delete it. Don't do it now to avoid
690     // invalidating iterators.
691     if (isInstructionTriviallyDead(Inst, TLI))
692       DeadInsts.push_back(Inst);
693 
694     // Replace PN with ExitVal if that is legal and does not break LCSSA.
695     if (PN->getNumIncomingValues() == 1 &&
696         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
697       PN->replaceAllUsesWith(ExitVal);
698       PN->eraseFromParent();
699     }
700   }
701 
702   // The insertion point instruction may have been deleted; clear it out
703   // so that the rewriter doesn't trip over it later.
704   Rewriter.clearInsertPoint();
705   return Changed;
706 }
707 
708 //===---------------------------------------------------------------------===//
709 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
710 // they will exit at the first iteration.
711 //===---------------------------------------------------------------------===//
712 
713 /// Check to see if this loop has loop invariant conditions which lead to loop
714 /// exits. If so, we know that if the exit path is taken, it is at the first
715 /// loop iteration. This lets us predict exit values of PHI nodes that live in
716 /// loop header.
717 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
718   // Verify the input to the pass is already in LCSSA form.
719   assert(L->isLCSSAForm(*DT));
720 
721   SmallVector<BasicBlock *, 8> ExitBlocks;
722   L->getUniqueExitBlocks(ExitBlocks);
723 
724   bool MadeAnyChanges = false;
725   for (auto *ExitBB : ExitBlocks) {
726     // If there are no more PHI nodes in this exit block, then no more
727     // values defined inside the loop are used on this path.
728     for (PHINode &PN : ExitBB->phis()) {
729       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
730            IncomingValIdx != E; ++IncomingValIdx) {
731         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
732 
733         // Can we prove that the exit must run on the first iteration if it
734         // runs at all?  (i.e. early exits are fine for our purposes, but
735         // traces which lead to this exit being taken on the 2nd iteration
736         // aren't.)  Note that this is about whether the exit branch is
737         // executed, not about whether it is taken.
738         if (!L->getLoopLatch() ||
739             !DT->dominates(IncomingBB, L->getLoopLatch()))
740           continue;
741 
742         // Get condition that leads to the exit path.
743         auto *TermInst = IncomingBB->getTerminator();
744 
745         Value *Cond = nullptr;
746         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
747           // Must be a conditional branch, otherwise the block
748           // should not be in the loop.
749           Cond = BI->getCondition();
750         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
751           Cond = SI->getCondition();
752         else
753           continue;
754 
755         if (!L->isLoopInvariant(Cond))
756           continue;
757 
758         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
759 
760         // Only deal with PHIs in the loop header.
761         if (!ExitVal || ExitVal->getParent() != L->getHeader())
762           continue;
763 
764         // If ExitVal is a PHI on the loop header, then we know its
765         // value along this exit because the exit can only be taken
766         // on the first iteration.
767         auto *LoopPreheader = L->getLoopPreheader();
768         assert(LoopPreheader && "Invalid loop");
769         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
770         if (PreheaderIdx != -1) {
771           assert(ExitVal->getParent() == L->getHeader() &&
772                  "ExitVal must be in loop header");
773           MadeAnyChanges = true;
774           PN.setIncomingValue(IncomingValIdx,
775                               ExitVal->getIncomingValue(PreheaderIdx));
776         }
777       }
778     }
779   }
780   return MadeAnyChanges;
781 }
782 
783 /// Check whether it is possible to delete the loop after rewriting exit
784 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
785 /// aggressively.
786 bool IndVarSimplify::canLoopBeDeleted(
787     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
788   BasicBlock *Preheader = L->getLoopPreheader();
789   // If there is no preheader, the loop will not be deleted.
790   if (!Preheader)
791     return false;
792 
793   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
794   // We obviate multiple ExitingBlocks case for simplicity.
795   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
796   // after exit value rewriting, we can enhance the logic here.
797   SmallVector<BasicBlock *, 4> ExitingBlocks;
798   L->getExitingBlocks(ExitingBlocks);
799   SmallVector<BasicBlock *, 8> ExitBlocks;
800   L->getUniqueExitBlocks(ExitBlocks);
801   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
802     return false;
803 
804   BasicBlock *ExitBlock = ExitBlocks[0];
805   BasicBlock::iterator BI = ExitBlock->begin();
806   while (PHINode *P = dyn_cast<PHINode>(BI)) {
807     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
808 
809     // If the Incoming value of P is found in RewritePhiSet, we know it
810     // could be rewritten to use a loop invariant value in transformation
811     // phase later. Skip it in the loop invariant check below.
812     bool found = false;
813     for (const RewritePhi &Phi : RewritePhiSet) {
814       unsigned i = Phi.Ith;
815       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
816         found = true;
817         break;
818       }
819     }
820 
821     Instruction *I;
822     if (!found && (I = dyn_cast<Instruction>(Incoming)))
823       if (!L->hasLoopInvariantOperands(I))
824         return false;
825 
826     ++BI;
827   }
828 
829   for (auto *BB : L->blocks())
830     if (llvm::any_of(*BB, [](Instruction &I) {
831           return I.mayHaveSideEffects();
832         }))
833       return false;
834 
835   return true;
836 }
837 
838 //===----------------------------------------------------------------------===//
839 //  IV Widening - Extend the width of an IV to cover its widest uses.
840 //===----------------------------------------------------------------------===//
841 
842 namespace {
843 
844 // Collect information about induction variables that are used by sign/zero
845 // extend operations. This information is recorded by CollectExtend and provides
846 // the input to WidenIV.
847 struct WideIVInfo {
848   PHINode *NarrowIV = nullptr;
849 
850   // Widest integer type created [sz]ext
851   Type *WidestNativeType = nullptr;
852 
853   // Was a sext user seen before a zext?
854   bool IsSigned = false;
855 };
856 
857 } // end anonymous namespace
858 
859 /// Update information about the induction variable that is extended by this
860 /// sign or zero extend operation. This is used to determine the final width of
861 /// the IV before actually widening it.
862 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
863                         const TargetTransformInfo *TTI) {
864   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
865   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
866     return;
867 
868   Type *Ty = Cast->getType();
869   uint64_t Width = SE->getTypeSizeInBits(Ty);
870   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
871     return;
872 
873   // Check that `Cast` actually extends the induction variable (we rely on this
874   // later).  This takes care of cases where `Cast` is extending a truncation of
875   // the narrow induction variable, and thus can end up being narrower than the
876   // "narrow" induction variable.
877   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
878   if (NarrowIVWidth >= Width)
879     return;
880 
881   // Cast is either an sext or zext up to this point.
882   // We should not widen an indvar if arithmetics on the wider indvar are more
883   // expensive than those on the narrower indvar. We check only the cost of ADD
884   // because at least an ADD is required to increment the induction variable. We
885   // could compute more comprehensively the cost of all instructions on the
886   // induction variable when necessary.
887   if (TTI &&
888       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
889           TTI->getArithmeticInstrCost(Instruction::Add,
890                                       Cast->getOperand(0)->getType())) {
891     return;
892   }
893 
894   if (!WI.WidestNativeType) {
895     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
896     WI.IsSigned = IsSigned;
897     return;
898   }
899 
900   // We extend the IV to satisfy the sign of its first user, arbitrarily.
901   if (WI.IsSigned != IsSigned)
902     return;
903 
904   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
905     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
906 }
907 
908 namespace {
909 
910 /// Record a link in the Narrow IV def-use chain along with the WideIV that
911 /// computes the same value as the Narrow IV def.  This avoids caching Use*
912 /// pointers.
913 struct NarrowIVDefUse {
914   Instruction *NarrowDef = nullptr;
915   Instruction *NarrowUse = nullptr;
916   Instruction *WideDef = nullptr;
917 
918   // True if the narrow def is never negative.  Tracking this information lets
919   // us use a sign extension instead of a zero extension or vice versa, when
920   // profitable and legal.
921   bool NeverNegative = false;
922 
923   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
924                  bool NeverNegative)
925       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
926         NeverNegative(NeverNegative) {}
927 };
928 
929 /// The goal of this transform is to remove sign and zero extends without
930 /// creating any new induction variables. To do this, it creates a new phi of
931 /// the wider type and redirects all users, either removing extends or inserting
932 /// truncs whenever we stop propagating the type.
933 class WidenIV {
934   // Parameters
935   PHINode *OrigPhi;
936   Type *WideType;
937 
938   // Context
939   LoopInfo        *LI;
940   Loop            *L;
941   ScalarEvolution *SE;
942   DominatorTree   *DT;
943 
944   // Does the module have any calls to the llvm.experimental.guard intrinsic
945   // at all? If not we can avoid scanning instructions looking for guards.
946   bool HasGuards;
947 
948   // Result
949   PHINode *WidePhi = nullptr;
950   Instruction *WideInc = nullptr;
951   const SCEV *WideIncExpr = nullptr;
952   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
953 
954   SmallPtrSet<Instruction *,16> Widened;
955   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
956 
957   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
958 
959   // A map tracking the kind of extension used to widen each narrow IV
960   // and narrow IV user.
961   // Key: pointer to a narrow IV or IV user.
962   // Value: the kind of extension used to widen this Instruction.
963   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
964 
965   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
966 
967   // A map with control-dependent ranges for post increment IV uses. The key is
968   // a pair of IV def and a use of this def denoting the context. The value is
969   // a ConstantRange representing possible values of the def at the given
970   // context.
971   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
972 
973   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
974                                               Instruction *UseI) {
975     DefUserPair Key(Def, UseI);
976     auto It = PostIncRangeInfos.find(Key);
977     return It == PostIncRangeInfos.end()
978                ? Optional<ConstantRange>(None)
979                : Optional<ConstantRange>(It->second);
980   }
981 
982   void calculatePostIncRanges(PHINode *OrigPhi);
983   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
984 
985   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
986     DefUserPair Key(Def, UseI);
987     auto It = PostIncRangeInfos.find(Key);
988     if (It == PostIncRangeInfos.end())
989       PostIncRangeInfos.insert({Key, R});
990     else
991       It->second = R.intersectWith(It->second);
992   }
993 
994 public:
995   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
996           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
997           bool HasGuards)
998       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
999         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1000         HasGuards(HasGuards), DeadInsts(DI) {
1001     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1002     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1003   }
1004 
1005   PHINode *createWideIV(SCEVExpander &Rewriter);
1006 
1007 protected:
1008   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1009                           Instruction *Use);
1010 
1011   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1012   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1013                                      const SCEVAddRecExpr *WideAR);
1014   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1015 
1016   ExtendKind getExtendKind(Instruction *I);
1017 
1018   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1019 
1020   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1021 
1022   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1023 
1024   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1025                               unsigned OpCode) const;
1026 
1027   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1028 
1029   bool widenLoopCompare(NarrowIVDefUse DU);
1030   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1031   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1032 
1033   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1034 };
1035 
1036 } // end anonymous namespace
1037 
1038 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1039                                  bool IsSigned, Instruction *Use) {
1040   // Set the debug location and conservative insertion point.
1041   IRBuilder<> Builder(Use);
1042   // Hoist the insertion point into loop preheaders as far as possible.
1043   for (const Loop *L = LI->getLoopFor(Use->getParent());
1044        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1045        L = L->getParentLoop())
1046     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1047 
1048   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1049                     Builder.CreateZExt(NarrowOper, WideType);
1050 }
1051 
1052 /// Instantiate a wide operation to replace a narrow operation. This only needs
1053 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1054 /// 0 for any operation we decide not to clone.
1055 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1056                                   const SCEVAddRecExpr *WideAR) {
1057   unsigned Opcode = DU.NarrowUse->getOpcode();
1058   switch (Opcode) {
1059   default:
1060     return nullptr;
1061   case Instruction::Add:
1062   case Instruction::Mul:
1063   case Instruction::UDiv:
1064   case Instruction::Sub:
1065     return cloneArithmeticIVUser(DU, WideAR);
1066 
1067   case Instruction::And:
1068   case Instruction::Or:
1069   case Instruction::Xor:
1070   case Instruction::Shl:
1071   case Instruction::LShr:
1072   case Instruction::AShr:
1073     return cloneBitwiseIVUser(DU);
1074   }
1075 }
1076 
1077 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1078   Instruction *NarrowUse = DU.NarrowUse;
1079   Instruction *NarrowDef = DU.NarrowDef;
1080   Instruction *WideDef = DU.WideDef;
1081 
1082   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1083 
1084   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1085   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1086   // invariant and will be folded or hoisted. If it actually comes from a
1087   // widened IV, it should be removed during a future call to widenIVUse.
1088   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1089   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1090                    ? WideDef
1091                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1092                                       IsSigned, NarrowUse);
1093   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1094                    ? WideDef
1095                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1096                                       IsSigned, NarrowUse);
1097 
1098   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1099   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1100                                         NarrowBO->getName());
1101   IRBuilder<> Builder(NarrowUse);
1102   Builder.Insert(WideBO);
1103   WideBO->copyIRFlags(NarrowBO);
1104   return WideBO;
1105 }
1106 
1107 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1108                                             const SCEVAddRecExpr *WideAR) {
1109   Instruction *NarrowUse = DU.NarrowUse;
1110   Instruction *NarrowDef = DU.NarrowDef;
1111   Instruction *WideDef = DU.WideDef;
1112 
1113   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1114 
1115   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1116 
1117   // We're trying to find X such that
1118   //
1119   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1120   //
1121   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1122   // and check using SCEV if any of them are correct.
1123 
1124   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1125   // correct solution to X.
1126   auto GuessNonIVOperand = [&](bool SignExt) {
1127     const SCEV *WideLHS;
1128     const SCEV *WideRHS;
1129 
1130     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1131       if (SignExt)
1132         return SE->getSignExtendExpr(S, Ty);
1133       return SE->getZeroExtendExpr(S, Ty);
1134     };
1135 
1136     if (IVOpIdx == 0) {
1137       WideLHS = SE->getSCEV(WideDef);
1138       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1139       WideRHS = GetExtend(NarrowRHS, WideType);
1140     } else {
1141       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1142       WideLHS = GetExtend(NarrowLHS, WideType);
1143       WideRHS = SE->getSCEV(WideDef);
1144     }
1145 
1146     // WideUse is "WideDef `op.wide` X" as described in the comment.
1147     const SCEV *WideUse = nullptr;
1148 
1149     switch (NarrowUse->getOpcode()) {
1150     default:
1151       llvm_unreachable("No other possibility!");
1152 
1153     case Instruction::Add:
1154       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1155       break;
1156 
1157     case Instruction::Mul:
1158       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1159       break;
1160 
1161     case Instruction::UDiv:
1162       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1163       break;
1164 
1165     case Instruction::Sub:
1166       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1167       break;
1168     }
1169 
1170     return WideUse == WideAR;
1171   };
1172 
1173   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1174   if (!GuessNonIVOperand(SignExtend)) {
1175     SignExtend = !SignExtend;
1176     if (!GuessNonIVOperand(SignExtend))
1177       return nullptr;
1178   }
1179 
1180   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1181                    ? WideDef
1182                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1183                                       SignExtend, NarrowUse);
1184   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1185                    ? WideDef
1186                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1187                                       SignExtend, NarrowUse);
1188 
1189   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1190   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1191                                         NarrowBO->getName());
1192 
1193   IRBuilder<> Builder(NarrowUse);
1194   Builder.Insert(WideBO);
1195   WideBO->copyIRFlags(NarrowBO);
1196   return WideBO;
1197 }
1198 
1199 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1200   auto It = ExtendKindMap.find(I);
1201   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1202   return It->second;
1203 }
1204 
1205 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1206                                      unsigned OpCode) const {
1207   if (OpCode == Instruction::Add)
1208     return SE->getAddExpr(LHS, RHS);
1209   if (OpCode == Instruction::Sub)
1210     return SE->getMinusSCEV(LHS, RHS);
1211   if (OpCode == Instruction::Mul)
1212     return SE->getMulExpr(LHS, RHS);
1213 
1214   llvm_unreachable("Unsupported opcode.");
1215 }
1216 
1217 /// No-wrap operations can transfer sign extension of their result to their
1218 /// operands. Generate the SCEV value for the widened operation without
1219 /// actually modifying the IR yet. If the expression after extending the
1220 /// operands is an AddRec for this loop, return the AddRec and the kind of
1221 /// extension used.
1222 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1223   // Handle the common case of add<nsw/nuw>
1224   const unsigned OpCode = DU.NarrowUse->getOpcode();
1225   // Only Add/Sub/Mul instructions supported yet.
1226   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1227       OpCode != Instruction::Mul)
1228     return {nullptr, Unknown};
1229 
1230   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1231   // if extending the other will lead to a recurrence.
1232   const unsigned ExtendOperIdx =
1233       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1234   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1235 
1236   const SCEV *ExtendOperExpr = nullptr;
1237   const OverflowingBinaryOperator *OBO =
1238     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1239   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1240   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1241     ExtendOperExpr = SE->getSignExtendExpr(
1242       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1243   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1244     ExtendOperExpr = SE->getZeroExtendExpr(
1245       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1246   else
1247     return {nullptr, Unknown};
1248 
1249   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1250   // flags. This instruction may be guarded by control flow that the no-wrap
1251   // behavior depends on. Non-control-equivalent instructions can be mapped to
1252   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1253   // semantics to those operations.
1254   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1255   const SCEV *rhs = ExtendOperExpr;
1256 
1257   // Let's swap operands to the initial order for the case of non-commutative
1258   // operations, like SUB. See PR21014.
1259   if (ExtendOperIdx == 0)
1260     std::swap(lhs, rhs);
1261   const SCEVAddRecExpr *AddRec =
1262       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1263 
1264   if (!AddRec || AddRec->getLoop() != L)
1265     return {nullptr, Unknown};
1266 
1267   return {AddRec, ExtKind};
1268 }
1269 
1270 /// Is this instruction potentially interesting for further simplification after
1271 /// widening it's type? In other words, can the extend be safely hoisted out of
1272 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1273 /// so, return the extended recurrence and the kind of extension used. Otherwise
1274 /// return {nullptr, Unknown}.
1275 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1276   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1277     return {nullptr, Unknown};
1278 
1279   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1280   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1281       SE->getTypeSizeInBits(WideType)) {
1282     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1283     // index. So don't follow this use.
1284     return {nullptr, Unknown};
1285   }
1286 
1287   const SCEV *WideExpr;
1288   ExtendKind ExtKind;
1289   if (DU.NeverNegative) {
1290     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1291     if (isa<SCEVAddRecExpr>(WideExpr))
1292       ExtKind = SignExtended;
1293     else {
1294       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1295       ExtKind = ZeroExtended;
1296     }
1297   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1298     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1299     ExtKind = SignExtended;
1300   } else {
1301     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1302     ExtKind = ZeroExtended;
1303   }
1304   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1305   if (!AddRec || AddRec->getLoop() != L)
1306     return {nullptr, Unknown};
1307   return {AddRec, ExtKind};
1308 }
1309 
1310 /// This IV user cannot be widen. Replace this use of the original narrow IV
1311 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1312 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1313   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1314   if (!InsertPt)
1315     return;
1316   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1317                     << *DU.NarrowUse << "\n");
1318   IRBuilder<> Builder(InsertPt);
1319   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1320   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1321 }
1322 
1323 /// If the narrow use is a compare instruction, then widen the compare
1324 //  (and possibly the other operand).  The extend operation is hoisted into the
1325 // loop preheader as far as possible.
1326 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1327   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1328   if (!Cmp)
1329     return false;
1330 
1331   // We can legally widen the comparison in the following two cases:
1332   //
1333   //  - The signedness of the IV extension and comparison match
1334   //
1335   //  - The narrow IV is always positive (and thus its sign extension is equal
1336   //    to its zero extension).  For instance, let's say we're zero extending
1337   //    %narrow for the following use
1338   //
1339   //      icmp slt i32 %narrow, %val   ... (A)
1340   //
1341   //    and %narrow is always positive.  Then
1342   //
1343   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1344   //          == icmp slt i32 zext(%narrow), sext(%val)
1345   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1346   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1347     return false;
1348 
1349   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1350   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1351   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1352   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1353 
1354   // Widen the compare instruction.
1355   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1356   if (!InsertPt)
1357     return false;
1358   IRBuilder<> Builder(InsertPt);
1359   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1360 
1361   // Widen the other operand of the compare, if necessary.
1362   if (CastWidth < IVWidth) {
1363     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1364     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1365   }
1366   return true;
1367 }
1368 
1369 /// If the narrow use is an instruction whose two operands are the defining
1370 /// instruction of DU and a load instruction, then we have the following:
1371 /// if the load is hoisted outside the loop, then we do not reach this function
1372 /// as scalar evolution analysis works fine in widenIVUse with variables
1373 /// hoisted outside the loop and efficient code is subsequently generated by
1374 /// not emitting truncate instructions. But when the load is not hoisted
1375 /// (whether due to limitation in alias analysis or due to a true legality),
1376 /// then scalar evolution can not proceed with loop variant values and
1377 /// inefficient code is generated. This function handles the non-hoisted load
1378 /// special case by making the optimization generate the same type of code for
1379 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1380 /// instruction). This special case is important especially when the induction
1381 /// variables are affecting addressing mode in code generation.
1382 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1383   Instruction *NarrowUse = DU.NarrowUse;
1384   Instruction *NarrowDef = DU.NarrowDef;
1385   Instruction *WideDef = DU.WideDef;
1386 
1387   // Handle the common case of add<nsw/nuw>
1388   const unsigned OpCode = NarrowUse->getOpcode();
1389   // Only Add/Sub/Mul instructions are supported.
1390   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1391       OpCode != Instruction::Mul)
1392     return false;
1393 
1394   // The operand that is not defined by NarrowDef of DU. Let's call it the
1395   // other operand.
1396   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1397   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1398          "bad DU");
1399 
1400   const SCEV *ExtendOperExpr = nullptr;
1401   const OverflowingBinaryOperator *OBO =
1402     cast<OverflowingBinaryOperator>(NarrowUse);
1403   ExtendKind ExtKind = getExtendKind(NarrowDef);
1404   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1405     ExtendOperExpr = SE->getSignExtendExpr(
1406       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1407   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1408     ExtendOperExpr = SE->getZeroExtendExpr(
1409       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1410   else
1411     return false;
1412 
1413   // We are interested in the other operand being a load instruction.
1414   // But, we should look into relaxing this restriction later on.
1415   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1416   if (I && I->getOpcode() != Instruction::Load)
1417     return false;
1418 
1419   // Verifying that Defining operand is an AddRec
1420   const SCEV *Op1 = SE->getSCEV(WideDef);
1421   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1422   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1423     return false;
1424   // Verifying that other operand is an Extend.
1425   if (ExtKind == SignExtended) {
1426     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1427       return false;
1428   } else {
1429     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1430       return false;
1431   }
1432 
1433   if (ExtKind == SignExtended) {
1434     for (Use &U : NarrowUse->uses()) {
1435       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1436       if (!User || User->getType() != WideType)
1437         return false;
1438     }
1439   } else { // ExtKind == ZeroExtended
1440     for (Use &U : NarrowUse->uses()) {
1441       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1442       if (!User || User->getType() != WideType)
1443         return false;
1444     }
1445   }
1446 
1447   return true;
1448 }
1449 
1450 /// Special Case for widening with variant Loads (see
1451 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1452 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1453   Instruction *NarrowUse = DU.NarrowUse;
1454   Instruction *NarrowDef = DU.NarrowDef;
1455   Instruction *WideDef = DU.WideDef;
1456 
1457   ExtendKind ExtKind = getExtendKind(NarrowDef);
1458 
1459   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1460 
1461   // Generating a widening use instruction.
1462   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1463                    ? WideDef
1464                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1465                                       ExtKind, NarrowUse);
1466   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1467                    ? WideDef
1468                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1469                                       ExtKind, NarrowUse);
1470 
1471   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1472   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1473                                         NarrowBO->getName());
1474   IRBuilder<> Builder(NarrowUse);
1475   Builder.Insert(WideBO);
1476   WideBO->copyIRFlags(NarrowBO);
1477 
1478   if (ExtKind == SignExtended)
1479     ExtendKindMap[NarrowUse] = SignExtended;
1480   else
1481     ExtendKindMap[NarrowUse] = ZeroExtended;
1482 
1483   // Update the Use.
1484   if (ExtKind == SignExtended) {
1485     for (Use &U : NarrowUse->uses()) {
1486       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1487       if (User && User->getType() == WideType) {
1488         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1489                           << *WideBO << "\n");
1490         ++NumElimExt;
1491         User->replaceAllUsesWith(WideBO);
1492         DeadInsts.emplace_back(User);
1493       }
1494     }
1495   } else { // ExtKind == ZeroExtended
1496     for (Use &U : NarrowUse->uses()) {
1497       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1498       if (User && User->getType() == WideType) {
1499         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1500                           << *WideBO << "\n");
1501         ++NumElimExt;
1502         User->replaceAllUsesWith(WideBO);
1503         DeadInsts.emplace_back(User);
1504       }
1505     }
1506   }
1507 }
1508 
1509 /// Determine whether an individual user of the narrow IV can be widened. If so,
1510 /// return the wide clone of the user.
1511 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1512   assert(ExtendKindMap.count(DU.NarrowDef) &&
1513          "Should already know the kind of extension used to widen NarrowDef");
1514 
1515   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1516   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1517     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1518       // For LCSSA phis, sink the truncate outside the loop.
1519       // After SimplifyCFG most loop exit targets have a single predecessor.
1520       // Otherwise fall back to a truncate within the loop.
1521       if (UsePhi->getNumOperands() != 1)
1522         truncateIVUse(DU, DT, LI);
1523       else {
1524         // Widening the PHI requires us to insert a trunc.  The logical place
1525         // for this trunc is in the same BB as the PHI.  This is not possible if
1526         // the BB is terminated by a catchswitch.
1527         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1528           return nullptr;
1529 
1530         PHINode *WidePhi =
1531           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1532                           UsePhi);
1533         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1534         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1535         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1536         UsePhi->replaceAllUsesWith(Trunc);
1537         DeadInsts.emplace_back(UsePhi);
1538         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1539                           << *WidePhi << "\n");
1540       }
1541       return nullptr;
1542     }
1543   }
1544 
1545   // This narrow use can be widened by a sext if it's non-negative or its narrow
1546   // def was widended by a sext. Same for zext.
1547   auto canWidenBySExt = [&]() {
1548     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1549   };
1550   auto canWidenByZExt = [&]() {
1551     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1552   };
1553 
1554   // Our raison d'etre! Eliminate sign and zero extension.
1555   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1556       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1557     Value *NewDef = DU.WideDef;
1558     if (DU.NarrowUse->getType() != WideType) {
1559       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1560       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1561       if (CastWidth < IVWidth) {
1562         // The cast isn't as wide as the IV, so insert a Trunc.
1563         IRBuilder<> Builder(DU.NarrowUse);
1564         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1565       }
1566       else {
1567         // A wider extend was hidden behind a narrower one. This may induce
1568         // another round of IV widening in which the intermediate IV becomes
1569         // dead. It should be very rare.
1570         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1571                           << " not wide enough to subsume " << *DU.NarrowUse
1572                           << "\n");
1573         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1574         NewDef = DU.NarrowUse;
1575       }
1576     }
1577     if (NewDef != DU.NarrowUse) {
1578       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1579                         << " replaced by " << *DU.WideDef << "\n");
1580       ++NumElimExt;
1581       DU.NarrowUse->replaceAllUsesWith(NewDef);
1582       DeadInsts.emplace_back(DU.NarrowUse);
1583     }
1584     // Now that the extend is gone, we want to expose it's uses for potential
1585     // further simplification. We don't need to directly inform SimplifyIVUsers
1586     // of the new users, because their parent IV will be processed later as a
1587     // new loop phi. If we preserved IVUsers analysis, we would also want to
1588     // push the uses of WideDef here.
1589 
1590     // No further widening is needed. The deceased [sz]ext had done it for us.
1591     return nullptr;
1592   }
1593 
1594   // Does this user itself evaluate to a recurrence after widening?
1595   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1596   if (!WideAddRec.first)
1597     WideAddRec = getWideRecurrence(DU);
1598 
1599   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1600   if (!WideAddRec.first) {
1601     // If use is a loop condition, try to promote the condition instead of
1602     // truncating the IV first.
1603     if (widenLoopCompare(DU))
1604       return nullptr;
1605 
1606     // We are here about to generate a truncate instruction that may hurt
1607     // performance because the scalar evolution expression computed earlier
1608     // in WideAddRec.first does not indicate a polynomial induction expression.
1609     // In that case, look at the operands of the use instruction to determine
1610     // if we can still widen the use instead of truncating its operand.
1611     if (widenWithVariantLoadUse(DU)) {
1612       widenWithVariantLoadUseCodegen(DU);
1613       return nullptr;
1614     }
1615 
1616     // This user does not evaluate to a recurrence after widening, so don't
1617     // follow it. Instead insert a Trunc to kill off the original use,
1618     // eventually isolating the original narrow IV so it can be removed.
1619     truncateIVUse(DU, DT, LI);
1620     return nullptr;
1621   }
1622   // Assume block terminators cannot evaluate to a recurrence. We can't to
1623   // insert a Trunc after a terminator if there happens to be a critical edge.
1624   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1625          "SCEV is not expected to evaluate a block terminator");
1626 
1627   // Reuse the IV increment that SCEVExpander created as long as it dominates
1628   // NarrowUse.
1629   Instruction *WideUse = nullptr;
1630   if (WideAddRec.first == WideIncExpr &&
1631       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1632     WideUse = WideInc;
1633   else {
1634     WideUse = cloneIVUser(DU, WideAddRec.first);
1635     if (!WideUse)
1636       return nullptr;
1637   }
1638   // Evaluation of WideAddRec ensured that the narrow expression could be
1639   // extended outside the loop without overflow. This suggests that the wide use
1640   // evaluates to the same expression as the extended narrow use, but doesn't
1641   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1642   // where it fails, we simply throw away the newly created wide use.
1643   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1644     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1645                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1646                       << "\n");
1647     DeadInsts.emplace_back(WideUse);
1648     return nullptr;
1649   }
1650 
1651   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1652   // Returning WideUse pushes it on the worklist.
1653   return WideUse;
1654 }
1655 
1656 /// Add eligible users of NarrowDef to NarrowIVUsers.
1657 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1658   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1659   bool NonNegativeDef =
1660       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1661                            SE->getConstant(NarrowSCEV->getType(), 0));
1662   for (User *U : NarrowDef->users()) {
1663     Instruction *NarrowUser = cast<Instruction>(U);
1664 
1665     // Handle data flow merges and bizarre phi cycles.
1666     if (!Widened.insert(NarrowUser).second)
1667       continue;
1668 
1669     bool NonNegativeUse = false;
1670     if (!NonNegativeDef) {
1671       // We might have a control-dependent range information for this context.
1672       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1673         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1674     }
1675 
1676     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1677                                NonNegativeDef || NonNegativeUse);
1678   }
1679 }
1680 
1681 /// Process a single induction variable. First use the SCEVExpander to create a
1682 /// wide induction variable that evaluates to the same recurrence as the
1683 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1684 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1685 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1686 ///
1687 /// It would be simpler to delete uses as they are processed, but we must avoid
1688 /// invalidating SCEV expressions.
1689 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1690   // Is this phi an induction variable?
1691   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1692   if (!AddRec)
1693     return nullptr;
1694 
1695   // Widen the induction variable expression.
1696   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1697                                ? SE->getSignExtendExpr(AddRec, WideType)
1698                                : SE->getZeroExtendExpr(AddRec, WideType);
1699 
1700   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1701          "Expect the new IV expression to preserve its type");
1702 
1703   // Can the IV be extended outside the loop without overflow?
1704   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1705   if (!AddRec || AddRec->getLoop() != L)
1706     return nullptr;
1707 
1708   // An AddRec must have loop-invariant operands. Since this AddRec is
1709   // materialized by a loop header phi, the expression cannot have any post-loop
1710   // operands, so they must dominate the loop header.
1711   assert(
1712       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1713       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1714       "Loop header phi recurrence inputs do not dominate the loop");
1715 
1716   // Iterate over IV uses (including transitive ones) looking for IV increments
1717   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1718   // the increment calculate control-dependent range information basing on
1719   // dominating conditions inside of the loop (e.g. a range check inside of the
1720   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1721   //
1722   // Control-dependent range information is later used to prove that a narrow
1723   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1724   // this on demand because when pushNarrowIVUsers needs this information some
1725   // of the dominating conditions might be already widened.
1726   if (UsePostIncrementRanges)
1727     calculatePostIncRanges(OrigPhi);
1728 
1729   // The rewriter provides a value for the desired IV expression. This may
1730   // either find an existing phi or materialize a new one. Either way, we
1731   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1732   // of the phi-SCC dominates the loop entry.
1733   Instruction *InsertPt = &L->getHeader()->front();
1734   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1735 
1736   // Remembering the WideIV increment generated by SCEVExpander allows
1737   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1738   // employ a general reuse mechanism because the call above is the only call to
1739   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1740   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1741     WideInc =
1742       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1743     WideIncExpr = SE->getSCEV(WideInc);
1744     // Propagate the debug location associated with the original loop increment
1745     // to the new (widened) increment.
1746     auto *OrigInc =
1747       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1748     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1749   }
1750 
1751   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1752   ++NumWidened;
1753 
1754   // Traverse the def-use chain using a worklist starting at the original IV.
1755   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1756 
1757   Widened.insert(OrigPhi);
1758   pushNarrowIVUsers(OrigPhi, WidePhi);
1759 
1760   while (!NarrowIVUsers.empty()) {
1761     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1762 
1763     // Process a def-use edge. This may replace the use, so don't hold a
1764     // use_iterator across it.
1765     Instruction *WideUse = widenIVUse(DU, Rewriter);
1766 
1767     // Follow all def-use edges from the previous narrow use.
1768     if (WideUse)
1769       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1770 
1771     // widenIVUse may have removed the def-use edge.
1772     if (DU.NarrowDef->use_empty())
1773       DeadInsts.emplace_back(DU.NarrowDef);
1774   }
1775 
1776   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1777   // evaluate the same recurrence, we can just copy the debug info over.
1778   SmallVector<DbgValueInst *, 1> DbgValues;
1779   llvm::findDbgValues(DbgValues, OrigPhi);
1780   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1781                                      ValueAsMetadata::get(WidePhi));
1782   for (auto &DbgValue : DbgValues)
1783     DbgValue->setOperand(0, MDPhi);
1784   return WidePhi;
1785 }
1786 
1787 /// Calculates control-dependent range for the given def at the given context
1788 /// by looking at dominating conditions inside of the loop
1789 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1790                                     Instruction *NarrowUser) {
1791   using namespace llvm::PatternMatch;
1792 
1793   Value *NarrowDefLHS;
1794   const APInt *NarrowDefRHS;
1795   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1796                                  m_APInt(NarrowDefRHS))) ||
1797       !NarrowDefRHS->isNonNegative())
1798     return;
1799 
1800   auto UpdateRangeFromCondition = [&] (Value *Condition,
1801                                        bool TrueDest) {
1802     CmpInst::Predicate Pred;
1803     Value *CmpRHS;
1804     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1805                                  m_Value(CmpRHS))))
1806       return;
1807 
1808     CmpInst::Predicate P =
1809             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1810 
1811     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1812     auto CmpConstrainedLHSRange =
1813             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1814     auto NarrowDefRange =
1815             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1816 
1817     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1818   };
1819 
1820   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1821     if (!HasGuards)
1822       return;
1823 
1824     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1825                                      Ctx->getParent()->rend())) {
1826       Value *C = nullptr;
1827       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1828         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1829     }
1830   };
1831 
1832   UpdateRangeFromGuards(NarrowUser);
1833 
1834   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1835   // If NarrowUserBB is statically unreachable asking dominator queries may
1836   // yield surprising results. (e.g. the block may not have a dom tree node)
1837   if (!DT->isReachableFromEntry(NarrowUserBB))
1838     return;
1839 
1840   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1841        L->contains(DTB->getBlock());
1842        DTB = DTB->getIDom()) {
1843     auto *BB = DTB->getBlock();
1844     auto *TI = BB->getTerminator();
1845     UpdateRangeFromGuards(TI);
1846 
1847     auto *BI = dyn_cast<BranchInst>(TI);
1848     if (!BI || !BI->isConditional())
1849       continue;
1850 
1851     auto *TrueSuccessor = BI->getSuccessor(0);
1852     auto *FalseSuccessor = BI->getSuccessor(1);
1853 
1854     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1855       return BBE.isSingleEdge() &&
1856              DT->dominates(BBE, NarrowUser->getParent());
1857     };
1858 
1859     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1860       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1861 
1862     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1863       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1864   }
1865 }
1866 
1867 /// Calculates PostIncRangeInfos map for the given IV
1868 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1869   SmallPtrSet<Instruction *, 16> Visited;
1870   SmallVector<Instruction *, 6> Worklist;
1871   Worklist.push_back(OrigPhi);
1872   Visited.insert(OrigPhi);
1873 
1874   while (!Worklist.empty()) {
1875     Instruction *NarrowDef = Worklist.pop_back_val();
1876 
1877     for (Use &U : NarrowDef->uses()) {
1878       auto *NarrowUser = cast<Instruction>(U.getUser());
1879 
1880       // Don't go looking outside the current loop.
1881       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1882       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1883         continue;
1884 
1885       if (!Visited.insert(NarrowUser).second)
1886         continue;
1887 
1888       Worklist.push_back(NarrowUser);
1889 
1890       calculatePostIncRange(NarrowDef, NarrowUser);
1891     }
1892   }
1893 }
1894 
1895 //===----------------------------------------------------------------------===//
1896 //  Live IV Reduction - Minimize IVs live across the loop.
1897 //===----------------------------------------------------------------------===//
1898 
1899 //===----------------------------------------------------------------------===//
1900 //  Simplification of IV users based on SCEV evaluation.
1901 //===----------------------------------------------------------------------===//
1902 
1903 namespace {
1904 
1905 class IndVarSimplifyVisitor : public IVVisitor {
1906   ScalarEvolution *SE;
1907   const TargetTransformInfo *TTI;
1908   PHINode *IVPhi;
1909 
1910 public:
1911   WideIVInfo WI;
1912 
1913   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1914                         const TargetTransformInfo *TTI,
1915                         const DominatorTree *DTree)
1916     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1917     DT = DTree;
1918     WI.NarrowIV = IVPhi;
1919   }
1920 
1921   // Implement the interface used by simplifyUsersOfIV.
1922   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1923 };
1924 
1925 } // end anonymous namespace
1926 
1927 /// Iteratively perform simplification on a worklist of IV users. Each
1928 /// successive simplification may push more users which may themselves be
1929 /// candidates for simplification.
1930 ///
1931 /// Sign/Zero extend elimination is interleaved with IV simplification.
1932 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1933                                        SCEVExpander &Rewriter,
1934                                        LoopInfo *LI) {
1935   SmallVector<WideIVInfo, 8> WideIVs;
1936 
1937   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1938           Intrinsic::getName(Intrinsic::experimental_guard));
1939   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1940 
1941   SmallVector<PHINode*, 8> LoopPhis;
1942   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1943     LoopPhis.push_back(cast<PHINode>(I));
1944   }
1945   // Each round of simplification iterates through the SimplifyIVUsers worklist
1946   // for all current phis, then determines whether any IVs can be
1947   // widened. Widening adds new phis to LoopPhis, inducing another round of
1948   // simplification on the wide IVs.
1949   bool Changed = false;
1950   while (!LoopPhis.empty()) {
1951     // Evaluate as many IV expressions as possible before widening any IVs. This
1952     // forces SCEV to set no-wrap flags before evaluating sign/zero
1953     // extension. The first time SCEV attempts to normalize sign/zero extension,
1954     // the result becomes final. So for the most predictable results, we delay
1955     // evaluation of sign/zero extend evaluation until needed, and avoid running
1956     // other SCEV based analysis prior to simplifyAndExtend.
1957     do {
1958       PHINode *CurrIV = LoopPhis.pop_back_val();
1959 
1960       // Information about sign/zero extensions of CurrIV.
1961       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1962 
1963       Changed |=
1964           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1965 
1966       if (Visitor.WI.WidestNativeType) {
1967         WideIVs.push_back(Visitor.WI);
1968       }
1969     } while(!LoopPhis.empty());
1970 
1971     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1972       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1973       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1974         Changed = true;
1975         LoopPhis.push_back(WidePhi);
1976       }
1977     }
1978   }
1979   return Changed;
1980 }
1981 
1982 //===----------------------------------------------------------------------===//
1983 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1984 //===----------------------------------------------------------------------===//
1985 
1986 /// Given an Value which is hoped to be part of an add recurance in the given
1987 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1988 /// that this is less general than SCEVs AddRec checking.
1989 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1990   Instruction *IncI = dyn_cast<Instruction>(IncV);
1991   if (!IncI)
1992     return nullptr;
1993 
1994   switch (IncI->getOpcode()) {
1995   case Instruction::Add:
1996   case Instruction::Sub:
1997     break;
1998   case Instruction::GetElementPtr:
1999     // An IV counter must preserve its type.
2000     if (IncI->getNumOperands() == 2)
2001       break;
2002     LLVM_FALLTHROUGH;
2003   default:
2004     return nullptr;
2005   }
2006 
2007   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2008   if (Phi && Phi->getParent() == L->getHeader()) {
2009     if (L->isLoopInvariant(IncI->getOperand(1)))
2010       return Phi;
2011     return nullptr;
2012   }
2013   if (IncI->getOpcode() == Instruction::GetElementPtr)
2014     return nullptr;
2015 
2016   // Allow add/sub to be commuted.
2017   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2018   if (Phi && Phi->getParent() == L->getHeader()) {
2019     if (L->isLoopInvariant(IncI->getOperand(0)))
2020       return Phi;
2021   }
2022   return nullptr;
2023 }
2024 
2025 /// Given a loop with one backedge and one exit, return the ICmpInst
2026 /// controlling the sole loop exit.  There is no guarantee that the exiting
2027 /// block is also the latch.
2028 static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) {
2029 
2030   BasicBlock *LatchBlock = L->getLoopLatch();
2031   // Don't bother with LFTR if the loop is not properly simplified.
2032   if (!LatchBlock)
2033     return nullptr;
2034 
2035   BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2036   assert(BI && "expected exit branch");
2037 
2038   return dyn_cast<ICmpInst>(BI->getCondition());
2039 }
2040 
2041 /// linearFunctionTestReplace policy. Return true unless we can show that the
2042 /// current exit test is already sufficiently canonical.
2043 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2044   // Do LFTR to simplify the exit condition to an ICMP.
2045   ICmpInst *Cond = getLoopTest(L, ExitingBB);
2046   if (!Cond)
2047     return true;
2048 
2049   // Do LFTR to simplify the exit ICMP to EQ/NE
2050   ICmpInst::Predicate Pred = Cond->getPredicate();
2051   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2052     return true;
2053 
2054   // Look for a loop invariant RHS
2055   Value *LHS = Cond->getOperand(0);
2056   Value *RHS = Cond->getOperand(1);
2057   if (!L->isLoopInvariant(RHS)) {
2058     if (!L->isLoopInvariant(LHS))
2059       return true;
2060     std::swap(LHS, RHS);
2061   }
2062   // Look for a simple IV counter LHS
2063   PHINode *Phi = dyn_cast<PHINode>(LHS);
2064   if (!Phi)
2065     Phi = getLoopPhiForCounter(LHS, L);
2066 
2067   if (!Phi)
2068     return true;
2069 
2070   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2071   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2072   if (Idx < 0)
2073     return true;
2074 
2075   // Do LFTR if the exit condition's IV is *not* a simple counter.
2076   Value *IncV = Phi->getIncomingValue(Idx);
2077   return Phi != getLoopPhiForCounter(IncV, L);
2078 }
2079 
2080 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2081 /// down to checking that all operands are constant and listing instructions
2082 /// that may hide undef.
2083 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2084                                unsigned Depth) {
2085   if (isa<Constant>(V))
2086     return !isa<UndefValue>(V);
2087 
2088   if (Depth >= 6)
2089     return false;
2090 
2091   // Conservatively handle non-constant non-instructions. For example, Arguments
2092   // may be undef.
2093   Instruction *I = dyn_cast<Instruction>(V);
2094   if (!I)
2095     return false;
2096 
2097   // Load and return values may be undef.
2098   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2099     return false;
2100 
2101   // Optimistically handle other instructions.
2102   for (Value *Op : I->operands()) {
2103     if (!Visited.insert(Op).second)
2104       continue;
2105     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2106       return false;
2107   }
2108   return true;
2109 }
2110 
2111 /// Return true if the given value is concrete. We must prove that undef can
2112 /// never reach it.
2113 ///
2114 /// TODO: If we decide that this is a good approach to checking for undef, we
2115 /// may factor it into a common location.
2116 static bool hasConcreteDef(Value *V) {
2117   SmallPtrSet<Value*, 8> Visited;
2118   Visited.insert(V);
2119   return hasConcreteDefImpl(V, Visited, 0);
2120 }
2121 
2122 /// Return true if this IV has any uses other than the (soon to be rewritten)
2123 /// loop exit test.
2124 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2125   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2126   Value *IncV = Phi->getIncomingValue(LatchIdx);
2127 
2128   for (User *U : Phi->users())
2129     if (U != Cond && U != IncV) return false;
2130 
2131   for (User *U : IncV->users())
2132     if (U != Cond && U != Phi) return false;
2133   return true;
2134 }
2135 
2136 /// Return true if the given phi is a "counter" in L.  A counter is an
2137 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2138 /// step of 1.  Note that L must have exactly one latch.
2139 static bool isLoopCounter(PHINode* Phi, Loop *L,
2140                           ScalarEvolution *SE) {
2141   assert(Phi->getParent() == L->getHeader());
2142   assert(L->getLoopLatch());
2143 
2144   if (!SE->isSCEVable(Phi->getType()))
2145     return false;
2146 
2147   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2148   if (!AR || AR->getLoop() != L || !AR->isAffine())
2149     return false;
2150 
2151   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2152   if (!Step || !Step->isOne())
2153     return false;
2154 
2155   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2156   Value *IncV = Phi->getIncomingValue(LatchIdx);
2157   return (getLoopPhiForCounter(IncV, L) == Phi);
2158 }
2159 
2160 /// Search the loop header for a loop counter (anadd rec w/step of one)
2161 /// suitable for use by LFTR.  If multiple counters are available, select the
2162 /// "best" one based profitable heuristics.
2163 ///
2164 /// BECount may be an i8* pointer type. The pointer difference is already
2165 /// valid count without scaling the address stride, so it remains a pointer
2166 /// expression as far as SCEV is concerned.
2167 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2168                                 const SCEV *BECount, ScalarEvolution *SE) {
2169   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2170 
2171   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2172 
2173   // Loop over all of the PHI nodes, looking for a simple counter.
2174   PHINode *BestPhi = nullptr;
2175   const SCEV *BestInit = nullptr;
2176   BasicBlock *LatchBlock = L->getLoopLatch();
2177   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2178   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2179 
2180   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2181     PHINode *Phi = cast<PHINode>(I);
2182     if (!isLoopCounter(Phi, L, SE))
2183       continue;
2184 
2185     // Avoid comparing an integer IV against a pointer Limit.
2186     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2187       continue;
2188 
2189     const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2190 
2191     // AR may be a pointer type, while BECount is an integer type.
2192     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2193     // AR may not be a narrower type, or we may never exit.
2194     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2195     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2196       continue;
2197 
2198     // Avoid reusing a potentially undef value to compute other values that may
2199     // have originally had a concrete definition.
2200     if (!hasConcreteDef(Phi)) {
2201       // We explicitly allow unknown phis as long as they are already used by
2202       // the loop test. In this case we assume that performing LFTR could not
2203       // increase the number of undef users.
2204       // TODO: Generalize this to allow *any* loop exit which is known to
2205       // execute on each iteration
2206       if (L->getExitingBlock())
2207         if (ICmpInst *Cond = getLoopTest(L, ExitingBB))
2208           if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) &&
2209               Phi != getLoopPhiForCounter(Cond->getOperand(1), L))
2210             continue;
2211     }
2212 
2213     const SCEV *Init = AR->getStart();
2214 
2215     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2216       // Don't force a live loop counter if another IV can be used.
2217       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2218         continue;
2219 
2220       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2221       // also prefers integer to pointer IVs.
2222       if (BestInit->isZero() != Init->isZero()) {
2223         if (BestInit->isZero())
2224           continue;
2225       }
2226       // If two IVs both count from zero or both count from nonzero then the
2227       // narrower is likely a dead phi that has been widened. Use the wider phi
2228       // to allow the other to be eliminated.
2229       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2230         continue;
2231     }
2232     BestPhi = Phi;
2233     BestInit = Init;
2234   }
2235   return BestPhi;
2236 }
2237 
2238 /// Insert an IR expression which computes the value held by the IV IndVar
2239 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2240 /// is taken IVCount times.
2241 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2242                            const SCEV *IVCount, Loop *L,
2243                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2244   assert(isLoopCounter(IndVar, L, SE));
2245   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2246   const SCEV *IVInit = AR->getStart();
2247 
2248   // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2249   // finds a valid pointer IV. Sign extend BECount in order to materialize a
2250   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2251   // the existing GEPs whenever possible.
2252   if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2253     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2254     // signed value. IVCount on the other hand represents the loop trip count,
2255     // which is an unsigned value. FindLoopCounter only allows induction
2256     // variables that have a positive unit stride of one. This means we don't
2257     // have to handle the case of negative offsets (yet) and just need to zero
2258     // extend IVCount.
2259     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2260     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2261 
2262     // Expand the code for the iteration count.
2263     assert(SE->isLoopInvariant(IVOffset, L) &&
2264            "Computed iteration count is not loop invariant!");
2265     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2266     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2267 
2268     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2269     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2270     // We could handle pointer IVs other than i8*, but we need to compensate for
2271     // gep index scaling.
2272     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2273                              cast<PointerType>(GEPBase->getType())
2274                                  ->getElementType())->isOne() &&
2275            "unit stride pointer IV must be i8*");
2276 
2277     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2278     return Builder.CreateGEP(GEPBase->getType()->getPointerElementType(),
2279                              GEPBase, GEPOffset, "lftr.limit");
2280   } else {
2281     // In any other case, convert both IVInit and IVCount to integers before
2282     // comparing. This may result in SCEV expansion of pointers, but in practice
2283     // SCEV will fold the pointer arithmetic away as such:
2284     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2285     //
2286     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2287     // for simple memset-style loops.
2288     //
2289     // IVInit integer and IVCount pointer would only occur if a canonical IV
2290     // were generated on top of case #2, which is not expected.
2291 
2292     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2293     const SCEV *IVLimit = nullptr;
2294     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2295     // For non-zero Start, compute IVCount here.
2296     if (AR->getStart()->isZero())
2297       IVLimit = IVCount;
2298     else {
2299       const SCEV *IVInit = AR->getStart();
2300 
2301       // For integer IVs, truncate the IV before computing IVInit + BECount.
2302       if (SE->getTypeSizeInBits(IVInit->getType())
2303           > SE->getTypeSizeInBits(IVCount->getType()))
2304         IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2305 
2306       IVLimit = SE->getAddExpr(IVInit, IVCount);
2307     }
2308     // Expand the code for the iteration count.
2309     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2310     IRBuilder<> Builder(BI);
2311     assert(SE->isLoopInvariant(IVLimit, L) &&
2312            "Computed iteration count is not loop invariant!");
2313     // Ensure that we generate the same type as IndVar, or a smaller integer
2314     // type. In the presence of null pointer values, we have an integer type
2315     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2316     Type *LimitTy = IVCount->getType()->isPointerTy() ?
2317       IndVar->getType() : IVCount->getType();
2318     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2319   }
2320 }
2321 
2322 /// This method rewrites the exit condition of the loop to be a canonical !=
2323 /// comparison against the incremented loop induction variable.  This pass is
2324 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2325 /// determine a loop-invariant trip count of the loop, which is actually a much
2326 /// broader range than just linear tests.
2327 bool IndVarSimplify::
2328 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2329                           const SCEV *BackedgeTakenCount,
2330                           PHINode *IndVar, SCEVExpander &Rewriter) {
2331   // Initialize CmpIndVar and IVCount to their preincremented values.
2332   Value *CmpIndVar = IndVar;
2333   const SCEV *IVCount = BackedgeTakenCount;
2334 
2335   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2336 
2337   // If the exiting block is the same as the backedge block, we prefer to
2338   // compare against the post-incremented value, otherwise we must compare
2339   // against the preincremented value.
2340   if (ExitingBB == L->getLoopLatch()) {
2341     // Add one to the "backedge-taken" count to get the trip count.
2342     // This addition may overflow, which is valid as long as the comparison is
2343     // truncated to BackedgeTakenCount->getType().
2344     IVCount = SE->getAddExpr(BackedgeTakenCount,
2345                              SE->getOne(BackedgeTakenCount->getType()));
2346     // The BackedgeTaken expression contains the number of times that the
2347     // backedge branches to the loop header.  This is one less than the
2348     // number of times the loop executes, so use the incremented indvar.
2349     CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB);
2350   }
2351 
2352   // It may be necessary to drop nowrap flags on the incrementing instruction
2353   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2354   // case the increment might have previously been poison on the last iteration
2355   // only) or if LFTR switches to a different IV that was previously dynamically
2356   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2357   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2358   // check), because the pre-inc addrec flags may be adopted from the original
2359   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2360   // TODO: This handling is inaccurate for one case: If we switch to a
2361   // dynamically dead IV that wraps on the first loop iteration only, which is
2362   // not covered by the post-inc addrec. (If the new IV was not dynamically
2363   // dead, it could not be poison on the first iteration in the first place.)
2364   Value *IncVar = IndVar->getIncomingValueForBlock(L->getLoopLatch());
2365   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2366     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2367     if (BO->hasNoUnsignedWrap())
2368       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2369     if (BO->hasNoSignedWrap())
2370       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2371   }
2372 
2373   Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE);
2374   assert(ExitCnt->getType()->isPointerTy() ==
2375              IndVar->getType()->isPointerTy() &&
2376          "genLoopLimit missed a cast");
2377 
2378   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2379   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2380   ICmpInst::Predicate P;
2381   if (L->contains(BI->getSuccessor(0)))
2382     P = ICmpInst::ICMP_NE;
2383   else
2384     P = ICmpInst::ICMP_EQ;
2385 
2386   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2387                     << "      LHS:" << *CmpIndVar << '\n'
2388                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2389                     << "\n"
2390                     << "      RHS:\t" << *ExitCnt << "\n"
2391                     << "  IVCount:\t" << *IVCount << "\n");
2392 
2393   IRBuilder<> Builder(BI);
2394 
2395   // The new loop exit condition should reuse the debug location of the
2396   // original loop exit condition.
2397   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2398     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2399 
2400   // LFTR can ignore IV overflow and truncate to the width of
2401   // BECount. This avoids materializing the add(zext(add)) expression.
2402   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2403   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2404   if (CmpIndVarSize > ExitCntSize) {
2405     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2406     const SCEV *ARStart = AR->getStart();
2407     const SCEV *ARStep = AR->getStepRecurrence(*SE);
2408     // For constant IVCount, avoid truncation.
2409     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2410       const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2411       APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2412       // Note that the post-inc value of BackedgeTakenCount may have overflowed
2413       // above such that IVCount is now zero.
2414       if (IVCount != BackedgeTakenCount && Count == 0) {
2415         Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2416         ++Count;
2417       }
2418       else
2419         Count = Count.zext(CmpIndVarSize);
2420       APInt NewLimit;
2421       if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2422         NewLimit = Start - Count;
2423       else
2424         NewLimit = Start + Count;
2425       ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2426 
2427       LLVM_DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
2428     } else {
2429       // We try to extend trip count first. If that doesn't work we truncate IV.
2430       // Zext(trunc(IV)) == IV implies equivalence of the following two:
2431       // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2432       // one of the two holds, extend the trip count, otherwise we truncate IV.
2433       bool Extended = false;
2434       const SCEV *IV = SE->getSCEV(CmpIndVar);
2435       const SCEV *ZExtTrunc =
2436            SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2437                                                      ExitCnt->getType()),
2438                                  CmpIndVar->getType());
2439 
2440       if (ZExtTrunc == IV) {
2441         Extended = true;
2442         ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2443                                      "wide.trip.count");
2444       } else {
2445         const SCEV *SExtTrunc =
2446           SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2447                                                     ExitCnt->getType()),
2448                                 CmpIndVar->getType());
2449         if (SExtTrunc == IV) {
2450           Extended = true;
2451           ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2452                                        "wide.trip.count");
2453         }
2454       }
2455 
2456       if (!Extended)
2457         CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2458                                         "lftr.wideiv");
2459     }
2460   }
2461   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2462   Value *OrigCond = BI->getCondition();
2463   // It's tempting to use replaceAllUsesWith here to fully replace the old
2464   // comparison, but that's not immediately safe, since users of the old
2465   // comparison may not be dominated by the new comparison. Instead, just
2466   // update the branch to use the new comparison; in the common case this
2467   // will make old comparison dead.
2468   BI->setCondition(Cond);
2469   DeadInsts.push_back(OrigCond);
2470 
2471   ++NumLFTR;
2472   return true;
2473 }
2474 
2475 //===----------------------------------------------------------------------===//
2476 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2477 //===----------------------------------------------------------------------===//
2478 
2479 /// If there's a single exit block, sink any loop-invariant values that
2480 /// were defined in the preheader but not used inside the loop into the
2481 /// exit block to reduce register pressure in the loop.
2482 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2483   BasicBlock *ExitBlock = L->getExitBlock();
2484   if (!ExitBlock) return false;
2485 
2486   BasicBlock *Preheader = L->getLoopPreheader();
2487   if (!Preheader) return false;
2488 
2489   bool MadeAnyChanges = false;
2490   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2491   BasicBlock::iterator I(Preheader->getTerminator());
2492   while (I != Preheader->begin()) {
2493     --I;
2494     // New instructions were inserted at the end of the preheader.
2495     if (isa<PHINode>(I))
2496       break;
2497 
2498     // Don't move instructions which might have side effects, since the side
2499     // effects need to complete before instructions inside the loop.  Also don't
2500     // move instructions which might read memory, since the loop may modify
2501     // memory. Note that it's okay if the instruction might have undefined
2502     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2503     // block.
2504     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2505       continue;
2506 
2507     // Skip debug info intrinsics.
2508     if (isa<DbgInfoIntrinsic>(I))
2509       continue;
2510 
2511     // Skip eh pad instructions.
2512     if (I->isEHPad())
2513       continue;
2514 
2515     // Don't sink alloca: we never want to sink static alloca's out of the
2516     // entry block, and correctly sinking dynamic alloca's requires
2517     // checks for stacksave/stackrestore intrinsics.
2518     // FIXME: Refactor this check somehow?
2519     if (isa<AllocaInst>(I))
2520       continue;
2521 
2522     // Determine if there is a use in or before the loop (direct or
2523     // otherwise).
2524     bool UsedInLoop = false;
2525     for (Use &U : I->uses()) {
2526       Instruction *User = cast<Instruction>(U.getUser());
2527       BasicBlock *UseBB = User->getParent();
2528       if (PHINode *P = dyn_cast<PHINode>(User)) {
2529         unsigned i =
2530           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2531         UseBB = P->getIncomingBlock(i);
2532       }
2533       if (UseBB == Preheader || L->contains(UseBB)) {
2534         UsedInLoop = true;
2535         break;
2536       }
2537     }
2538 
2539     // If there is, the def must remain in the preheader.
2540     if (UsedInLoop)
2541       continue;
2542 
2543     // Otherwise, sink it to the exit block.
2544     Instruction *ToMove = &*I;
2545     bool Done = false;
2546 
2547     if (I != Preheader->begin()) {
2548       // Skip debug info intrinsics.
2549       do {
2550         --I;
2551       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2552 
2553       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2554         Done = true;
2555     } else {
2556       Done = true;
2557     }
2558 
2559     MadeAnyChanges = true;
2560     ToMove->moveBefore(*ExitBlock, InsertPt);
2561     if (Done) break;
2562     InsertPt = ToMove->getIterator();
2563   }
2564 
2565   return MadeAnyChanges;
2566 }
2567 
2568 //===----------------------------------------------------------------------===//
2569 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2570 //===----------------------------------------------------------------------===//
2571 
2572 bool IndVarSimplify::run(Loop *L) {
2573   // We need (and expect!) the incoming loop to be in LCSSA.
2574   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2575          "LCSSA required to run indvars!");
2576   bool Changed = false;
2577 
2578   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2579   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2580   //    canonicalization can be a pessimization without LSR to "clean up"
2581   //    afterwards.
2582   //  - We depend on having a preheader; in particular,
2583   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2584   //    and we're in trouble if we can't find the induction variable even when
2585   //    we've manually inserted one.
2586   //  - LFTR relies on having a single backedge.
2587   if (!L->isLoopSimplifyForm())
2588     return false;
2589 
2590   // If there are any floating-point recurrences, attempt to
2591   // transform them to use integer recurrences.
2592   Changed |= rewriteNonIntegerIVs(L);
2593 
2594   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2595 
2596   // Create a rewriter object which we'll use to transform the code with.
2597   SCEVExpander Rewriter(*SE, DL, "indvars");
2598 #ifndef NDEBUG
2599   Rewriter.setDebugType(DEBUG_TYPE);
2600 #endif
2601 
2602   // Eliminate redundant IV users.
2603   //
2604   // Simplification works best when run before other consumers of SCEV. We
2605   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2606   // other expressions involving loop IVs have been evaluated. This helps SCEV
2607   // set no-wrap flags before normalizing sign/zero extension.
2608   Rewriter.disableCanonicalMode();
2609   Changed |= simplifyAndExtend(L, Rewriter, LI);
2610 
2611   // Check to see if this loop has a computable loop-invariant execution count.
2612   // If so, this means that we can compute the final value of any expressions
2613   // that are recurrent in the loop, and substitute the exit values from the
2614   // loop into any instructions outside of the loop that use the final values of
2615   // the current expressions.
2616   //
2617   if (ReplaceExitValue != NeverRepl &&
2618       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2619     Changed |= rewriteLoopExitValues(L, Rewriter);
2620 
2621   // Eliminate redundant IV cycles.
2622   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2623 
2624   // If we have a trip count expression, rewrite the loop's exit condition
2625   // using it.
2626   if (!DisableLFTR) {
2627     // For the moment, we only do LFTR for single exit loops.  The code is
2628     // structured as it is in the expectation of generalization to multi-exit
2629     // loops in the near future.  See D62625 for context.
2630     SmallVector<BasicBlock*, 16> ExitingBlocks;
2631     if (auto *ExitingBB = L->getExitingBlock())
2632       ExitingBlocks.push_back(ExitingBB);
2633     for (BasicBlock *ExitingBB : ExitingBlocks) {
2634       // Can't rewrite non-branch yet.
2635       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2636         continue;
2637 
2638       if (!needsLFTR(L, ExitingBB))
2639         continue;
2640 
2641       const SCEV *BETakenCount = SE->getExitCount(L, ExitingBB);
2642       if (isa<SCEVCouldNotCompute>(BETakenCount))
2643         continue;
2644 
2645       // Better to fold to true (TODO: do so!)
2646       if (BETakenCount->isZero())
2647         continue;
2648 
2649       PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE);
2650       if (!IndVar)
2651         continue;
2652 
2653       // Avoid high cost expansions.  Note: This heuristic is questionable in
2654       // that our definition of "high cost" is not exactly principled.
2655       if (Rewriter.isHighCostExpansion(BETakenCount, L))
2656         continue;
2657 
2658       // Check preconditions for proper SCEVExpander operation. SCEV does not
2659       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2660       // any pass that uses the SCEVExpander must do it. This does not work
2661       // well for loop passes because SCEVExpander makes assumptions about
2662       // all loops, while LoopPassManager only forces the current loop to be
2663       // simplified.
2664       //
2665       // FIXME: SCEV expansion has no way to bail out, so the caller must
2666       // explicitly check any assumptions made by SCEV. Brittle.
2667       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BETakenCount);
2668       if (!AR || AR->getLoop()->getLoopPreheader())
2669         Changed |= linearFunctionTestReplace(L, ExitingBB,
2670                                              BETakenCount, IndVar,
2671                                              Rewriter);
2672     }
2673   }
2674   // Clear the rewriter cache, because values that are in the rewriter's cache
2675   // can be deleted in the loop below, causing the AssertingVH in the cache to
2676   // trigger.
2677   Rewriter.clear();
2678 
2679   // Now that we're done iterating through lists, clean up any instructions
2680   // which are now dead.
2681   while (!DeadInsts.empty())
2682     if (Instruction *Inst =
2683             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2684       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2685 
2686   // The Rewriter may not be used from this point on.
2687 
2688   // Loop-invariant instructions in the preheader that aren't used in the
2689   // loop may be sunk below the loop to reduce register pressure.
2690   Changed |= sinkUnusedInvariants(L);
2691 
2692   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2693   // trip count and therefore can further simplify exit values in addition to
2694   // rewriteLoopExitValues.
2695   Changed |= rewriteFirstIterationLoopExitValues(L);
2696 
2697   // Clean up dead instructions.
2698   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2699 
2700   // Check a post-condition.
2701   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2702          "Indvars did not preserve LCSSA!");
2703 
2704   // Verify that LFTR, and any other change have not interfered with SCEV's
2705   // ability to compute trip count.
2706 #ifndef NDEBUG
2707   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2708     SE->forgetLoop(L);
2709     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2710     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2711         SE->getTypeSizeInBits(NewBECount->getType()))
2712       NewBECount = SE->getTruncateOrNoop(NewBECount,
2713                                          BackedgeTakenCount->getType());
2714     else
2715       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2716                                                  NewBECount->getType());
2717     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2718   }
2719 #endif
2720 
2721   return Changed;
2722 }
2723 
2724 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2725                                           LoopStandardAnalysisResults &AR,
2726                                           LPMUpdater &) {
2727   Function *F = L.getHeader()->getParent();
2728   const DataLayout &DL = F->getParent()->getDataLayout();
2729 
2730   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2731   if (!IVS.run(&L))
2732     return PreservedAnalyses::all();
2733 
2734   auto PA = getLoopPassPreservedAnalyses();
2735   PA.preserveSet<CFGAnalyses>();
2736   return PA;
2737 }
2738 
2739 namespace {
2740 
2741 struct IndVarSimplifyLegacyPass : public LoopPass {
2742   static char ID; // Pass identification, replacement for typeid
2743 
2744   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2745     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2746   }
2747 
2748   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2749     if (skipLoop(L))
2750       return false;
2751 
2752     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2753     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2754     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2755     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2756     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2757     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2758     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2759     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2760 
2761     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2762     return IVS.run(L);
2763   }
2764 
2765   void getAnalysisUsage(AnalysisUsage &AU) const override {
2766     AU.setPreservesCFG();
2767     getLoopAnalysisUsage(AU);
2768   }
2769 };
2770 
2771 } // end anonymous namespace
2772 
2773 char IndVarSimplifyLegacyPass::ID = 0;
2774 
2775 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2776                       "Induction Variable Simplification", false, false)
2777 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2778 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2779                     "Induction Variable Simplification", false, false)
2780 
2781 Pass *llvm::createIndVarSimplifyPass() {
2782   return new IndVarSimplifyLegacyPass();
2783 }
2784