xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision d4346584fa35232af286ededf9f01353d905ed9f)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened     , "Number of indvars widened");
93 STATISTIC(NumReplaced    , "Number of exit values replaced");
94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102   "verify-indvars", cl::Hidden,
103   cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
106 
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111                clEnumValN(OnlyCheapRepl, "cheap",
112                           "only replace exit value when the cost is cheap"),
113                clEnumValN(NoHardUse, "noharduse",
114                           "only replace exit values when loop def likely dead"),
115                clEnumValN(AlwaysRepl, "always",
116                           "always replace exit value whenever possible")));
117 
118 static cl::opt<bool> UsePostIncrementRanges(
119   "indvars-post-increment-ranges", cl::Hidden,
120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121   cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125             cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 static cl::opt<bool>
128 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
129                 cl::desc("Predicate conditions in read only loops"));
130 
131 
132 namespace {
133 
134 struct RewritePhi;
135 
136 class IndVarSimplify {
137   LoopInfo *LI;
138   ScalarEvolution *SE;
139   DominatorTree *DT;
140   const DataLayout &DL;
141   TargetLibraryInfo *TLI;
142   const TargetTransformInfo *TTI;
143 
144   SmallVector<WeakTrackingVH, 16> DeadInsts;
145 
146   bool isValidRewrite(Value *FromVal, Value *ToVal);
147 
148   bool handleFloatingPointIV(Loop *L, PHINode *PH);
149   bool rewriteNonIntegerIVs(Loop *L);
150 
151   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153 
154   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
155   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
156   bool rewriteFirstIterationLoopExitValues(Loop *L);
157   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
158 
159   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160                                  const SCEV *ExitCount,
161                                  PHINode *IndVar, SCEVExpander &Rewriter);
162 
163   bool sinkUnusedInvariants(Loop *L);
164 
165 public:
166   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167                  const DataLayout &DL, TargetLibraryInfo *TLI,
168                  TargetTransformInfo *TTI)
169       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
170 
171   bool run(Loop *L);
172 };
173 
174 } // end anonymous namespace
175 
176 /// Return true if the SCEV expansion generated by the rewriter can replace the
177 /// original value. SCEV guarantees that it produces the same value, but the way
178 /// it is produced may be illegal IR.  Ideally, this function will only be
179 /// called for verification.
180 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
181   // If an SCEV expression subsumed multiple pointers, its expansion could
182   // reassociate the GEP changing the base pointer. This is illegal because the
183   // final address produced by a GEP chain must be inbounds relative to its
184   // underlying object. Otherwise basic alias analysis, among other things,
185   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
186   // producing an expression involving multiple pointers. Until then, we must
187   // bail out here.
188   //
189   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
190   // because it understands lcssa phis while SCEV does not.
191   Value *FromPtr = FromVal;
192   Value *ToPtr = ToVal;
193   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
194     FromPtr = GEP->getPointerOperand();
195   }
196   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
197     ToPtr = GEP->getPointerOperand();
198   }
199   if (FromPtr != FromVal || ToPtr != ToVal) {
200     // Quickly check the common case
201     if (FromPtr == ToPtr)
202       return true;
203 
204     // SCEV may have rewritten an expression that produces the GEP's pointer
205     // operand. That's ok as long as the pointer operand has the same base
206     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
207     // base of a recurrence. This handles the case in which SCEV expansion
208     // converts a pointer type recurrence into a nonrecurrent pointer base
209     // indexed by an integer recurrence.
210 
211     // If the GEP base pointer is a vector of pointers, abort.
212     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
213       return false;
214 
215     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
216     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
217     if (FromBase == ToBase)
218       return true;
219 
220     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
221                       << " != " << *ToBase << "\n");
222 
223     return false;
224   }
225   return true;
226 }
227 
228 /// Determine the insertion point for this user. By default, insert immediately
229 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
230 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
231 /// common dominator for the incoming blocks. A nullptr can be returned if no
232 /// viable location is found: it may happen if User is a PHI and Def only comes
233 /// to this PHI from unreachable blocks.
234 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
235                                           DominatorTree *DT, LoopInfo *LI) {
236   PHINode *PHI = dyn_cast<PHINode>(User);
237   if (!PHI)
238     return User;
239 
240   Instruction *InsertPt = nullptr;
241   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
242     if (PHI->getIncomingValue(i) != Def)
243       continue;
244 
245     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
246 
247     if (!DT->isReachableFromEntry(InsertBB))
248       continue;
249 
250     if (!InsertPt) {
251       InsertPt = InsertBB->getTerminator();
252       continue;
253     }
254     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
255     InsertPt = InsertBB->getTerminator();
256   }
257 
258   // If we have skipped all inputs, it means that Def only comes to Phi from
259   // unreachable blocks.
260   if (!InsertPt)
261     return nullptr;
262 
263   auto *DefI = dyn_cast<Instruction>(Def);
264   if (!DefI)
265     return InsertPt;
266 
267   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
268 
269   auto *L = LI->getLoopFor(DefI->getParent());
270   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
271 
272   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
273     if (LI->getLoopFor(DTN->getBlock()) == L)
274       return DTN->getBlock()->getTerminator();
275 
276   llvm_unreachable("DefI dominates InsertPt!");
277 }
278 
279 //===----------------------------------------------------------------------===//
280 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
281 //===----------------------------------------------------------------------===//
282 
283 /// Convert APF to an integer, if possible.
284 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
285   bool isExact = false;
286   // See if we can convert this to an int64_t
287   uint64_t UIntVal;
288   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
289                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
290       !isExact)
291     return false;
292   IntVal = UIntVal;
293   return true;
294 }
295 
296 /// If the loop has floating induction variable then insert corresponding
297 /// integer induction variable if possible.
298 /// For example,
299 /// for(double i = 0; i < 10000; ++i)
300 ///   bar(i)
301 /// is converted into
302 /// for(int i = 0; i < 10000; ++i)
303 ///   bar((double)i);
304 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
305   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
306   unsigned BackEdge     = IncomingEdge^1;
307 
308   // Check incoming value.
309   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
310 
311   int64_t InitValue;
312   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
313     return false;
314 
315   // Check IV increment. Reject this PN if increment operation is not
316   // an add or increment value can not be represented by an integer.
317   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
318   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
319 
320   // If this is not an add of the PHI with a constantfp, or if the constant fp
321   // is not an integer, bail out.
322   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
323   int64_t IncValue;
324   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
325       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
326     return false;
327 
328   // Check Incr uses. One user is PN and the other user is an exit condition
329   // used by the conditional terminator.
330   Value::user_iterator IncrUse = Incr->user_begin();
331   Instruction *U1 = cast<Instruction>(*IncrUse++);
332   if (IncrUse == Incr->user_end()) return false;
333   Instruction *U2 = cast<Instruction>(*IncrUse++);
334   if (IncrUse != Incr->user_end()) return false;
335 
336   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
337   // only used by a branch, we can't transform it.
338   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
339   if (!Compare)
340     Compare = dyn_cast<FCmpInst>(U2);
341   if (!Compare || !Compare->hasOneUse() ||
342       !isa<BranchInst>(Compare->user_back()))
343     return false;
344 
345   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
346 
347   // We need to verify that the branch actually controls the iteration count
348   // of the loop.  If not, the new IV can overflow and no one will notice.
349   // The branch block must be in the loop and one of the successors must be out
350   // of the loop.
351   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
352   if (!L->contains(TheBr->getParent()) ||
353       (L->contains(TheBr->getSuccessor(0)) &&
354        L->contains(TheBr->getSuccessor(1))))
355     return false;
356 
357   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
358   // transform it.
359   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
360   int64_t ExitValue;
361   if (ExitValueVal == nullptr ||
362       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
363     return false;
364 
365   // Find new predicate for integer comparison.
366   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
367   switch (Compare->getPredicate()) {
368   default: return false;  // Unknown comparison.
369   case CmpInst::FCMP_OEQ:
370   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
371   case CmpInst::FCMP_ONE:
372   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
373   case CmpInst::FCMP_OGT:
374   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
375   case CmpInst::FCMP_OGE:
376   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
377   case CmpInst::FCMP_OLT:
378   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
379   case CmpInst::FCMP_OLE:
380   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
381   }
382 
383   // We convert the floating point induction variable to a signed i32 value if
384   // we can.  This is only safe if the comparison will not overflow in a way
385   // that won't be trapped by the integer equivalent operations.  Check for this
386   // now.
387   // TODO: We could use i64 if it is native and the range requires it.
388 
389   // The start/stride/exit values must all fit in signed i32.
390   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
391     return false;
392 
393   // If not actually striding (add x, 0.0), avoid touching the code.
394   if (IncValue == 0)
395     return false;
396 
397   // Positive and negative strides have different safety conditions.
398   if (IncValue > 0) {
399     // If we have a positive stride, we require the init to be less than the
400     // exit value.
401     if (InitValue >= ExitValue)
402       return false;
403 
404     uint32_t Range = uint32_t(ExitValue-InitValue);
405     // Check for infinite loop, either:
406     // while (i <= Exit) or until (i > Exit)
407     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
408       if (++Range == 0) return false;  // Range overflows.
409     }
410 
411     unsigned Leftover = Range % uint32_t(IncValue);
412 
413     // If this is an equality comparison, we require that the strided value
414     // exactly land on the exit value, otherwise the IV condition will wrap
415     // around and do things the fp IV wouldn't.
416     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
417         Leftover != 0)
418       return false;
419 
420     // If the stride would wrap around the i32 before exiting, we can't
421     // transform the IV.
422     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
423       return false;
424   } else {
425     // If we have a negative stride, we require the init to be greater than the
426     // exit value.
427     if (InitValue <= ExitValue)
428       return false;
429 
430     uint32_t Range = uint32_t(InitValue-ExitValue);
431     // Check for infinite loop, either:
432     // while (i >= Exit) or until (i < Exit)
433     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
434       if (++Range == 0) return false;  // Range overflows.
435     }
436 
437     unsigned Leftover = Range % uint32_t(-IncValue);
438 
439     // If this is an equality comparison, we require that the strided value
440     // exactly land on the exit value, otherwise the IV condition will wrap
441     // around and do things the fp IV wouldn't.
442     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
443         Leftover != 0)
444       return false;
445 
446     // If the stride would wrap around the i32 before exiting, we can't
447     // transform the IV.
448     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
449       return false;
450   }
451 
452   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
453 
454   // Insert new integer induction variable.
455   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
456   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
457                       PN->getIncomingBlock(IncomingEdge));
458 
459   Value *NewAdd =
460     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
461                               Incr->getName()+".int", Incr);
462   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
463 
464   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
465                                       ConstantInt::get(Int32Ty, ExitValue),
466                                       Compare->getName());
467 
468   // In the following deletions, PN may become dead and may be deleted.
469   // Use a WeakTrackingVH to observe whether this happens.
470   WeakTrackingVH WeakPH = PN;
471 
472   // Delete the old floating point exit comparison.  The branch starts using the
473   // new comparison.
474   NewCompare->takeName(Compare);
475   Compare->replaceAllUsesWith(NewCompare);
476   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
477 
478   // Delete the old floating point increment.
479   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
480   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
481 
482   // If the FP induction variable still has uses, this is because something else
483   // in the loop uses its value.  In order to canonicalize the induction
484   // variable, we chose to eliminate the IV and rewrite it in terms of an
485   // int->fp cast.
486   //
487   // We give preference to sitofp over uitofp because it is faster on most
488   // platforms.
489   if (WeakPH) {
490     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
491                                  &*PN->getParent()->getFirstInsertionPt());
492     PN->replaceAllUsesWith(Conv);
493     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
494   }
495   return true;
496 }
497 
498 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
499   // First step.  Check to see if there are any floating-point recurrences.
500   // If there are, change them into integer recurrences, permitting analysis by
501   // the SCEV routines.
502   BasicBlock *Header = L->getHeader();
503 
504   SmallVector<WeakTrackingVH, 8> PHIs;
505   for (PHINode &PN : Header->phis())
506     PHIs.push_back(&PN);
507 
508   bool Changed = false;
509   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
510     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
511       Changed |= handleFloatingPointIV(L, PN);
512 
513   // If the loop previously had floating-point IV, ScalarEvolution
514   // may not have been able to compute a trip count. Now that we've done some
515   // re-writing, the trip count may be computable.
516   if (Changed)
517     SE->forgetLoop(L);
518   return Changed;
519 }
520 
521 namespace {
522 
523 // Collect information about PHI nodes which can be transformed in
524 // rewriteLoopExitValues.
525 struct RewritePhi {
526   PHINode *PN;
527 
528   // Ith incoming value.
529   unsigned Ith;
530 
531   // Exit value after expansion.
532   Value *Val;
533 
534   // High Cost when expansion.
535   bool HighCost;
536 
537   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
538       : PN(P), Ith(I), Val(V), HighCost(H) {}
539 };
540 
541 } // end anonymous namespace
542 
543 //===----------------------------------------------------------------------===//
544 // rewriteLoopExitValues - Optimize IV users outside the loop.
545 // As a side effect, reduces the amount of IV processing within the loop.
546 //===----------------------------------------------------------------------===//
547 
548 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
549   SmallPtrSet<const Instruction *, 8> Visited;
550   SmallVector<const Instruction *, 8> WorkList;
551   Visited.insert(I);
552   WorkList.push_back(I);
553   while (!WorkList.empty()) {
554     const Instruction *Curr = WorkList.pop_back_val();
555     // This use is outside the loop, nothing to do.
556     if (!L->contains(Curr))
557       continue;
558     // Do we assume it is a "hard" use which will not be eliminated easily?
559     if (Curr->mayHaveSideEffects())
560       return true;
561     // Otherwise, add all its users to worklist.
562     for (auto U : Curr->users()) {
563       auto *UI = cast<Instruction>(U);
564       if (Visited.insert(UI).second)
565         WorkList.push_back(UI);
566     }
567   }
568   return false;
569 }
570 
571 /// Check to see if this loop has a computable loop-invariant execution count.
572 /// If so, this means that we can compute the final value of any expressions
573 /// that are recurrent in the loop, and substitute the exit values from the loop
574 /// into any instructions outside of the loop that use the final values of the
575 /// current expressions.
576 ///
577 /// This is mostly redundant with the regular IndVarSimplify activities that
578 /// happen later, except that it's more powerful in some cases, because it's
579 /// able to brute-force evaluate arbitrary instructions as long as they have
580 /// constant operands at the beginning of the loop.
581 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
582   // Check a pre-condition.
583   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
584          "Indvars did not preserve LCSSA!");
585 
586   SmallVector<BasicBlock*, 8> ExitBlocks;
587   L->getUniqueExitBlocks(ExitBlocks);
588 
589   SmallVector<RewritePhi, 8> RewritePhiSet;
590   // Find all values that are computed inside the loop, but used outside of it.
591   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
592   // the exit blocks of the loop to find them.
593   for (BasicBlock *ExitBB : ExitBlocks) {
594     // If there are no PHI nodes in this exit block, then no values defined
595     // inside the loop are used on this path, skip it.
596     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
597     if (!PN) continue;
598 
599     unsigned NumPreds = PN->getNumIncomingValues();
600 
601     // Iterate over all of the PHI nodes.
602     BasicBlock::iterator BBI = ExitBB->begin();
603     while ((PN = dyn_cast<PHINode>(BBI++))) {
604       if (PN->use_empty())
605         continue; // dead use, don't replace it
606 
607       if (!SE->isSCEVable(PN->getType()))
608         continue;
609 
610       // It's necessary to tell ScalarEvolution about this explicitly so that
611       // it can walk the def-use list and forget all SCEVs, as it may not be
612       // watching the PHI itself. Once the new exit value is in place, there
613       // may not be a def-use connection between the loop and every instruction
614       // which got a SCEVAddRecExpr for that loop.
615       SE->forgetValue(PN);
616 
617       // Iterate over all of the values in all the PHI nodes.
618       for (unsigned i = 0; i != NumPreds; ++i) {
619         // If the value being merged in is not integer or is not defined
620         // in the loop, skip it.
621         Value *InVal = PN->getIncomingValue(i);
622         if (!isa<Instruction>(InVal))
623           continue;
624 
625         // If this pred is for a subloop, not L itself, skip it.
626         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
627           continue; // The Block is in a subloop, skip it.
628 
629         // Check that InVal is defined in the loop.
630         Instruction *Inst = cast<Instruction>(InVal);
631         if (!L->contains(Inst))
632           continue;
633 
634         // Okay, this instruction has a user outside of the current loop
635         // and varies predictably *inside* the loop.  Evaluate the value it
636         // contains when the loop exits, if possible.  We prefer to start with
637         // expressions which are true for all exits (so as to maximize
638         // expression reuse by the SCEVExpander), but resort to per-exit
639         // evaluation if that fails.
640         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
641         if (isa<SCEVCouldNotCompute>(ExitValue) ||
642             !SE->isLoopInvariant(ExitValue, L) ||
643             !isSafeToExpand(ExitValue, *SE)) {
644           // TODO: This should probably be sunk into SCEV in some way; maybe a
645           // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
646           // most SCEV expressions and other recurrence types (e.g. shift
647           // recurrences).  Is there existing code we can reuse?
648           const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
649           if (isa<SCEVCouldNotCompute>(ExitCount))
650             continue;
651           if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
652             if (AddRec->getLoop() == L)
653               ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
654           if (isa<SCEVCouldNotCompute>(ExitValue) ||
655               !SE->isLoopInvariant(ExitValue, L) ||
656               !isSafeToExpand(ExitValue, *SE))
657             continue;
658         }
659 
660         // Computing the value outside of the loop brings no benefit if it is
661         // definitely used inside the loop in a way which can not be optimized
662         // away.  Avoid doing so unless we know we have a value which computes
663         // the ExitValue already.  TODO: This should be merged into SCEV
664         // expander to leverage its knowledge of existing expressions.
665         if (ReplaceExitValue != AlwaysRepl &&
666             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
667             hasHardUserWithinLoop(L, Inst))
668           continue;
669 
670         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
671         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
672 
673         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
674                           << '\n'
675                           << "  LoopVal = " << *Inst << "\n");
676 
677         if (!isValidRewrite(Inst, ExitVal)) {
678           DeadInsts.push_back(ExitVal);
679           continue;
680         }
681 
682 #ifndef NDEBUG
683         // If we reuse an instruction from a loop which is neither L nor one of
684         // its containing loops, we end up breaking LCSSA form for this loop by
685         // creating a new use of its instruction.
686         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
687           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
688             if (EVL != L)
689               assert(EVL->contains(L) && "LCSSA breach detected!");
690 #endif
691 
692         // Collect all the candidate PHINodes to be rewritten.
693         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
694       }
695     }
696   }
697 
698   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
699 
700   bool Changed = false;
701   // Transformation.
702   for (const RewritePhi &Phi : RewritePhiSet) {
703     PHINode *PN = Phi.PN;
704     Value *ExitVal = Phi.Val;
705 
706     // Only do the rewrite when the ExitValue can be expanded cheaply.
707     // If LoopCanBeDel is true, rewrite exit value aggressively.
708     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
709       DeadInsts.push_back(ExitVal);
710       continue;
711     }
712 
713     Changed = true;
714     ++NumReplaced;
715     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
716     PN->setIncomingValue(Phi.Ith, ExitVal);
717 
718     // If this instruction is dead now, delete it. Don't do it now to avoid
719     // invalidating iterators.
720     if (isInstructionTriviallyDead(Inst, TLI))
721       DeadInsts.push_back(Inst);
722 
723     // Replace PN with ExitVal if that is legal and does not break LCSSA.
724     if (PN->getNumIncomingValues() == 1 &&
725         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
726       PN->replaceAllUsesWith(ExitVal);
727       PN->eraseFromParent();
728     }
729   }
730 
731   // The insertion point instruction may have been deleted; clear it out
732   // so that the rewriter doesn't trip over it later.
733   Rewriter.clearInsertPoint();
734   return Changed;
735 }
736 
737 //===---------------------------------------------------------------------===//
738 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
739 // they will exit at the first iteration.
740 //===---------------------------------------------------------------------===//
741 
742 /// Check to see if this loop has loop invariant conditions which lead to loop
743 /// exits. If so, we know that if the exit path is taken, it is at the first
744 /// loop iteration. This lets us predict exit values of PHI nodes that live in
745 /// loop header.
746 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
747   // Verify the input to the pass is already in LCSSA form.
748   assert(L->isLCSSAForm(*DT));
749 
750   SmallVector<BasicBlock *, 8> ExitBlocks;
751   L->getUniqueExitBlocks(ExitBlocks);
752 
753   bool MadeAnyChanges = false;
754   for (auto *ExitBB : ExitBlocks) {
755     // If there are no more PHI nodes in this exit block, then no more
756     // values defined inside the loop are used on this path.
757     for (PHINode &PN : ExitBB->phis()) {
758       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
759            IncomingValIdx != E; ++IncomingValIdx) {
760         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
761 
762         // Can we prove that the exit must run on the first iteration if it
763         // runs at all?  (i.e. early exits are fine for our purposes, but
764         // traces which lead to this exit being taken on the 2nd iteration
765         // aren't.)  Note that this is about whether the exit branch is
766         // executed, not about whether it is taken.
767         if (!L->getLoopLatch() ||
768             !DT->dominates(IncomingBB, L->getLoopLatch()))
769           continue;
770 
771         // Get condition that leads to the exit path.
772         auto *TermInst = IncomingBB->getTerminator();
773 
774         Value *Cond = nullptr;
775         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
776           // Must be a conditional branch, otherwise the block
777           // should not be in the loop.
778           Cond = BI->getCondition();
779         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
780           Cond = SI->getCondition();
781         else
782           continue;
783 
784         if (!L->isLoopInvariant(Cond))
785           continue;
786 
787         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
788 
789         // Only deal with PHIs in the loop header.
790         if (!ExitVal || ExitVal->getParent() != L->getHeader())
791           continue;
792 
793         // If ExitVal is a PHI on the loop header, then we know its
794         // value along this exit because the exit can only be taken
795         // on the first iteration.
796         auto *LoopPreheader = L->getLoopPreheader();
797         assert(LoopPreheader && "Invalid loop");
798         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
799         if (PreheaderIdx != -1) {
800           assert(ExitVal->getParent() == L->getHeader() &&
801                  "ExitVal must be in loop header");
802           MadeAnyChanges = true;
803           PN.setIncomingValue(IncomingValIdx,
804                               ExitVal->getIncomingValue(PreheaderIdx));
805         }
806       }
807     }
808   }
809   return MadeAnyChanges;
810 }
811 
812 /// Check whether it is possible to delete the loop after rewriting exit
813 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
814 /// aggressively.
815 bool IndVarSimplify::canLoopBeDeleted(
816     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
817   BasicBlock *Preheader = L->getLoopPreheader();
818   // If there is no preheader, the loop will not be deleted.
819   if (!Preheader)
820     return false;
821 
822   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
823   // We obviate multiple ExitingBlocks case for simplicity.
824   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
825   // after exit value rewriting, we can enhance the logic here.
826   SmallVector<BasicBlock *, 4> ExitingBlocks;
827   L->getExitingBlocks(ExitingBlocks);
828   SmallVector<BasicBlock *, 8> ExitBlocks;
829   L->getUniqueExitBlocks(ExitBlocks);
830   if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
831     return false;
832 
833   BasicBlock *ExitBlock = ExitBlocks[0];
834   BasicBlock::iterator BI = ExitBlock->begin();
835   while (PHINode *P = dyn_cast<PHINode>(BI)) {
836     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
837 
838     // If the Incoming value of P is found in RewritePhiSet, we know it
839     // could be rewritten to use a loop invariant value in transformation
840     // phase later. Skip it in the loop invariant check below.
841     bool found = false;
842     for (const RewritePhi &Phi : RewritePhiSet) {
843       unsigned i = Phi.Ith;
844       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
845         found = true;
846         break;
847       }
848     }
849 
850     Instruction *I;
851     if (!found && (I = dyn_cast<Instruction>(Incoming)))
852       if (!L->hasLoopInvariantOperands(I))
853         return false;
854 
855     ++BI;
856   }
857 
858   for (auto *BB : L->blocks())
859     if (llvm::any_of(*BB, [](Instruction &I) {
860           return I.mayHaveSideEffects();
861         }))
862       return false;
863 
864   return true;
865 }
866 
867 //===----------------------------------------------------------------------===//
868 //  IV Widening - Extend the width of an IV to cover its widest uses.
869 //===----------------------------------------------------------------------===//
870 
871 namespace {
872 
873 // Collect information about induction variables that are used by sign/zero
874 // extend operations. This information is recorded by CollectExtend and provides
875 // the input to WidenIV.
876 struct WideIVInfo {
877   PHINode *NarrowIV = nullptr;
878 
879   // Widest integer type created [sz]ext
880   Type *WidestNativeType = nullptr;
881 
882   // Was a sext user seen before a zext?
883   bool IsSigned = false;
884 };
885 
886 } // end anonymous namespace
887 
888 /// Update information about the induction variable that is extended by this
889 /// sign or zero extend operation. This is used to determine the final width of
890 /// the IV before actually widening it.
891 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
892                         const TargetTransformInfo *TTI) {
893   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
894   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
895     return;
896 
897   Type *Ty = Cast->getType();
898   uint64_t Width = SE->getTypeSizeInBits(Ty);
899   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
900     return;
901 
902   // Check that `Cast` actually extends the induction variable (we rely on this
903   // later).  This takes care of cases where `Cast` is extending a truncation of
904   // the narrow induction variable, and thus can end up being narrower than the
905   // "narrow" induction variable.
906   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
907   if (NarrowIVWidth >= Width)
908     return;
909 
910   // Cast is either an sext or zext up to this point.
911   // We should not widen an indvar if arithmetics on the wider indvar are more
912   // expensive than those on the narrower indvar. We check only the cost of ADD
913   // because at least an ADD is required to increment the induction variable. We
914   // could compute more comprehensively the cost of all instructions on the
915   // induction variable when necessary.
916   if (TTI &&
917       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
918           TTI->getArithmeticInstrCost(Instruction::Add,
919                                       Cast->getOperand(0)->getType())) {
920     return;
921   }
922 
923   if (!WI.WidestNativeType) {
924     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
925     WI.IsSigned = IsSigned;
926     return;
927   }
928 
929   // We extend the IV to satisfy the sign of its first user, arbitrarily.
930   if (WI.IsSigned != IsSigned)
931     return;
932 
933   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
934     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
935 }
936 
937 namespace {
938 
939 /// Record a link in the Narrow IV def-use chain along with the WideIV that
940 /// computes the same value as the Narrow IV def.  This avoids caching Use*
941 /// pointers.
942 struct NarrowIVDefUse {
943   Instruction *NarrowDef = nullptr;
944   Instruction *NarrowUse = nullptr;
945   Instruction *WideDef = nullptr;
946 
947   // True if the narrow def is never negative.  Tracking this information lets
948   // us use a sign extension instead of a zero extension or vice versa, when
949   // profitable and legal.
950   bool NeverNegative = false;
951 
952   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
953                  bool NeverNegative)
954       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
955         NeverNegative(NeverNegative) {}
956 };
957 
958 /// The goal of this transform is to remove sign and zero extends without
959 /// creating any new induction variables. To do this, it creates a new phi of
960 /// the wider type and redirects all users, either removing extends or inserting
961 /// truncs whenever we stop propagating the type.
962 class WidenIV {
963   // Parameters
964   PHINode *OrigPhi;
965   Type *WideType;
966 
967   // Context
968   LoopInfo        *LI;
969   Loop            *L;
970   ScalarEvolution *SE;
971   DominatorTree   *DT;
972 
973   // Does the module have any calls to the llvm.experimental.guard intrinsic
974   // at all? If not we can avoid scanning instructions looking for guards.
975   bool HasGuards;
976 
977   // Result
978   PHINode *WidePhi = nullptr;
979   Instruction *WideInc = nullptr;
980   const SCEV *WideIncExpr = nullptr;
981   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
982 
983   SmallPtrSet<Instruction *,16> Widened;
984   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
985 
986   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
987 
988   // A map tracking the kind of extension used to widen each narrow IV
989   // and narrow IV user.
990   // Key: pointer to a narrow IV or IV user.
991   // Value: the kind of extension used to widen this Instruction.
992   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
993 
994   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
995 
996   // A map with control-dependent ranges for post increment IV uses. The key is
997   // a pair of IV def and a use of this def denoting the context. The value is
998   // a ConstantRange representing possible values of the def at the given
999   // context.
1000   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1001 
1002   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1003                                               Instruction *UseI) {
1004     DefUserPair Key(Def, UseI);
1005     auto It = PostIncRangeInfos.find(Key);
1006     return It == PostIncRangeInfos.end()
1007                ? Optional<ConstantRange>(None)
1008                : Optional<ConstantRange>(It->second);
1009   }
1010 
1011   void calculatePostIncRanges(PHINode *OrigPhi);
1012   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1013 
1014   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1015     DefUserPair Key(Def, UseI);
1016     auto It = PostIncRangeInfos.find(Key);
1017     if (It == PostIncRangeInfos.end())
1018       PostIncRangeInfos.insert({Key, R});
1019     else
1020       It->second = R.intersectWith(It->second);
1021   }
1022 
1023 public:
1024   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1025           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1026           bool HasGuards)
1027       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1028         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1029         HasGuards(HasGuards), DeadInsts(DI) {
1030     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1031     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1032   }
1033 
1034   PHINode *createWideIV(SCEVExpander &Rewriter);
1035 
1036 protected:
1037   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1038                           Instruction *Use);
1039 
1040   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1041   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1042                                      const SCEVAddRecExpr *WideAR);
1043   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1044 
1045   ExtendKind getExtendKind(Instruction *I);
1046 
1047   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1048 
1049   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1050 
1051   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1052 
1053   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1054                               unsigned OpCode) const;
1055 
1056   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1057 
1058   bool widenLoopCompare(NarrowIVDefUse DU);
1059   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1060   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1061 
1062   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1063 };
1064 
1065 } // end anonymous namespace
1066 
1067 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1068                                  bool IsSigned, Instruction *Use) {
1069   // Set the debug location and conservative insertion point.
1070   IRBuilder<> Builder(Use);
1071   // Hoist the insertion point into loop preheaders as far as possible.
1072   for (const Loop *L = LI->getLoopFor(Use->getParent());
1073        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1074        L = L->getParentLoop())
1075     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1076 
1077   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1078                     Builder.CreateZExt(NarrowOper, WideType);
1079 }
1080 
1081 /// Instantiate a wide operation to replace a narrow operation. This only needs
1082 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1083 /// 0 for any operation we decide not to clone.
1084 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1085                                   const SCEVAddRecExpr *WideAR) {
1086   unsigned Opcode = DU.NarrowUse->getOpcode();
1087   switch (Opcode) {
1088   default:
1089     return nullptr;
1090   case Instruction::Add:
1091   case Instruction::Mul:
1092   case Instruction::UDiv:
1093   case Instruction::Sub:
1094     return cloneArithmeticIVUser(DU, WideAR);
1095 
1096   case Instruction::And:
1097   case Instruction::Or:
1098   case Instruction::Xor:
1099   case Instruction::Shl:
1100   case Instruction::LShr:
1101   case Instruction::AShr:
1102     return cloneBitwiseIVUser(DU);
1103   }
1104 }
1105 
1106 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1107   Instruction *NarrowUse = DU.NarrowUse;
1108   Instruction *NarrowDef = DU.NarrowDef;
1109   Instruction *WideDef = DU.WideDef;
1110 
1111   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1112 
1113   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1114   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1115   // invariant and will be folded or hoisted. If it actually comes from a
1116   // widened IV, it should be removed during a future call to widenIVUse.
1117   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1118   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1119                    ? WideDef
1120                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1121                                       IsSigned, NarrowUse);
1122   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1123                    ? WideDef
1124                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1125                                       IsSigned, NarrowUse);
1126 
1127   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1128   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1129                                         NarrowBO->getName());
1130   IRBuilder<> Builder(NarrowUse);
1131   Builder.Insert(WideBO);
1132   WideBO->copyIRFlags(NarrowBO);
1133   return WideBO;
1134 }
1135 
1136 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1137                                             const SCEVAddRecExpr *WideAR) {
1138   Instruction *NarrowUse = DU.NarrowUse;
1139   Instruction *NarrowDef = DU.NarrowDef;
1140   Instruction *WideDef = DU.WideDef;
1141 
1142   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1143 
1144   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1145 
1146   // We're trying to find X such that
1147   //
1148   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1149   //
1150   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1151   // and check using SCEV if any of them are correct.
1152 
1153   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1154   // correct solution to X.
1155   auto GuessNonIVOperand = [&](bool SignExt) {
1156     const SCEV *WideLHS;
1157     const SCEV *WideRHS;
1158 
1159     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1160       if (SignExt)
1161         return SE->getSignExtendExpr(S, Ty);
1162       return SE->getZeroExtendExpr(S, Ty);
1163     };
1164 
1165     if (IVOpIdx == 0) {
1166       WideLHS = SE->getSCEV(WideDef);
1167       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1168       WideRHS = GetExtend(NarrowRHS, WideType);
1169     } else {
1170       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1171       WideLHS = GetExtend(NarrowLHS, WideType);
1172       WideRHS = SE->getSCEV(WideDef);
1173     }
1174 
1175     // WideUse is "WideDef `op.wide` X" as described in the comment.
1176     const SCEV *WideUse = nullptr;
1177 
1178     switch (NarrowUse->getOpcode()) {
1179     default:
1180       llvm_unreachable("No other possibility!");
1181 
1182     case Instruction::Add:
1183       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1184       break;
1185 
1186     case Instruction::Mul:
1187       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1188       break;
1189 
1190     case Instruction::UDiv:
1191       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1192       break;
1193 
1194     case Instruction::Sub:
1195       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1196       break;
1197     }
1198 
1199     return WideUse == WideAR;
1200   };
1201 
1202   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1203   if (!GuessNonIVOperand(SignExtend)) {
1204     SignExtend = !SignExtend;
1205     if (!GuessNonIVOperand(SignExtend))
1206       return nullptr;
1207   }
1208 
1209   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1210                    ? WideDef
1211                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1212                                       SignExtend, NarrowUse);
1213   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1214                    ? WideDef
1215                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1216                                       SignExtend, NarrowUse);
1217 
1218   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1219   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1220                                         NarrowBO->getName());
1221 
1222   IRBuilder<> Builder(NarrowUse);
1223   Builder.Insert(WideBO);
1224   WideBO->copyIRFlags(NarrowBO);
1225   return WideBO;
1226 }
1227 
1228 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1229   auto It = ExtendKindMap.find(I);
1230   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1231   return It->second;
1232 }
1233 
1234 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1235                                      unsigned OpCode) const {
1236   if (OpCode == Instruction::Add)
1237     return SE->getAddExpr(LHS, RHS);
1238   if (OpCode == Instruction::Sub)
1239     return SE->getMinusSCEV(LHS, RHS);
1240   if (OpCode == Instruction::Mul)
1241     return SE->getMulExpr(LHS, RHS);
1242 
1243   llvm_unreachable("Unsupported opcode.");
1244 }
1245 
1246 /// No-wrap operations can transfer sign extension of their result to their
1247 /// operands. Generate the SCEV value for the widened operation without
1248 /// actually modifying the IR yet. If the expression after extending the
1249 /// operands is an AddRec for this loop, return the AddRec and the kind of
1250 /// extension used.
1251 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1252   // Handle the common case of add<nsw/nuw>
1253   const unsigned OpCode = DU.NarrowUse->getOpcode();
1254   // Only Add/Sub/Mul instructions supported yet.
1255   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1256       OpCode != Instruction::Mul)
1257     return {nullptr, Unknown};
1258 
1259   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1260   // if extending the other will lead to a recurrence.
1261   const unsigned ExtendOperIdx =
1262       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1263   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1264 
1265   const SCEV *ExtendOperExpr = nullptr;
1266   const OverflowingBinaryOperator *OBO =
1267     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1268   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1269   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1270     ExtendOperExpr = SE->getSignExtendExpr(
1271       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1272   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1273     ExtendOperExpr = SE->getZeroExtendExpr(
1274       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1275   else
1276     return {nullptr, Unknown};
1277 
1278   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1279   // flags. This instruction may be guarded by control flow that the no-wrap
1280   // behavior depends on. Non-control-equivalent instructions can be mapped to
1281   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1282   // semantics to those operations.
1283   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1284   const SCEV *rhs = ExtendOperExpr;
1285 
1286   // Let's swap operands to the initial order for the case of non-commutative
1287   // operations, like SUB. See PR21014.
1288   if (ExtendOperIdx == 0)
1289     std::swap(lhs, rhs);
1290   const SCEVAddRecExpr *AddRec =
1291       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1292 
1293   if (!AddRec || AddRec->getLoop() != L)
1294     return {nullptr, Unknown};
1295 
1296   return {AddRec, ExtKind};
1297 }
1298 
1299 /// Is this instruction potentially interesting for further simplification after
1300 /// widening it's type? In other words, can the extend be safely hoisted out of
1301 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1302 /// so, return the extended recurrence and the kind of extension used. Otherwise
1303 /// return {nullptr, Unknown}.
1304 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1305   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1306     return {nullptr, Unknown};
1307 
1308   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1309   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1310       SE->getTypeSizeInBits(WideType)) {
1311     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1312     // index. So don't follow this use.
1313     return {nullptr, Unknown};
1314   }
1315 
1316   const SCEV *WideExpr;
1317   ExtendKind ExtKind;
1318   if (DU.NeverNegative) {
1319     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1320     if (isa<SCEVAddRecExpr>(WideExpr))
1321       ExtKind = SignExtended;
1322     else {
1323       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1324       ExtKind = ZeroExtended;
1325     }
1326   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1327     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1328     ExtKind = SignExtended;
1329   } else {
1330     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1331     ExtKind = ZeroExtended;
1332   }
1333   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1334   if (!AddRec || AddRec->getLoop() != L)
1335     return {nullptr, Unknown};
1336   return {AddRec, ExtKind};
1337 }
1338 
1339 /// This IV user cannot be widened. Replace this use of the original narrow IV
1340 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1341 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1342   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1343   if (!InsertPt)
1344     return;
1345   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1346                     << *DU.NarrowUse << "\n");
1347   IRBuilder<> Builder(InsertPt);
1348   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1349   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1350 }
1351 
1352 /// If the narrow use is a compare instruction, then widen the compare
1353 //  (and possibly the other operand).  The extend operation is hoisted into the
1354 // loop preheader as far as possible.
1355 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1356   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1357   if (!Cmp)
1358     return false;
1359 
1360   // We can legally widen the comparison in the following two cases:
1361   //
1362   //  - The signedness of the IV extension and comparison match
1363   //
1364   //  - The narrow IV is always positive (and thus its sign extension is equal
1365   //    to its zero extension).  For instance, let's say we're zero extending
1366   //    %narrow for the following use
1367   //
1368   //      icmp slt i32 %narrow, %val   ... (A)
1369   //
1370   //    and %narrow is always positive.  Then
1371   //
1372   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1373   //          == icmp slt i32 zext(%narrow), sext(%val)
1374   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1375   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1376     return false;
1377 
1378   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1379   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1380   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1381   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1382 
1383   // Widen the compare instruction.
1384   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1385   if (!InsertPt)
1386     return false;
1387   IRBuilder<> Builder(InsertPt);
1388   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1389 
1390   // Widen the other operand of the compare, if necessary.
1391   if (CastWidth < IVWidth) {
1392     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1393     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1394   }
1395   return true;
1396 }
1397 
1398 /// If the narrow use is an instruction whose two operands are the defining
1399 /// instruction of DU and a load instruction, then we have the following:
1400 /// if the load is hoisted outside the loop, then we do not reach this function
1401 /// as scalar evolution analysis works fine in widenIVUse with variables
1402 /// hoisted outside the loop and efficient code is subsequently generated by
1403 /// not emitting truncate instructions. But when the load is not hoisted
1404 /// (whether due to limitation in alias analysis or due to a true legality),
1405 /// then scalar evolution can not proceed with loop variant values and
1406 /// inefficient code is generated. This function handles the non-hoisted load
1407 /// special case by making the optimization generate the same type of code for
1408 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1409 /// instruction). This special case is important especially when the induction
1410 /// variables are affecting addressing mode in code generation.
1411 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1412   Instruction *NarrowUse = DU.NarrowUse;
1413   Instruction *NarrowDef = DU.NarrowDef;
1414   Instruction *WideDef = DU.WideDef;
1415 
1416   // Handle the common case of add<nsw/nuw>
1417   const unsigned OpCode = NarrowUse->getOpcode();
1418   // Only Add/Sub/Mul instructions are supported.
1419   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1420       OpCode != Instruction::Mul)
1421     return false;
1422 
1423   // The operand that is not defined by NarrowDef of DU. Let's call it the
1424   // other operand.
1425   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1426   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1427          "bad DU");
1428 
1429   const SCEV *ExtendOperExpr = nullptr;
1430   const OverflowingBinaryOperator *OBO =
1431     cast<OverflowingBinaryOperator>(NarrowUse);
1432   ExtendKind ExtKind = getExtendKind(NarrowDef);
1433   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1434     ExtendOperExpr = SE->getSignExtendExpr(
1435       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1436   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1437     ExtendOperExpr = SE->getZeroExtendExpr(
1438       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1439   else
1440     return false;
1441 
1442   // We are interested in the other operand being a load instruction.
1443   // But, we should look into relaxing this restriction later on.
1444   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1445   if (I && I->getOpcode() != Instruction::Load)
1446     return false;
1447 
1448   // Verifying that Defining operand is an AddRec
1449   const SCEV *Op1 = SE->getSCEV(WideDef);
1450   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1451   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1452     return false;
1453   // Verifying that other operand is an Extend.
1454   if (ExtKind == SignExtended) {
1455     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1456       return false;
1457   } else {
1458     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1459       return false;
1460   }
1461 
1462   if (ExtKind == SignExtended) {
1463     for (Use &U : NarrowUse->uses()) {
1464       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1465       if (!User || User->getType() != WideType)
1466         return false;
1467     }
1468   } else { // ExtKind == ZeroExtended
1469     for (Use &U : NarrowUse->uses()) {
1470       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1471       if (!User || User->getType() != WideType)
1472         return false;
1473     }
1474   }
1475 
1476   return true;
1477 }
1478 
1479 /// Special Case for widening with variant Loads (see
1480 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1481 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1482   Instruction *NarrowUse = DU.NarrowUse;
1483   Instruction *NarrowDef = DU.NarrowDef;
1484   Instruction *WideDef = DU.WideDef;
1485 
1486   ExtendKind ExtKind = getExtendKind(NarrowDef);
1487 
1488   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1489 
1490   // Generating a widening use instruction.
1491   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1492                    ? WideDef
1493                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1494                                       ExtKind, NarrowUse);
1495   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1496                    ? WideDef
1497                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1498                                       ExtKind, NarrowUse);
1499 
1500   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1501   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1502                                         NarrowBO->getName());
1503   IRBuilder<> Builder(NarrowUse);
1504   Builder.Insert(WideBO);
1505   WideBO->copyIRFlags(NarrowBO);
1506 
1507   if (ExtKind == SignExtended)
1508     ExtendKindMap[NarrowUse] = SignExtended;
1509   else
1510     ExtendKindMap[NarrowUse] = ZeroExtended;
1511 
1512   // Update the Use.
1513   if (ExtKind == SignExtended) {
1514     for (Use &U : NarrowUse->uses()) {
1515       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1516       if (User && User->getType() == WideType) {
1517         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1518                           << *WideBO << "\n");
1519         ++NumElimExt;
1520         User->replaceAllUsesWith(WideBO);
1521         DeadInsts.emplace_back(User);
1522       }
1523     }
1524   } else { // ExtKind == ZeroExtended
1525     for (Use &U : NarrowUse->uses()) {
1526       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1527       if (User && User->getType() == WideType) {
1528         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1529                           << *WideBO << "\n");
1530         ++NumElimExt;
1531         User->replaceAllUsesWith(WideBO);
1532         DeadInsts.emplace_back(User);
1533       }
1534     }
1535   }
1536 }
1537 
1538 /// Determine whether an individual user of the narrow IV can be widened. If so,
1539 /// return the wide clone of the user.
1540 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1541   assert(ExtendKindMap.count(DU.NarrowDef) &&
1542          "Should already know the kind of extension used to widen NarrowDef");
1543 
1544   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1545   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1546     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1547       // For LCSSA phis, sink the truncate outside the loop.
1548       // After SimplifyCFG most loop exit targets have a single predecessor.
1549       // Otherwise fall back to a truncate within the loop.
1550       if (UsePhi->getNumOperands() != 1)
1551         truncateIVUse(DU, DT, LI);
1552       else {
1553         // Widening the PHI requires us to insert a trunc.  The logical place
1554         // for this trunc is in the same BB as the PHI.  This is not possible if
1555         // the BB is terminated by a catchswitch.
1556         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1557           return nullptr;
1558 
1559         PHINode *WidePhi =
1560           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1561                           UsePhi);
1562         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1563         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1564         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1565         UsePhi->replaceAllUsesWith(Trunc);
1566         DeadInsts.emplace_back(UsePhi);
1567         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1568                           << *WidePhi << "\n");
1569       }
1570       return nullptr;
1571     }
1572   }
1573 
1574   // This narrow use can be widened by a sext if it's non-negative or its narrow
1575   // def was widended by a sext. Same for zext.
1576   auto canWidenBySExt = [&]() {
1577     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1578   };
1579   auto canWidenByZExt = [&]() {
1580     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1581   };
1582 
1583   // Our raison d'etre! Eliminate sign and zero extension.
1584   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1585       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1586     Value *NewDef = DU.WideDef;
1587     if (DU.NarrowUse->getType() != WideType) {
1588       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1589       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1590       if (CastWidth < IVWidth) {
1591         // The cast isn't as wide as the IV, so insert a Trunc.
1592         IRBuilder<> Builder(DU.NarrowUse);
1593         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1594       }
1595       else {
1596         // A wider extend was hidden behind a narrower one. This may induce
1597         // another round of IV widening in which the intermediate IV becomes
1598         // dead. It should be very rare.
1599         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1600                           << " not wide enough to subsume " << *DU.NarrowUse
1601                           << "\n");
1602         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1603         NewDef = DU.NarrowUse;
1604       }
1605     }
1606     if (NewDef != DU.NarrowUse) {
1607       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1608                         << " replaced by " << *DU.WideDef << "\n");
1609       ++NumElimExt;
1610       DU.NarrowUse->replaceAllUsesWith(NewDef);
1611       DeadInsts.emplace_back(DU.NarrowUse);
1612     }
1613     // Now that the extend is gone, we want to expose it's uses for potential
1614     // further simplification. We don't need to directly inform SimplifyIVUsers
1615     // of the new users, because their parent IV will be processed later as a
1616     // new loop phi. If we preserved IVUsers analysis, we would also want to
1617     // push the uses of WideDef here.
1618 
1619     // No further widening is needed. The deceased [sz]ext had done it for us.
1620     return nullptr;
1621   }
1622 
1623   // Does this user itself evaluate to a recurrence after widening?
1624   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1625   if (!WideAddRec.first)
1626     WideAddRec = getWideRecurrence(DU);
1627 
1628   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1629   if (!WideAddRec.first) {
1630     // If use is a loop condition, try to promote the condition instead of
1631     // truncating the IV first.
1632     if (widenLoopCompare(DU))
1633       return nullptr;
1634 
1635     // We are here about to generate a truncate instruction that may hurt
1636     // performance because the scalar evolution expression computed earlier
1637     // in WideAddRec.first does not indicate a polynomial induction expression.
1638     // In that case, look at the operands of the use instruction to determine
1639     // if we can still widen the use instead of truncating its operand.
1640     if (widenWithVariantLoadUse(DU)) {
1641       widenWithVariantLoadUseCodegen(DU);
1642       return nullptr;
1643     }
1644 
1645     // This user does not evaluate to a recurrence after widening, so don't
1646     // follow it. Instead insert a Trunc to kill off the original use,
1647     // eventually isolating the original narrow IV so it can be removed.
1648     truncateIVUse(DU, DT, LI);
1649     return nullptr;
1650   }
1651   // Assume block terminators cannot evaluate to a recurrence. We can't to
1652   // insert a Trunc after a terminator if there happens to be a critical edge.
1653   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1654          "SCEV is not expected to evaluate a block terminator");
1655 
1656   // Reuse the IV increment that SCEVExpander created as long as it dominates
1657   // NarrowUse.
1658   Instruction *WideUse = nullptr;
1659   if (WideAddRec.first == WideIncExpr &&
1660       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1661     WideUse = WideInc;
1662   else {
1663     WideUse = cloneIVUser(DU, WideAddRec.first);
1664     if (!WideUse)
1665       return nullptr;
1666   }
1667   // Evaluation of WideAddRec ensured that the narrow expression could be
1668   // extended outside the loop without overflow. This suggests that the wide use
1669   // evaluates to the same expression as the extended narrow use, but doesn't
1670   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1671   // where it fails, we simply throw away the newly created wide use.
1672   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1673     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1674                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1675                       << "\n");
1676     DeadInsts.emplace_back(WideUse);
1677     return nullptr;
1678   }
1679 
1680   // if we reached this point then we are going to replace
1681   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1682   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1683 
1684   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1685   // Returning WideUse pushes it on the worklist.
1686   return WideUse;
1687 }
1688 
1689 /// Add eligible users of NarrowDef to NarrowIVUsers.
1690 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1691   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1692   bool NonNegativeDef =
1693       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1694                            SE->getConstant(NarrowSCEV->getType(), 0));
1695   for (User *U : NarrowDef->users()) {
1696     Instruction *NarrowUser = cast<Instruction>(U);
1697 
1698     // Handle data flow merges and bizarre phi cycles.
1699     if (!Widened.insert(NarrowUser).second)
1700       continue;
1701 
1702     bool NonNegativeUse = false;
1703     if (!NonNegativeDef) {
1704       // We might have a control-dependent range information for this context.
1705       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1706         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1707     }
1708 
1709     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1710                                NonNegativeDef || NonNegativeUse);
1711   }
1712 }
1713 
1714 /// Process a single induction variable. First use the SCEVExpander to create a
1715 /// wide induction variable that evaluates to the same recurrence as the
1716 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1717 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1718 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1719 ///
1720 /// It would be simpler to delete uses as they are processed, but we must avoid
1721 /// invalidating SCEV expressions.
1722 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1723   // Is this phi an induction variable?
1724   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1725   if (!AddRec)
1726     return nullptr;
1727 
1728   // Widen the induction variable expression.
1729   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1730                                ? SE->getSignExtendExpr(AddRec, WideType)
1731                                : SE->getZeroExtendExpr(AddRec, WideType);
1732 
1733   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1734          "Expect the new IV expression to preserve its type");
1735 
1736   // Can the IV be extended outside the loop without overflow?
1737   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1738   if (!AddRec || AddRec->getLoop() != L)
1739     return nullptr;
1740 
1741   // An AddRec must have loop-invariant operands. Since this AddRec is
1742   // materialized by a loop header phi, the expression cannot have any post-loop
1743   // operands, so they must dominate the loop header.
1744   assert(
1745       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1746       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1747       "Loop header phi recurrence inputs do not dominate the loop");
1748 
1749   // Iterate over IV uses (including transitive ones) looking for IV increments
1750   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1751   // the increment calculate control-dependent range information basing on
1752   // dominating conditions inside of the loop (e.g. a range check inside of the
1753   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1754   //
1755   // Control-dependent range information is later used to prove that a narrow
1756   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1757   // this on demand because when pushNarrowIVUsers needs this information some
1758   // of the dominating conditions might be already widened.
1759   if (UsePostIncrementRanges)
1760     calculatePostIncRanges(OrigPhi);
1761 
1762   // The rewriter provides a value for the desired IV expression. This may
1763   // either find an existing phi or materialize a new one. Either way, we
1764   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1765   // of the phi-SCC dominates the loop entry.
1766   Instruction *InsertPt = &L->getHeader()->front();
1767   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1768 
1769   // Remembering the WideIV increment generated by SCEVExpander allows
1770   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1771   // employ a general reuse mechanism because the call above is the only call to
1772   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1773   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1774     WideInc =
1775       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1776     WideIncExpr = SE->getSCEV(WideInc);
1777     // Propagate the debug location associated with the original loop increment
1778     // to the new (widened) increment.
1779     auto *OrigInc =
1780       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1781     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1782   }
1783 
1784   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1785   ++NumWidened;
1786 
1787   // Traverse the def-use chain using a worklist starting at the original IV.
1788   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1789 
1790   Widened.insert(OrigPhi);
1791   pushNarrowIVUsers(OrigPhi, WidePhi);
1792 
1793   while (!NarrowIVUsers.empty()) {
1794     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1795 
1796     // Process a def-use edge. This may replace the use, so don't hold a
1797     // use_iterator across it.
1798     Instruction *WideUse = widenIVUse(DU, Rewriter);
1799 
1800     // Follow all def-use edges from the previous narrow use.
1801     if (WideUse)
1802       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1803 
1804     // widenIVUse may have removed the def-use edge.
1805     if (DU.NarrowDef->use_empty())
1806       DeadInsts.emplace_back(DU.NarrowDef);
1807   }
1808 
1809   // Attach any debug information to the new PHI.
1810   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1811 
1812   return WidePhi;
1813 }
1814 
1815 /// Calculates control-dependent range for the given def at the given context
1816 /// by looking at dominating conditions inside of the loop
1817 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1818                                     Instruction *NarrowUser) {
1819   using namespace llvm::PatternMatch;
1820 
1821   Value *NarrowDefLHS;
1822   const APInt *NarrowDefRHS;
1823   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1824                                  m_APInt(NarrowDefRHS))) ||
1825       !NarrowDefRHS->isNonNegative())
1826     return;
1827 
1828   auto UpdateRangeFromCondition = [&] (Value *Condition,
1829                                        bool TrueDest) {
1830     CmpInst::Predicate Pred;
1831     Value *CmpRHS;
1832     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1833                                  m_Value(CmpRHS))))
1834       return;
1835 
1836     CmpInst::Predicate P =
1837             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1838 
1839     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1840     auto CmpConstrainedLHSRange =
1841             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1842     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1843         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1844 
1845     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1846   };
1847 
1848   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1849     if (!HasGuards)
1850       return;
1851 
1852     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1853                                      Ctx->getParent()->rend())) {
1854       Value *C = nullptr;
1855       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1856         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1857     }
1858   };
1859 
1860   UpdateRangeFromGuards(NarrowUser);
1861 
1862   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1863   // If NarrowUserBB is statically unreachable asking dominator queries may
1864   // yield surprising results. (e.g. the block may not have a dom tree node)
1865   if (!DT->isReachableFromEntry(NarrowUserBB))
1866     return;
1867 
1868   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1869        L->contains(DTB->getBlock());
1870        DTB = DTB->getIDom()) {
1871     auto *BB = DTB->getBlock();
1872     auto *TI = BB->getTerminator();
1873     UpdateRangeFromGuards(TI);
1874 
1875     auto *BI = dyn_cast<BranchInst>(TI);
1876     if (!BI || !BI->isConditional())
1877       continue;
1878 
1879     auto *TrueSuccessor = BI->getSuccessor(0);
1880     auto *FalseSuccessor = BI->getSuccessor(1);
1881 
1882     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1883       return BBE.isSingleEdge() &&
1884              DT->dominates(BBE, NarrowUser->getParent());
1885     };
1886 
1887     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1888       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1889 
1890     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1891       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1892   }
1893 }
1894 
1895 /// Calculates PostIncRangeInfos map for the given IV
1896 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1897   SmallPtrSet<Instruction *, 16> Visited;
1898   SmallVector<Instruction *, 6> Worklist;
1899   Worklist.push_back(OrigPhi);
1900   Visited.insert(OrigPhi);
1901 
1902   while (!Worklist.empty()) {
1903     Instruction *NarrowDef = Worklist.pop_back_val();
1904 
1905     for (Use &U : NarrowDef->uses()) {
1906       auto *NarrowUser = cast<Instruction>(U.getUser());
1907 
1908       // Don't go looking outside the current loop.
1909       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1910       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1911         continue;
1912 
1913       if (!Visited.insert(NarrowUser).second)
1914         continue;
1915 
1916       Worklist.push_back(NarrowUser);
1917 
1918       calculatePostIncRange(NarrowDef, NarrowUser);
1919     }
1920   }
1921 }
1922 
1923 //===----------------------------------------------------------------------===//
1924 //  Live IV Reduction - Minimize IVs live across the loop.
1925 //===----------------------------------------------------------------------===//
1926 
1927 //===----------------------------------------------------------------------===//
1928 //  Simplification of IV users based on SCEV evaluation.
1929 //===----------------------------------------------------------------------===//
1930 
1931 namespace {
1932 
1933 class IndVarSimplifyVisitor : public IVVisitor {
1934   ScalarEvolution *SE;
1935   const TargetTransformInfo *TTI;
1936   PHINode *IVPhi;
1937 
1938 public:
1939   WideIVInfo WI;
1940 
1941   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1942                         const TargetTransformInfo *TTI,
1943                         const DominatorTree *DTree)
1944     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1945     DT = DTree;
1946     WI.NarrowIV = IVPhi;
1947   }
1948 
1949   // Implement the interface used by simplifyUsersOfIV.
1950   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1951 };
1952 
1953 } // end anonymous namespace
1954 
1955 /// Iteratively perform simplification on a worklist of IV users. Each
1956 /// successive simplification may push more users which may themselves be
1957 /// candidates for simplification.
1958 ///
1959 /// Sign/Zero extend elimination is interleaved with IV simplification.
1960 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1961                                        SCEVExpander &Rewriter,
1962                                        LoopInfo *LI) {
1963   SmallVector<WideIVInfo, 8> WideIVs;
1964 
1965   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1966           Intrinsic::getName(Intrinsic::experimental_guard));
1967   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1968 
1969   SmallVector<PHINode*, 8> LoopPhis;
1970   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1971     LoopPhis.push_back(cast<PHINode>(I));
1972   }
1973   // Each round of simplification iterates through the SimplifyIVUsers worklist
1974   // for all current phis, then determines whether any IVs can be
1975   // widened. Widening adds new phis to LoopPhis, inducing another round of
1976   // simplification on the wide IVs.
1977   bool Changed = false;
1978   while (!LoopPhis.empty()) {
1979     // Evaluate as many IV expressions as possible before widening any IVs. This
1980     // forces SCEV to set no-wrap flags before evaluating sign/zero
1981     // extension. The first time SCEV attempts to normalize sign/zero extension,
1982     // the result becomes final. So for the most predictable results, we delay
1983     // evaluation of sign/zero extend evaluation until needed, and avoid running
1984     // other SCEV based analysis prior to simplifyAndExtend.
1985     do {
1986       PHINode *CurrIV = LoopPhis.pop_back_val();
1987 
1988       // Information about sign/zero extensions of CurrIV.
1989       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1990 
1991       Changed |=
1992           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1993 
1994       if (Visitor.WI.WidestNativeType) {
1995         WideIVs.push_back(Visitor.WI);
1996       }
1997     } while(!LoopPhis.empty());
1998 
1999     for (; !WideIVs.empty(); WideIVs.pop_back()) {
2000       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2001       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2002         Changed = true;
2003         LoopPhis.push_back(WidePhi);
2004       }
2005     }
2006   }
2007   return Changed;
2008 }
2009 
2010 //===----------------------------------------------------------------------===//
2011 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2012 //===----------------------------------------------------------------------===//
2013 
2014 /// Given an Value which is hoped to be part of an add recurance in the given
2015 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
2016 /// that this is less general than SCEVs AddRec checking.
2017 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2018   Instruction *IncI = dyn_cast<Instruction>(IncV);
2019   if (!IncI)
2020     return nullptr;
2021 
2022   switch (IncI->getOpcode()) {
2023   case Instruction::Add:
2024   case Instruction::Sub:
2025     break;
2026   case Instruction::GetElementPtr:
2027     // An IV counter must preserve its type.
2028     if (IncI->getNumOperands() == 2)
2029       break;
2030     LLVM_FALLTHROUGH;
2031   default:
2032     return nullptr;
2033   }
2034 
2035   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2036   if (Phi && Phi->getParent() == L->getHeader()) {
2037     if (L->isLoopInvariant(IncI->getOperand(1)))
2038       return Phi;
2039     return nullptr;
2040   }
2041   if (IncI->getOpcode() == Instruction::GetElementPtr)
2042     return nullptr;
2043 
2044   // Allow add/sub to be commuted.
2045   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2046   if (Phi && Phi->getParent() == L->getHeader()) {
2047     if (L->isLoopInvariant(IncI->getOperand(0)))
2048       return Phi;
2049   }
2050   return nullptr;
2051 }
2052 
2053 /// Whether the current loop exit test is based on this value.  Currently this
2054 /// is limited to a direct use in the loop condition.
2055 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2056   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2057   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2058   // TODO: Allow non-icmp loop test.
2059   if (!ICmp)
2060     return false;
2061 
2062   // TODO: Allow indirect use.
2063   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2064 }
2065 
2066 /// linearFunctionTestReplace policy. Return true unless we can show that the
2067 /// current exit test is already sufficiently canonical.
2068 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2069   assert(L->getLoopLatch() && "Must be in simplified form");
2070 
2071   // Avoid converting a constant or loop invariant test back to a runtime
2072   // test.  This is critical for when SCEV's cached ExitCount is less precise
2073   // than the current IR (such as after we've proven a particular exit is
2074   // actually dead and thus the BE count never reaches our ExitCount.)
2075   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2076   if (L->isLoopInvariant(BI->getCondition()))
2077     return false;
2078 
2079   // Do LFTR to simplify the exit condition to an ICMP.
2080   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2081   if (!Cond)
2082     return true;
2083 
2084   // Do LFTR to simplify the exit ICMP to EQ/NE
2085   ICmpInst::Predicate Pred = Cond->getPredicate();
2086   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2087     return true;
2088 
2089   // Look for a loop invariant RHS
2090   Value *LHS = Cond->getOperand(0);
2091   Value *RHS = Cond->getOperand(1);
2092   if (!L->isLoopInvariant(RHS)) {
2093     if (!L->isLoopInvariant(LHS))
2094       return true;
2095     std::swap(LHS, RHS);
2096   }
2097   // Look for a simple IV counter LHS
2098   PHINode *Phi = dyn_cast<PHINode>(LHS);
2099   if (!Phi)
2100     Phi = getLoopPhiForCounter(LHS, L);
2101 
2102   if (!Phi)
2103     return true;
2104 
2105   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2106   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2107   if (Idx < 0)
2108     return true;
2109 
2110   // Do LFTR if the exit condition's IV is *not* a simple counter.
2111   Value *IncV = Phi->getIncomingValue(Idx);
2112   return Phi != getLoopPhiForCounter(IncV, L);
2113 }
2114 
2115 /// Return true if undefined behavior would provable be executed on the path to
2116 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2117 /// anything about whether OnPathTo is actually executed or whether Root is
2118 /// actually poison.  This can be used to assess whether a new use of Root can
2119 /// be added at a location which is control equivalent with OnPathTo (such as
2120 /// immediately before it) without introducing UB which didn't previously
2121 /// exist.  Note that a false result conveys no information.
2122 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2123                                           Instruction *OnPathTo,
2124                                           DominatorTree *DT) {
2125   // Basic approach is to assume Root is poison, propagate poison forward
2126   // through all users we can easily track, and then check whether any of those
2127   // users are provable UB and must execute before out exiting block might
2128   // exit.
2129 
2130   // The set of all recursive users we've visited (which are assumed to all be
2131   // poison because of said visit)
2132   SmallSet<const Value *, 16> KnownPoison;
2133   SmallVector<const Instruction*, 16> Worklist;
2134   Worklist.push_back(Root);
2135   while (!Worklist.empty()) {
2136     const Instruction *I = Worklist.pop_back_val();
2137 
2138     // If we know this must trigger UB on a path leading our target.
2139     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2140       return true;
2141 
2142     // If we can't analyze propagation through this instruction, just skip it
2143     // and transitive users.  Safe as false is a conservative result.
2144     if (!propagatesFullPoison(I) && I != Root)
2145       continue;
2146 
2147     if (KnownPoison.insert(I).second)
2148       for (const User *User : I->users())
2149         Worklist.push_back(cast<Instruction>(User));
2150   }
2151 
2152   // Might be non-UB, or might have a path we couldn't prove must execute on
2153   // way to exiting bb.
2154   return false;
2155 }
2156 
2157 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2158 /// down to checking that all operands are constant and listing instructions
2159 /// that may hide undef.
2160 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2161                                unsigned Depth) {
2162   if (isa<Constant>(V))
2163     return !isa<UndefValue>(V);
2164 
2165   if (Depth >= 6)
2166     return false;
2167 
2168   // Conservatively handle non-constant non-instructions. For example, Arguments
2169   // may be undef.
2170   Instruction *I = dyn_cast<Instruction>(V);
2171   if (!I)
2172     return false;
2173 
2174   // Load and return values may be undef.
2175   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2176     return false;
2177 
2178   // Optimistically handle other instructions.
2179   for (Value *Op : I->operands()) {
2180     if (!Visited.insert(Op).second)
2181       continue;
2182     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2183       return false;
2184   }
2185   return true;
2186 }
2187 
2188 /// Return true if the given value is concrete. We must prove that undef can
2189 /// never reach it.
2190 ///
2191 /// TODO: If we decide that this is a good approach to checking for undef, we
2192 /// may factor it into a common location.
2193 static bool hasConcreteDef(Value *V) {
2194   SmallPtrSet<Value*, 8> Visited;
2195   Visited.insert(V);
2196   return hasConcreteDefImpl(V, Visited, 0);
2197 }
2198 
2199 /// Return true if this IV has any uses other than the (soon to be rewritten)
2200 /// loop exit test.
2201 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2202   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2203   Value *IncV = Phi->getIncomingValue(LatchIdx);
2204 
2205   for (User *U : Phi->users())
2206     if (U != Cond && U != IncV) return false;
2207 
2208   for (User *U : IncV->users())
2209     if (U != Cond && U != Phi) return false;
2210   return true;
2211 }
2212 
2213 /// Return true if the given phi is a "counter" in L.  A counter is an
2214 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2215 /// step of 1.  Note that L must have exactly one latch.
2216 static bool isLoopCounter(PHINode* Phi, Loop *L,
2217                           ScalarEvolution *SE) {
2218   assert(Phi->getParent() == L->getHeader());
2219   assert(L->getLoopLatch());
2220 
2221   if (!SE->isSCEVable(Phi->getType()))
2222     return false;
2223 
2224   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2225   if (!AR || AR->getLoop() != L || !AR->isAffine())
2226     return false;
2227 
2228   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2229   if (!Step || !Step->isOne())
2230     return false;
2231 
2232   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2233   Value *IncV = Phi->getIncomingValue(LatchIdx);
2234   return (getLoopPhiForCounter(IncV, L) == Phi);
2235 }
2236 
2237 /// Search the loop header for a loop counter (anadd rec w/step of one)
2238 /// suitable for use by LFTR.  If multiple counters are available, select the
2239 /// "best" one based profitable heuristics.
2240 ///
2241 /// BECount may be an i8* pointer type. The pointer difference is already
2242 /// valid count without scaling the address stride, so it remains a pointer
2243 /// expression as far as SCEV is concerned.
2244 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2245                                 const SCEV *BECount,
2246                                 ScalarEvolution *SE, DominatorTree *DT) {
2247   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2248 
2249   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2250 
2251   // Loop over all of the PHI nodes, looking for a simple counter.
2252   PHINode *BestPhi = nullptr;
2253   const SCEV *BestInit = nullptr;
2254   BasicBlock *LatchBlock = L->getLoopLatch();
2255   assert(LatchBlock && "Must be in simplified form");
2256   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2257 
2258   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2259     PHINode *Phi = cast<PHINode>(I);
2260     if (!isLoopCounter(Phi, L, SE))
2261       continue;
2262 
2263     // Avoid comparing an integer IV against a pointer Limit.
2264     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2265       continue;
2266 
2267     const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2268 
2269     // AR may be a pointer type, while BECount is an integer type.
2270     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2271     // AR may not be a narrower type, or we may never exit.
2272     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2273     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2274       continue;
2275 
2276     // Avoid reusing a potentially undef value to compute other values that may
2277     // have originally had a concrete definition.
2278     if (!hasConcreteDef(Phi)) {
2279       // We explicitly allow unknown phis as long as they are already used by
2280       // the loop exit test.  This is legal since performing LFTR could not
2281       // increase the number of undef users.
2282       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2283       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2284           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2285         continue;
2286     }
2287 
2288     // Avoid introducing undefined behavior due to poison which didn't exist in
2289     // the original program.  (Annoyingly, the rules for poison and undef
2290     // propagation are distinct, so this does NOT cover the undef case above.)
2291     // We have to ensure that we don't introduce UB by introducing a use on an
2292     // iteration where said IV produces poison.  Our strategy here differs for
2293     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2294     // see code in linearFunctionTestReplace.  For pointers, we restrict
2295     // transforms as there is no good way to reinfer inbounds once lost.
2296     if (!Phi->getType()->isIntegerTy() &&
2297         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2298       continue;
2299 
2300     const SCEV *Init = AR->getStart();
2301 
2302     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2303       // Don't force a live loop counter if another IV can be used.
2304       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2305         continue;
2306 
2307       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2308       // also prefers integer to pointer IVs.
2309       if (BestInit->isZero() != Init->isZero()) {
2310         if (BestInit->isZero())
2311           continue;
2312       }
2313       // If two IVs both count from zero or both count from nonzero then the
2314       // narrower is likely a dead phi that has been widened. Use the wider phi
2315       // to allow the other to be eliminated.
2316       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2317         continue;
2318     }
2319     BestPhi = Phi;
2320     BestInit = Init;
2321   }
2322   return BestPhi;
2323 }
2324 
2325 /// Insert an IR expression which computes the value held by the IV IndVar
2326 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2327 /// is taken ExitCount times.
2328 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2329                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2330                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2331   assert(isLoopCounter(IndVar, L, SE));
2332   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2333   const SCEV *IVInit = AR->getStart();
2334 
2335   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2336   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2337   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2338   // the existing GEPs whenever possible.
2339   if (IndVar->getType()->isPointerTy() &&
2340       !ExitCount->getType()->isPointerTy()) {
2341     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2342     // signed value. ExitCount on the other hand represents the loop trip count,
2343     // which is an unsigned value. FindLoopCounter only allows induction
2344     // variables that have a positive unit stride of one. This means we don't
2345     // have to handle the case of negative offsets (yet) and just need to zero
2346     // extend ExitCount.
2347     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2348     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2349     if (UsePostInc)
2350       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2351 
2352     // Expand the code for the iteration count.
2353     assert(SE->isLoopInvariant(IVOffset, L) &&
2354            "Computed iteration count is not loop invariant!");
2355 
2356     // We could handle pointer IVs other than i8*, but we need to compensate for
2357     // gep index scaling.
2358     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2359                              cast<PointerType>(IndVar->getType())
2360                                  ->getElementType())->isOne() &&
2361            "unit stride pointer IV must be i8*");
2362 
2363     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2364     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2365     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2366   } else {
2367     // In any other case, convert both IVInit and ExitCount to integers before
2368     // comparing. This may result in SCEV expansion of pointers, but in practice
2369     // SCEV will fold the pointer arithmetic away as such:
2370     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2371     //
2372     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2373     // for simple memset-style loops.
2374     //
2375     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2376     // were generated on top of case #2, which is not expected.
2377 
2378     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2379     // For unit stride, IVCount = Start + ExitCount with 2's complement
2380     // overflow.
2381 
2382     // For integer IVs, truncate the IV before computing IVInit + BECount,
2383     // unless we know apriori that the limit must be a constant when evaluated
2384     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2385     // of the IV in the loop over a (potentially) expensive expansion of the
2386     // widened exit count add(zext(add)) expression.
2387     if (SE->getTypeSizeInBits(IVInit->getType())
2388         > SE->getTypeSizeInBits(ExitCount->getType())) {
2389       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2390         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2391       else
2392         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2393     }
2394 
2395     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2396 
2397     if (UsePostInc)
2398       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2399 
2400     // Expand the code for the iteration count.
2401     assert(SE->isLoopInvariant(IVLimit, L) &&
2402            "Computed iteration count is not loop invariant!");
2403     // Ensure that we generate the same type as IndVar, or a smaller integer
2404     // type. In the presence of null pointer values, we have an integer type
2405     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2406     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2407       IndVar->getType() : ExitCount->getType();
2408     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2409     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2410   }
2411 }
2412 
2413 /// This method rewrites the exit condition of the loop to be a canonical !=
2414 /// comparison against the incremented loop induction variable.  This pass is
2415 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2416 /// determine a loop-invariant trip count of the loop, which is actually a much
2417 /// broader range than just linear tests.
2418 bool IndVarSimplify::
2419 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2420                           const SCEV *ExitCount,
2421                           PHINode *IndVar, SCEVExpander &Rewriter) {
2422   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2423   assert(isLoopCounter(IndVar, L, SE));
2424   Instruction * const IncVar =
2425     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2426 
2427   // Initialize CmpIndVar to the preincremented IV.
2428   Value *CmpIndVar = IndVar;
2429   bool UsePostInc = false;
2430 
2431   // If the exiting block is the same as the backedge block, we prefer to
2432   // compare against the post-incremented value, otherwise we must compare
2433   // against the preincremented value.
2434   if (ExitingBB == L->getLoopLatch()) {
2435     // For pointer IVs, we chose to not strip inbounds which requires us not
2436     // to add a potentially UB introducing use.  We need to either a) show
2437     // the loop test we're modifying is already in post-inc form, or b) show
2438     // that adding a use must not introduce UB.
2439     bool SafeToPostInc =
2440         IndVar->getType()->isIntegerTy() ||
2441         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2442         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2443     if (SafeToPostInc) {
2444       UsePostInc = true;
2445       CmpIndVar = IncVar;
2446     }
2447   }
2448 
2449   // It may be necessary to drop nowrap flags on the incrementing instruction
2450   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2451   // case the increment might have previously been poison on the last iteration
2452   // only) or if LFTR switches to a different IV that was previously dynamically
2453   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2454   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2455   // check), because the pre-inc addrec flags may be adopted from the original
2456   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2457   // TODO: This handling is inaccurate for one case: If we switch to a
2458   // dynamically dead IV that wraps on the first loop iteration only, which is
2459   // not covered by the post-inc addrec. (If the new IV was not dynamically
2460   // dead, it could not be poison on the first iteration in the first place.)
2461   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2462     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2463     if (BO->hasNoUnsignedWrap())
2464       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2465     if (BO->hasNoSignedWrap())
2466       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2467   }
2468 
2469   Value *ExitCnt = genLoopLimit(
2470       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2471   assert(ExitCnt->getType()->isPointerTy() ==
2472              IndVar->getType()->isPointerTy() &&
2473          "genLoopLimit missed a cast");
2474 
2475   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2476   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2477   ICmpInst::Predicate P;
2478   if (L->contains(BI->getSuccessor(0)))
2479     P = ICmpInst::ICMP_NE;
2480   else
2481     P = ICmpInst::ICMP_EQ;
2482 
2483   IRBuilder<> Builder(BI);
2484 
2485   // The new loop exit condition should reuse the debug location of the
2486   // original loop exit condition.
2487   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2488     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2489 
2490   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2491   // avoid the expensive expansion of the limit expression in the wider type,
2492   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2493   // since we know (from the exit count bitwidth), that we can't self-wrap in
2494   // the narrower type.
2495   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2496   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2497   if (CmpIndVarSize > ExitCntSize) {
2498     assert(!CmpIndVar->getType()->isPointerTy() &&
2499            !ExitCnt->getType()->isPointerTy());
2500 
2501     // Before resorting to actually inserting the truncate, use the same
2502     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2503     // the other side of the comparison instead.  We still evaluate the limit
2504     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2505     // a truncate within in.
2506     bool Extended = false;
2507     const SCEV *IV = SE->getSCEV(CmpIndVar);
2508     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2509                                                   ExitCnt->getType());
2510     const SCEV *ZExtTrunc =
2511       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2512 
2513     if (ZExtTrunc == IV) {
2514       Extended = true;
2515       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2516                                    "wide.trip.count");
2517     } else {
2518       const SCEV *SExtTrunc =
2519         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2520       if (SExtTrunc == IV) {
2521         Extended = true;
2522         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2523                                      "wide.trip.count");
2524       }
2525     }
2526 
2527     if (Extended) {
2528       bool Discard;
2529       L->makeLoopInvariant(ExitCnt, Discard);
2530     } else
2531       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2532                                       "lftr.wideiv");
2533   }
2534   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2535                     << "      LHS:" << *CmpIndVar << '\n'
2536                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2537                     << "\n"
2538                     << "      RHS:\t" << *ExitCnt << "\n"
2539                     << "ExitCount:\t" << *ExitCount << "\n"
2540                     << "  was: " << *BI->getCondition() << "\n");
2541 
2542   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2543   Value *OrigCond = BI->getCondition();
2544   // It's tempting to use replaceAllUsesWith here to fully replace the old
2545   // comparison, but that's not immediately safe, since users of the old
2546   // comparison may not be dominated by the new comparison. Instead, just
2547   // update the branch to use the new comparison; in the common case this
2548   // will make old comparison dead.
2549   BI->setCondition(Cond);
2550   DeadInsts.push_back(OrigCond);
2551 
2552   ++NumLFTR;
2553   return true;
2554 }
2555 
2556 //===----------------------------------------------------------------------===//
2557 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2558 //===----------------------------------------------------------------------===//
2559 
2560 /// If there's a single exit block, sink any loop-invariant values that
2561 /// were defined in the preheader but not used inside the loop into the
2562 /// exit block to reduce register pressure in the loop.
2563 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2564   BasicBlock *ExitBlock = L->getExitBlock();
2565   if (!ExitBlock) return false;
2566 
2567   BasicBlock *Preheader = L->getLoopPreheader();
2568   if (!Preheader) return false;
2569 
2570   bool MadeAnyChanges = false;
2571   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2572   BasicBlock::iterator I(Preheader->getTerminator());
2573   while (I != Preheader->begin()) {
2574     --I;
2575     // New instructions were inserted at the end of the preheader.
2576     if (isa<PHINode>(I))
2577       break;
2578 
2579     // Don't move instructions which might have side effects, since the side
2580     // effects need to complete before instructions inside the loop.  Also don't
2581     // move instructions which might read memory, since the loop may modify
2582     // memory. Note that it's okay if the instruction might have undefined
2583     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2584     // block.
2585     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2586       continue;
2587 
2588     // Skip debug info intrinsics.
2589     if (isa<DbgInfoIntrinsic>(I))
2590       continue;
2591 
2592     // Skip eh pad instructions.
2593     if (I->isEHPad())
2594       continue;
2595 
2596     // Don't sink alloca: we never want to sink static alloca's out of the
2597     // entry block, and correctly sinking dynamic alloca's requires
2598     // checks for stacksave/stackrestore intrinsics.
2599     // FIXME: Refactor this check somehow?
2600     if (isa<AllocaInst>(I))
2601       continue;
2602 
2603     // Determine if there is a use in or before the loop (direct or
2604     // otherwise).
2605     bool UsedInLoop = false;
2606     for (Use &U : I->uses()) {
2607       Instruction *User = cast<Instruction>(U.getUser());
2608       BasicBlock *UseBB = User->getParent();
2609       if (PHINode *P = dyn_cast<PHINode>(User)) {
2610         unsigned i =
2611           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2612         UseBB = P->getIncomingBlock(i);
2613       }
2614       if (UseBB == Preheader || L->contains(UseBB)) {
2615         UsedInLoop = true;
2616         break;
2617       }
2618     }
2619 
2620     // If there is, the def must remain in the preheader.
2621     if (UsedInLoop)
2622       continue;
2623 
2624     // Otherwise, sink it to the exit block.
2625     Instruction *ToMove = &*I;
2626     bool Done = false;
2627 
2628     if (I != Preheader->begin()) {
2629       // Skip debug info intrinsics.
2630       do {
2631         --I;
2632       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2633 
2634       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2635         Done = true;
2636     } else {
2637       Done = true;
2638     }
2639 
2640     MadeAnyChanges = true;
2641     ToMove->moveBefore(*ExitBlock, InsertPt);
2642     if (Done) break;
2643     InsertPt = ToMove->getIterator();
2644   }
2645 
2646   return MadeAnyChanges;
2647 }
2648 
2649 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2650   SmallVector<BasicBlock*, 16> ExitingBlocks;
2651   L->getExitingBlocks(ExitingBlocks);
2652 
2653   // Form an expression for the maximum exit count possible for this loop. We
2654   // merge the max and exact information to approximate a version of
2655   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2656   // TODO: factor this out as a version of getConstantMaxBackedgeTakenCount which
2657   // isn't guaranteed to return a constant.
2658   SmallVector<const SCEV*, 4> ExitCounts;
2659   const SCEV *MaxConstEC = SE->getConstantMaxBackedgeTakenCount(L);
2660   if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2661     ExitCounts.push_back(MaxConstEC);
2662   for (BasicBlock *ExitingBB : ExitingBlocks) {
2663     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2664     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2665       assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2666              "We should only have known counts for exiting blocks that "
2667              "dominate latch!");
2668       ExitCounts.push_back(ExitCount);
2669     }
2670   }
2671   if (ExitCounts.empty())
2672     return false;
2673   const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2674 
2675   bool Changed = false;
2676   for (BasicBlock *ExitingBB : ExitingBlocks) {
2677     // If our exitting block exits multiple loops, we can only rewrite the
2678     // innermost one.  Otherwise, we're changing how many times the innermost
2679     // loop runs before it exits.
2680     if (LI->getLoopFor(ExitingBB) != L)
2681       continue;
2682 
2683     // Can't rewrite non-branch yet.
2684     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2685     if (!BI)
2686       continue;
2687 
2688     // If already constant, nothing to do.
2689     if (isa<Constant>(BI->getCondition()))
2690       continue;
2691 
2692     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2693     if (isa<SCEVCouldNotCompute>(ExitCount))
2694       continue;
2695 
2696     // If we know we'd exit on the first iteration, rewrite the exit to
2697     // reflect this.  This does not imply the loop must exit through this
2698     // exit; there may be an earlier one taken on the first iteration.
2699     // TODO: Given we know the backedge can't be taken, we should go ahead
2700     // and break it.  Or at least, kill all the header phis and simplify.
2701     if (ExitCount->isZero()) {
2702       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2703       auto *OldCond = BI->getCondition();
2704       auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
2705         ConstantInt::getFalse(OldCond->getType());
2706       BI->setCondition(NewCond);
2707       if (OldCond->use_empty())
2708         DeadInsts.push_back(OldCond);
2709       Changed = true;
2710       continue;
2711     }
2712 
2713     // If we end up with a pointer exit count, bail.  Note that we can end up
2714     // with a pointer exit count for one exiting block, and not for another in
2715     // the same loop.
2716     if (!ExitCount->getType()->isIntegerTy() ||
2717         !MaxExitCount->getType()->isIntegerTy())
2718       continue;
2719 
2720     Type *WiderType =
2721       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2722     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2723     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2724     assert(MaxExitCount->getType() == ExitCount->getType());
2725 
2726     // Can we prove that some other exit must be taken strictly before this
2727     // one?
2728     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2729                                      MaxExitCount, ExitCount)) {
2730       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2731       auto *OldCond = BI->getCondition();
2732       auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
2733         ConstantInt::getTrue(OldCond->getType());
2734       BI->setCondition(NewCond);
2735       if (OldCond->use_empty())
2736         DeadInsts.push_back(OldCond);
2737       Changed = true;
2738       continue;
2739     }
2740 
2741     // TODO: If we can prove that the exiting iteration is equal to the exit
2742     // count for this exit and that no previous exit oppurtunities exist within
2743     // the loop, then we can discharge all other exits.  (May fall out of
2744     // previous TODO.)
2745   }
2746 
2747   // Finally, see if we can rewrite our exit conditions into a loop invariant
2748   // form.  If we have a read-only loop, and we can tell that we must exit down
2749   // a path which does not need any of the values computed within the loop, we
2750   // can rewrite the loop to exit on the first iteration.  Note that this
2751   // doesn't either a) tell us the loop exits on the first iteration (unless
2752   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2753   // This transformation looks a lot like a restricted form of dead loop
2754   // elimination, but restricted to read-only loops and without neccesssarily
2755   // needing to kill the loop entirely.
2756   if (!LoopPredication)
2757     return Changed;
2758 
2759   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2760     return Changed;
2761 
2762   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2763   // through *explicit* control flow.  We have to eliminate the possibility of
2764   // implicit exits (see below) before we know it's truly exact.
2765   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2766   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2767       !SE->isLoopInvariant(ExactBTC, L) ||
2768       !isSafeToExpand(ExactBTC, *SE))
2769     return Changed;
2770 
2771   auto BadExit = [&](BasicBlock *ExitingBB) {
2772     // If our exiting block exits multiple loops, we can only rewrite the
2773     // innermost one.  Otherwise, we're changing how many times the innermost
2774     // loop runs before it exits.
2775     if (LI->getLoopFor(ExitingBB) != L)
2776       return true;
2777 
2778     // Can't rewrite non-branch yet.
2779     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2780     if (!BI)
2781       return true;
2782 
2783     // If already constant, nothing to do.
2784     if (isa<Constant>(BI->getCondition()))
2785       return true;
2786 
2787     // If the exit block has phis, we need to be able to compute the values
2788     // within the loop which contains them.  This assumes trivially lcssa phis
2789     // have already been removed; TODO: generalize
2790     BasicBlock *ExitBlock =
2791     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2792     if (!ExitBlock->phis().empty())
2793       return true;
2794 
2795     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2796     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2797     if (!SE->isLoopInvariant(ExitCount, L) ||
2798         !isSafeToExpand(ExitCount, *SE))
2799       return true;
2800 
2801     return false;
2802   };
2803 
2804   // If we have any exits which can't be predicated themselves, than we can't
2805   // predicate any exit which isn't guaranteed to execute before it.  Consider
2806   // two exits (a) and (b) which would both exit on the same iteration.  If we
2807   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2808   // we could convert a loop from exiting through (a) to one exiting through
2809   // (b).  Note that this problem exists only for exits with the same exit
2810   // count, and we could be more aggressive when exit counts are known inequal.
2811   llvm::sort(ExitingBlocks,
2812             [&](BasicBlock *A, BasicBlock *B) {
2813               // std::sort sorts in ascending order, so we want the inverse of
2814               // the normal dominance relation, plus a tie breaker for blocks
2815               // unordered by dominance.
2816               if (DT->properlyDominates(A, B)) return true;
2817               if (DT->properlyDominates(B, A)) return false;
2818               return A->getName() < B->getName();
2819             });
2820   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2821   // exits before the backedge).  If they aren't, reasoning about reachability
2822   // is complicated and we choose not to for now.
2823   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2824     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2825       return Changed;
2826 
2827   // Given our sorted total order, we know that exit[j] must be evaluated
2828   // after all exit[i] such j > i.
2829   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2830     if (BadExit(ExitingBlocks[i])) {
2831       ExitingBlocks.resize(i);
2832       break;
2833     }
2834 
2835   if (ExitingBlocks.empty())
2836     return Changed;
2837 
2838   // We rely on not being able to reach an exiting block on a later iteration
2839   // then it's statically compute exit count.  The implementaton of
2840   // getExitCount currently has this invariant, but assert it here so that
2841   // breakage is obvious if this ever changes..
2842   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2843         return DT->dominates(ExitingBB, L->getLoopLatch());
2844       }));
2845 
2846   // At this point, ExitingBlocks consists of only those blocks which are
2847   // predicatable.  Given that, we know we have at least one exit we can
2848   // predicate if the loop is doesn't have side effects and doesn't have any
2849   // implicit exits (because then our exact BTC isn't actually exact).
2850   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2851   // suggestions on how to improve this?  I can obviously bail out for outer
2852   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2853   // is that enough for *all* side effects?
2854   for (BasicBlock *BB : L->blocks())
2855     for (auto &I : *BB)
2856       // TODO:isGuaranteedToTransfer
2857       if (I.mayHaveSideEffects() || I.mayThrow())
2858         return Changed;
2859 
2860   // Finally, do the actual predication for all predicatable blocks.  A couple
2861   // of notes here:
2862   // 1) We don't bother to constant fold dominated exits with identical exit
2863   //    counts; that's simply a form of CSE/equality propagation and we leave
2864   //    it for dedicated passes.
2865   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2866   //    legality constraints and we leave that to dedicated logic.  We want to
2867   //    predicate even if we can't insert a loop invariant expression as
2868   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2869   //    varying check.
2870   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2871   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2872   Value *ExactBTCV = nullptr; //lazy generated if needed
2873   for (BasicBlock *ExitingBB : ExitingBlocks) {
2874     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2875 
2876     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2877     Value *NewCond;
2878     if (ExitCount == ExactBTC) {
2879       NewCond = L->contains(BI->getSuccessor(0)) ?
2880         B.getFalse() : B.getTrue();
2881     } else {
2882       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2883       if (!ExactBTCV)
2884         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2885       Value *RHS = ExactBTCV;
2886       if (ECV->getType() != RHS->getType()) {
2887         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2888         ECV = B.CreateZExt(ECV, WiderTy);
2889         RHS = B.CreateZExt(RHS, WiderTy);
2890       }
2891       auto Pred = L->contains(BI->getSuccessor(0)) ?
2892         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2893       NewCond = B.CreateICmp(Pred, ECV, RHS);
2894     }
2895     Value *OldCond = BI->getCondition();
2896     BI->setCondition(NewCond);
2897     if (OldCond->use_empty())
2898       DeadInsts.push_back(OldCond);
2899     Changed = true;
2900   }
2901 
2902   return Changed;
2903 }
2904 
2905 //===----------------------------------------------------------------------===//
2906 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2907 //===----------------------------------------------------------------------===//
2908 
2909 bool IndVarSimplify::run(Loop *L) {
2910   // We need (and expect!) the incoming loop to be in LCSSA.
2911   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2912          "LCSSA required to run indvars!");
2913   bool Changed = false;
2914 
2915   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2916   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2917   //    canonicalization can be a pessimization without LSR to "clean up"
2918   //    afterwards.
2919   //  - We depend on having a preheader; in particular,
2920   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2921   //    and we're in trouble if we can't find the induction variable even when
2922   //    we've manually inserted one.
2923   //  - LFTR relies on having a single backedge.
2924   if (!L->isLoopSimplifyForm())
2925     return false;
2926 
2927   // If there are any floating-point recurrences, attempt to
2928   // transform them to use integer recurrences.
2929   Changed |= rewriteNonIntegerIVs(L);
2930 
2931 #ifndef NDEBUG
2932   // Used below for a consistency check only
2933   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2934 #endif
2935 
2936   // Create a rewriter object which we'll use to transform the code with.
2937   SCEVExpander Rewriter(*SE, DL, "indvars");
2938 #ifndef NDEBUG
2939   Rewriter.setDebugType(DEBUG_TYPE);
2940 #endif
2941 
2942   // Eliminate redundant IV users.
2943   //
2944   // Simplification works best when run before other consumers of SCEV. We
2945   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2946   // other expressions involving loop IVs have been evaluated. This helps SCEV
2947   // set no-wrap flags before normalizing sign/zero extension.
2948   Rewriter.disableCanonicalMode();
2949   Changed |= simplifyAndExtend(L, Rewriter, LI);
2950 
2951   // Check to see if we can compute the final value of any expressions
2952   // that are recurrent in the loop, and substitute the exit values from the
2953   // loop into any instructions outside of the loop that use the final values
2954   // of the current expressions.
2955   if (ReplaceExitValue != NeverRepl)
2956     Changed |= rewriteLoopExitValues(L, Rewriter);
2957 
2958   // Eliminate redundant IV cycles.
2959   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2960 
2961   Changed |= optimizeLoopExits(L, Rewriter);
2962 
2963   // If we have a trip count expression, rewrite the loop's exit condition
2964   // using it.
2965   if (!DisableLFTR) {
2966     SmallVector<BasicBlock*, 16> ExitingBlocks;
2967     L->getExitingBlocks(ExitingBlocks);
2968     for (BasicBlock *ExitingBB : ExitingBlocks) {
2969       // Can't rewrite non-branch yet.
2970       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2971         continue;
2972 
2973       // If our exitting block exits multiple loops, we can only rewrite the
2974       // innermost one.  Otherwise, we're changing how many times the innermost
2975       // loop runs before it exits.
2976       if (LI->getLoopFor(ExitingBB) != L)
2977         continue;
2978 
2979       if (!needsLFTR(L, ExitingBB))
2980         continue;
2981 
2982       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2983       if (isa<SCEVCouldNotCompute>(ExitCount))
2984         continue;
2985 
2986       // This was handled above, but as we form SCEVs, we can sometimes refine
2987       // existing ones; this allows exit counts to be folded to zero which
2988       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2989       // until stable to handle cases like this better.
2990       if (ExitCount->isZero())
2991         continue;
2992 
2993       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2994       if (!IndVar)
2995         continue;
2996 
2997       // Avoid high cost expansions.  Note: This heuristic is questionable in
2998       // that our definition of "high cost" is not exactly principled.
2999       if (Rewriter.isHighCostExpansion(ExitCount, L))
3000         continue;
3001 
3002       // Check preconditions for proper SCEVExpander operation. SCEV does not
3003       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3004       // any pass that uses the SCEVExpander must do it. This does not work
3005       // well for loop passes because SCEVExpander makes assumptions about
3006       // all loops, while LoopPassManager only forces the current loop to be
3007       // simplified.
3008       //
3009       // FIXME: SCEV expansion has no way to bail out, so the caller must
3010       // explicitly check any assumptions made by SCEV. Brittle.
3011       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3012       if (!AR || AR->getLoop()->getLoopPreheader())
3013         Changed |= linearFunctionTestReplace(L, ExitingBB,
3014                                              ExitCount, IndVar,
3015                                              Rewriter);
3016     }
3017   }
3018   // Clear the rewriter cache, because values that are in the rewriter's cache
3019   // can be deleted in the loop below, causing the AssertingVH in the cache to
3020   // trigger.
3021   Rewriter.clear();
3022 
3023   // Now that we're done iterating through lists, clean up any instructions
3024   // which are now dead.
3025   while (!DeadInsts.empty())
3026     if (Instruction *Inst =
3027             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3028       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3029 
3030   // The Rewriter may not be used from this point on.
3031 
3032   // Loop-invariant instructions in the preheader that aren't used in the
3033   // loop may be sunk below the loop to reduce register pressure.
3034   Changed |= sinkUnusedInvariants(L);
3035 
3036   // rewriteFirstIterationLoopExitValues does not rely on the computation of
3037   // trip count and therefore can further simplify exit values in addition to
3038   // rewriteLoopExitValues.
3039   Changed |= rewriteFirstIterationLoopExitValues(L);
3040 
3041   // Clean up dead instructions.
3042   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3043 
3044   // Check a post-condition.
3045   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3046          "Indvars did not preserve LCSSA!");
3047 
3048   // Verify that LFTR, and any other change have not interfered with SCEV's
3049   // ability to compute trip count.
3050 #ifndef NDEBUG
3051   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3052     SE->forgetLoop(L);
3053     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3054     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3055         SE->getTypeSizeInBits(NewBECount->getType()))
3056       NewBECount = SE->getTruncateOrNoop(NewBECount,
3057                                          BackedgeTakenCount->getType());
3058     else
3059       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3060                                                  NewBECount->getType());
3061     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
3062   }
3063 #endif
3064 
3065   return Changed;
3066 }
3067 
3068 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3069                                           LoopStandardAnalysisResults &AR,
3070                                           LPMUpdater &) {
3071   Function *F = L.getHeader()->getParent();
3072   const DataLayout &DL = F->getParent()->getDataLayout();
3073 
3074   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3075   if (!IVS.run(&L))
3076     return PreservedAnalyses::all();
3077 
3078   auto PA = getLoopPassPreservedAnalyses();
3079   PA.preserveSet<CFGAnalyses>();
3080   return PA;
3081 }
3082 
3083 namespace {
3084 
3085 struct IndVarSimplifyLegacyPass : public LoopPass {
3086   static char ID; // Pass identification, replacement for typeid
3087 
3088   IndVarSimplifyLegacyPass() : LoopPass(ID) {
3089     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3090   }
3091 
3092   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3093     if (skipLoop(L))
3094       return false;
3095 
3096     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3097     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3098     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3099     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3100     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3101     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3102     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3103     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3104 
3105     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3106     return IVS.run(L);
3107   }
3108 
3109   void getAnalysisUsage(AnalysisUsage &AU) const override {
3110     AU.setPreservesCFG();
3111     getLoopAnalysisUsage(AU);
3112   }
3113 };
3114 
3115 } // end anonymous namespace
3116 
3117 char IndVarSimplifyLegacyPass::ID = 0;
3118 
3119 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3120                       "Induction Variable Simplification", false, false)
3121 INITIALIZE_PASS_DEPENDENCY(LoopPass)
3122 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3123                     "Induction Variable Simplification", false, false)
3124 
3125 Pass *llvm::createIndVarSimplifyPass() {
3126   return new IndVarSimplifyLegacyPass();
3127 }
3128