xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 6cca3ad43e6df866ec38708f2e20331a59d275b6)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened     , "Number of indvars widened");
93 STATISTIC(NumReplaced    , "Number of exit values replaced");
94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102   "verify-indvars", cl::Hidden,
103   cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
106 
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111                clEnumValN(OnlyCheapRepl, "cheap",
112                           "only replace exit value when the cost is cheap"),
113                clEnumValN(NoHardUse, "noharduse",
114                           "only replace exit values when loop def likely dead"),
115                clEnumValN(AlwaysRepl, "always",
116                           "always replace exit value whenever possible")));
117 
118 static cl::opt<bool> UsePostIncrementRanges(
119   "indvars-post-increment-ranges", cl::Hidden,
120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121   cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125             cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 namespace {
128 
129 struct RewritePhi;
130 
131 class IndVarSimplify {
132   LoopInfo *LI;
133   ScalarEvolution *SE;
134   DominatorTree *DT;
135   const DataLayout &DL;
136   TargetLibraryInfo *TLI;
137   const TargetTransformInfo *TTI;
138 
139   SmallVector<WeakTrackingVH, 16> DeadInsts;
140 
141   bool isValidRewrite(Value *FromVal, Value *ToVal);
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147   bool optimizeLoopExits(Loop *L);
148 
149   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
150   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
151   bool rewriteFirstIterationLoopExitValues(Loop *L);
152   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
153 
154   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
155                                  const SCEV *ExitCount,
156                                  PHINode *IndVar, SCEVExpander &Rewriter);
157 
158   bool sinkUnusedInvariants(Loop *L);
159 
160 public:
161   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
162                  const DataLayout &DL, TargetLibraryInfo *TLI,
163                  TargetTransformInfo *TTI)
164       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
165 
166   bool run(Loop *L);
167 };
168 
169 } // end anonymous namespace
170 
171 /// Return true if the SCEV expansion generated by the rewriter can replace the
172 /// original value. SCEV guarantees that it produces the same value, but the way
173 /// it is produced may be illegal IR.  Ideally, this function will only be
174 /// called for verification.
175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
176   // If an SCEV expression subsumed multiple pointers, its expansion could
177   // reassociate the GEP changing the base pointer. This is illegal because the
178   // final address produced by a GEP chain must be inbounds relative to its
179   // underlying object. Otherwise basic alias analysis, among other things,
180   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
181   // producing an expression involving multiple pointers. Until then, we must
182   // bail out here.
183   //
184   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
185   // because it understands lcssa phis while SCEV does not.
186   Value *FromPtr = FromVal;
187   Value *ToPtr = ToVal;
188   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
189     FromPtr = GEP->getPointerOperand();
190   }
191   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
192     ToPtr = GEP->getPointerOperand();
193   }
194   if (FromPtr != FromVal || ToPtr != ToVal) {
195     // Quickly check the common case
196     if (FromPtr == ToPtr)
197       return true;
198 
199     // SCEV may have rewritten an expression that produces the GEP's pointer
200     // operand. That's ok as long as the pointer operand has the same base
201     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
202     // base of a recurrence. This handles the case in which SCEV expansion
203     // converts a pointer type recurrence into a nonrecurrent pointer base
204     // indexed by an integer recurrence.
205 
206     // If the GEP base pointer is a vector of pointers, abort.
207     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
208       return false;
209 
210     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
211     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
212     if (FromBase == ToBase)
213       return true;
214 
215     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
216                       << " != " << *ToBase << "\n");
217 
218     return false;
219   }
220   return true;
221 }
222 
223 /// Determine the insertion point for this user. By default, insert immediately
224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
226 /// common dominator for the incoming blocks. A nullptr can be returned if no
227 /// viable location is found: it may happen if User is a PHI and Def only comes
228 /// to this PHI from unreachable blocks.
229 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
230                                           DominatorTree *DT, LoopInfo *LI) {
231   PHINode *PHI = dyn_cast<PHINode>(User);
232   if (!PHI)
233     return User;
234 
235   Instruction *InsertPt = nullptr;
236   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
237     if (PHI->getIncomingValue(i) != Def)
238       continue;
239 
240     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
241 
242     if (!DT->isReachableFromEntry(InsertBB))
243       continue;
244 
245     if (!InsertPt) {
246       InsertPt = InsertBB->getTerminator();
247       continue;
248     }
249     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
250     InsertPt = InsertBB->getTerminator();
251   }
252 
253   // If we have skipped all inputs, it means that Def only comes to Phi from
254   // unreachable blocks.
255   if (!InsertPt)
256     return nullptr;
257 
258   auto *DefI = dyn_cast<Instruction>(Def);
259   if (!DefI)
260     return InsertPt;
261 
262   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
263 
264   auto *L = LI->getLoopFor(DefI->getParent());
265   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
266 
267   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
268     if (LI->getLoopFor(DTN->getBlock()) == L)
269       return DTN->getBlock()->getTerminator();
270 
271   llvm_unreachable("DefI dominates InsertPt!");
272 }
273 
274 //===----------------------------------------------------------------------===//
275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
276 //===----------------------------------------------------------------------===//
277 
278 /// Convert APF to an integer, if possible.
279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
280   bool isExact = false;
281   // See if we can convert this to an int64_t
282   uint64_t UIntVal;
283   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
284                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
285       !isExact)
286     return false;
287   IntVal = UIntVal;
288   return true;
289 }
290 
291 /// If the loop has floating induction variable then insert corresponding
292 /// integer induction variable if possible.
293 /// For example,
294 /// for(double i = 0; i < 10000; ++i)
295 ///   bar(i)
296 /// is converted into
297 /// for(int i = 0; i < 10000; ++i)
298 ///   bar((double)i);
299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
300   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
301   unsigned BackEdge     = IncomingEdge^1;
302 
303   // Check incoming value.
304   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
305 
306   int64_t InitValue;
307   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
308     return false;
309 
310   // Check IV increment. Reject this PN if increment operation is not
311   // an add or increment value can not be represented by an integer.
312   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
313   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
314 
315   // If this is not an add of the PHI with a constantfp, or if the constant fp
316   // is not an integer, bail out.
317   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
318   int64_t IncValue;
319   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
320       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
321     return false;
322 
323   // Check Incr uses. One user is PN and the other user is an exit condition
324   // used by the conditional terminator.
325   Value::user_iterator IncrUse = Incr->user_begin();
326   Instruction *U1 = cast<Instruction>(*IncrUse++);
327   if (IncrUse == Incr->user_end()) return false;
328   Instruction *U2 = cast<Instruction>(*IncrUse++);
329   if (IncrUse != Incr->user_end()) return false;
330 
331   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
332   // only used by a branch, we can't transform it.
333   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
334   if (!Compare)
335     Compare = dyn_cast<FCmpInst>(U2);
336   if (!Compare || !Compare->hasOneUse() ||
337       !isa<BranchInst>(Compare->user_back()))
338     return false;
339 
340   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
341 
342   // We need to verify that the branch actually controls the iteration count
343   // of the loop.  If not, the new IV can overflow and no one will notice.
344   // The branch block must be in the loop and one of the successors must be out
345   // of the loop.
346   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
347   if (!L->contains(TheBr->getParent()) ||
348       (L->contains(TheBr->getSuccessor(0)) &&
349        L->contains(TheBr->getSuccessor(1))))
350     return false;
351 
352   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
353   // transform it.
354   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
355   int64_t ExitValue;
356   if (ExitValueVal == nullptr ||
357       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
358     return false;
359 
360   // Find new predicate for integer comparison.
361   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
362   switch (Compare->getPredicate()) {
363   default: return false;  // Unknown comparison.
364   case CmpInst::FCMP_OEQ:
365   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
366   case CmpInst::FCMP_ONE:
367   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
368   case CmpInst::FCMP_OGT:
369   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
370   case CmpInst::FCMP_OGE:
371   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
372   case CmpInst::FCMP_OLT:
373   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
374   case CmpInst::FCMP_OLE:
375   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
376   }
377 
378   // We convert the floating point induction variable to a signed i32 value if
379   // we can.  This is only safe if the comparison will not overflow in a way
380   // that won't be trapped by the integer equivalent operations.  Check for this
381   // now.
382   // TODO: We could use i64 if it is native and the range requires it.
383 
384   // The start/stride/exit values must all fit in signed i32.
385   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
386     return false;
387 
388   // If not actually striding (add x, 0.0), avoid touching the code.
389   if (IncValue == 0)
390     return false;
391 
392   // Positive and negative strides have different safety conditions.
393   if (IncValue > 0) {
394     // If we have a positive stride, we require the init to be less than the
395     // exit value.
396     if (InitValue >= ExitValue)
397       return false;
398 
399     uint32_t Range = uint32_t(ExitValue-InitValue);
400     // Check for infinite loop, either:
401     // while (i <= Exit) or until (i > Exit)
402     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
403       if (++Range == 0) return false;  // Range overflows.
404     }
405 
406     unsigned Leftover = Range % uint32_t(IncValue);
407 
408     // If this is an equality comparison, we require that the strided value
409     // exactly land on the exit value, otherwise the IV condition will wrap
410     // around and do things the fp IV wouldn't.
411     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412         Leftover != 0)
413       return false;
414 
415     // If the stride would wrap around the i32 before exiting, we can't
416     // transform the IV.
417     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
418       return false;
419   } else {
420     // If we have a negative stride, we require the init to be greater than the
421     // exit value.
422     if (InitValue <= ExitValue)
423       return false;
424 
425     uint32_t Range = uint32_t(InitValue-ExitValue);
426     // Check for infinite loop, either:
427     // while (i >= Exit) or until (i < Exit)
428     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
429       if (++Range == 0) return false;  // Range overflows.
430     }
431 
432     unsigned Leftover = Range % uint32_t(-IncValue);
433 
434     // If this is an equality comparison, we require that the strided value
435     // exactly land on the exit value, otherwise the IV condition will wrap
436     // around and do things the fp IV wouldn't.
437     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
438         Leftover != 0)
439       return false;
440 
441     // If the stride would wrap around the i32 before exiting, we can't
442     // transform the IV.
443     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
444       return false;
445   }
446 
447   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
448 
449   // Insert new integer induction variable.
450   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
451   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
452                       PN->getIncomingBlock(IncomingEdge));
453 
454   Value *NewAdd =
455     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
456                               Incr->getName()+".int", Incr);
457   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
458 
459   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
460                                       ConstantInt::get(Int32Ty, ExitValue),
461                                       Compare->getName());
462 
463   // In the following deletions, PN may become dead and may be deleted.
464   // Use a WeakTrackingVH to observe whether this happens.
465   WeakTrackingVH WeakPH = PN;
466 
467   // Delete the old floating point exit comparison.  The branch starts using the
468   // new comparison.
469   NewCompare->takeName(Compare);
470   Compare->replaceAllUsesWith(NewCompare);
471   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
472 
473   // Delete the old floating point increment.
474   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
475   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
476 
477   // If the FP induction variable still has uses, this is because something else
478   // in the loop uses its value.  In order to canonicalize the induction
479   // variable, we chose to eliminate the IV and rewrite it in terms of an
480   // int->fp cast.
481   //
482   // We give preference to sitofp over uitofp because it is faster on most
483   // platforms.
484   if (WeakPH) {
485     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486                                  &*PN->getParent()->getFirstInsertionPt());
487     PN->replaceAllUsesWith(Conv);
488     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
489   }
490   return true;
491 }
492 
493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
494   // First step.  Check to see if there are any floating-point recurrences.
495   // If there are, change them into integer recurrences, permitting analysis by
496   // the SCEV routines.
497   BasicBlock *Header = L->getHeader();
498 
499   SmallVector<WeakTrackingVH, 8> PHIs;
500   for (PHINode &PN : Header->phis())
501     PHIs.push_back(&PN);
502 
503   bool Changed = false;
504   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
505     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
506       Changed |= handleFloatingPointIV(L, PN);
507 
508   // If the loop previously had floating-point IV, ScalarEvolution
509   // may not have been able to compute a trip count. Now that we've done some
510   // re-writing, the trip count may be computable.
511   if (Changed)
512     SE->forgetLoop(L);
513   return Changed;
514 }
515 
516 namespace {
517 
518 // Collect information about PHI nodes which can be transformed in
519 // rewriteLoopExitValues.
520 struct RewritePhi {
521   PHINode *PN;
522 
523   // Ith incoming value.
524   unsigned Ith;
525 
526   // Exit value after expansion.
527   Value *Val;
528 
529   // High Cost when expansion.
530   bool HighCost;
531 
532   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
533       : PN(P), Ith(I), Val(V), HighCost(H) {}
534 };
535 
536 } // end anonymous namespace
537 
538 //===----------------------------------------------------------------------===//
539 // rewriteLoopExitValues - Optimize IV users outside the loop.
540 // As a side effect, reduces the amount of IV processing within the loop.
541 //===----------------------------------------------------------------------===//
542 
543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
544   SmallPtrSet<const Instruction *, 8> Visited;
545   SmallVector<const Instruction *, 8> WorkList;
546   Visited.insert(I);
547   WorkList.push_back(I);
548   while (!WorkList.empty()) {
549     const Instruction *Curr = WorkList.pop_back_val();
550     // This use is outside the loop, nothing to do.
551     if (!L->contains(Curr))
552       continue;
553     // Do we assume it is a "hard" use which will not be eliminated easily?
554     if (Curr->mayHaveSideEffects())
555       return true;
556     // Otherwise, add all its users to worklist.
557     for (auto U : Curr->users()) {
558       auto *UI = cast<Instruction>(U);
559       if (Visited.insert(UI).second)
560         WorkList.push_back(UI);
561     }
562   }
563   return false;
564 }
565 
566 /// Check to see if this loop has a computable loop-invariant execution count.
567 /// If so, this means that we can compute the final value of any expressions
568 /// that are recurrent in the loop, and substitute the exit values from the loop
569 /// into any instructions outside of the loop that use the final values of the
570 /// current expressions.
571 ///
572 /// This is mostly redundant with the regular IndVarSimplify activities that
573 /// happen later, except that it's more powerful in some cases, because it's
574 /// able to brute-force evaluate arbitrary instructions as long as they have
575 /// constant operands at the beginning of the loop.
576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
577   // Check a pre-condition.
578   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
579          "Indvars did not preserve LCSSA!");
580 
581   SmallVector<BasicBlock*, 8> ExitBlocks;
582   L->getUniqueExitBlocks(ExitBlocks);
583 
584   SmallVector<RewritePhi, 8> RewritePhiSet;
585   // Find all values that are computed inside the loop, but used outside of it.
586   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
587   // the exit blocks of the loop to find them.
588   for (BasicBlock *ExitBB : ExitBlocks) {
589     // If there are no PHI nodes in this exit block, then no values defined
590     // inside the loop are used on this path, skip it.
591     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
592     if (!PN) continue;
593 
594     unsigned NumPreds = PN->getNumIncomingValues();
595 
596     // Iterate over all of the PHI nodes.
597     BasicBlock::iterator BBI = ExitBB->begin();
598     while ((PN = dyn_cast<PHINode>(BBI++))) {
599       if (PN->use_empty())
600         continue; // dead use, don't replace it
601 
602       if (!SE->isSCEVable(PN->getType()))
603         continue;
604 
605       // It's necessary to tell ScalarEvolution about this explicitly so that
606       // it can walk the def-use list and forget all SCEVs, as it may not be
607       // watching the PHI itself. Once the new exit value is in place, there
608       // may not be a def-use connection between the loop and every instruction
609       // which got a SCEVAddRecExpr for that loop.
610       SE->forgetValue(PN);
611 
612       // Iterate over all of the values in all the PHI nodes.
613       for (unsigned i = 0; i != NumPreds; ++i) {
614         // If the value being merged in is not integer or is not defined
615         // in the loop, skip it.
616         Value *InVal = PN->getIncomingValue(i);
617         if (!isa<Instruction>(InVal))
618           continue;
619 
620         // If this pred is for a subloop, not L itself, skip it.
621         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
622           continue; // The Block is in a subloop, skip it.
623 
624         // Check that InVal is defined in the loop.
625         Instruction *Inst = cast<Instruction>(InVal);
626         if (!L->contains(Inst))
627           continue;
628 
629         // Okay, this instruction has a user outside of the current loop
630         // and varies predictably *inside* the loop.  Evaluate the value it
631         // contains when the loop exits, if possible.  We prefer to start with
632         // expressions which are true for all exits (so as to maximize
633         // expression reuse by the SCEVExpander), but resort to per-exit
634         // evaluation if that fails.
635         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
636         if (isa<SCEVCouldNotCompute>(ExitValue) ||
637             !SE->isLoopInvariant(ExitValue, L) ||
638             !isSafeToExpand(ExitValue, *SE)) {
639           // TODO: This should probably be sunk into SCEV in some way; maybe a
640           // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
641           // most SCEV expressions and other recurrence types (e.g. shift
642           // recurrences).  Is there existing code we can reuse?
643           const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
644           if (isa<SCEVCouldNotCompute>(ExitCount))
645             continue;
646           if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
647             ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
648           if (isa<SCEVCouldNotCompute>(ExitValue) ||
649               !SE->isLoopInvariant(ExitValue, L) ||
650               !isSafeToExpand(ExitValue, *SE))
651             continue;
652         }
653 
654         // Computing the value outside of the loop brings no benefit if it is
655         // definitely used inside the loop in a way which can not be optimized
656         // away.  Avoid doing so unless we know we have a value which computes
657         // the ExitValue already.  TODO: This should be merged into SCEV
658         // expander to leverage its knowledge of existing expressions.
659         if (ReplaceExitValue != AlwaysRepl &&
660             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
661             hasHardUserWithinLoop(L, Inst))
662           continue;
663 
664         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
665         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
666 
667         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
668                           << '\n'
669                           << "  LoopVal = " << *Inst << "\n");
670 
671         if (!isValidRewrite(Inst, ExitVal)) {
672           DeadInsts.push_back(ExitVal);
673           continue;
674         }
675 
676 #ifndef NDEBUG
677         // If we reuse an instruction from a loop which is neither L nor one of
678         // its containing loops, we end up breaking LCSSA form for this loop by
679         // creating a new use of its instruction.
680         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
681           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
682             if (EVL != L)
683               assert(EVL->contains(L) && "LCSSA breach detected!");
684 #endif
685 
686         // Collect all the candidate PHINodes to be rewritten.
687         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
688       }
689     }
690   }
691 
692   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
693 
694   bool Changed = false;
695   // Transformation.
696   for (const RewritePhi &Phi : RewritePhiSet) {
697     PHINode *PN = Phi.PN;
698     Value *ExitVal = Phi.Val;
699 
700     // Only do the rewrite when the ExitValue can be expanded cheaply.
701     // If LoopCanBeDel is true, rewrite exit value aggressively.
702     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
703       DeadInsts.push_back(ExitVal);
704       continue;
705     }
706 
707     Changed = true;
708     ++NumReplaced;
709     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
710     PN->setIncomingValue(Phi.Ith, ExitVal);
711 
712     // If this instruction is dead now, delete it. Don't do it now to avoid
713     // invalidating iterators.
714     if (isInstructionTriviallyDead(Inst, TLI))
715       DeadInsts.push_back(Inst);
716 
717     // Replace PN with ExitVal if that is legal and does not break LCSSA.
718     if (PN->getNumIncomingValues() == 1 &&
719         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
720       PN->replaceAllUsesWith(ExitVal);
721       PN->eraseFromParent();
722     }
723   }
724 
725   // The insertion point instruction may have been deleted; clear it out
726   // so that the rewriter doesn't trip over it later.
727   Rewriter.clearInsertPoint();
728   return Changed;
729 }
730 
731 //===---------------------------------------------------------------------===//
732 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
733 // they will exit at the first iteration.
734 //===---------------------------------------------------------------------===//
735 
736 /// Check to see if this loop has loop invariant conditions which lead to loop
737 /// exits. If so, we know that if the exit path is taken, it is at the first
738 /// loop iteration. This lets us predict exit values of PHI nodes that live in
739 /// loop header.
740 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
741   // Verify the input to the pass is already in LCSSA form.
742   assert(L->isLCSSAForm(*DT));
743 
744   SmallVector<BasicBlock *, 8> ExitBlocks;
745   L->getUniqueExitBlocks(ExitBlocks);
746 
747   bool MadeAnyChanges = false;
748   for (auto *ExitBB : ExitBlocks) {
749     // If there are no more PHI nodes in this exit block, then no more
750     // values defined inside the loop are used on this path.
751     for (PHINode &PN : ExitBB->phis()) {
752       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
753            IncomingValIdx != E; ++IncomingValIdx) {
754         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
755 
756         // Can we prove that the exit must run on the first iteration if it
757         // runs at all?  (i.e. early exits are fine for our purposes, but
758         // traces which lead to this exit being taken on the 2nd iteration
759         // aren't.)  Note that this is about whether the exit branch is
760         // executed, not about whether it is taken.
761         if (!L->getLoopLatch() ||
762             !DT->dominates(IncomingBB, L->getLoopLatch()))
763           continue;
764 
765         // Get condition that leads to the exit path.
766         auto *TermInst = IncomingBB->getTerminator();
767 
768         Value *Cond = nullptr;
769         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
770           // Must be a conditional branch, otherwise the block
771           // should not be in the loop.
772           Cond = BI->getCondition();
773         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
774           Cond = SI->getCondition();
775         else
776           continue;
777 
778         if (!L->isLoopInvariant(Cond))
779           continue;
780 
781         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
782 
783         // Only deal with PHIs in the loop header.
784         if (!ExitVal || ExitVal->getParent() != L->getHeader())
785           continue;
786 
787         // If ExitVal is a PHI on the loop header, then we know its
788         // value along this exit because the exit can only be taken
789         // on the first iteration.
790         auto *LoopPreheader = L->getLoopPreheader();
791         assert(LoopPreheader && "Invalid loop");
792         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
793         if (PreheaderIdx != -1) {
794           assert(ExitVal->getParent() == L->getHeader() &&
795                  "ExitVal must be in loop header");
796           MadeAnyChanges = true;
797           PN.setIncomingValue(IncomingValIdx,
798                               ExitVal->getIncomingValue(PreheaderIdx));
799         }
800       }
801     }
802   }
803   return MadeAnyChanges;
804 }
805 
806 /// Check whether it is possible to delete the loop after rewriting exit
807 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
808 /// aggressively.
809 bool IndVarSimplify::canLoopBeDeleted(
810     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
811   BasicBlock *Preheader = L->getLoopPreheader();
812   // If there is no preheader, the loop will not be deleted.
813   if (!Preheader)
814     return false;
815 
816   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
817   // We obviate multiple ExitingBlocks case for simplicity.
818   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
819   // after exit value rewriting, we can enhance the logic here.
820   SmallVector<BasicBlock *, 4> ExitingBlocks;
821   L->getExitingBlocks(ExitingBlocks);
822   SmallVector<BasicBlock *, 8> ExitBlocks;
823   L->getUniqueExitBlocks(ExitBlocks);
824   if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
825     return false;
826 
827   BasicBlock *ExitBlock = ExitBlocks[0];
828   BasicBlock::iterator BI = ExitBlock->begin();
829   while (PHINode *P = dyn_cast<PHINode>(BI)) {
830     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
831 
832     // If the Incoming value of P is found in RewritePhiSet, we know it
833     // could be rewritten to use a loop invariant value in transformation
834     // phase later. Skip it in the loop invariant check below.
835     bool found = false;
836     for (const RewritePhi &Phi : RewritePhiSet) {
837       unsigned i = Phi.Ith;
838       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
839         found = true;
840         break;
841       }
842     }
843 
844     Instruction *I;
845     if (!found && (I = dyn_cast<Instruction>(Incoming)))
846       if (!L->hasLoopInvariantOperands(I))
847         return false;
848 
849     ++BI;
850   }
851 
852   for (auto *BB : L->blocks())
853     if (llvm::any_of(*BB, [](Instruction &I) {
854           return I.mayHaveSideEffects();
855         }))
856       return false;
857 
858   return true;
859 }
860 
861 //===----------------------------------------------------------------------===//
862 //  IV Widening - Extend the width of an IV to cover its widest uses.
863 //===----------------------------------------------------------------------===//
864 
865 namespace {
866 
867 // Collect information about induction variables that are used by sign/zero
868 // extend operations. This information is recorded by CollectExtend and provides
869 // the input to WidenIV.
870 struct WideIVInfo {
871   PHINode *NarrowIV = nullptr;
872 
873   // Widest integer type created [sz]ext
874   Type *WidestNativeType = nullptr;
875 
876   // Was a sext user seen before a zext?
877   bool IsSigned = false;
878 };
879 
880 } // end anonymous namespace
881 
882 /// Update information about the induction variable that is extended by this
883 /// sign or zero extend operation. This is used to determine the final width of
884 /// the IV before actually widening it.
885 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
886                         const TargetTransformInfo *TTI) {
887   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
888   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
889     return;
890 
891   Type *Ty = Cast->getType();
892   uint64_t Width = SE->getTypeSizeInBits(Ty);
893   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
894     return;
895 
896   // Check that `Cast` actually extends the induction variable (we rely on this
897   // later).  This takes care of cases where `Cast` is extending a truncation of
898   // the narrow induction variable, and thus can end up being narrower than the
899   // "narrow" induction variable.
900   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
901   if (NarrowIVWidth >= Width)
902     return;
903 
904   // Cast is either an sext or zext up to this point.
905   // We should not widen an indvar if arithmetics on the wider indvar are more
906   // expensive than those on the narrower indvar. We check only the cost of ADD
907   // because at least an ADD is required to increment the induction variable. We
908   // could compute more comprehensively the cost of all instructions on the
909   // induction variable when necessary.
910   if (TTI &&
911       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
912           TTI->getArithmeticInstrCost(Instruction::Add,
913                                       Cast->getOperand(0)->getType())) {
914     return;
915   }
916 
917   if (!WI.WidestNativeType) {
918     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
919     WI.IsSigned = IsSigned;
920     return;
921   }
922 
923   // We extend the IV to satisfy the sign of its first user, arbitrarily.
924   if (WI.IsSigned != IsSigned)
925     return;
926 
927   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
928     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
929 }
930 
931 namespace {
932 
933 /// Record a link in the Narrow IV def-use chain along with the WideIV that
934 /// computes the same value as the Narrow IV def.  This avoids caching Use*
935 /// pointers.
936 struct NarrowIVDefUse {
937   Instruction *NarrowDef = nullptr;
938   Instruction *NarrowUse = nullptr;
939   Instruction *WideDef = nullptr;
940 
941   // True if the narrow def is never negative.  Tracking this information lets
942   // us use a sign extension instead of a zero extension or vice versa, when
943   // profitable and legal.
944   bool NeverNegative = false;
945 
946   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
947                  bool NeverNegative)
948       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
949         NeverNegative(NeverNegative) {}
950 };
951 
952 /// The goal of this transform is to remove sign and zero extends without
953 /// creating any new induction variables. To do this, it creates a new phi of
954 /// the wider type and redirects all users, either removing extends or inserting
955 /// truncs whenever we stop propagating the type.
956 class WidenIV {
957   // Parameters
958   PHINode *OrigPhi;
959   Type *WideType;
960 
961   // Context
962   LoopInfo        *LI;
963   Loop            *L;
964   ScalarEvolution *SE;
965   DominatorTree   *DT;
966 
967   // Does the module have any calls to the llvm.experimental.guard intrinsic
968   // at all? If not we can avoid scanning instructions looking for guards.
969   bool HasGuards;
970 
971   // Result
972   PHINode *WidePhi = nullptr;
973   Instruction *WideInc = nullptr;
974   const SCEV *WideIncExpr = nullptr;
975   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
976 
977   SmallPtrSet<Instruction *,16> Widened;
978   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
979 
980   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
981 
982   // A map tracking the kind of extension used to widen each narrow IV
983   // and narrow IV user.
984   // Key: pointer to a narrow IV or IV user.
985   // Value: the kind of extension used to widen this Instruction.
986   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
987 
988   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
989 
990   // A map with control-dependent ranges for post increment IV uses. The key is
991   // a pair of IV def and a use of this def denoting the context. The value is
992   // a ConstantRange representing possible values of the def at the given
993   // context.
994   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
995 
996   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
997                                               Instruction *UseI) {
998     DefUserPair Key(Def, UseI);
999     auto It = PostIncRangeInfos.find(Key);
1000     return It == PostIncRangeInfos.end()
1001                ? Optional<ConstantRange>(None)
1002                : Optional<ConstantRange>(It->second);
1003   }
1004 
1005   void calculatePostIncRanges(PHINode *OrigPhi);
1006   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1007 
1008   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1009     DefUserPair Key(Def, UseI);
1010     auto It = PostIncRangeInfos.find(Key);
1011     if (It == PostIncRangeInfos.end())
1012       PostIncRangeInfos.insert({Key, R});
1013     else
1014       It->second = R.intersectWith(It->second);
1015   }
1016 
1017 public:
1018   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1019           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1020           bool HasGuards)
1021       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1022         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1023         HasGuards(HasGuards), DeadInsts(DI) {
1024     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1025     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1026   }
1027 
1028   PHINode *createWideIV(SCEVExpander &Rewriter);
1029 
1030 protected:
1031   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1032                           Instruction *Use);
1033 
1034   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1035   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1036                                      const SCEVAddRecExpr *WideAR);
1037   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1038 
1039   ExtendKind getExtendKind(Instruction *I);
1040 
1041   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1042 
1043   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1044 
1045   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1046 
1047   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1048                               unsigned OpCode) const;
1049 
1050   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1051 
1052   bool widenLoopCompare(NarrowIVDefUse DU);
1053   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1054   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1055 
1056   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1057 };
1058 
1059 } // end anonymous namespace
1060 
1061 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1062                                  bool IsSigned, Instruction *Use) {
1063   // Set the debug location and conservative insertion point.
1064   IRBuilder<> Builder(Use);
1065   // Hoist the insertion point into loop preheaders as far as possible.
1066   for (const Loop *L = LI->getLoopFor(Use->getParent());
1067        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1068        L = L->getParentLoop())
1069     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1070 
1071   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1072                     Builder.CreateZExt(NarrowOper, WideType);
1073 }
1074 
1075 /// Instantiate a wide operation to replace a narrow operation. This only needs
1076 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1077 /// 0 for any operation we decide not to clone.
1078 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1079                                   const SCEVAddRecExpr *WideAR) {
1080   unsigned Opcode = DU.NarrowUse->getOpcode();
1081   switch (Opcode) {
1082   default:
1083     return nullptr;
1084   case Instruction::Add:
1085   case Instruction::Mul:
1086   case Instruction::UDiv:
1087   case Instruction::Sub:
1088     return cloneArithmeticIVUser(DU, WideAR);
1089 
1090   case Instruction::And:
1091   case Instruction::Or:
1092   case Instruction::Xor:
1093   case Instruction::Shl:
1094   case Instruction::LShr:
1095   case Instruction::AShr:
1096     return cloneBitwiseIVUser(DU);
1097   }
1098 }
1099 
1100 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1101   Instruction *NarrowUse = DU.NarrowUse;
1102   Instruction *NarrowDef = DU.NarrowDef;
1103   Instruction *WideDef = DU.WideDef;
1104 
1105   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1106 
1107   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1108   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1109   // invariant and will be folded or hoisted. If it actually comes from a
1110   // widened IV, it should be removed during a future call to widenIVUse.
1111   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1112   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1113                    ? WideDef
1114                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1115                                       IsSigned, NarrowUse);
1116   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1117                    ? WideDef
1118                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1119                                       IsSigned, NarrowUse);
1120 
1121   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1122   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1123                                         NarrowBO->getName());
1124   IRBuilder<> Builder(NarrowUse);
1125   Builder.Insert(WideBO);
1126   WideBO->copyIRFlags(NarrowBO);
1127   return WideBO;
1128 }
1129 
1130 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1131                                             const SCEVAddRecExpr *WideAR) {
1132   Instruction *NarrowUse = DU.NarrowUse;
1133   Instruction *NarrowDef = DU.NarrowDef;
1134   Instruction *WideDef = DU.WideDef;
1135 
1136   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1137 
1138   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1139 
1140   // We're trying to find X such that
1141   //
1142   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1143   //
1144   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1145   // and check using SCEV if any of them are correct.
1146 
1147   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1148   // correct solution to X.
1149   auto GuessNonIVOperand = [&](bool SignExt) {
1150     const SCEV *WideLHS;
1151     const SCEV *WideRHS;
1152 
1153     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1154       if (SignExt)
1155         return SE->getSignExtendExpr(S, Ty);
1156       return SE->getZeroExtendExpr(S, Ty);
1157     };
1158 
1159     if (IVOpIdx == 0) {
1160       WideLHS = SE->getSCEV(WideDef);
1161       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1162       WideRHS = GetExtend(NarrowRHS, WideType);
1163     } else {
1164       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1165       WideLHS = GetExtend(NarrowLHS, WideType);
1166       WideRHS = SE->getSCEV(WideDef);
1167     }
1168 
1169     // WideUse is "WideDef `op.wide` X" as described in the comment.
1170     const SCEV *WideUse = nullptr;
1171 
1172     switch (NarrowUse->getOpcode()) {
1173     default:
1174       llvm_unreachable("No other possibility!");
1175 
1176     case Instruction::Add:
1177       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1178       break;
1179 
1180     case Instruction::Mul:
1181       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1182       break;
1183 
1184     case Instruction::UDiv:
1185       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1186       break;
1187 
1188     case Instruction::Sub:
1189       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1190       break;
1191     }
1192 
1193     return WideUse == WideAR;
1194   };
1195 
1196   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1197   if (!GuessNonIVOperand(SignExtend)) {
1198     SignExtend = !SignExtend;
1199     if (!GuessNonIVOperand(SignExtend))
1200       return nullptr;
1201   }
1202 
1203   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1204                    ? WideDef
1205                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1206                                       SignExtend, NarrowUse);
1207   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1208                    ? WideDef
1209                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1210                                       SignExtend, NarrowUse);
1211 
1212   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1213   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1214                                         NarrowBO->getName());
1215 
1216   IRBuilder<> Builder(NarrowUse);
1217   Builder.Insert(WideBO);
1218   WideBO->copyIRFlags(NarrowBO);
1219   return WideBO;
1220 }
1221 
1222 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1223   auto It = ExtendKindMap.find(I);
1224   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1225   return It->second;
1226 }
1227 
1228 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1229                                      unsigned OpCode) const {
1230   if (OpCode == Instruction::Add)
1231     return SE->getAddExpr(LHS, RHS);
1232   if (OpCode == Instruction::Sub)
1233     return SE->getMinusSCEV(LHS, RHS);
1234   if (OpCode == Instruction::Mul)
1235     return SE->getMulExpr(LHS, RHS);
1236 
1237   llvm_unreachable("Unsupported opcode.");
1238 }
1239 
1240 /// No-wrap operations can transfer sign extension of their result to their
1241 /// operands. Generate the SCEV value for the widened operation without
1242 /// actually modifying the IR yet. If the expression after extending the
1243 /// operands is an AddRec for this loop, return the AddRec and the kind of
1244 /// extension used.
1245 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1246   // Handle the common case of add<nsw/nuw>
1247   const unsigned OpCode = DU.NarrowUse->getOpcode();
1248   // Only Add/Sub/Mul instructions supported yet.
1249   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1250       OpCode != Instruction::Mul)
1251     return {nullptr, Unknown};
1252 
1253   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1254   // if extending the other will lead to a recurrence.
1255   const unsigned ExtendOperIdx =
1256       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1257   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1258 
1259   const SCEV *ExtendOperExpr = nullptr;
1260   const OverflowingBinaryOperator *OBO =
1261     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1262   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1263   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1264     ExtendOperExpr = SE->getSignExtendExpr(
1265       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1266   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1267     ExtendOperExpr = SE->getZeroExtendExpr(
1268       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1269   else
1270     return {nullptr, Unknown};
1271 
1272   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1273   // flags. This instruction may be guarded by control flow that the no-wrap
1274   // behavior depends on. Non-control-equivalent instructions can be mapped to
1275   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1276   // semantics to those operations.
1277   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1278   const SCEV *rhs = ExtendOperExpr;
1279 
1280   // Let's swap operands to the initial order for the case of non-commutative
1281   // operations, like SUB. See PR21014.
1282   if (ExtendOperIdx == 0)
1283     std::swap(lhs, rhs);
1284   const SCEVAddRecExpr *AddRec =
1285       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1286 
1287   if (!AddRec || AddRec->getLoop() != L)
1288     return {nullptr, Unknown};
1289 
1290   return {AddRec, ExtKind};
1291 }
1292 
1293 /// Is this instruction potentially interesting for further simplification after
1294 /// widening it's type? In other words, can the extend be safely hoisted out of
1295 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1296 /// so, return the extended recurrence and the kind of extension used. Otherwise
1297 /// return {nullptr, Unknown}.
1298 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1299   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1300     return {nullptr, Unknown};
1301 
1302   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1303   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1304       SE->getTypeSizeInBits(WideType)) {
1305     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1306     // index. So don't follow this use.
1307     return {nullptr, Unknown};
1308   }
1309 
1310   const SCEV *WideExpr;
1311   ExtendKind ExtKind;
1312   if (DU.NeverNegative) {
1313     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1314     if (isa<SCEVAddRecExpr>(WideExpr))
1315       ExtKind = SignExtended;
1316     else {
1317       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1318       ExtKind = ZeroExtended;
1319     }
1320   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1321     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1322     ExtKind = SignExtended;
1323   } else {
1324     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1325     ExtKind = ZeroExtended;
1326   }
1327   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1328   if (!AddRec || AddRec->getLoop() != L)
1329     return {nullptr, Unknown};
1330   return {AddRec, ExtKind};
1331 }
1332 
1333 /// This IV user cannot be widened. Replace this use of the original narrow IV
1334 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1335 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1336   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1337   if (!InsertPt)
1338     return;
1339   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1340                     << *DU.NarrowUse << "\n");
1341   IRBuilder<> Builder(InsertPt);
1342   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1343   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1344 }
1345 
1346 /// If the narrow use is a compare instruction, then widen the compare
1347 //  (and possibly the other operand).  The extend operation is hoisted into the
1348 // loop preheader as far as possible.
1349 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1350   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1351   if (!Cmp)
1352     return false;
1353 
1354   // We can legally widen the comparison in the following two cases:
1355   //
1356   //  - The signedness of the IV extension and comparison match
1357   //
1358   //  - The narrow IV is always positive (and thus its sign extension is equal
1359   //    to its zero extension).  For instance, let's say we're zero extending
1360   //    %narrow for the following use
1361   //
1362   //      icmp slt i32 %narrow, %val   ... (A)
1363   //
1364   //    and %narrow is always positive.  Then
1365   //
1366   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1367   //          == icmp slt i32 zext(%narrow), sext(%val)
1368   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1369   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1370     return false;
1371 
1372   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1373   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1374   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1375   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1376 
1377   // Widen the compare instruction.
1378   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1379   if (!InsertPt)
1380     return false;
1381   IRBuilder<> Builder(InsertPt);
1382   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1383 
1384   // Widen the other operand of the compare, if necessary.
1385   if (CastWidth < IVWidth) {
1386     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1387     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1388   }
1389   return true;
1390 }
1391 
1392 /// If the narrow use is an instruction whose two operands are the defining
1393 /// instruction of DU and a load instruction, then we have the following:
1394 /// if the load is hoisted outside the loop, then we do not reach this function
1395 /// as scalar evolution analysis works fine in widenIVUse with variables
1396 /// hoisted outside the loop and efficient code is subsequently generated by
1397 /// not emitting truncate instructions. But when the load is not hoisted
1398 /// (whether due to limitation in alias analysis or due to a true legality),
1399 /// then scalar evolution can not proceed with loop variant values and
1400 /// inefficient code is generated. This function handles the non-hoisted load
1401 /// special case by making the optimization generate the same type of code for
1402 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1403 /// instruction). This special case is important especially when the induction
1404 /// variables are affecting addressing mode in code generation.
1405 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1406   Instruction *NarrowUse = DU.NarrowUse;
1407   Instruction *NarrowDef = DU.NarrowDef;
1408   Instruction *WideDef = DU.WideDef;
1409 
1410   // Handle the common case of add<nsw/nuw>
1411   const unsigned OpCode = NarrowUse->getOpcode();
1412   // Only Add/Sub/Mul instructions are supported.
1413   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1414       OpCode != Instruction::Mul)
1415     return false;
1416 
1417   // The operand that is not defined by NarrowDef of DU. Let's call it the
1418   // other operand.
1419   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1420   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1421          "bad DU");
1422 
1423   const SCEV *ExtendOperExpr = nullptr;
1424   const OverflowingBinaryOperator *OBO =
1425     cast<OverflowingBinaryOperator>(NarrowUse);
1426   ExtendKind ExtKind = getExtendKind(NarrowDef);
1427   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1428     ExtendOperExpr = SE->getSignExtendExpr(
1429       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1430   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1431     ExtendOperExpr = SE->getZeroExtendExpr(
1432       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1433   else
1434     return false;
1435 
1436   // We are interested in the other operand being a load instruction.
1437   // But, we should look into relaxing this restriction later on.
1438   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1439   if (I && I->getOpcode() != Instruction::Load)
1440     return false;
1441 
1442   // Verifying that Defining operand is an AddRec
1443   const SCEV *Op1 = SE->getSCEV(WideDef);
1444   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1445   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1446     return false;
1447   // Verifying that other operand is an Extend.
1448   if (ExtKind == SignExtended) {
1449     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1450       return false;
1451   } else {
1452     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1453       return false;
1454   }
1455 
1456   if (ExtKind == SignExtended) {
1457     for (Use &U : NarrowUse->uses()) {
1458       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1459       if (!User || User->getType() != WideType)
1460         return false;
1461     }
1462   } else { // ExtKind == ZeroExtended
1463     for (Use &U : NarrowUse->uses()) {
1464       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1465       if (!User || User->getType() != WideType)
1466         return false;
1467     }
1468   }
1469 
1470   return true;
1471 }
1472 
1473 /// Special Case for widening with variant Loads (see
1474 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1475 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1476   Instruction *NarrowUse = DU.NarrowUse;
1477   Instruction *NarrowDef = DU.NarrowDef;
1478   Instruction *WideDef = DU.WideDef;
1479 
1480   ExtendKind ExtKind = getExtendKind(NarrowDef);
1481 
1482   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1483 
1484   // Generating a widening use instruction.
1485   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1486                    ? WideDef
1487                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1488                                       ExtKind, NarrowUse);
1489   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1490                    ? WideDef
1491                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1492                                       ExtKind, NarrowUse);
1493 
1494   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1495   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1496                                         NarrowBO->getName());
1497   IRBuilder<> Builder(NarrowUse);
1498   Builder.Insert(WideBO);
1499   WideBO->copyIRFlags(NarrowBO);
1500 
1501   if (ExtKind == SignExtended)
1502     ExtendKindMap[NarrowUse] = SignExtended;
1503   else
1504     ExtendKindMap[NarrowUse] = ZeroExtended;
1505 
1506   // Update the Use.
1507   if (ExtKind == SignExtended) {
1508     for (Use &U : NarrowUse->uses()) {
1509       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1510       if (User && User->getType() == WideType) {
1511         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1512                           << *WideBO << "\n");
1513         ++NumElimExt;
1514         User->replaceAllUsesWith(WideBO);
1515         DeadInsts.emplace_back(User);
1516       }
1517     }
1518   } else { // ExtKind == ZeroExtended
1519     for (Use &U : NarrowUse->uses()) {
1520       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1521       if (User && User->getType() == WideType) {
1522         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1523                           << *WideBO << "\n");
1524         ++NumElimExt;
1525         User->replaceAllUsesWith(WideBO);
1526         DeadInsts.emplace_back(User);
1527       }
1528     }
1529   }
1530 }
1531 
1532 /// Determine whether an individual user of the narrow IV can be widened. If so,
1533 /// return the wide clone of the user.
1534 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1535   assert(ExtendKindMap.count(DU.NarrowDef) &&
1536          "Should already know the kind of extension used to widen NarrowDef");
1537 
1538   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1539   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1540     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1541       // For LCSSA phis, sink the truncate outside the loop.
1542       // After SimplifyCFG most loop exit targets have a single predecessor.
1543       // Otherwise fall back to a truncate within the loop.
1544       if (UsePhi->getNumOperands() != 1)
1545         truncateIVUse(DU, DT, LI);
1546       else {
1547         // Widening the PHI requires us to insert a trunc.  The logical place
1548         // for this trunc is in the same BB as the PHI.  This is not possible if
1549         // the BB is terminated by a catchswitch.
1550         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1551           return nullptr;
1552 
1553         PHINode *WidePhi =
1554           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1555                           UsePhi);
1556         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1557         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1558         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1559         UsePhi->replaceAllUsesWith(Trunc);
1560         DeadInsts.emplace_back(UsePhi);
1561         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1562                           << *WidePhi << "\n");
1563       }
1564       return nullptr;
1565     }
1566   }
1567 
1568   // This narrow use can be widened by a sext if it's non-negative or its narrow
1569   // def was widended by a sext. Same for zext.
1570   auto canWidenBySExt = [&]() {
1571     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1572   };
1573   auto canWidenByZExt = [&]() {
1574     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1575   };
1576 
1577   // Our raison d'etre! Eliminate sign and zero extension.
1578   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1579       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1580     Value *NewDef = DU.WideDef;
1581     if (DU.NarrowUse->getType() != WideType) {
1582       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1583       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1584       if (CastWidth < IVWidth) {
1585         // The cast isn't as wide as the IV, so insert a Trunc.
1586         IRBuilder<> Builder(DU.NarrowUse);
1587         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1588       }
1589       else {
1590         // A wider extend was hidden behind a narrower one. This may induce
1591         // another round of IV widening in which the intermediate IV becomes
1592         // dead. It should be very rare.
1593         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1594                           << " not wide enough to subsume " << *DU.NarrowUse
1595                           << "\n");
1596         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1597         NewDef = DU.NarrowUse;
1598       }
1599     }
1600     if (NewDef != DU.NarrowUse) {
1601       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1602                         << " replaced by " << *DU.WideDef << "\n");
1603       ++NumElimExt;
1604       DU.NarrowUse->replaceAllUsesWith(NewDef);
1605       DeadInsts.emplace_back(DU.NarrowUse);
1606     }
1607     // Now that the extend is gone, we want to expose it's uses for potential
1608     // further simplification. We don't need to directly inform SimplifyIVUsers
1609     // of the new users, because their parent IV will be processed later as a
1610     // new loop phi. If we preserved IVUsers analysis, we would also want to
1611     // push the uses of WideDef here.
1612 
1613     // No further widening is needed. The deceased [sz]ext had done it for us.
1614     return nullptr;
1615   }
1616 
1617   // Does this user itself evaluate to a recurrence after widening?
1618   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1619   if (!WideAddRec.first)
1620     WideAddRec = getWideRecurrence(DU);
1621 
1622   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1623   if (!WideAddRec.first) {
1624     // If use is a loop condition, try to promote the condition instead of
1625     // truncating the IV first.
1626     if (widenLoopCompare(DU))
1627       return nullptr;
1628 
1629     // We are here about to generate a truncate instruction that may hurt
1630     // performance because the scalar evolution expression computed earlier
1631     // in WideAddRec.first does not indicate a polynomial induction expression.
1632     // In that case, look at the operands of the use instruction to determine
1633     // if we can still widen the use instead of truncating its operand.
1634     if (widenWithVariantLoadUse(DU)) {
1635       widenWithVariantLoadUseCodegen(DU);
1636       return nullptr;
1637     }
1638 
1639     // This user does not evaluate to a recurrence after widening, so don't
1640     // follow it. Instead insert a Trunc to kill off the original use,
1641     // eventually isolating the original narrow IV so it can be removed.
1642     truncateIVUse(DU, DT, LI);
1643     return nullptr;
1644   }
1645   // Assume block terminators cannot evaluate to a recurrence. We can't to
1646   // insert a Trunc after a terminator if there happens to be a critical edge.
1647   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1648          "SCEV is not expected to evaluate a block terminator");
1649 
1650   // Reuse the IV increment that SCEVExpander created as long as it dominates
1651   // NarrowUse.
1652   Instruction *WideUse = nullptr;
1653   if (WideAddRec.first == WideIncExpr &&
1654       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1655     WideUse = WideInc;
1656   else {
1657     WideUse = cloneIVUser(DU, WideAddRec.first);
1658     if (!WideUse)
1659       return nullptr;
1660   }
1661   // Evaluation of WideAddRec ensured that the narrow expression could be
1662   // extended outside the loop without overflow. This suggests that the wide use
1663   // evaluates to the same expression as the extended narrow use, but doesn't
1664   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1665   // where it fails, we simply throw away the newly created wide use.
1666   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1667     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1668                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1669                       << "\n");
1670     DeadInsts.emplace_back(WideUse);
1671     return nullptr;
1672   }
1673 
1674   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1675   // Returning WideUse pushes it on the worklist.
1676   return WideUse;
1677 }
1678 
1679 /// Add eligible users of NarrowDef to NarrowIVUsers.
1680 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1681   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1682   bool NonNegativeDef =
1683       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1684                            SE->getConstant(NarrowSCEV->getType(), 0));
1685   for (User *U : NarrowDef->users()) {
1686     Instruction *NarrowUser = cast<Instruction>(U);
1687 
1688     // Handle data flow merges and bizarre phi cycles.
1689     if (!Widened.insert(NarrowUser).second)
1690       continue;
1691 
1692     bool NonNegativeUse = false;
1693     if (!NonNegativeDef) {
1694       // We might have a control-dependent range information for this context.
1695       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1696         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1697     }
1698 
1699     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1700                                NonNegativeDef || NonNegativeUse);
1701   }
1702 }
1703 
1704 /// Process a single induction variable. First use the SCEVExpander to create a
1705 /// wide induction variable that evaluates to the same recurrence as the
1706 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1707 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1708 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1709 ///
1710 /// It would be simpler to delete uses as they are processed, but we must avoid
1711 /// invalidating SCEV expressions.
1712 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1713   // Is this phi an induction variable?
1714   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1715   if (!AddRec)
1716     return nullptr;
1717 
1718   // Widen the induction variable expression.
1719   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1720                                ? SE->getSignExtendExpr(AddRec, WideType)
1721                                : SE->getZeroExtendExpr(AddRec, WideType);
1722 
1723   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1724          "Expect the new IV expression to preserve its type");
1725 
1726   // Can the IV be extended outside the loop without overflow?
1727   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1728   if (!AddRec || AddRec->getLoop() != L)
1729     return nullptr;
1730 
1731   // An AddRec must have loop-invariant operands. Since this AddRec is
1732   // materialized by a loop header phi, the expression cannot have any post-loop
1733   // operands, so they must dominate the loop header.
1734   assert(
1735       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1736       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1737       "Loop header phi recurrence inputs do not dominate the loop");
1738 
1739   // Iterate over IV uses (including transitive ones) looking for IV increments
1740   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1741   // the increment calculate control-dependent range information basing on
1742   // dominating conditions inside of the loop (e.g. a range check inside of the
1743   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1744   //
1745   // Control-dependent range information is later used to prove that a narrow
1746   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1747   // this on demand because when pushNarrowIVUsers needs this information some
1748   // of the dominating conditions might be already widened.
1749   if (UsePostIncrementRanges)
1750     calculatePostIncRanges(OrigPhi);
1751 
1752   // The rewriter provides a value for the desired IV expression. This may
1753   // either find an existing phi or materialize a new one. Either way, we
1754   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1755   // of the phi-SCC dominates the loop entry.
1756   Instruction *InsertPt = &L->getHeader()->front();
1757   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1758 
1759   // Remembering the WideIV increment generated by SCEVExpander allows
1760   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1761   // employ a general reuse mechanism because the call above is the only call to
1762   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1763   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1764     WideInc =
1765       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1766     WideIncExpr = SE->getSCEV(WideInc);
1767     // Propagate the debug location associated with the original loop increment
1768     // to the new (widened) increment.
1769     auto *OrigInc =
1770       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1771     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1772   }
1773 
1774   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1775   ++NumWidened;
1776 
1777   // Traverse the def-use chain using a worklist starting at the original IV.
1778   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1779 
1780   Widened.insert(OrigPhi);
1781   pushNarrowIVUsers(OrigPhi, WidePhi);
1782 
1783   while (!NarrowIVUsers.empty()) {
1784     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1785 
1786     // Process a def-use edge. This may replace the use, so don't hold a
1787     // use_iterator across it.
1788     Instruction *WideUse = widenIVUse(DU, Rewriter);
1789 
1790     // Follow all def-use edges from the previous narrow use.
1791     if (WideUse)
1792       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1793 
1794     // widenIVUse may have removed the def-use edge.
1795     if (DU.NarrowDef->use_empty())
1796       DeadInsts.emplace_back(DU.NarrowDef);
1797   }
1798 
1799   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1800   // evaluate the same recurrence, we can just copy the debug info over.
1801   SmallVector<DbgValueInst *, 1> DbgValues;
1802   llvm::findDbgValues(DbgValues, OrigPhi);
1803   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1804                                      ValueAsMetadata::get(WidePhi));
1805   for (auto &DbgValue : DbgValues)
1806     DbgValue->setOperand(0, MDPhi);
1807   return WidePhi;
1808 }
1809 
1810 /// Calculates control-dependent range for the given def at the given context
1811 /// by looking at dominating conditions inside of the loop
1812 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1813                                     Instruction *NarrowUser) {
1814   using namespace llvm::PatternMatch;
1815 
1816   Value *NarrowDefLHS;
1817   const APInt *NarrowDefRHS;
1818   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1819                                  m_APInt(NarrowDefRHS))) ||
1820       !NarrowDefRHS->isNonNegative())
1821     return;
1822 
1823   auto UpdateRangeFromCondition = [&] (Value *Condition,
1824                                        bool TrueDest) {
1825     CmpInst::Predicate Pred;
1826     Value *CmpRHS;
1827     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1828                                  m_Value(CmpRHS))))
1829       return;
1830 
1831     CmpInst::Predicate P =
1832             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1833 
1834     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1835     auto CmpConstrainedLHSRange =
1836             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1837     auto NarrowDefRange =
1838             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1839 
1840     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1841   };
1842 
1843   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1844     if (!HasGuards)
1845       return;
1846 
1847     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1848                                      Ctx->getParent()->rend())) {
1849       Value *C = nullptr;
1850       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1851         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1852     }
1853   };
1854 
1855   UpdateRangeFromGuards(NarrowUser);
1856 
1857   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1858   // If NarrowUserBB is statically unreachable asking dominator queries may
1859   // yield surprising results. (e.g. the block may not have a dom tree node)
1860   if (!DT->isReachableFromEntry(NarrowUserBB))
1861     return;
1862 
1863   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1864        L->contains(DTB->getBlock());
1865        DTB = DTB->getIDom()) {
1866     auto *BB = DTB->getBlock();
1867     auto *TI = BB->getTerminator();
1868     UpdateRangeFromGuards(TI);
1869 
1870     auto *BI = dyn_cast<BranchInst>(TI);
1871     if (!BI || !BI->isConditional())
1872       continue;
1873 
1874     auto *TrueSuccessor = BI->getSuccessor(0);
1875     auto *FalseSuccessor = BI->getSuccessor(1);
1876 
1877     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1878       return BBE.isSingleEdge() &&
1879              DT->dominates(BBE, NarrowUser->getParent());
1880     };
1881 
1882     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1883       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1884 
1885     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1886       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1887   }
1888 }
1889 
1890 /// Calculates PostIncRangeInfos map for the given IV
1891 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1892   SmallPtrSet<Instruction *, 16> Visited;
1893   SmallVector<Instruction *, 6> Worklist;
1894   Worklist.push_back(OrigPhi);
1895   Visited.insert(OrigPhi);
1896 
1897   while (!Worklist.empty()) {
1898     Instruction *NarrowDef = Worklist.pop_back_val();
1899 
1900     for (Use &U : NarrowDef->uses()) {
1901       auto *NarrowUser = cast<Instruction>(U.getUser());
1902 
1903       // Don't go looking outside the current loop.
1904       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1905       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1906         continue;
1907 
1908       if (!Visited.insert(NarrowUser).second)
1909         continue;
1910 
1911       Worklist.push_back(NarrowUser);
1912 
1913       calculatePostIncRange(NarrowDef, NarrowUser);
1914     }
1915   }
1916 }
1917 
1918 //===----------------------------------------------------------------------===//
1919 //  Live IV Reduction - Minimize IVs live across the loop.
1920 //===----------------------------------------------------------------------===//
1921 
1922 //===----------------------------------------------------------------------===//
1923 //  Simplification of IV users based on SCEV evaluation.
1924 //===----------------------------------------------------------------------===//
1925 
1926 namespace {
1927 
1928 class IndVarSimplifyVisitor : public IVVisitor {
1929   ScalarEvolution *SE;
1930   const TargetTransformInfo *TTI;
1931   PHINode *IVPhi;
1932 
1933 public:
1934   WideIVInfo WI;
1935 
1936   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1937                         const TargetTransformInfo *TTI,
1938                         const DominatorTree *DTree)
1939     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1940     DT = DTree;
1941     WI.NarrowIV = IVPhi;
1942   }
1943 
1944   // Implement the interface used by simplifyUsersOfIV.
1945   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1946 };
1947 
1948 } // end anonymous namespace
1949 
1950 /// Iteratively perform simplification on a worklist of IV users. Each
1951 /// successive simplification may push more users which may themselves be
1952 /// candidates for simplification.
1953 ///
1954 /// Sign/Zero extend elimination is interleaved with IV simplification.
1955 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1956                                        SCEVExpander &Rewriter,
1957                                        LoopInfo *LI) {
1958   SmallVector<WideIVInfo, 8> WideIVs;
1959 
1960   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1961           Intrinsic::getName(Intrinsic::experimental_guard));
1962   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1963 
1964   SmallVector<PHINode*, 8> LoopPhis;
1965   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1966     LoopPhis.push_back(cast<PHINode>(I));
1967   }
1968   // Each round of simplification iterates through the SimplifyIVUsers worklist
1969   // for all current phis, then determines whether any IVs can be
1970   // widened. Widening adds new phis to LoopPhis, inducing another round of
1971   // simplification on the wide IVs.
1972   bool Changed = false;
1973   while (!LoopPhis.empty()) {
1974     // Evaluate as many IV expressions as possible before widening any IVs. This
1975     // forces SCEV to set no-wrap flags before evaluating sign/zero
1976     // extension. The first time SCEV attempts to normalize sign/zero extension,
1977     // the result becomes final. So for the most predictable results, we delay
1978     // evaluation of sign/zero extend evaluation until needed, and avoid running
1979     // other SCEV based analysis prior to simplifyAndExtend.
1980     do {
1981       PHINode *CurrIV = LoopPhis.pop_back_val();
1982 
1983       // Information about sign/zero extensions of CurrIV.
1984       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1985 
1986       Changed |=
1987           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1988 
1989       if (Visitor.WI.WidestNativeType) {
1990         WideIVs.push_back(Visitor.WI);
1991       }
1992     } while(!LoopPhis.empty());
1993 
1994     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1995       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1996       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1997         Changed = true;
1998         LoopPhis.push_back(WidePhi);
1999       }
2000     }
2001   }
2002   return Changed;
2003 }
2004 
2005 //===----------------------------------------------------------------------===//
2006 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2007 //===----------------------------------------------------------------------===//
2008 
2009 /// Given an Value which is hoped to be part of an add recurance in the given
2010 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
2011 /// that this is less general than SCEVs AddRec checking.
2012 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2013   Instruction *IncI = dyn_cast<Instruction>(IncV);
2014   if (!IncI)
2015     return nullptr;
2016 
2017   switch (IncI->getOpcode()) {
2018   case Instruction::Add:
2019   case Instruction::Sub:
2020     break;
2021   case Instruction::GetElementPtr:
2022     // An IV counter must preserve its type.
2023     if (IncI->getNumOperands() == 2)
2024       break;
2025     LLVM_FALLTHROUGH;
2026   default:
2027     return nullptr;
2028   }
2029 
2030   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2031   if (Phi && Phi->getParent() == L->getHeader()) {
2032     if (L->isLoopInvariant(IncI->getOperand(1)))
2033       return Phi;
2034     return nullptr;
2035   }
2036   if (IncI->getOpcode() == Instruction::GetElementPtr)
2037     return nullptr;
2038 
2039   // Allow add/sub to be commuted.
2040   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2041   if (Phi && Phi->getParent() == L->getHeader()) {
2042     if (L->isLoopInvariant(IncI->getOperand(0)))
2043       return Phi;
2044   }
2045   return nullptr;
2046 }
2047 
2048 /// Whether the current loop exit test is based on this value.  Currently this
2049 /// is limited to a direct use in the loop condition.
2050 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2051   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2052   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2053   // TODO: Allow non-icmp loop test.
2054   if (!ICmp)
2055     return false;
2056 
2057   // TODO: Allow indirect use.
2058   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2059 }
2060 
2061 /// linearFunctionTestReplace policy. Return true unless we can show that the
2062 /// current exit test is already sufficiently canonical.
2063 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2064   assert(L->getLoopLatch() && "Must be in simplified form");
2065 
2066   // Avoid converting a constant or loop invariant test back to a runtime
2067   // test.  This is critical for when SCEV's cached ExitCount is less precise
2068   // than the current IR (such as after we've proven a particular exit is
2069   // actually dead and thus the BE count never reaches our ExitCount.)
2070   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2071   if (L->isLoopInvariant(BI->getCondition()))
2072     return false;
2073 
2074   // Do LFTR to simplify the exit condition to an ICMP.
2075   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2076   if (!Cond)
2077     return true;
2078 
2079   // Do LFTR to simplify the exit ICMP to EQ/NE
2080   ICmpInst::Predicate Pred = Cond->getPredicate();
2081   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2082     return true;
2083 
2084   // Look for a loop invariant RHS
2085   Value *LHS = Cond->getOperand(0);
2086   Value *RHS = Cond->getOperand(1);
2087   if (!L->isLoopInvariant(RHS)) {
2088     if (!L->isLoopInvariant(LHS))
2089       return true;
2090     std::swap(LHS, RHS);
2091   }
2092   // Look for a simple IV counter LHS
2093   PHINode *Phi = dyn_cast<PHINode>(LHS);
2094   if (!Phi)
2095     Phi = getLoopPhiForCounter(LHS, L);
2096 
2097   if (!Phi)
2098     return true;
2099 
2100   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2101   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2102   if (Idx < 0)
2103     return true;
2104 
2105   // Do LFTR if the exit condition's IV is *not* a simple counter.
2106   Value *IncV = Phi->getIncomingValue(Idx);
2107   return Phi != getLoopPhiForCounter(IncV, L);
2108 }
2109 
2110 /// Return true if undefined behavior would provable be executed on the path to
2111 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2112 /// anything about whether OnPathTo is actually executed or whether Root is
2113 /// actually poison.  This can be used to assess whether a new use of Root can
2114 /// be added at a location which is control equivalent with OnPathTo (such as
2115 /// immediately before it) without introducing UB which didn't previously
2116 /// exist.  Note that a false result conveys no information.
2117 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2118                                           Instruction *OnPathTo,
2119                                           DominatorTree *DT) {
2120   // Basic approach is to assume Root is poison, propagate poison forward
2121   // through all users we can easily track, and then check whether any of those
2122   // users are provable UB and must execute before out exiting block might
2123   // exit.
2124 
2125   // The set of all recursive users we've visited (which are assumed to all be
2126   // poison because of said visit)
2127   SmallSet<const Value *, 16> KnownPoison;
2128   SmallVector<const Instruction*, 16> Worklist;
2129   Worklist.push_back(Root);
2130   while (!Worklist.empty()) {
2131     const Instruction *I = Worklist.pop_back_val();
2132 
2133     // If we know this must trigger UB on a path leading our target.
2134     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2135       return true;
2136 
2137     // If we can't analyze propagation through this instruction, just skip it
2138     // and transitive users.  Safe as false is a conservative result.
2139     if (!propagatesFullPoison(I) && I != Root)
2140       continue;
2141 
2142     if (KnownPoison.insert(I).second)
2143       for (const User *User : I->users())
2144         Worklist.push_back(cast<Instruction>(User));
2145   }
2146 
2147   // Might be non-UB, or might have a path we couldn't prove must execute on
2148   // way to exiting bb.
2149   return false;
2150 }
2151 
2152 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2153 /// down to checking that all operands are constant and listing instructions
2154 /// that may hide undef.
2155 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2156                                unsigned Depth) {
2157   if (isa<Constant>(V))
2158     return !isa<UndefValue>(V);
2159 
2160   if (Depth >= 6)
2161     return false;
2162 
2163   // Conservatively handle non-constant non-instructions. For example, Arguments
2164   // may be undef.
2165   Instruction *I = dyn_cast<Instruction>(V);
2166   if (!I)
2167     return false;
2168 
2169   // Load and return values may be undef.
2170   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2171     return false;
2172 
2173   // Optimistically handle other instructions.
2174   for (Value *Op : I->operands()) {
2175     if (!Visited.insert(Op).second)
2176       continue;
2177     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2178       return false;
2179   }
2180   return true;
2181 }
2182 
2183 /// Return true if the given value is concrete. We must prove that undef can
2184 /// never reach it.
2185 ///
2186 /// TODO: If we decide that this is a good approach to checking for undef, we
2187 /// may factor it into a common location.
2188 static bool hasConcreteDef(Value *V) {
2189   SmallPtrSet<Value*, 8> Visited;
2190   Visited.insert(V);
2191   return hasConcreteDefImpl(V, Visited, 0);
2192 }
2193 
2194 /// Return true if this IV has any uses other than the (soon to be rewritten)
2195 /// loop exit test.
2196 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2197   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2198   Value *IncV = Phi->getIncomingValue(LatchIdx);
2199 
2200   for (User *U : Phi->users())
2201     if (U != Cond && U != IncV) return false;
2202 
2203   for (User *U : IncV->users())
2204     if (U != Cond && U != Phi) return false;
2205   return true;
2206 }
2207 
2208 /// Return true if the given phi is a "counter" in L.  A counter is an
2209 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2210 /// step of 1.  Note that L must have exactly one latch.
2211 static bool isLoopCounter(PHINode* Phi, Loop *L,
2212                           ScalarEvolution *SE) {
2213   assert(Phi->getParent() == L->getHeader());
2214   assert(L->getLoopLatch());
2215 
2216   if (!SE->isSCEVable(Phi->getType()))
2217     return false;
2218 
2219   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2220   if (!AR || AR->getLoop() != L || !AR->isAffine())
2221     return false;
2222 
2223   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2224   if (!Step || !Step->isOne())
2225     return false;
2226 
2227   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2228   Value *IncV = Phi->getIncomingValue(LatchIdx);
2229   return (getLoopPhiForCounter(IncV, L) == Phi);
2230 }
2231 
2232 /// Search the loop header for a loop counter (anadd rec w/step of one)
2233 /// suitable for use by LFTR.  If multiple counters are available, select the
2234 /// "best" one based profitable heuristics.
2235 ///
2236 /// BECount may be an i8* pointer type. The pointer difference is already
2237 /// valid count without scaling the address stride, so it remains a pointer
2238 /// expression as far as SCEV is concerned.
2239 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2240                                 const SCEV *BECount,
2241                                 ScalarEvolution *SE, DominatorTree *DT) {
2242   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2243 
2244   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2245 
2246   // Loop over all of the PHI nodes, looking for a simple counter.
2247   PHINode *BestPhi = nullptr;
2248   const SCEV *BestInit = nullptr;
2249   BasicBlock *LatchBlock = L->getLoopLatch();
2250   assert(LatchBlock && "Must be in simplified form");
2251   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2252 
2253   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2254     PHINode *Phi = cast<PHINode>(I);
2255     if (!isLoopCounter(Phi, L, SE))
2256       continue;
2257 
2258     // Avoid comparing an integer IV against a pointer Limit.
2259     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2260       continue;
2261 
2262     const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2263 
2264     // AR may be a pointer type, while BECount is an integer type.
2265     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2266     // AR may not be a narrower type, or we may never exit.
2267     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2268     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2269       continue;
2270 
2271     // Avoid reusing a potentially undef value to compute other values that may
2272     // have originally had a concrete definition.
2273     if (!hasConcreteDef(Phi)) {
2274       // We explicitly allow unknown phis as long as they are already used by
2275       // the loop exit test.  This is legal since performing LFTR could not
2276       // increase the number of undef users.
2277       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2278       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2279           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2280         continue;
2281     }
2282 
2283     // Avoid introducing undefined behavior due to poison which didn't exist in
2284     // the original program.  (Annoyingly, the rules for poison and undef
2285     // propagation are distinct, so this does NOT cover the undef case above.)
2286     // We have to ensure that we don't introduce UB by introducing a use on an
2287     // iteration where said IV produces poison.  Our strategy here differs for
2288     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2289     // see code in linearFunctionTestReplace.  For pointers, we restrict
2290     // transforms as there is no good way to reinfer inbounds once lost.
2291     if (!Phi->getType()->isIntegerTy() &&
2292         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2293       continue;
2294 
2295     const SCEV *Init = AR->getStart();
2296 
2297     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2298       // Don't force a live loop counter if another IV can be used.
2299       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2300         continue;
2301 
2302       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2303       // also prefers integer to pointer IVs.
2304       if (BestInit->isZero() != Init->isZero()) {
2305         if (BestInit->isZero())
2306           continue;
2307       }
2308       // If two IVs both count from zero or both count from nonzero then the
2309       // narrower is likely a dead phi that has been widened. Use the wider phi
2310       // to allow the other to be eliminated.
2311       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2312         continue;
2313     }
2314     BestPhi = Phi;
2315     BestInit = Init;
2316   }
2317   return BestPhi;
2318 }
2319 
2320 /// Insert an IR expression which computes the value held by the IV IndVar
2321 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2322 /// is taken ExitCount times.
2323 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2324                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2325                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2326   assert(isLoopCounter(IndVar, L, SE));
2327   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2328   const SCEV *IVInit = AR->getStart();
2329 
2330   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2331   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2332   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2333   // the existing GEPs whenever possible.
2334   if (IndVar->getType()->isPointerTy() &&
2335       !ExitCount->getType()->isPointerTy()) {
2336     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2337     // signed value. ExitCount on the other hand represents the loop trip count,
2338     // which is an unsigned value. FindLoopCounter only allows induction
2339     // variables that have a positive unit stride of one. This means we don't
2340     // have to handle the case of negative offsets (yet) and just need to zero
2341     // extend ExitCount.
2342     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2343     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2344     if (UsePostInc)
2345       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2346 
2347     // Expand the code for the iteration count.
2348     assert(SE->isLoopInvariant(IVOffset, L) &&
2349            "Computed iteration count is not loop invariant!");
2350 
2351     // We could handle pointer IVs other than i8*, but we need to compensate for
2352     // gep index scaling.
2353     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2354                              cast<PointerType>(IndVar->getType())
2355                                  ->getElementType())->isOne() &&
2356            "unit stride pointer IV must be i8*");
2357 
2358     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2359     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2360     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2361   } else {
2362     // In any other case, convert both IVInit and ExitCount to integers before
2363     // comparing. This may result in SCEV expansion of pointers, but in practice
2364     // SCEV will fold the pointer arithmetic away as such:
2365     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2366     //
2367     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2368     // for simple memset-style loops.
2369     //
2370     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2371     // were generated on top of case #2, which is not expected.
2372 
2373     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2374     // For unit stride, IVCount = Start + ExitCount with 2's complement
2375     // overflow.
2376 
2377     // For integer IVs, truncate the IV before computing IVInit + BECount,
2378     // unless we know apriori that the limit must be a constant when evaluated
2379     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2380     // of the IV in the loop over a (potentially) expensive expansion of the
2381     // widened exit count add(zext(add)) expression.
2382     if (SE->getTypeSizeInBits(IVInit->getType())
2383         > SE->getTypeSizeInBits(ExitCount->getType())) {
2384       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2385         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2386       else
2387         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2388     }
2389 
2390     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2391 
2392     if (UsePostInc)
2393       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2394 
2395     // Expand the code for the iteration count.
2396     assert(SE->isLoopInvariant(IVLimit, L) &&
2397            "Computed iteration count is not loop invariant!");
2398     // Ensure that we generate the same type as IndVar, or a smaller integer
2399     // type. In the presence of null pointer values, we have an integer type
2400     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2401     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2402       IndVar->getType() : ExitCount->getType();
2403     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2404     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2405   }
2406 }
2407 
2408 /// This method rewrites the exit condition of the loop to be a canonical !=
2409 /// comparison against the incremented loop induction variable.  This pass is
2410 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2411 /// determine a loop-invariant trip count of the loop, which is actually a much
2412 /// broader range than just linear tests.
2413 bool IndVarSimplify::
2414 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2415                           const SCEV *ExitCount,
2416                           PHINode *IndVar, SCEVExpander &Rewriter) {
2417   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2418   assert(isLoopCounter(IndVar, L, SE));
2419   Instruction * const IncVar =
2420     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2421 
2422   // Initialize CmpIndVar to the preincremented IV.
2423   Value *CmpIndVar = IndVar;
2424   bool UsePostInc = false;
2425 
2426   // If the exiting block is the same as the backedge block, we prefer to
2427   // compare against the post-incremented value, otherwise we must compare
2428   // against the preincremented value.
2429   if (ExitingBB == L->getLoopLatch()) {
2430     // For pointer IVs, we chose to not strip inbounds which requires us not
2431     // to add a potentially UB introducing use.  We need to either a) show
2432     // the loop test we're modifying is already in post-inc form, or b) show
2433     // that adding a use must not introduce UB.
2434     bool SafeToPostInc =
2435         IndVar->getType()->isIntegerTy() ||
2436         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2437         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2438     if (SafeToPostInc) {
2439       UsePostInc = true;
2440       CmpIndVar = IncVar;
2441     }
2442   }
2443 
2444   // It may be necessary to drop nowrap flags on the incrementing instruction
2445   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2446   // case the increment might have previously been poison on the last iteration
2447   // only) or if LFTR switches to a different IV that was previously dynamically
2448   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2449   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2450   // check), because the pre-inc addrec flags may be adopted from the original
2451   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2452   // TODO: This handling is inaccurate for one case: If we switch to a
2453   // dynamically dead IV that wraps on the first loop iteration only, which is
2454   // not covered by the post-inc addrec. (If the new IV was not dynamically
2455   // dead, it could not be poison on the first iteration in the first place.)
2456   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2457     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2458     if (BO->hasNoUnsignedWrap())
2459       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2460     if (BO->hasNoSignedWrap())
2461       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2462   }
2463 
2464   Value *ExitCnt = genLoopLimit(
2465       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2466   assert(ExitCnt->getType()->isPointerTy() ==
2467              IndVar->getType()->isPointerTy() &&
2468          "genLoopLimit missed a cast");
2469 
2470   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2471   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2472   ICmpInst::Predicate P;
2473   if (L->contains(BI->getSuccessor(0)))
2474     P = ICmpInst::ICMP_NE;
2475   else
2476     P = ICmpInst::ICMP_EQ;
2477 
2478   IRBuilder<> Builder(BI);
2479 
2480   // The new loop exit condition should reuse the debug location of the
2481   // original loop exit condition.
2482   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2483     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2484 
2485   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2486   // avoid the expensive expansion of the limit expression in the wider type,
2487   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2488   // since we know (from the exit count bitwidth), that we can't self-wrap in
2489   // the narrower type.
2490   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2491   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2492   if (CmpIndVarSize > ExitCntSize) {
2493     assert(!CmpIndVar->getType()->isPointerTy() &&
2494            !ExitCnt->getType()->isPointerTy());
2495 
2496     // Before resorting to actually inserting the truncate, use the same
2497     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2498     // the other side of the comparison instead.  We still evaluate the limit
2499     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2500     // a truncate within in.
2501     bool Extended = false;
2502     const SCEV *IV = SE->getSCEV(CmpIndVar);
2503     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2504                                                   ExitCnt->getType());
2505     const SCEV *ZExtTrunc =
2506       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2507 
2508     if (ZExtTrunc == IV) {
2509       Extended = true;
2510       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2511                                    "wide.trip.count");
2512     } else {
2513       const SCEV *SExtTrunc =
2514         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2515       if (SExtTrunc == IV) {
2516         Extended = true;
2517         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2518                                      "wide.trip.count");
2519       }
2520     }
2521 
2522     if (Extended) {
2523       bool Discard;
2524       L->makeLoopInvariant(ExitCnt, Discard);
2525     } else
2526       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2527                                       "lftr.wideiv");
2528   }
2529   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2530                     << "      LHS:" << *CmpIndVar << '\n'
2531                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2532                     << "\n"
2533                     << "      RHS:\t" << *ExitCnt << "\n"
2534                     << "ExitCount:\t" << *ExitCount << "\n"
2535                     << "  was: " << *BI->getCondition() << "\n");
2536 
2537   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2538   Value *OrigCond = BI->getCondition();
2539   // It's tempting to use replaceAllUsesWith here to fully replace the old
2540   // comparison, but that's not immediately safe, since users of the old
2541   // comparison may not be dominated by the new comparison. Instead, just
2542   // update the branch to use the new comparison; in the common case this
2543   // will make old comparison dead.
2544   BI->setCondition(Cond);
2545   DeadInsts.push_back(OrigCond);
2546 
2547   ++NumLFTR;
2548   return true;
2549 }
2550 
2551 //===----------------------------------------------------------------------===//
2552 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2553 //===----------------------------------------------------------------------===//
2554 
2555 /// If there's a single exit block, sink any loop-invariant values that
2556 /// were defined in the preheader but not used inside the loop into the
2557 /// exit block to reduce register pressure in the loop.
2558 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2559   BasicBlock *ExitBlock = L->getExitBlock();
2560   if (!ExitBlock) return false;
2561 
2562   BasicBlock *Preheader = L->getLoopPreheader();
2563   if (!Preheader) return false;
2564 
2565   bool MadeAnyChanges = false;
2566   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2567   BasicBlock::iterator I(Preheader->getTerminator());
2568   while (I != Preheader->begin()) {
2569     --I;
2570     // New instructions were inserted at the end of the preheader.
2571     if (isa<PHINode>(I))
2572       break;
2573 
2574     // Don't move instructions which might have side effects, since the side
2575     // effects need to complete before instructions inside the loop.  Also don't
2576     // move instructions which might read memory, since the loop may modify
2577     // memory. Note that it's okay if the instruction might have undefined
2578     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2579     // block.
2580     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2581       continue;
2582 
2583     // Skip debug info intrinsics.
2584     if (isa<DbgInfoIntrinsic>(I))
2585       continue;
2586 
2587     // Skip eh pad instructions.
2588     if (I->isEHPad())
2589       continue;
2590 
2591     // Don't sink alloca: we never want to sink static alloca's out of the
2592     // entry block, and correctly sinking dynamic alloca's requires
2593     // checks for stacksave/stackrestore intrinsics.
2594     // FIXME: Refactor this check somehow?
2595     if (isa<AllocaInst>(I))
2596       continue;
2597 
2598     // Determine if there is a use in or before the loop (direct or
2599     // otherwise).
2600     bool UsedInLoop = false;
2601     for (Use &U : I->uses()) {
2602       Instruction *User = cast<Instruction>(U.getUser());
2603       BasicBlock *UseBB = User->getParent();
2604       if (PHINode *P = dyn_cast<PHINode>(User)) {
2605         unsigned i =
2606           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2607         UseBB = P->getIncomingBlock(i);
2608       }
2609       if (UseBB == Preheader || L->contains(UseBB)) {
2610         UsedInLoop = true;
2611         break;
2612       }
2613     }
2614 
2615     // If there is, the def must remain in the preheader.
2616     if (UsedInLoop)
2617       continue;
2618 
2619     // Otherwise, sink it to the exit block.
2620     Instruction *ToMove = &*I;
2621     bool Done = false;
2622 
2623     if (I != Preheader->begin()) {
2624       // Skip debug info intrinsics.
2625       do {
2626         --I;
2627       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2628 
2629       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2630         Done = true;
2631     } else {
2632       Done = true;
2633     }
2634 
2635     MadeAnyChanges = true;
2636     ToMove->moveBefore(*ExitBlock, InsertPt);
2637     if (Done) break;
2638     InsertPt = ToMove->getIterator();
2639   }
2640 
2641   return MadeAnyChanges;
2642 }
2643 
2644 bool IndVarSimplify::optimizeLoopExits(Loop *L) {
2645   SmallVector<BasicBlock*, 16> ExitingBlocks;
2646   L->getExitingBlocks(ExitingBlocks);
2647 
2648   // Form an expression for the maximum exit count possible for this loop. We
2649   // merge the max and exact information to approximate a version of
2650   // getMaxBackedgeTakenInfo which isn't restricted to just constants.
2651   // TODO: factor this out as a version of getMaxBackedgeTakenCount which
2652   // isn't guaranteed to return a constant.
2653   SmallVector<const SCEV*, 4> ExitCounts;
2654   const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
2655   if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2656     ExitCounts.push_back(MaxConstEC);
2657   for (BasicBlock *ExitingBB : ExitingBlocks) {
2658     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2659     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2660       assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2661              "We should only have known counts for exiting blocks that "
2662              "dominate latch!");
2663       ExitCounts.push_back(ExitCount);
2664     }
2665   }
2666   if (ExitCounts.empty())
2667     return false;
2668   const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2669 
2670   bool Changed = false;
2671   for (BasicBlock *ExitingBB : ExitingBlocks) {
2672     // If our exitting block exits multiple loops, we can only rewrite the
2673     // innermost one.  Otherwise, we're changing how many times the innermost
2674     // loop runs before it exits.
2675     if (LI->getLoopFor(ExitingBB) != L)
2676       continue;
2677 
2678     // Can't rewrite non-branch yet.
2679     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2680     if (!BI)
2681       continue;
2682 
2683     // If already constant, nothing to do.
2684     if (isa<Constant>(BI->getCondition()))
2685       continue;
2686 
2687     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2688     if (isa<SCEVCouldNotCompute>(ExitCount))
2689       continue;
2690 
2691     // If we know we'd exit on the first iteration, rewrite the exit to
2692     // reflect this.  This does not imply the loop must exit through this
2693     // exit; there may be an earlier one taken on the first iteration.
2694     // TODO: Given we know the backedge can't be taken, we should go ahead
2695     // and break it.  Or at least, kill all the header phis and simplify.
2696     if (ExitCount->isZero()) {
2697       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2698       auto *OldCond = BI->getCondition();
2699       auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
2700         ConstantInt::getFalse(OldCond->getType());
2701       BI->setCondition(NewCond);
2702       if (OldCond->use_empty())
2703         DeadInsts.push_back(OldCond);
2704       Changed = true;
2705       continue;
2706     }
2707 
2708     // If we end up with a pointer exit count, bail.  Note that we can end up
2709     // with a pointer exit count for one exiting block, and not for another in
2710     // the same loop.
2711     if (!ExitCount->getType()->isIntegerTy() ||
2712         !MaxExitCount->getType()->isIntegerTy())
2713       continue;
2714 
2715     Type *WiderType =
2716       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2717     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2718     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2719     assert(MaxExitCount->getType() == ExitCount->getType());
2720 
2721     // Can we prove that some other exit must be taken strictly before this
2722     // one?  TODO: handle cases where ule is known, and equality is covered
2723     // by a dominating exit
2724     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2725                                      MaxExitCount, ExitCount)) {
2726       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2727       auto *OldCond = BI->getCondition();
2728       auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
2729         ConstantInt::getTrue(OldCond->getType());
2730       BI->setCondition(NewCond);
2731       if (OldCond->use_empty())
2732         DeadInsts.push_back(OldCond);
2733       Changed = true;
2734       continue;
2735     }
2736 
2737     // TODO: If we can prove that the exiting iteration is equal to the exit
2738     // count for this exit and that no previous exit oppurtunities exist within
2739     // the loop, then we can discharge all other exits.  (May fall out of
2740     // previous TODO.)
2741 
2742     // TODO: If we can't prove any relation between our exit count and the
2743     // loops exit count, but taking this exit doesn't require actually running
2744     // the loop (i.e. no side effects, no computed values used in exit), then
2745     // we can replace the exit test with a loop invariant test which exits on
2746     // the first iteration.
2747   }
2748   return Changed;
2749 }
2750 
2751 //===----------------------------------------------------------------------===//
2752 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2753 //===----------------------------------------------------------------------===//
2754 
2755 bool IndVarSimplify::run(Loop *L) {
2756   // We need (and expect!) the incoming loop to be in LCSSA.
2757   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2758          "LCSSA required to run indvars!");
2759   bool Changed = false;
2760 
2761   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2762   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2763   //    canonicalization can be a pessimization without LSR to "clean up"
2764   //    afterwards.
2765   //  - We depend on having a preheader; in particular,
2766   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2767   //    and we're in trouble if we can't find the induction variable even when
2768   //    we've manually inserted one.
2769   //  - LFTR relies on having a single backedge.
2770   if (!L->isLoopSimplifyForm())
2771     return false;
2772 
2773   // If there are any floating-point recurrences, attempt to
2774   // transform them to use integer recurrences.
2775   Changed |= rewriteNonIntegerIVs(L);
2776 
2777 #ifndef NDEBUG
2778   // Used below for a consistency check only
2779   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2780 #endif
2781 
2782   // Create a rewriter object which we'll use to transform the code with.
2783   SCEVExpander Rewriter(*SE, DL, "indvars");
2784 #ifndef NDEBUG
2785   Rewriter.setDebugType(DEBUG_TYPE);
2786 #endif
2787 
2788   // Eliminate redundant IV users.
2789   //
2790   // Simplification works best when run before other consumers of SCEV. We
2791   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2792   // other expressions involving loop IVs have been evaluated. This helps SCEV
2793   // set no-wrap flags before normalizing sign/zero extension.
2794   Rewriter.disableCanonicalMode();
2795   Changed |= simplifyAndExtend(L, Rewriter, LI);
2796 
2797   // Check to see if we can compute the final value of any expressions
2798   // that are recurrent in the loop, and substitute the exit values from the
2799   // loop into any instructions outside of the loop that use the final values
2800   // of the current expressions.
2801   if (ReplaceExitValue != NeverRepl)
2802     Changed |= rewriteLoopExitValues(L, Rewriter);
2803 
2804   // Eliminate redundant IV cycles.
2805   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2806 
2807   Changed |= optimizeLoopExits(L);
2808 
2809   // If we have a trip count expression, rewrite the loop's exit condition
2810   // using it.
2811   if (!DisableLFTR) {
2812     SmallVector<BasicBlock*, 16> ExitingBlocks;
2813     L->getExitingBlocks(ExitingBlocks);
2814     for (BasicBlock *ExitingBB : ExitingBlocks) {
2815       // Can't rewrite non-branch yet.
2816       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2817         continue;
2818 
2819       // If our exitting block exits multiple loops, we can only rewrite the
2820       // innermost one.  Otherwise, we're changing how many times the innermost
2821       // loop runs before it exits.
2822       if (LI->getLoopFor(ExitingBB) != L)
2823         continue;
2824 
2825       if (!needsLFTR(L, ExitingBB))
2826         continue;
2827 
2828       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2829       if (isa<SCEVCouldNotCompute>(ExitCount))
2830         continue;
2831 
2832       // This was handled above, but as we form SCEVs, we can sometimes refine
2833       // existing ones; this allows exit counts to be folded to zero which
2834       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2835       // until stable to handle cases like this better.
2836       if (ExitCount->isZero())
2837         continue;
2838 
2839       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2840       if (!IndVar)
2841         continue;
2842 
2843       // Avoid high cost expansions.  Note: This heuristic is questionable in
2844       // that our definition of "high cost" is not exactly principled.
2845       if (Rewriter.isHighCostExpansion(ExitCount, L))
2846         continue;
2847 
2848       // Check preconditions for proper SCEVExpander operation. SCEV does not
2849       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2850       // any pass that uses the SCEVExpander must do it. This does not work
2851       // well for loop passes because SCEVExpander makes assumptions about
2852       // all loops, while LoopPassManager only forces the current loop to be
2853       // simplified.
2854       //
2855       // FIXME: SCEV expansion has no way to bail out, so the caller must
2856       // explicitly check any assumptions made by SCEV. Brittle.
2857       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2858       if (!AR || AR->getLoop()->getLoopPreheader())
2859         Changed |= linearFunctionTestReplace(L, ExitingBB,
2860                                              ExitCount, IndVar,
2861                                              Rewriter);
2862     }
2863   }
2864   // Clear the rewriter cache, because values that are in the rewriter's cache
2865   // can be deleted in the loop below, causing the AssertingVH in the cache to
2866   // trigger.
2867   Rewriter.clear();
2868 
2869   // Now that we're done iterating through lists, clean up any instructions
2870   // which are now dead.
2871   while (!DeadInsts.empty())
2872     if (Instruction *Inst =
2873             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2874       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2875 
2876   // The Rewriter may not be used from this point on.
2877 
2878   // Loop-invariant instructions in the preheader that aren't used in the
2879   // loop may be sunk below the loop to reduce register pressure.
2880   Changed |= sinkUnusedInvariants(L);
2881 
2882   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2883   // trip count and therefore can further simplify exit values in addition to
2884   // rewriteLoopExitValues.
2885   Changed |= rewriteFirstIterationLoopExitValues(L);
2886 
2887   // Clean up dead instructions.
2888   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2889 
2890   // Check a post-condition.
2891   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2892          "Indvars did not preserve LCSSA!");
2893 
2894   // Verify that LFTR, and any other change have not interfered with SCEV's
2895   // ability to compute trip count.
2896 #ifndef NDEBUG
2897   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2898     SE->forgetLoop(L);
2899     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2900     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2901         SE->getTypeSizeInBits(NewBECount->getType()))
2902       NewBECount = SE->getTruncateOrNoop(NewBECount,
2903                                          BackedgeTakenCount->getType());
2904     else
2905       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2906                                                  NewBECount->getType());
2907     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2908   }
2909 #endif
2910 
2911   return Changed;
2912 }
2913 
2914 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2915                                           LoopStandardAnalysisResults &AR,
2916                                           LPMUpdater &) {
2917   Function *F = L.getHeader()->getParent();
2918   const DataLayout &DL = F->getParent()->getDataLayout();
2919 
2920   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2921   if (!IVS.run(&L))
2922     return PreservedAnalyses::all();
2923 
2924   auto PA = getLoopPassPreservedAnalyses();
2925   PA.preserveSet<CFGAnalyses>();
2926   return PA;
2927 }
2928 
2929 namespace {
2930 
2931 struct IndVarSimplifyLegacyPass : public LoopPass {
2932   static char ID; // Pass identification, replacement for typeid
2933 
2934   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2935     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2936   }
2937 
2938   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2939     if (skipLoop(L))
2940       return false;
2941 
2942     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2943     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2944     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2945     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2946     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2947     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2948     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2949     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2950 
2951     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2952     return IVS.run(L);
2953   }
2954 
2955   void getAnalysisUsage(AnalysisUsage &AU) const override {
2956     AU.setPreservesCFG();
2957     getLoopAnalysisUsage(AU);
2958   }
2959 };
2960 
2961 } // end anonymous namespace
2962 
2963 char IndVarSimplifyLegacyPass::ID = 0;
2964 
2965 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2966                       "Induction Variable Simplification", false, false)
2967 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2968 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2969                     "Induction Variable Simplification", false, false)
2970 
2971 Pass *llvm::createIndVarSimplifyPass() {
2972   return new IndVarSimplifyLegacyPass();
2973 }
2974