xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision e1b4b1b46e47590c0093ceeacd7f6659f66bb03c)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened     , "Number of indvars widened");
93 STATISTIC(NumReplaced    , "Number of exit values replaced");
94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102   "verify-indvars", cl::Hidden,
103   cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
106 
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111                clEnumValN(OnlyCheapRepl, "cheap",
112                           "only replace exit value when the cost is cheap"),
113                clEnumValN(NoHardUse, "noharduse",
114                           "only replace exit values when loop def likely dead"),
115                clEnumValN(AlwaysRepl, "always",
116                           "always replace exit value whenever possible")));
117 
118 static cl::opt<bool> UsePostIncrementRanges(
119   "indvars-post-increment-ranges", cl::Hidden,
120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121   cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125             cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 namespace {
128 
129 struct RewritePhi;
130 
131 class IndVarSimplify {
132   LoopInfo *LI;
133   ScalarEvolution *SE;
134   DominatorTree *DT;
135   const DataLayout &DL;
136   TargetLibraryInfo *TLI;
137   const TargetTransformInfo *TTI;
138 
139   SmallVector<WeakTrackingVH, 16> DeadInsts;
140 
141   bool isValidRewrite(Value *FromVal, Value *ToVal);
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147 
148   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
149   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
150   bool rewriteFirstIterationLoopExitValues(Loop *L);
151   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
152 
153   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
154                                  const SCEV *BackedgeTakenCount,
155                                  PHINode *IndVar, SCEVExpander &Rewriter);
156 
157   bool sinkUnusedInvariants(Loop *L);
158 
159 public:
160   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
161                  const DataLayout &DL, TargetLibraryInfo *TLI,
162                  TargetTransformInfo *TTI)
163       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
164 
165   bool run(Loop *L);
166 };
167 
168 } // end anonymous namespace
169 
170 /// Return true if the SCEV expansion generated by the rewriter can replace the
171 /// original value. SCEV guarantees that it produces the same value, but the way
172 /// it is produced may be illegal IR.  Ideally, this function will only be
173 /// called for verification.
174 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
175   // If an SCEV expression subsumed multiple pointers, its expansion could
176   // reassociate the GEP changing the base pointer. This is illegal because the
177   // final address produced by a GEP chain must be inbounds relative to its
178   // underlying object. Otherwise basic alias analysis, among other things,
179   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
180   // producing an expression involving multiple pointers. Until then, we must
181   // bail out here.
182   //
183   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
184   // because it understands lcssa phis while SCEV does not.
185   Value *FromPtr = FromVal;
186   Value *ToPtr = ToVal;
187   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
188     FromPtr = GEP->getPointerOperand();
189   }
190   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
191     ToPtr = GEP->getPointerOperand();
192   }
193   if (FromPtr != FromVal || ToPtr != ToVal) {
194     // Quickly check the common case
195     if (FromPtr == ToPtr)
196       return true;
197 
198     // SCEV may have rewritten an expression that produces the GEP's pointer
199     // operand. That's ok as long as the pointer operand has the same base
200     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
201     // base of a recurrence. This handles the case in which SCEV expansion
202     // converts a pointer type recurrence into a nonrecurrent pointer base
203     // indexed by an integer recurrence.
204 
205     // If the GEP base pointer is a vector of pointers, abort.
206     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
207       return false;
208 
209     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
210     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
211     if (FromBase == ToBase)
212       return true;
213 
214     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
215                       << " != " << *ToBase << "\n");
216 
217     return false;
218   }
219   return true;
220 }
221 
222 /// Determine the insertion point for this user. By default, insert immediately
223 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
224 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
225 /// common dominator for the incoming blocks. A nullptr can be returned if no
226 /// viable location is found: it may happen if User is a PHI and Def only comes
227 /// to this PHI from unreachable blocks.
228 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
229                                           DominatorTree *DT, LoopInfo *LI) {
230   PHINode *PHI = dyn_cast<PHINode>(User);
231   if (!PHI)
232     return User;
233 
234   Instruction *InsertPt = nullptr;
235   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
236     if (PHI->getIncomingValue(i) != Def)
237       continue;
238 
239     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
240 
241     if (!DT->isReachableFromEntry(InsertBB))
242       continue;
243 
244     if (!InsertPt) {
245       InsertPt = InsertBB->getTerminator();
246       continue;
247     }
248     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
249     InsertPt = InsertBB->getTerminator();
250   }
251 
252   // If we have skipped all inputs, it means that Def only comes to Phi from
253   // unreachable blocks.
254   if (!InsertPt)
255     return nullptr;
256 
257   auto *DefI = dyn_cast<Instruction>(Def);
258   if (!DefI)
259     return InsertPt;
260 
261   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
262 
263   auto *L = LI->getLoopFor(DefI->getParent());
264   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
265 
266   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
267     if (LI->getLoopFor(DTN->getBlock()) == L)
268       return DTN->getBlock()->getTerminator();
269 
270   llvm_unreachable("DefI dominates InsertPt!");
271 }
272 
273 //===----------------------------------------------------------------------===//
274 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
275 //===----------------------------------------------------------------------===//
276 
277 /// Convert APF to an integer, if possible.
278 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
279   bool isExact = false;
280   // See if we can convert this to an int64_t
281   uint64_t UIntVal;
282   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
283                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
284       !isExact)
285     return false;
286   IntVal = UIntVal;
287   return true;
288 }
289 
290 /// If the loop has floating induction variable then insert corresponding
291 /// integer induction variable if possible.
292 /// For example,
293 /// for(double i = 0; i < 10000; ++i)
294 ///   bar(i)
295 /// is converted into
296 /// for(int i = 0; i < 10000; ++i)
297 ///   bar((double)i);
298 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
299   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
300   unsigned BackEdge     = IncomingEdge^1;
301 
302   // Check incoming value.
303   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
304 
305   int64_t InitValue;
306   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
307     return false;
308 
309   // Check IV increment. Reject this PN if increment operation is not
310   // an add or increment value can not be represented by an integer.
311   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
312   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
313 
314   // If this is not an add of the PHI with a constantfp, or if the constant fp
315   // is not an integer, bail out.
316   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
317   int64_t IncValue;
318   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
319       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
320     return false;
321 
322   // Check Incr uses. One user is PN and the other user is an exit condition
323   // used by the conditional terminator.
324   Value::user_iterator IncrUse = Incr->user_begin();
325   Instruction *U1 = cast<Instruction>(*IncrUse++);
326   if (IncrUse == Incr->user_end()) return false;
327   Instruction *U2 = cast<Instruction>(*IncrUse++);
328   if (IncrUse != Incr->user_end()) return false;
329 
330   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
331   // only used by a branch, we can't transform it.
332   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
333   if (!Compare)
334     Compare = dyn_cast<FCmpInst>(U2);
335   if (!Compare || !Compare->hasOneUse() ||
336       !isa<BranchInst>(Compare->user_back()))
337     return false;
338 
339   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
340 
341   // We need to verify that the branch actually controls the iteration count
342   // of the loop.  If not, the new IV can overflow and no one will notice.
343   // The branch block must be in the loop and one of the successors must be out
344   // of the loop.
345   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
346   if (!L->contains(TheBr->getParent()) ||
347       (L->contains(TheBr->getSuccessor(0)) &&
348        L->contains(TheBr->getSuccessor(1))))
349     return false;
350 
351   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
352   // transform it.
353   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
354   int64_t ExitValue;
355   if (ExitValueVal == nullptr ||
356       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
357     return false;
358 
359   // Find new predicate for integer comparison.
360   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
361   switch (Compare->getPredicate()) {
362   default: return false;  // Unknown comparison.
363   case CmpInst::FCMP_OEQ:
364   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
365   case CmpInst::FCMP_ONE:
366   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
367   case CmpInst::FCMP_OGT:
368   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
369   case CmpInst::FCMP_OGE:
370   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
371   case CmpInst::FCMP_OLT:
372   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
373   case CmpInst::FCMP_OLE:
374   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
375   }
376 
377   // We convert the floating point induction variable to a signed i32 value if
378   // we can.  This is only safe if the comparison will not overflow in a way
379   // that won't be trapped by the integer equivalent operations.  Check for this
380   // now.
381   // TODO: We could use i64 if it is native and the range requires it.
382 
383   // The start/stride/exit values must all fit in signed i32.
384   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
385     return false;
386 
387   // If not actually striding (add x, 0.0), avoid touching the code.
388   if (IncValue == 0)
389     return false;
390 
391   // Positive and negative strides have different safety conditions.
392   if (IncValue > 0) {
393     // If we have a positive stride, we require the init to be less than the
394     // exit value.
395     if (InitValue >= ExitValue)
396       return false;
397 
398     uint32_t Range = uint32_t(ExitValue-InitValue);
399     // Check for infinite loop, either:
400     // while (i <= Exit) or until (i > Exit)
401     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
402       if (++Range == 0) return false;  // Range overflows.
403     }
404 
405     unsigned Leftover = Range % uint32_t(IncValue);
406 
407     // If this is an equality comparison, we require that the strided value
408     // exactly land on the exit value, otherwise the IV condition will wrap
409     // around and do things the fp IV wouldn't.
410     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
411         Leftover != 0)
412       return false;
413 
414     // If the stride would wrap around the i32 before exiting, we can't
415     // transform the IV.
416     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
417       return false;
418   } else {
419     // If we have a negative stride, we require the init to be greater than the
420     // exit value.
421     if (InitValue <= ExitValue)
422       return false;
423 
424     uint32_t Range = uint32_t(InitValue-ExitValue);
425     // Check for infinite loop, either:
426     // while (i >= Exit) or until (i < Exit)
427     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
428       if (++Range == 0) return false;  // Range overflows.
429     }
430 
431     unsigned Leftover = Range % uint32_t(-IncValue);
432 
433     // If this is an equality comparison, we require that the strided value
434     // exactly land on the exit value, otherwise the IV condition will wrap
435     // around and do things the fp IV wouldn't.
436     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
437         Leftover != 0)
438       return false;
439 
440     // If the stride would wrap around the i32 before exiting, we can't
441     // transform the IV.
442     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
443       return false;
444   }
445 
446   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
447 
448   // Insert new integer induction variable.
449   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
450   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
451                       PN->getIncomingBlock(IncomingEdge));
452 
453   Value *NewAdd =
454     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
455                               Incr->getName()+".int", Incr);
456   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
457 
458   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
459                                       ConstantInt::get(Int32Ty, ExitValue),
460                                       Compare->getName());
461 
462   // In the following deletions, PN may become dead and may be deleted.
463   // Use a WeakTrackingVH to observe whether this happens.
464   WeakTrackingVH WeakPH = PN;
465 
466   // Delete the old floating point exit comparison.  The branch starts using the
467   // new comparison.
468   NewCompare->takeName(Compare);
469   Compare->replaceAllUsesWith(NewCompare);
470   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
471 
472   // Delete the old floating point increment.
473   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
474   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
475 
476   // If the FP induction variable still has uses, this is because something else
477   // in the loop uses its value.  In order to canonicalize the induction
478   // variable, we chose to eliminate the IV and rewrite it in terms of an
479   // int->fp cast.
480   //
481   // We give preference to sitofp over uitofp because it is faster on most
482   // platforms.
483   if (WeakPH) {
484     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
485                                  &*PN->getParent()->getFirstInsertionPt());
486     PN->replaceAllUsesWith(Conv);
487     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
488   }
489   return true;
490 }
491 
492 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
493   // First step.  Check to see if there are any floating-point recurrences.
494   // If there are, change them into integer recurrences, permitting analysis by
495   // the SCEV routines.
496   BasicBlock *Header = L->getHeader();
497 
498   SmallVector<WeakTrackingVH, 8> PHIs;
499   for (PHINode &PN : Header->phis())
500     PHIs.push_back(&PN);
501 
502   bool Changed = false;
503   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
504     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
505       Changed |= handleFloatingPointIV(L, PN);
506 
507   // If the loop previously had floating-point IV, ScalarEvolution
508   // may not have been able to compute a trip count. Now that we've done some
509   // re-writing, the trip count may be computable.
510   if (Changed)
511     SE->forgetLoop(L);
512   return Changed;
513 }
514 
515 namespace {
516 
517 // Collect information about PHI nodes which can be transformed in
518 // rewriteLoopExitValues.
519 struct RewritePhi {
520   PHINode *PN;
521 
522   // Ith incoming value.
523   unsigned Ith;
524 
525   // Exit value after expansion.
526   Value *Val;
527 
528   // High Cost when expansion.
529   bool HighCost;
530 
531   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
532       : PN(P), Ith(I), Val(V), HighCost(H) {}
533 };
534 
535 } // end anonymous namespace
536 
537 //===----------------------------------------------------------------------===//
538 // rewriteLoopExitValues - Optimize IV users outside the loop.
539 // As a side effect, reduces the amount of IV processing within the loop.
540 //===----------------------------------------------------------------------===//
541 
542 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
543   SmallPtrSet<const Instruction *, 8> Visited;
544   SmallVector<const Instruction *, 8> WorkList;
545   Visited.insert(I);
546   WorkList.push_back(I);
547   while (!WorkList.empty()) {
548     const Instruction *Curr = WorkList.pop_back_val();
549     // This use is outside the loop, nothing to do.
550     if (!L->contains(Curr))
551       continue;
552     // Do we assume it is a "hard" use which will not be eliminated easily?
553     if (Curr->mayHaveSideEffects())
554       return true;
555     // Otherwise, add all its users to worklist.
556     for (auto U : Curr->users()) {
557       auto *UI = cast<Instruction>(U);
558       if (Visited.insert(UI).second)
559         WorkList.push_back(UI);
560     }
561   }
562   return false;
563 }
564 
565 /// Check to see if this loop has a computable loop-invariant execution count.
566 /// If so, this means that we can compute the final value of any expressions
567 /// that are recurrent in the loop, and substitute the exit values from the loop
568 /// into any instructions outside of the loop that use the final values of the
569 /// current expressions.
570 ///
571 /// This is mostly redundant with the regular IndVarSimplify activities that
572 /// happen later, except that it's more powerful in some cases, because it's
573 /// able to brute-force evaluate arbitrary instructions as long as they have
574 /// constant operands at the beginning of the loop.
575 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
576   // Check a pre-condition.
577   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
578          "Indvars did not preserve LCSSA!");
579 
580   SmallVector<BasicBlock*, 8> ExitBlocks;
581   L->getUniqueExitBlocks(ExitBlocks);
582 
583   SmallVector<RewritePhi, 8> RewritePhiSet;
584   // Find all values that are computed inside the loop, but used outside of it.
585   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
586   // the exit blocks of the loop to find them.
587   for (BasicBlock *ExitBB : ExitBlocks) {
588     // If there are no PHI nodes in this exit block, then no values defined
589     // inside the loop are used on this path, skip it.
590     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
591     if (!PN) continue;
592 
593     unsigned NumPreds = PN->getNumIncomingValues();
594 
595     // Iterate over all of the PHI nodes.
596     BasicBlock::iterator BBI = ExitBB->begin();
597     while ((PN = dyn_cast<PHINode>(BBI++))) {
598       if (PN->use_empty())
599         continue; // dead use, don't replace it
600 
601       if (!SE->isSCEVable(PN->getType()))
602         continue;
603 
604       // It's necessary to tell ScalarEvolution about this explicitly so that
605       // it can walk the def-use list and forget all SCEVs, as it may not be
606       // watching the PHI itself. Once the new exit value is in place, there
607       // may not be a def-use connection between the loop and every instruction
608       // which got a SCEVAddRecExpr for that loop.
609       SE->forgetValue(PN);
610 
611       // Iterate over all of the values in all the PHI nodes.
612       for (unsigned i = 0; i != NumPreds; ++i) {
613         // If the value being merged in is not integer or is not defined
614         // in the loop, skip it.
615         Value *InVal = PN->getIncomingValue(i);
616         if (!isa<Instruction>(InVal))
617           continue;
618 
619         // If this pred is for a subloop, not L itself, skip it.
620         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
621           continue; // The Block is in a subloop, skip it.
622 
623         // Check that InVal is defined in the loop.
624         Instruction *Inst = cast<Instruction>(InVal);
625         if (!L->contains(Inst))
626           continue;
627 
628         // Okay, this instruction has a user outside of the current loop
629         // and varies predictably *inside* the loop.  Evaluate the value it
630         // contains when the loop exits, if possible.
631         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
632         if (!SE->isLoopInvariant(ExitValue, L) ||
633             !isSafeToExpand(ExitValue, *SE))
634           continue;
635 
636         // Computing the value outside of the loop brings no benefit if it is
637         // definitely used inside the loop in a way which can not be optimized
638         // away.
639         if (ReplaceExitValue != AlwaysRepl &&
640             !isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
641           continue;
642 
643         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
644         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
645 
646         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
647                           << '\n'
648                           << "  LoopVal = " << *Inst << "\n");
649 
650         if (!isValidRewrite(Inst, ExitVal)) {
651           DeadInsts.push_back(ExitVal);
652           continue;
653         }
654 
655 #ifndef NDEBUG
656         // If we reuse an instruction from a loop which is neither L nor one of
657         // its containing loops, we end up breaking LCSSA form for this loop by
658         // creating a new use of its instruction.
659         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
660           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
661             if (EVL != L)
662               assert(EVL->contains(L) && "LCSSA breach detected!");
663 #endif
664 
665         // Collect all the candidate PHINodes to be rewritten.
666         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
667       }
668     }
669   }
670 
671   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
672 
673   bool Changed = false;
674   // Transformation.
675   for (const RewritePhi &Phi : RewritePhiSet) {
676     PHINode *PN = Phi.PN;
677     Value *ExitVal = Phi.Val;
678 
679     // Only do the rewrite when the ExitValue can be expanded cheaply.
680     // If LoopCanBeDel is true, rewrite exit value aggressively.
681     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
682       DeadInsts.push_back(ExitVal);
683       continue;
684     }
685 
686     Changed = true;
687     ++NumReplaced;
688     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
689     PN->setIncomingValue(Phi.Ith, ExitVal);
690 
691     // If this instruction is dead now, delete it. Don't do it now to avoid
692     // invalidating iterators.
693     if (isInstructionTriviallyDead(Inst, TLI))
694       DeadInsts.push_back(Inst);
695 
696     // Replace PN with ExitVal if that is legal and does not break LCSSA.
697     if (PN->getNumIncomingValues() == 1 &&
698         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
699       PN->replaceAllUsesWith(ExitVal);
700       PN->eraseFromParent();
701     }
702   }
703 
704   // The insertion point instruction may have been deleted; clear it out
705   // so that the rewriter doesn't trip over it later.
706   Rewriter.clearInsertPoint();
707   return Changed;
708 }
709 
710 //===---------------------------------------------------------------------===//
711 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
712 // they will exit at the first iteration.
713 //===---------------------------------------------------------------------===//
714 
715 /// Check to see if this loop has loop invariant conditions which lead to loop
716 /// exits. If so, we know that if the exit path is taken, it is at the first
717 /// loop iteration. This lets us predict exit values of PHI nodes that live in
718 /// loop header.
719 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
720   // Verify the input to the pass is already in LCSSA form.
721   assert(L->isLCSSAForm(*DT));
722 
723   SmallVector<BasicBlock *, 8> ExitBlocks;
724   L->getUniqueExitBlocks(ExitBlocks);
725 
726   bool MadeAnyChanges = false;
727   for (auto *ExitBB : ExitBlocks) {
728     // If there are no more PHI nodes in this exit block, then no more
729     // values defined inside the loop are used on this path.
730     for (PHINode &PN : ExitBB->phis()) {
731       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
732            IncomingValIdx != E; ++IncomingValIdx) {
733         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
734 
735         // Can we prove that the exit must run on the first iteration if it
736         // runs at all?  (i.e. early exits are fine for our purposes, but
737         // traces which lead to this exit being taken on the 2nd iteration
738         // aren't.)  Note that this is about whether the exit branch is
739         // executed, not about whether it is taken.
740         if (!L->getLoopLatch() ||
741             !DT->dominates(IncomingBB, L->getLoopLatch()))
742           continue;
743 
744         // Get condition that leads to the exit path.
745         auto *TermInst = IncomingBB->getTerminator();
746 
747         Value *Cond = nullptr;
748         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
749           // Must be a conditional branch, otherwise the block
750           // should not be in the loop.
751           Cond = BI->getCondition();
752         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
753           Cond = SI->getCondition();
754         else
755           continue;
756 
757         if (!L->isLoopInvariant(Cond))
758           continue;
759 
760         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
761 
762         // Only deal with PHIs in the loop header.
763         if (!ExitVal || ExitVal->getParent() != L->getHeader())
764           continue;
765 
766         // If ExitVal is a PHI on the loop header, then we know its
767         // value along this exit because the exit can only be taken
768         // on the first iteration.
769         auto *LoopPreheader = L->getLoopPreheader();
770         assert(LoopPreheader && "Invalid loop");
771         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
772         if (PreheaderIdx != -1) {
773           assert(ExitVal->getParent() == L->getHeader() &&
774                  "ExitVal must be in loop header");
775           MadeAnyChanges = true;
776           PN.setIncomingValue(IncomingValIdx,
777                               ExitVal->getIncomingValue(PreheaderIdx));
778         }
779       }
780     }
781   }
782   return MadeAnyChanges;
783 }
784 
785 /// Check whether it is possible to delete the loop after rewriting exit
786 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
787 /// aggressively.
788 bool IndVarSimplify::canLoopBeDeleted(
789     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
790   BasicBlock *Preheader = L->getLoopPreheader();
791   // If there is no preheader, the loop will not be deleted.
792   if (!Preheader)
793     return false;
794 
795   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
796   // We obviate multiple ExitingBlocks case for simplicity.
797   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
798   // after exit value rewriting, we can enhance the logic here.
799   SmallVector<BasicBlock *, 4> ExitingBlocks;
800   L->getExitingBlocks(ExitingBlocks);
801   SmallVector<BasicBlock *, 8> ExitBlocks;
802   L->getUniqueExitBlocks(ExitBlocks);
803   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
804     return false;
805 
806   BasicBlock *ExitBlock = ExitBlocks[0];
807   BasicBlock::iterator BI = ExitBlock->begin();
808   while (PHINode *P = dyn_cast<PHINode>(BI)) {
809     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
810 
811     // If the Incoming value of P is found in RewritePhiSet, we know it
812     // could be rewritten to use a loop invariant value in transformation
813     // phase later. Skip it in the loop invariant check below.
814     bool found = false;
815     for (const RewritePhi &Phi : RewritePhiSet) {
816       unsigned i = Phi.Ith;
817       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
818         found = true;
819         break;
820       }
821     }
822 
823     Instruction *I;
824     if (!found && (I = dyn_cast<Instruction>(Incoming)))
825       if (!L->hasLoopInvariantOperands(I))
826         return false;
827 
828     ++BI;
829   }
830 
831   for (auto *BB : L->blocks())
832     if (llvm::any_of(*BB, [](Instruction &I) {
833           return I.mayHaveSideEffects();
834         }))
835       return false;
836 
837   return true;
838 }
839 
840 //===----------------------------------------------------------------------===//
841 //  IV Widening - Extend the width of an IV to cover its widest uses.
842 //===----------------------------------------------------------------------===//
843 
844 namespace {
845 
846 // Collect information about induction variables that are used by sign/zero
847 // extend operations. This information is recorded by CollectExtend and provides
848 // the input to WidenIV.
849 struct WideIVInfo {
850   PHINode *NarrowIV = nullptr;
851 
852   // Widest integer type created [sz]ext
853   Type *WidestNativeType = nullptr;
854 
855   // Was a sext user seen before a zext?
856   bool IsSigned = false;
857 };
858 
859 } // end anonymous namespace
860 
861 /// Update information about the induction variable that is extended by this
862 /// sign or zero extend operation. This is used to determine the final width of
863 /// the IV before actually widening it.
864 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
865                         const TargetTransformInfo *TTI) {
866   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
867   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
868     return;
869 
870   Type *Ty = Cast->getType();
871   uint64_t Width = SE->getTypeSizeInBits(Ty);
872   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
873     return;
874 
875   // Check that `Cast` actually extends the induction variable (we rely on this
876   // later).  This takes care of cases where `Cast` is extending a truncation of
877   // the narrow induction variable, and thus can end up being narrower than the
878   // "narrow" induction variable.
879   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
880   if (NarrowIVWidth >= Width)
881     return;
882 
883   // Cast is either an sext or zext up to this point.
884   // We should not widen an indvar if arithmetics on the wider indvar are more
885   // expensive than those on the narrower indvar. We check only the cost of ADD
886   // because at least an ADD is required to increment the induction variable. We
887   // could compute more comprehensively the cost of all instructions on the
888   // induction variable when necessary.
889   if (TTI &&
890       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
891           TTI->getArithmeticInstrCost(Instruction::Add,
892                                       Cast->getOperand(0)->getType())) {
893     return;
894   }
895 
896   if (!WI.WidestNativeType) {
897     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
898     WI.IsSigned = IsSigned;
899     return;
900   }
901 
902   // We extend the IV to satisfy the sign of its first user, arbitrarily.
903   if (WI.IsSigned != IsSigned)
904     return;
905 
906   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
907     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
908 }
909 
910 namespace {
911 
912 /// Record a link in the Narrow IV def-use chain along with the WideIV that
913 /// computes the same value as the Narrow IV def.  This avoids caching Use*
914 /// pointers.
915 struct NarrowIVDefUse {
916   Instruction *NarrowDef = nullptr;
917   Instruction *NarrowUse = nullptr;
918   Instruction *WideDef = nullptr;
919 
920   // True if the narrow def is never negative.  Tracking this information lets
921   // us use a sign extension instead of a zero extension or vice versa, when
922   // profitable and legal.
923   bool NeverNegative = false;
924 
925   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
926                  bool NeverNegative)
927       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
928         NeverNegative(NeverNegative) {}
929 };
930 
931 /// The goal of this transform is to remove sign and zero extends without
932 /// creating any new induction variables. To do this, it creates a new phi of
933 /// the wider type and redirects all users, either removing extends or inserting
934 /// truncs whenever we stop propagating the type.
935 class WidenIV {
936   // Parameters
937   PHINode *OrigPhi;
938   Type *WideType;
939 
940   // Context
941   LoopInfo        *LI;
942   Loop            *L;
943   ScalarEvolution *SE;
944   DominatorTree   *DT;
945 
946   // Does the module have any calls to the llvm.experimental.guard intrinsic
947   // at all? If not we can avoid scanning instructions looking for guards.
948   bool HasGuards;
949 
950   // Result
951   PHINode *WidePhi = nullptr;
952   Instruction *WideInc = nullptr;
953   const SCEV *WideIncExpr = nullptr;
954   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
955 
956   SmallPtrSet<Instruction *,16> Widened;
957   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
958 
959   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
960 
961   // A map tracking the kind of extension used to widen each narrow IV
962   // and narrow IV user.
963   // Key: pointer to a narrow IV or IV user.
964   // Value: the kind of extension used to widen this Instruction.
965   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
966 
967   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
968 
969   // A map with control-dependent ranges for post increment IV uses. The key is
970   // a pair of IV def and a use of this def denoting the context. The value is
971   // a ConstantRange representing possible values of the def at the given
972   // context.
973   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
974 
975   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
976                                               Instruction *UseI) {
977     DefUserPair Key(Def, UseI);
978     auto It = PostIncRangeInfos.find(Key);
979     return It == PostIncRangeInfos.end()
980                ? Optional<ConstantRange>(None)
981                : Optional<ConstantRange>(It->second);
982   }
983 
984   void calculatePostIncRanges(PHINode *OrigPhi);
985   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
986 
987   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
988     DefUserPair Key(Def, UseI);
989     auto It = PostIncRangeInfos.find(Key);
990     if (It == PostIncRangeInfos.end())
991       PostIncRangeInfos.insert({Key, R});
992     else
993       It->second = R.intersectWith(It->second);
994   }
995 
996 public:
997   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
998           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
999           bool HasGuards)
1000       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1001         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1002         HasGuards(HasGuards), DeadInsts(DI) {
1003     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1004     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1005   }
1006 
1007   PHINode *createWideIV(SCEVExpander &Rewriter);
1008 
1009 protected:
1010   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1011                           Instruction *Use);
1012 
1013   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1014   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1015                                      const SCEVAddRecExpr *WideAR);
1016   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1017 
1018   ExtendKind getExtendKind(Instruction *I);
1019 
1020   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1021 
1022   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1023 
1024   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1025 
1026   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1027                               unsigned OpCode) const;
1028 
1029   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1030 
1031   bool widenLoopCompare(NarrowIVDefUse DU);
1032   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1033   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1034 
1035   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1036 };
1037 
1038 } // end anonymous namespace
1039 
1040 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1041                                  bool IsSigned, Instruction *Use) {
1042   // Set the debug location and conservative insertion point.
1043   IRBuilder<> Builder(Use);
1044   // Hoist the insertion point into loop preheaders as far as possible.
1045   for (const Loop *L = LI->getLoopFor(Use->getParent());
1046        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1047        L = L->getParentLoop())
1048     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1049 
1050   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1051                     Builder.CreateZExt(NarrowOper, WideType);
1052 }
1053 
1054 /// Instantiate a wide operation to replace a narrow operation. This only needs
1055 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1056 /// 0 for any operation we decide not to clone.
1057 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1058                                   const SCEVAddRecExpr *WideAR) {
1059   unsigned Opcode = DU.NarrowUse->getOpcode();
1060   switch (Opcode) {
1061   default:
1062     return nullptr;
1063   case Instruction::Add:
1064   case Instruction::Mul:
1065   case Instruction::UDiv:
1066   case Instruction::Sub:
1067     return cloneArithmeticIVUser(DU, WideAR);
1068 
1069   case Instruction::And:
1070   case Instruction::Or:
1071   case Instruction::Xor:
1072   case Instruction::Shl:
1073   case Instruction::LShr:
1074   case Instruction::AShr:
1075     return cloneBitwiseIVUser(DU);
1076   }
1077 }
1078 
1079 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1080   Instruction *NarrowUse = DU.NarrowUse;
1081   Instruction *NarrowDef = DU.NarrowDef;
1082   Instruction *WideDef = DU.WideDef;
1083 
1084   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1085 
1086   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1087   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1088   // invariant and will be folded or hoisted. If it actually comes from a
1089   // widened IV, it should be removed during a future call to widenIVUse.
1090   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1091   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1092                    ? WideDef
1093                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1094                                       IsSigned, NarrowUse);
1095   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1096                    ? WideDef
1097                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1098                                       IsSigned, NarrowUse);
1099 
1100   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1101   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1102                                         NarrowBO->getName());
1103   IRBuilder<> Builder(NarrowUse);
1104   Builder.Insert(WideBO);
1105   WideBO->copyIRFlags(NarrowBO);
1106   return WideBO;
1107 }
1108 
1109 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1110                                             const SCEVAddRecExpr *WideAR) {
1111   Instruction *NarrowUse = DU.NarrowUse;
1112   Instruction *NarrowDef = DU.NarrowDef;
1113   Instruction *WideDef = DU.WideDef;
1114 
1115   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1116 
1117   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1118 
1119   // We're trying to find X such that
1120   //
1121   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1122   //
1123   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1124   // and check using SCEV if any of them are correct.
1125 
1126   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1127   // correct solution to X.
1128   auto GuessNonIVOperand = [&](bool SignExt) {
1129     const SCEV *WideLHS;
1130     const SCEV *WideRHS;
1131 
1132     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1133       if (SignExt)
1134         return SE->getSignExtendExpr(S, Ty);
1135       return SE->getZeroExtendExpr(S, Ty);
1136     };
1137 
1138     if (IVOpIdx == 0) {
1139       WideLHS = SE->getSCEV(WideDef);
1140       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1141       WideRHS = GetExtend(NarrowRHS, WideType);
1142     } else {
1143       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1144       WideLHS = GetExtend(NarrowLHS, WideType);
1145       WideRHS = SE->getSCEV(WideDef);
1146     }
1147 
1148     // WideUse is "WideDef `op.wide` X" as described in the comment.
1149     const SCEV *WideUse = nullptr;
1150 
1151     switch (NarrowUse->getOpcode()) {
1152     default:
1153       llvm_unreachable("No other possibility!");
1154 
1155     case Instruction::Add:
1156       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1157       break;
1158 
1159     case Instruction::Mul:
1160       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1161       break;
1162 
1163     case Instruction::UDiv:
1164       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1165       break;
1166 
1167     case Instruction::Sub:
1168       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1169       break;
1170     }
1171 
1172     return WideUse == WideAR;
1173   };
1174 
1175   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1176   if (!GuessNonIVOperand(SignExtend)) {
1177     SignExtend = !SignExtend;
1178     if (!GuessNonIVOperand(SignExtend))
1179       return nullptr;
1180   }
1181 
1182   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1183                    ? WideDef
1184                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1185                                       SignExtend, NarrowUse);
1186   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1187                    ? WideDef
1188                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1189                                       SignExtend, NarrowUse);
1190 
1191   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1192   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1193                                         NarrowBO->getName());
1194 
1195   IRBuilder<> Builder(NarrowUse);
1196   Builder.Insert(WideBO);
1197   WideBO->copyIRFlags(NarrowBO);
1198   return WideBO;
1199 }
1200 
1201 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1202   auto It = ExtendKindMap.find(I);
1203   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1204   return It->second;
1205 }
1206 
1207 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1208                                      unsigned OpCode) const {
1209   if (OpCode == Instruction::Add)
1210     return SE->getAddExpr(LHS, RHS);
1211   if (OpCode == Instruction::Sub)
1212     return SE->getMinusSCEV(LHS, RHS);
1213   if (OpCode == Instruction::Mul)
1214     return SE->getMulExpr(LHS, RHS);
1215 
1216   llvm_unreachable("Unsupported opcode.");
1217 }
1218 
1219 /// No-wrap operations can transfer sign extension of their result to their
1220 /// operands. Generate the SCEV value for the widened operation without
1221 /// actually modifying the IR yet. If the expression after extending the
1222 /// operands is an AddRec for this loop, return the AddRec and the kind of
1223 /// extension used.
1224 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1225   // Handle the common case of add<nsw/nuw>
1226   const unsigned OpCode = DU.NarrowUse->getOpcode();
1227   // Only Add/Sub/Mul instructions supported yet.
1228   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1229       OpCode != Instruction::Mul)
1230     return {nullptr, Unknown};
1231 
1232   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1233   // if extending the other will lead to a recurrence.
1234   const unsigned ExtendOperIdx =
1235       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1236   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1237 
1238   const SCEV *ExtendOperExpr = nullptr;
1239   const OverflowingBinaryOperator *OBO =
1240     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1241   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1242   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1243     ExtendOperExpr = SE->getSignExtendExpr(
1244       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1245   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1246     ExtendOperExpr = SE->getZeroExtendExpr(
1247       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1248   else
1249     return {nullptr, Unknown};
1250 
1251   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1252   // flags. This instruction may be guarded by control flow that the no-wrap
1253   // behavior depends on. Non-control-equivalent instructions can be mapped to
1254   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1255   // semantics to those operations.
1256   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1257   const SCEV *rhs = ExtendOperExpr;
1258 
1259   // Let's swap operands to the initial order for the case of non-commutative
1260   // operations, like SUB. See PR21014.
1261   if (ExtendOperIdx == 0)
1262     std::swap(lhs, rhs);
1263   const SCEVAddRecExpr *AddRec =
1264       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1265 
1266   if (!AddRec || AddRec->getLoop() != L)
1267     return {nullptr, Unknown};
1268 
1269   return {AddRec, ExtKind};
1270 }
1271 
1272 /// Is this instruction potentially interesting for further simplification after
1273 /// widening it's type? In other words, can the extend be safely hoisted out of
1274 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1275 /// so, return the extended recurrence and the kind of extension used. Otherwise
1276 /// return {nullptr, Unknown}.
1277 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1278   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1279     return {nullptr, Unknown};
1280 
1281   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1282   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1283       SE->getTypeSizeInBits(WideType)) {
1284     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1285     // index. So don't follow this use.
1286     return {nullptr, Unknown};
1287   }
1288 
1289   const SCEV *WideExpr;
1290   ExtendKind ExtKind;
1291   if (DU.NeverNegative) {
1292     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1293     if (isa<SCEVAddRecExpr>(WideExpr))
1294       ExtKind = SignExtended;
1295     else {
1296       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1297       ExtKind = ZeroExtended;
1298     }
1299   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1300     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1301     ExtKind = SignExtended;
1302   } else {
1303     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1304     ExtKind = ZeroExtended;
1305   }
1306   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1307   if (!AddRec || AddRec->getLoop() != L)
1308     return {nullptr, Unknown};
1309   return {AddRec, ExtKind};
1310 }
1311 
1312 /// This IV user cannot be widen. Replace this use of the original narrow IV
1313 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1314 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1315   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1316   if (!InsertPt)
1317     return;
1318   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1319                     << *DU.NarrowUse << "\n");
1320   IRBuilder<> Builder(InsertPt);
1321   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1322   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1323 }
1324 
1325 /// If the narrow use is a compare instruction, then widen the compare
1326 //  (and possibly the other operand).  The extend operation is hoisted into the
1327 // loop preheader as far as possible.
1328 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1329   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1330   if (!Cmp)
1331     return false;
1332 
1333   // We can legally widen the comparison in the following two cases:
1334   //
1335   //  - The signedness of the IV extension and comparison match
1336   //
1337   //  - The narrow IV is always positive (and thus its sign extension is equal
1338   //    to its zero extension).  For instance, let's say we're zero extending
1339   //    %narrow for the following use
1340   //
1341   //      icmp slt i32 %narrow, %val   ... (A)
1342   //
1343   //    and %narrow is always positive.  Then
1344   //
1345   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1346   //          == icmp slt i32 zext(%narrow), sext(%val)
1347   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1348   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1349     return false;
1350 
1351   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1352   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1353   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1354   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1355 
1356   // Widen the compare instruction.
1357   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1358   if (!InsertPt)
1359     return false;
1360   IRBuilder<> Builder(InsertPt);
1361   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1362 
1363   // Widen the other operand of the compare, if necessary.
1364   if (CastWidth < IVWidth) {
1365     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1366     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1367   }
1368   return true;
1369 }
1370 
1371 /// If the narrow use is an instruction whose two operands are the defining
1372 /// instruction of DU and a load instruction, then we have the following:
1373 /// if the load is hoisted outside the loop, then we do not reach this function
1374 /// as scalar evolution analysis works fine in widenIVUse with variables
1375 /// hoisted outside the loop and efficient code is subsequently generated by
1376 /// not emitting truncate instructions. But when the load is not hoisted
1377 /// (whether due to limitation in alias analysis or due to a true legality),
1378 /// then scalar evolution can not proceed with loop variant values and
1379 /// inefficient code is generated. This function handles the non-hoisted load
1380 /// special case by making the optimization generate the same type of code for
1381 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1382 /// instruction). This special case is important especially when the induction
1383 /// variables are affecting addressing mode in code generation.
1384 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1385   Instruction *NarrowUse = DU.NarrowUse;
1386   Instruction *NarrowDef = DU.NarrowDef;
1387   Instruction *WideDef = DU.WideDef;
1388 
1389   // Handle the common case of add<nsw/nuw>
1390   const unsigned OpCode = NarrowUse->getOpcode();
1391   // Only Add/Sub/Mul instructions are supported.
1392   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1393       OpCode != Instruction::Mul)
1394     return false;
1395 
1396   // The operand that is not defined by NarrowDef of DU. Let's call it the
1397   // other operand.
1398   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1399   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1400          "bad DU");
1401 
1402   const SCEV *ExtendOperExpr = nullptr;
1403   const OverflowingBinaryOperator *OBO =
1404     cast<OverflowingBinaryOperator>(NarrowUse);
1405   ExtendKind ExtKind = getExtendKind(NarrowDef);
1406   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1407     ExtendOperExpr = SE->getSignExtendExpr(
1408       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1409   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1410     ExtendOperExpr = SE->getZeroExtendExpr(
1411       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1412   else
1413     return false;
1414 
1415   // We are interested in the other operand being a load instruction.
1416   // But, we should look into relaxing this restriction later on.
1417   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1418   if (I && I->getOpcode() != Instruction::Load)
1419     return false;
1420 
1421   // Verifying that Defining operand is an AddRec
1422   const SCEV *Op1 = SE->getSCEV(WideDef);
1423   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1424   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1425     return false;
1426   // Verifying that other operand is an Extend.
1427   if (ExtKind == SignExtended) {
1428     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1429       return false;
1430   } else {
1431     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1432       return false;
1433   }
1434 
1435   if (ExtKind == SignExtended) {
1436     for (Use &U : NarrowUse->uses()) {
1437       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1438       if (!User || User->getType() != WideType)
1439         return false;
1440     }
1441   } else { // ExtKind == ZeroExtended
1442     for (Use &U : NarrowUse->uses()) {
1443       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1444       if (!User || User->getType() != WideType)
1445         return false;
1446     }
1447   }
1448 
1449   return true;
1450 }
1451 
1452 /// Special Case for widening with variant Loads (see
1453 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1454 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1455   Instruction *NarrowUse = DU.NarrowUse;
1456   Instruction *NarrowDef = DU.NarrowDef;
1457   Instruction *WideDef = DU.WideDef;
1458 
1459   ExtendKind ExtKind = getExtendKind(NarrowDef);
1460 
1461   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1462 
1463   // Generating a widening use instruction.
1464   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1465                    ? WideDef
1466                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1467                                       ExtKind, NarrowUse);
1468   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1469                    ? WideDef
1470                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1471                                       ExtKind, NarrowUse);
1472 
1473   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1474   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1475                                         NarrowBO->getName());
1476   IRBuilder<> Builder(NarrowUse);
1477   Builder.Insert(WideBO);
1478   WideBO->copyIRFlags(NarrowBO);
1479 
1480   if (ExtKind == SignExtended)
1481     ExtendKindMap[NarrowUse] = SignExtended;
1482   else
1483     ExtendKindMap[NarrowUse] = ZeroExtended;
1484 
1485   // Update the Use.
1486   if (ExtKind == SignExtended) {
1487     for (Use &U : NarrowUse->uses()) {
1488       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1489       if (User && User->getType() == WideType) {
1490         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1491                           << *WideBO << "\n");
1492         ++NumElimExt;
1493         User->replaceAllUsesWith(WideBO);
1494         DeadInsts.emplace_back(User);
1495       }
1496     }
1497   } else { // ExtKind == ZeroExtended
1498     for (Use &U : NarrowUse->uses()) {
1499       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1500       if (User && User->getType() == WideType) {
1501         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1502                           << *WideBO << "\n");
1503         ++NumElimExt;
1504         User->replaceAllUsesWith(WideBO);
1505         DeadInsts.emplace_back(User);
1506       }
1507     }
1508   }
1509 }
1510 
1511 /// Determine whether an individual user of the narrow IV can be widened. If so,
1512 /// return the wide clone of the user.
1513 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1514   assert(ExtendKindMap.count(DU.NarrowDef) &&
1515          "Should already know the kind of extension used to widen NarrowDef");
1516 
1517   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1518   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1519     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1520       // For LCSSA phis, sink the truncate outside the loop.
1521       // After SimplifyCFG most loop exit targets have a single predecessor.
1522       // Otherwise fall back to a truncate within the loop.
1523       if (UsePhi->getNumOperands() != 1)
1524         truncateIVUse(DU, DT, LI);
1525       else {
1526         // Widening the PHI requires us to insert a trunc.  The logical place
1527         // for this trunc is in the same BB as the PHI.  This is not possible if
1528         // the BB is terminated by a catchswitch.
1529         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1530           return nullptr;
1531 
1532         PHINode *WidePhi =
1533           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1534                           UsePhi);
1535         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1536         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1537         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1538         UsePhi->replaceAllUsesWith(Trunc);
1539         DeadInsts.emplace_back(UsePhi);
1540         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1541                           << *WidePhi << "\n");
1542       }
1543       return nullptr;
1544     }
1545   }
1546 
1547   // This narrow use can be widened by a sext if it's non-negative or its narrow
1548   // def was widended by a sext. Same for zext.
1549   auto canWidenBySExt = [&]() {
1550     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1551   };
1552   auto canWidenByZExt = [&]() {
1553     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1554   };
1555 
1556   // Our raison d'etre! Eliminate sign and zero extension.
1557   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1558       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1559     Value *NewDef = DU.WideDef;
1560     if (DU.NarrowUse->getType() != WideType) {
1561       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1562       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1563       if (CastWidth < IVWidth) {
1564         // The cast isn't as wide as the IV, so insert a Trunc.
1565         IRBuilder<> Builder(DU.NarrowUse);
1566         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1567       }
1568       else {
1569         // A wider extend was hidden behind a narrower one. This may induce
1570         // another round of IV widening in which the intermediate IV becomes
1571         // dead. It should be very rare.
1572         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1573                           << " not wide enough to subsume " << *DU.NarrowUse
1574                           << "\n");
1575         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1576         NewDef = DU.NarrowUse;
1577       }
1578     }
1579     if (NewDef != DU.NarrowUse) {
1580       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1581                         << " replaced by " << *DU.WideDef << "\n");
1582       ++NumElimExt;
1583       DU.NarrowUse->replaceAllUsesWith(NewDef);
1584       DeadInsts.emplace_back(DU.NarrowUse);
1585     }
1586     // Now that the extend is gone, we want to expose it's uses for potential
1587     // further simplification. We don't need to directly inform SimplifyIVUsers
1588     // of the new users, because their parent IV will be processed later as a
1589     // new loop phi. If we preserved IVUsers analysis, we would also want to
1590     // push the uses of WideDef here.
1591 
1592     // No further widening is needed. The deceased [sz]ext had done it for us.
1593     return nullptr;
1594   }
1595 
1596   // Does this user itself evaluate to a recurrence after widening?
1597   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1598   if (!WideAddRec.first)
1599     WideAddRec = getWideRecurrence(DU);
1600 
1601   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1602   if (!WideAddRec.first) {
1603     // If use is a loop condition, try to promote the condition instead of
1604     // truncating the IV first.
1605     if (widenLoopCompare(DU))
1606       return nullptr;
1607 
1608     // We are here about to generate a truncate instruction that may hurt
1609     // performance because the scalar evolution expression computed earlier
1610     // in WideAddRec.first does not indicate a polynomial induction expression.
1611     // In that case, look at the operands of the use instruction to determine
1612     // if we can still widen the use instead of truncating its operand.
1613     if (widenWithVariantLoadUse(DU)) {
1614       widenWithVariantLoadUseCodegen(DU);
1615       return nullptr;
1616     }
1617 
1618     // This user does not evaluate to a recurrence after widening, so don't
1619     // follow it. Instead insert a Trunc to kill off the original use,
1620     // eventually isolating the original narrow IV so it can be removed.
1621     truncateIVUse(DU, DT, LI);
1622     return nullptr;
1623   }
1624   // Assume block terminators cannot evaluate to a recurrence. We can't to
1625   // insert a Trunc after a terminator if there happens to be a critical edge.
1626   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1627          "SCEV is not expected to evaluate a block terminator");
1628 
1629   // Reuse the IV increment that SCEVExpander created as long as it dominates
1630   // NarrowUse.
1631   Instruction *WideUse = nullptr;
1632   if (WideAddRec.first == WideIncExpr &&
1633       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1634     WideUse = WideInc;
1635   else {
1636     WideUse = cloneIVUser(DU, WideAddRec.first);
1637     if (!WideUse)
1638       return nullptr;
1639   }
1640   // Evaluation of WideAddRec ensured that the narrow expression could be
1641   // extended outside the loop without overflow. This suggests that the wide use
1642   // evaluates to the same expression as the extended narrow use, but doesn't
1643   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1644   // where it fails, we simply throw away the newly created wide use.
1645   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1646     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1647                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1648                       << "\n");
1649     DeadInsts.emplace_back(WideUse);
1650     return nullptr;
1651   }
1652 
1653   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1654   // Returning WideUse pushes it on the worklist.
1655   return WideUse;
1656 }
1657 
1658 /// Add eligible users of NarrowDef to NarrowIVUsers.
1659 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1660   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1661   bool NonNegativeDef =
1662       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1663                            SE->getConstant(NarrowSCEV->getType(), 0));
1664   for (User *U : NarrowDef->users()) {
1665     Instruction *NarrowUser = cast<Instruction>(U);
1666 
1667     // Handle data flow merges and bizarre phi cycles.
1668     if (!Widened.insert(NarrowUser).second)
1669       continue;
1670 
1671     bool NonNegativeUse = false;
1672     if (!NonNegativeDef) {
1673       // We might have a control-dependent range information for this context.
1674       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1675         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1676     }
1677 
1678     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1679                                NonNegativeDef || NonNegativeUse);
1680   }
1681 }
1682 
1683 /// Process a single induction variable. First use the SCEVExpander to create a
1684 /// wide induction variable that evaluates to the same recurrence as the
1685 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1686 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1687 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1688 ///
1689 /// It would be simpler to delete uses as they are processed, but we must avoid
1690 /// invalidating SCEV expressions.
1691 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1692   // Is this phi an induction variable?
1693   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1694   if (!AddRec)
1695     return nullptr;
1696 
1697   // Widen the induction variable expression.
1698   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1699                                ? SE->getSignExtendExpr(AddRec, WideType)
1700                                : SE->getZeroExtendExpr(AddRec, WideType);
1701 
1702   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1703          "Expect the new IV expression to preserve its type");
1704 
1705   // Can the IV be extended outside the loop without overflow?
1706   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1707   if (!AddRec || AddRec->getLoop() != L)
1708     return nullptr;
1709 
1710   // An AddRec must have loop-invariant operands. Since this AddRec is
1711   // materialized by a loop header phi, the expression cannot have any post-loop
1712   // operands, so they must dominate the loop header.
1713   assert(
1714       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1715       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1716       "Loop header phi recurrence inputs do not dominate the loop");
1717 
1718   // Iterate over IV uses (including transitive ones) looking for IV increments
1719   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1720   // the increment calculate control-dependent range information basing on
1721   // dominating conditions inside of the loop (e.g. a range check inside of the
1722   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1723   //
1724   // Control-dependent range information is later used to prove that a narrow
1725   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1726   // this on demand because when pushNarrowIVUsers needs this information some
1727   // of the dominating conditions might be already widened.
1728   if (UsePostIncrementRanges)
1729     calculatePostIncRanges(OrigPhi);
1730 
1731   // The rewriter provides a value for the desired IV expression. This may
1732   // either find an existing phi or materialize a new one. Either way, we
1733   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1734   // of the phi-SCC dominates the loop entry.
1735   Instruction *InsertPt = &L->getHeader()->front();
1736   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1737 
1738   // Remembering the WideIV increment generated by SCEVExpander allows
1739   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1740   // employ a general reuse mechanism because the call above is the only call to
1741   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1742   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1743     WideInc =
1744       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1745     WideIncExpr = SE->getSCEV(WideInc);
1746     // Propagate the debug location associated with the original loop increment
1747     // to the new (widened) increment.
1748     auto *OrigInc =
1749       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1750     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1751   }
1752 
1753   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1754   ++NumWidened;
1755 
1756   // Traverse the def-use chain using a worklist starting at the original IV.
1757   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1758 
1759   Widened.insert(OrigPhi);
1760   pushNarrowIVUsers(OrigPhi, WidePhi);
1761 
1762   while (!NarrowIVUsers.empty()) {
1763     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1764 
1765     // Process a def-use edge. This may replace the use, so don't hold a
1766     // use_iterator across it.
1767     Instruction *WideUse = widenIVUse(DU, Rewriter);
1768 
1769     // Follow all def-use edges from the previous narrow use.
1770     if (WideUse)
1771       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1772 
1773     // widenIVUse may have removed the def-use edge.
1774     if (DU.NarrowDef->use_empty())
1775       DeadInsts.emplace_back(DU.NarrowDef);
1776   }
1777 
1778   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1779   // evaluate the same recurrence, we can just copy the debug info over.
1780   SmallVector<DbgValueInst *, 1> DbgValues;
1781   llvm::findDbgValues(DbgValues, OrigPhi);
1782   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1783                                      ValueAsMetadata::get(WidePhi));
1784   for (auto &DbgValue : DbgValues)
1785     DbgValue->setOperand(0, MDPhi);
1786   return WidePhi;
1787 }
1788 
1789 /// Calculates control-dependent range for the given def at the given context
1790 /// by looking at dominating conditions inside of the loop
1791 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1792                                     Instruction *NarrowUser) {
1793   using namespace llvm::PatternMatch;
1794 
1795   Value *NarrowDefLHS;
1796   const APInt *NarrowDefRHS;
1797   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1798                                  m_APInt(NarrowDefRHS))) ||
1799       !NarrowDefRHS->isNonNegative())
1800     return;
1801 
1802   auto UpdateRangeFromCondition = [&] (Value *Condition,
1803                                        bool TrueDest) {
1804     CmpInst::Predicate Pred;
1805     Value *CmpRHS;
1806     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1807                                  m_Value(CmpRHS))))
1808       return;
1809 
1810     CmpInst::Predicate P =
1811             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1812 
1813     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1814     auto CmpConstrainedLHSRange =
1815             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1816     auto NarrowDefRange =
1817             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1818 
1819     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1820   };
1821 
1822   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1823     if (!HasGuards)
1824       return;
1825 
1826     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1827                                      Ctx->getParent()->rend())) {
1828       Value *C = nullptr;
1829       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1830         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1831     }
1832   };
1833 
1834   UpdateRangeFromGuards(NarrowUser);
1835 
1836   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1837   // If NarrowUserBB is statically unreachable asking dominator queries may
1838   // yield surprising results. (e.g. the block may not have a dom tree node)
1839   if (!DT->isReachableFromEntry(NarrowUserBB))
1840     return;
1841 
1842   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1843        L->contains(DTB->getBlock());
1844        DTB = DTB->getIDom()) {
1845     auto *BB = DTB->getBlock();
1846     auto *TI = BB->getTerminator();
1847     UpdateRangeFromGuards(TI);
1848 
1849     auto *BI = dyn_cast<BranchInst>(TI);
1850     if (!BI || !BI->isConditional())
1851       continue;
1852 
1853     auto *TrueSuccessor = BI->getSuccessor(0);
1854     auto *FalseSuccessor = BI->getSuccessor(1);
1855 
1856     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1857       return BBE.isSingleEdge() &&
1858              DT->dominates(BBE, NarrowUser->getParent());
1859     };
1860 
1861     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1862       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1863 
1864     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1865       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1866   }
1867 }
1868 
1869 /// Calculates PostIncRangeInfos map for the given IV
1870 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1871   SmallPtrSet<Instruction *, 16> Visited;
1872   SmallVector<Instruction *, 6> Worklist;
1873   Worklist.push_back(OrigPhi);
1874   Visited.insert(OrigPhi);
1875 
1876   while (!Worklist.empty()) {
1877     Instruction *NarrowDef = Worklist.pop_back_val();
1878 
1879     for (Use &U : NarrowDef->uses()) {
1880       auto *NarrowUser = cast<Instruction>(U.getUser());
1881 
1882       // Don't go looking outside the current loop.
1883       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1884       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1885         continue;
1886 
1887       if (!Visited.insert(NarrowUser).second)
1888         continue;
1889 
1890       Worklist.push_back(NarrowUser);
1891 
1892       calculatePostIncRange(NarrowDef, NarrowUser);
1893     }
1894   }
1895 }
1896 
1897 //===----------------------------------------------------------------------===//
1898 //  Live IV Reduction - Minimize IVs live across the loop.
1899 //===----------------------------------------------------------------------===//
1900 
1901 //===----------------------------------------------------------------------===//
1902 //  Simplification of IV users based on SCEV evaluation.
1903 //===----------------------------------------------------------------------===//
1904 
1905 namespace {
1906 
1907 class IndVarSimplifyVisitor : public IVVisitor {
1908   ScalarEvolution *SE;
1909   const TargetTransformInfo *TTI;
1910   PHINode *IVPhi;
1911 
1912 public:
1913   WideIVInfo WI;
1914 
1915   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1916                         const TargetTransformInfo *TTI,
1917                         const DominatorTree *DTree)
1918     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1919     DT = DTree;
1920     WI.NarrowIV = IVPhi;
1921   }
1922 
1923   // Implement the interface used by simplifyUsersOfIV.
1924   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1925 };
1926 
1927 } // end anonymous namespace
1928 
1929 /// Iteratively perform simplification on a worklist of IV users. Each
1930 /// successive simplification may push more users which may themselves be
1931 /// candidates for simplification.
1932 ///
1933 /// Sign/Zero extend elimination is interleaved with IV simplification.
1934 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1935                                        SCEVExpander &Rewriter,
1936                                        LoopInfo *LI) {
1937   SmallVector<WideIVInfo, 8> WideIVs;
1938 
1939   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1940           Intrinsic::getName(Intrinsic::experimental_guard));
1941   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1942 
1943   SmallVector<PHINode*, 8> LoopPhis;
1944   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1945     LoopPhis.push_back(cast<PHINode>(I));
1946   }
1947   // Each round of simplification iterates through the SimplifyIVUsers worklist
1948   // for all current phis, then determines whether any IVs can be
1949   // widened. Widening adds new phis to LoopPhis, inducing another round of
1950   // simplification on the wide IVs.
1951   bool Changed = false;
1952   while (!LoopPhis.empty()) {
1953     // Evaluate as many IV expressions as possible before widening any IVs. This
1954     // forces SCEV to set no-wrap flags before evaluating sign/zero
1955     // extension. The first time SCEV attempts to normalize sign/zero extension,
1956     // the result becomes final. So for the most predictable results, we delay
1957     // evaluation of sign/zero extend evaluation until needed, and avoid running
1958     // other SCEV based analysis prior to simplifyAndExtend.
1959     do {
1960       PHINode *CurrIV = LoopPhis.pop_back_val();
1961 
1962       // Information about sign/zero extensions of CurrIV.
1963       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1964 
1965       Changed |=
1966           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1967 
1968       if (Visitor.WI.WidestNativeType) {
1969         WideIVs.push_back(Visitor.WI);
1970       }
1971     } while(!LoopPhis.empty());
1972 
1973     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1974       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1975       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1976         Changed = true;
1977         LoopPhis.push_back(WidePhi);
1978       }
1979     }
1980   }
1981   return Changed;
1982 }
1983 
1984 //===----------------------------------------------------------------------===//
1985 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1986 //===----------------------------------------------------------------------===//
1987 
1988 /// Given an Value which is hoped to be part of an add recurance in the given
1989 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1990 /// that this is less general than SCEVs AddRec checking.
1991 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1992   Instruction *IncI = dyn_cast<Instruction>(IncV);
1993   if (!IncI)
1994     return nullptr;
1995 
1996   switch (IncI->getOpcode()) {
1997   case Instruction::Add:
1998   case Instruction::Sub:
1999     break;
2000   case Instruction::GetElementPtr:
2001     // An IV counter must preserve its type.
2002     if (IncI->getNumOperands() == 2)
2003       break;
2004     LLVM_FALLTHROUGH;
2005   default:
2006     return nullptr;
2007   }
2008 
2009   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2010   if (Phi && Phi->getParent() == L->getHeader()) {
2011     if (L->isLoopInvariant(IncI->getOperand(1)))
2012       return Phi;
2013     return nullptr;
2014   }
2015   if (IncI->getOpcode() == Instruction::GetElementPtr)
2016     return nullptr;
2017 
2018   // Allow add/sub to be commuted.
2019   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2020   if (Phi && Phi->getParent() == L->getHeader()) {
2021     if (L->isLoopInvariant(IncI->getOperand(0)))
2022       return Phi;
2023   }
2024   return nullptr;
2025 }
2026 
2027 /// Given a loop with one backedge and one exit, return the ICmpInst
2028 /// controlling the sole loop exit.  There is no guarantee that the exiting
2029 /// block is also the latch.
2030 static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) {
2031 
2032   BasicBlock *LatchBlock = L->getLoopLatch();
2033   // Don't bother with LFTR if the loop is not properly simplified.
2034   if (!LatchBlock)
2035     return nullptr;
2036 
2037   BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2038   assert(BI && "expected exit branch");
2039 
2040   return dyn_cast<ICmpInst>(BI->getCondition());
2041 }
2042 
2043 /// linearFunctionTestReplace policy. Return true unless we can show that the
2044 /// current exit test is already sufficiently canonical.
2045 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2046   // Do LFTR to simplify the exit condition to an ICMP.
2047   ICmpInst *Cond = getLoopTest(L, ExitingBB);
2048   if (!Cond)
2049     return true;
2050 
2051   // Do LFTR to simplify the exit ICMP to EQ/NE
2052   ICmpInst::Predicate Pred = Cond->getPredicate();
2053   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2054     return true;
2055 
2056   // Look for a loop invariant RHS
2057   Value *LHS = Cond->getOperand(0);
2058   Value *RHS = Cond->getOperand(1);
2059   if (!L->isLoopInvariant(RHS)) {
2060     if (!L->isLoopInvariant(LHS))
2061       return true;
2062     std::swap(LHS, RHS);
2063   }
2064   // Look for a simple IV counter LHS
2065   PHINode *Phi = dyn_cast<PHINode>(LHS);
2066   if (!Phi)
2067     Phi = getLoopPhiForCounter(LHS, L);
2068 
2069   if (!Phi)
2070     return true;
2071 
2072   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2073   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2074   if (Idx < 0)
2075     return true;
2076 
2077   // Do LFTR if the exit condition's IV is *not* a simple counter.
2078   Value *IncV = Phi->getIncomingValue(Idx);
2079   return Phi != getLoopPhiForCounter(IncV, L);
2080 }
2081 
2082 /// Return true if undefined behavior would provable be executed on the path to
2083 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2084 /// anything about whether OnPathTo is actually executed or whether Root is
2085 /// actually poison.  This can be used to assess whether a new use of Root can
2086 /// be added at a location which is control equivalent with OnPathTo (such as
2087 /// immediately before it) without introducing UB which didn't previously
2088 /// exist.  Note that a false result conveys no information.
2089 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2090                                           Instruction *OnPathTo,
2091                                           DominatorTree *DT) {
2092   // Basic approach is to assume Root is poison, propagate poison forward
2093   // through all users we can easily track, and then check whether any of those
2094   // users are provable UB and must execute before out exiting block might
2095   // exit.
2096 
2097   // The set of all recursive users we've visited (which are assumed to all be
2098   // poison because of said visit)
2099   SmallSet<const Value *, 16> KnownPoison;
2100   SmallVector<const Instruction*, 16> Worklist;
2101   Worklist.push_back(Root);
2102   while (!Worklist.empty()) {
2103     const Instruction *I = Worklist.pop_back_val();
2104 
2105     // If we know this must trigger UB on a path leading our target.
2106     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2107       return true;
2108 
2109     // If we can't analyze propagation through this instruction, just skip it
2110     // and transitive users.  Safe as false is a conservative result.
2111     if (!propagatesFullPoison(I) && I != Root)
2112       continue;
2113 
2114     if (KnownPoison.insert(I).second)
2115       for (const User *User : I->users())
2116         Worklist.push_back(cast<Instruction>(User));
2117   }
2118 
2119   // Might be non-UB, or might have a path we couldn't prove must execute on
2120   // way to exiting bb.
2121   return false;
2122 }
2123 
2124 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2125 /// down to checking that all operands are constant and listing instructions
2126 /// that may hide undef.
2127 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2128                                unsigned Depth) {
2129   if (isa<Constant>(V))
2130     return !isa<UndefValue>(V);
2131 
2132   if (Depth >= 6)
2133     return false;
2134 
2135   // Conservatively handle non-constant non-instructions. For example, Arguments
2136   // may be undef.
2137   Instruction *I = dyn_cast<Instruction>(V);
2138   if (!I)
2139     return false;
2140 
2141   // Load and return values may be undef.
2142   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2143     return false;
2144 
2145   // Optimistically handle other instructions.
2146   for (Value *Op : I->operands()) {
2147     if (!Visited.insert(Op).second)
2148       continue;
2149     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2150       return false;
2151   }
2152   return true;
2153 }
2154 
2155 /// Return true if the given value is concrete. We must prove that undef can
2156 /// never reach it.
2157 ///
2158 /// TODO: If we decide that this is a good approach to checking for undef, we
2159 /// may factor it into a common location.
2160 static bool hasConcreteDef(Value *V) {
2161   SmallPtrSet<Value*, 8> Visited;
2162   Visited.insert(V);
2163   return hasConcreteDefImpl(V, Visited, 0);
2164 }
2165 
2166 /// Return true if this IV has any uses other than the (soon to be rewritten)
2167 /// loop exit test.
2168 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2169   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2170   Value *IncV = Phi->getIncomingValue(LatchIdx);
2171 
2172   for (User *U : Phi->users())
2173     if (U != Cond && U != IncV) return false;
2174 
2175   for (User *U : IncV->users())
2176     if (U != Cond && U != Phi) return false;
2177   return true;
2178 }
2179 
2180 /// Return true if the given phi is a "counter" in L.  A counter is an
2181 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2182 /// step of 1.  Note that L must have exactly one latch.
2183 static bool isLoopCounter(PHINode* Phi, Loop *L,
2184                           ScalarEvolution *SE) {
2185   assert(Phi->getParent() == L->getHeader());
2186   assert(L->getLoopLatch());
2187 
2188   if (!SE->isSCEVable(Phi->getType()))
2189     return false;
2190 
2191   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2192   if (!AR || AR->getLoop() != L || !AR->isAffine())
2193     return false;
2194 
2195   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2196   if (!Step || !Step->isOne())
2197     return false;
2198 
2199   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2200   Value *IncV = Phi->getIncomingValue(LatchIdx);
2201   return (getLoopPhiForCounter(IncV, L) == Phi);
2202 }
2203 
2204 /// Search the loop header for a loop counter (anadd rec w/step of one)
2205 /// suitable for use by LFTR.  If multiple counters are available, select the
2206 /// "best" one based profitable heuristics.
2207 ///
2208 /// BECount may be an i8* pointer type. The pointer difference is already
2209 /// valid count without scaling the address stride, so it remains a pointer
2210 /// expression as far as SCEV is concerned.
2211 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2212                                 const SCEV *BECount,
2213                                 ScalarEvolution *SE, DominatorTree *DT) {
2214   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2215 
2216   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2217 
2218   // Loop over all of the PHI nodes, looking for a simple counter.
2219   PHINode *BestPhi = nullptr;
2220   const SCEV *BestInit = nullptr;
2221   BasicBlock *LatchBlock = L->getLoopLatch();
2222   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2223   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2224 
2225   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2226     PHINode *Phi = cast<PHINode>(I);
2227     if (!isLoopCounter(Phi, L, SE))
2228       continue;
2229 
2230     // Avoid comparing an integer IV against a pointer Limit.
2231     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2232       continue;
2233 
2234     const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2235 
2236     // AR may be a pointer type, while BECount is an integer type.
2237     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2238     // AR may not be a narrower type, or we may never exit.
2239     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2240     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2241       continue;
2242 
2243     // Avoid reusing a potentially undef value to compute other values that may
2244     // have originally had a concrete definition.
2245     if (!hasConcreteDef(Phi)) {
2246       // We explicitly allow unknown phis as long as they are already used by
2247       // the loop test. In this case we assume that performing LFTR could not
2248       // increase the number of undef users.
2249       // TODO: Generalize this to allow *any* loop exit which is known to
2250       // execute on each iteration
2251       if (L->getExitingBlock())
2252         if (ICmpInst *Cond = getLoopTest(L, ExitingBB))
2253           if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) &&
2254               Phi != getLoopPhiForCounter(Cond->getOperand(1), L))
2255             continue;
2256     }
2257 
2258     // Avoid introducing undefined behavior due to poison which didn't exist in
2259     // the original program.  (Annoyingly, the rules for poison and undef
2260     // propagation are distinct, so this does NOT cover the undef case above.)
2261     // We have to ensure that we don't introduce UB by introducing a use on an
2262     // iteration where said IV produces poison.  Our strategy here differs for
2263     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2264     // see code in linearFunctionTestReplace.  For pointers, we restrict
2265     // transforms as there is no good way to reinfer inbounds once lost.
2266     if (!Phi->getType()->isIntegerTy() &&
2267         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2268       continue;
2269 
2270     const SCEV *Init = AR->getStart();
2271 
2272     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2273       // Don't force a live loop counter if another IV can be used.
2274       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2275         continue;
2276 
2277       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2278       // also prefers integer to pointer IVs.
2279       if (BestInit->isZero() != Init->isZero()) {
2280         if (BestInit->isZero())
2281           continue;
2282       }
2283       // If two IVs both count from zero or both count from nonzero then the
2284       // narrower is likely a dead phi that has been widened. Use the wider phi
2285       // to allow the other to be eliminated.
2286       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2287         continue;
2288     }
2289     BestPhi = Phi;
2290     BestInit = Init;
2291   }
2292   return BestPhi;
2293 }
2294 
2295 /// Insert an IR expression which computes the value held by the IV IndVar
2296 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2297 /// is taken IVCount times.
2298 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2299                            const SCEV *IVCount, Loop *L,
2300                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2301   assert(isLoopCounter(IndVar, L, SE));
2302   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2303   const SCEV *IVInit = AR->getStart();
2304 
2305   // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2306   // finds a valid pointer IV. Sign extend BECount in order to materialize a
2307   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2308   // the existing GEPs whenever possible.
2309   if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2310     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2311     // signed value. IVCount on the other hand represents the loop trip count,
2312     // which is an unsigned value. FindLoopCounter only allows induction
2313     // variables that have a positive unit stride of one. This means we don't
2314     // have to handle the case of negative offsets (yet) and just need to zero
2315     // extend IVCount.
2316     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2317     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2318 
2319     // Expand the code for the iteration count.
2320     assert(SE->isLoopInvariant(IVOffset, L) &&
2321            "Computed iteration count is not loop invariant!");
2322     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2323     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2324 
2325     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2326     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2327     // We could handle pointer IVs other than i8*, but we need to compensate for
2328     // gep index scaling.
2329     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2330                              cast<PointerType>(GEPBase->getType())
2331                                  ->getElementType())->isOne() &&
2332            "unit stride pointer IV must be i8*");
2333 
2334     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2335     return Builder.CreateGEP(GEPBase->getType()->getPointerElementType(),
2336                              GEPBase, GEPOffset, "lftr.limit");
2337   } else {
2338     // In any other case, convert both IVInit and IVCount to integers before
2339     // comparing. This may result in SCEV expansion of pointers, but in practice
2340     // SCEV will fold the pointer arithmetic away as such:
2341     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2342     //
2343     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2344     // for simple memset-style loops.
2345     //
2346     // IVInit integer and IVCount pointer would only occur if a canonical IV
2347     // were generated on top of case #2, which is not expected.
2348 
2349     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2350     const SCEV *IVLimit = nullptr;
2351     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2352     // For non-zero Start, compute IVCount here.
2353     if (AR->getStart()->isZero())
2354       IVLimit = IVCount;
2355     else {
2356       const SCEV *IVInit = AR->getStart();
2357 
2358       // For integer IVs, truncate the IV before computing IVInit + BECount.
2359       if (SE->getTypeSizeInBits(IVInit->getType())
2360           > SE->getTypeSizeInBits(IVCount->getType()))
2361         IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2362 
2363       IVLimit = SE->getAddExpr(IVInit, IVCount);
2364     }
2365     // Expand the code for the iteration count.
2366     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2367     IRBuilder<> Builder(BI);
2368     assert(SE->isLoopInvariant(IVLimit, L) &&
2369            "Computed iteration count is not loop invariant!");
2370     // Ensure that we generate the same type as IndVar, or a smaller integer
2371     // type. In the presence of null pointer values, we have an integer type
2372     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2373     Type *LimitTy = IVCount->getType()->isPointerTy() ?
2374       IndVar->getType() : IVCount->getType();
2375     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2376   }
2377 }
2378 
2379 /// This method rewrites the exit condition of the loop to be a canonical !=
2380 /// comparison against the incremented loop induction variable.  This pass is
2381 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2382 /// determine a loop-invariant trip count of the loop, which is actually a much
2383 /// broader range than just linear tests.
2384 bool IndVarSimplify::
2385 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2386                           const SCEV *BackedgeTakenCount,
2387                           PHINode *IndVar, SCEVExpander &Rewriter) {
2388   assert(isLoopCounter(IndVar, L, SE));
2389   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2390   Instruction * const IncVar =
2391     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2392 
2393   // Initialize CmpIndVar and IVCount to their preincremented values.
2394   Value *CmpIndVar = IndVar;
2395   const SCEV *IVCount = BackedgeTakenCount;
2396 
2397   // If the exiting block is the same as the backedge block, we prefer to
2398   // compare against the post-incremented value, otherwise we must compare
2399   // against the preincremented value.
2400   if (ExitingBB == L->getLoopLatch()) {
2401     bool SafeToPostInc = IndVar->getType()->isIntegerTy();
2402     if (!SafeToPostInc) {
2403       // For pointer IVs, we chose to not strip inbounds which requires us not
2404       // to add a potentially UB introducing use.  We need to either a) show
2405       // the loop test we're modifying is already in post-inc form, or b) show
2406       // that adding a use must not introduce UB.
2407       ICmpInst *LoopTest = getLoopTest(L, ExitingBB);
2408       SafeToPostInc = LoopTest->getOperand(0) == IncVar ||
2409         LoopTest->getOperand(1) == IncVar ||
2410         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2411     }
2412     if (SafeToPostInc) {
2413       // Add one to the "backedge-taken" count to get the trip count.
2414       // This addition may overflow, which is valid as long as the comparison
2415       // is truncated to BackedgeTakenCount->getType().
2416       IVCount = SE->getAddExpr(BackedgeTakenCount,
2417                                SE->getOne(BackedgeTakenCount->getType()));
2418       // The BackedgeTaken expression contains the number of times that the
2419       // backedge branches to the loop header.  This is one less than the
2420       // number of times the loop executes, so use the incremented indvar.
2421       CmpIndVar = IncVar;
2422     }
2423   }
2424 
2425   // It may be necessary to drop nowrap flags on the incrementing instruction
2426   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2427   // case the increment might have previously been poison on the last iteration
2428   // only) or if LFTR switches to a different IV that was previously dynamically
2429   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2430   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2431   // check), because the pre-inc addrec flags may be adopted from the original
2432   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2433   // TODO: This handling is inaccurate for one case: If we switch to a
2434   // dynamically dead IV that wraps on the first loop iteration only, which is
2435   // not covered by the post-inc addrec. (If the new IV was not dynamically
2436   // dead, it could not be poison on the first iteration in the first place.)
2437   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2438     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2439     if (BO->hasNoUnsignedWrap())
2440       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2441     if (BO->hasNoSignedWrap())
2442       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2443   }
2444 
2445   Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE);
2446   assert(ExitCnt->getType()->isPointerTy() ==
2447              IndVar->getType()->isPointerTy() &&
2448          "genLoopLimit missed a cast");
2449 
2450   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2451   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2452   ICmpInst::Predicate P;
2453   if (L->contains(BI->getSuccessor(0)))
2454     P = ICmpInst::ICMP_NE;
2455   else
2456     P = ICmpInst::ICMP_EQ;
2457 
2458   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2459                     << "      LHS:" << *CmpIndVar << '\n'
2460                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2461                     << "\n"
2462                     << "      RHS:\t" << *ExitCnt << "\n"
2463                     << "  IVCount:\t" << *IVCount << "\n");
2464 
2465   IRBuilder<> Builder(BI);
2466 
2467   // The new loop exit condition should reuse the debug location of the
2468   // original loop exit condition.
2469   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2470     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2471 
2472   // LFTR can ignore IV overflow and truncate to the width of
2473   // BECount. This avoids materializing the add(zext(add)) expression.
2474   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2475   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2476   if (CmpIndVarSize > ExitCntSize) {
2477     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2478     const SCEV *ARStart = AR->getStart();
2479     const SCEV *ARStep = AR->getStepRecurrence(*SE);
2480     // For constant IVCount, avoid truncation.
2481     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2482       const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2483       APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2484       // Note that the post-inc value of BackedgeTakenCount may have overflowed
2485       // above such that IVCount is now zero.
2486       if (IVCount != BackedgeTakenCount && Count == 0) {
2487         Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2488         ++Count;
2489       }
2490       else
2491         Count = Count.zext(CmpIndVarSize);
2492       APInt NewLimit;
2493       if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2494         NewLimit = Start - Count;
2495       else
2496         NewLimit = Start + Count;
2497       ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2498 
2499       LLVM_DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
2500     } else {
2501       // We try to extend trip count first. If that doesn't work we truncate IV.
2502       // Zext(trunc(IV)) == IV implies equivalence of the following two:
2503       // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2504       // one of the two holds, extend the trip count, otherwise we truncate IV.
2505       bool Extended = false;
2506       const SCEV *IV = SE->getSCEV(CmpIndVar);
2507       const SCEV *ZExtTrunc =
2508            SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2509                                                      ExitCnt->getType()),
2510                                  CmpIndVar->getType());
2511 
2512       if (ZExtTrunc == IV) {
2513         Extended = true;
2514         ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2515                                      "wide.trip.count");
2516       } else {
2517         const SCEV *SExtTrunc =
2518           SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2519                                                     ExitCnt->getType()),
2520                                 CmpIndVar->getType());
2521         if (SExtTrunc == IV) {
2522           Extended = true;
2523           ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2524                                        "wide.trip.count");
2525         }
2526       }
2527 
2528       if (!Extended)
2529         CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2530                                         "lftr.wideiv");
2531     }
2532   }
2533   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2534   Value *OrigCond = BI->getCondition();
2535   // It's tempting to use replaceAllUsesWith here to fully replace the old
2536   // comparison, but that's not immediately safe, since users of the old
2537   // comparison may not be dominated by the new comparison. Instead, just
2538   // update the branch to use the new comparison; in the common case this
2539   // will make old comparison dead.
2540   BI->setCondition(Cond);
2541   DeadInsts.push_back(OrigCond);
2542 
2543   ++NumLFTR;
2544   return true;
2545 }
2546 
2547 //===----------------------------------------------------------------------===//
2548 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2549 //===----------------------------------------------------------------------===//
2550 
2551 /// If there's a single exit block, sink any loop-invariant values that
2552 /// were defined in the preheader but not used inside the loop into the
2553 /// exit block to reduce register pressure in the loop.
2554 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2555   BasicBlock *ExitBlock = L->getExitBlock();
2556   if (!ExitBlock) return false;
2557 
2558   BasicBlock *Preheader = L->getLoopPreheader();
2559   if (!Preheader) return false;
2560 
2561   bool MadeAnyChanges = false;
2562   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2563   BasicBlock::iterator I(Preheader->getTerminator());
2564   while (I != Preheader->begin()) {
2565     --I;
2566     // New instructions were inserted at the end of the preheader.
2567     if (isa<PHINode>(I))
2568       break;
2569 
2570     // Don't move instructions which might have side effects, since the side
2571     // effects need to complete before instructions inside the loop.  Also don't
2572     // move instructions which might read memory, since the loop may modify
2573     // memory. Note that it's okay if the instruction might have undefined
2574     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2575     // block.
2576     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2577       continue;
2578 
2579     // Skip debug info intrinsics.
2580     if (isa<DbgInfoIntrinsic>(I))
2581       continue;
2582 
2583     // Skip eh pad instructions.
2584     if (I->isEHPad())
2585       continue;
2586 
2587     // Don't sink alloca: we never want to sink static alloca's out of the
2588     // entry block, and correctly sinking dynamic alloca's requires
2589     // checks for stacksave/stackrestore intrinsics.
2590     // FIXME: Refactor this check somehow?
2591     if (isa<AllocaInst>(I))
2592       continue;
2593 
2594     // Determine if there is a use in or before the loop (direct or
2595     // otherwise).
2596     bool UsedInLoop = false;
2597     for (Use &U : I->uses()) {
2598       Instruction *User = cast<Instruction>(U.getUser());
2599       BasicBlock *UseBB = User->getParent();
2600       if (PHINode *P = dyn_cast<PHINode>(User)) {
2601         unsigned i =
2602           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2603         UseBB = P->getIncomingBlock(i);
2604       }
2605       if (UseBB == Preheader || L->contains(UseBB)) {
2606         UsedInLoop = true;
2607         break;
2608       }
2609     }
2610 
2611     // If there is, the def must remain in the preheader.
2612     if (UsedInLoop)
2613       continue;
2614 
2615     // Otherwise, sink it to the exit block.
2616     Instruction *ToMove = &*I;
2617     bool Done = false;
2618 
2619     if (I != Preheader->begin()) {
2620       // Skip debug info intrinsics.
2621       do {
2622         --I;
2623       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2624 
2625       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2626         Done = true;
2627     } else {
2628       Done = true;
2629     }
2630 
2631     MadeAnyChanges = true;
2632     ToMove->moveBefore(*ExitBlock, InsertPt);
2633     if (Done) break;
2634     InsertPt = ToMove->getIterator();
2635   }
2636 
2637   return MadeAnyChanges;
2638 }
2639 
2640 //===----------------------------------------------------------------------===//
2641 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2642 //===----------------------------------------------------------------------===//
2643 
2644 bool IndVarSimplify::run(Loop *L) {
2645   // We need (and expect!) the incoming loop to be in LCSSA.
2646   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2647          "LCSSA required to run indvars!");
2648   bool Changed = false;
2649 
2650   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2651   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2652   //    canonicalization can be a pessimization without LSR to "clean up"
2653   //    afterwards.
2654   //  - We depend on having a preheader; in particular,
2655   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2656   //    and we're in trouble if we can't find the induction variable even when
2657   //    we've manually inserted one.
2658   //  - LFTR relies on having a single backedge.
2659   if (!L->isLoopSimplifyForm())
2660     return false;
2661 
2662   // If there are any floating-point recurrences, attempt to
2663   // transform them to use integer recurrences.
2664   Changed |= rewriteNonIntegerIVs(L);
2665 
2666   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2667 
2668   // Create a rewriter object which we'll use to transform the code with.
2669   SCEVExpander Rewriter(*SE, DL, "indvars");
2670 #ifndef NDEBUG
2671   Rewriter.setDebugType(DEBUG_TYPE);
2672 #endif
2673 
2674   // Eliminate redundant IV users.
2675   //
2676   // Simplification works best when run before other consumers of SCEV. We
2677   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2678   // other expressions involving loop IVs have been evaluated. This helps SCEV
2679   // set no-wrap flags before normalizing sign/zero extension.
2680   Rewriter.disableCanonicalMode();
2681   Changed |= simplifyAndExtend(L, Rewriter, LI);
2682 
2683   // Check to see if this loop has a computable loop-invariant execution count.
2684   // If so, this means that we can compute the final value of any expressions
2685   // that are recurrent in the loop, and substitute the exit values from the
2686   // loop into any instructions outside of the loop that use the final values of
2687   // the current expressions.
2688   //
2689   if (ReplaceExitValue != NeverRepl &&
2690       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2691     Changed |= rewriteLoopExitValues(L, Rewriter);
2692 
2693   // Eliminate redundant IV cycles.
2694   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2695 
2696   // If we have a trip count expression, rewrite the loop's exit condition
2697   // using it.
2698   if (!DisableLFTR) {
2699     // For the moment, we only do LFTR for single exit loops.  The code is
2700     // structured as it is in the expectation of generalization to multi-exit
2701     // loops in the near future.  See D62625 for context.
2702     SmallVector<BasicBlock*, 16> ExitingBlocks;
2703     if (auto *ExitingBB = L->getExitingBlock())
2704       ExitingBlocks.push_back(ExitingBB);
2705     for (BasicBlock *ExitingBB : ExitingBlocks) {
2706       // Can't rewrite non-branch yet.
2707       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2708         continue;
2709 
2710       if (!needsLFTR(L, ExitingBB))
2711         continue;
2712 
2713       const SCEV *BETakenCount = SE->getExitCount(L, ExitingBB);
2714       if (isa<SCEVCouldNotCompute>(BETakenCount))
2715         continue;
2716 
2717       // Better to fold to true (TODO: do so!)
2718       if (BETakenCount->isZero())
2719         continue;
2720 
2721       PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE, DT);
2722       if (!IndVar)
2723         continue;
2724 
2725       // Avoid high cost expansions.  Note: This heuristic is questionable in
2726       // that our definition of "high cost" is not exactly principled.
2727       if (Rewriter.isHighCostExpansion(BETakenCount, L))
2728         continue;
2729 
2730       // Check preconditions for proper SCEVExpander operation. SCEV does not
2731       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2732       // any pass that uses the SCEVExpander must do it. This does not work
2733       // well for loop passes because SCEVExpander makes assumptions about
2734       // all loops, while LoopPassManager only forces the current loop to be
2735       // simplified.
2736       //
2737       // FIXME: SCEV expansion has no way to bail out, so the caller must
2738       // explicitly check any assumptions made by SCEV. Brittle.
2739       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BETakenCount);
2740       if (!AR || AR->getLoop()->getLoopPreheader())
2741         Changed |= linearFunctionTestReplace(L, ExitingBB,
2742                                              BETakenCount, IndVar,
2743                                              Rewriter);
2744     }
2745   }
2746   // Clear the rewriter cache, because values that are in the rewriter's cache
2747   // can be deleted in the loop below, causing the AssertingVH in the cache to
2748   // trigger.
2749   Rewriter.clear();
2750 
2751   // Now that we're done iterating through lists, clean up any instructions
2752   // which are now dead.
2753   while (!DeadInsts.empty())
2754     if (Instruction *Inst =
2755             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2756       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2757 
2758   // The Rewriter may not be used from this point on.
2759 
2760   // Loop-invariant instructions in the preheader that aren't used in the
2761   // loop may be sunk below the loop to reduce register pressure.
2762   Changed |= sinkUnusedInvariants(L);
2763 
2764   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2765   // trip count and therefore can further simplify exit values in addition to
2766   // rewriteLoopExitValues.
2767   Changed |= rewriteFirstIterationLoopExitValues(L);
2768 
2769   // Clean up dead instructions.
2770   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2771 
2772   // Check a post-condition.
2773   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2774          "Indvars did not preserve LCSSA!");
2775 
2776   // Verify that LFTR, and any other change have not interfered with SCEV's
2777   // ability to compute trip count.
2778 #ifndef NDEBUG
2779   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2780     SE->forgetLoop(L);
2781     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2782     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2783         SE->getTypeSizeInBits(NewBECount->getType()))
2784       NewBECount = SE->getTruncateOrNoop(NewBECount,
2785                                          BackedgeTakenCount->getType());
2786     else
2787       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2788                                                  NewBECount->getType());
2789     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2790   }
2791 #endif
2792 
2793   return Changed;
2794 }
2795 
2796 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2797                                           LoopStandardAnalysisResults &AR,
2798                                           LPMUpdater &) {
2799   Function *F = L.getHeader()->getParent();
2800   const DataLayout &DL = F->getParent()->getDataLayout();
2801 
2802   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2803   if (!IVS.run(&L))
2804     return PreservedAnalyses::all();
2805 
2806   auto PA = getLoopPassPreservedAnalyses();
2807   PA.preserveSet<CFGAnalyses>();
2808   return PA;
2809 }
2810 
2811 namespace {
2812 
2813 struct IndVarSimplifyLegacyPass : public LoopPass {
2814   static char ID; // Pass identification, replacement for typeid
2815 
2816   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2817     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2818   }
2819 
2820   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2821     if (skipLoop(L))
2822       return false;
2823 
2824     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2825     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2826     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2827     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2828     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2829     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2830     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2831     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2832 
2833     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2834     return IVS.run(L);
2835   }
2836 
2837   void getAnalysisUsage(AnalysisUsage &AU) const override {
2838     AU.setPreservesCFG();
2839     getLoopAnalysisUsage(AU);
2840   }
2841 };
2842 
2843 } // end anonymous namespace
2844 
2845 char IndVarSimplifyLegacyPass::ID = 0;
2846 
2847 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2848                       "Induction Variable Simplification", false, false)
2849 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2850 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2851                     "Induction Variable Simplification", false, false)
2852 
2853 Pass *llvm::createIndVarSimplifyPass() {
2854   return new IndVarSimplifyLegacyPass();
2855 }
2856