xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 8deb84c8ef858e89dc418f133fb13ce5cb4ee329)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened     , "Number of indvars widened");
93 STATISTIC(NumReplaced    , "Number of exit values replaced");
94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102   "verify-indvars", cl::Hidden,
103   cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
106 
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111                clEnumValN(OnlyCheapRepl, "cheap",
112                           "only replace exit value when the cost is cheap"),
113                clEnumValN(NoHardUse, "noharduse",
114                           "only replace exit values when loop def likely dead"),
115                clEnumValN(AlwaysRepl, "always",
116                           "always replace exit value whenever possible")));
117 
118 static cl::opt<bool> UsePostIncrementRanges(
119   "indvars-post-increment-ranges", cl::Hidden,
120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121   cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125             cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 namespace {
128 
129 struct RewritePhi;
130 
131 class IndVarSimplify {
132   LoopInfo *LI;
133   ScalarEvolution *SE;
134   DominatorTree *DT;
135   const DataLayout &DL;
136   TargetLibraryInfo *TLI;
137   const TargetTransformInfo *TTI;
138 
139   SmallVector<WeakTrackingVH, 16> DeadInsts;
140 
141   bool isValidRewrite(Value *FromVal, Value *ToVal);
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147 
148   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
149   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
150   bool rewriteFirstIterationLoopExitValues(Loop *L);
151   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
152 
153   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
154                                  const SCEV *ExitCount,
155                                  PHINode *IndVar, SCEVExpander &Rewriter);
156 
157   bool sinkUnusedInvariants(Loop *L);
158 
159 public:
160   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
161                  const DataLayout &DL, TargetLibraryInfo *TLI,
162                  TargetTransformInfo *TTI)
163       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
164 
165   bool run(Loop *L);
166 };
167 
168 } // end anonymous namespace
169 
170 /// Return true if the SCEV expansion generated by the rewriter can replace the
171 /// original value. SCEV guarantees that it produces the same value, but the way
172 /// it is produced may be illegal IR.  Ideally, this function will only be
173 /// called for verification.
174 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
175   // If an SCEV expression subsumed multiple pointers, its expansion could
176   // reassociate the GEP changing the base pointer. This is illegal because the
177   // final address produced by a GEP chain must be inbounds relative to its
178   // underlying object. Otherwise basic alias analysis, among other things,
179   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
180   // producing an expression involving multiple pointers. Until then, we must
181   // bail out here.
182   //
183   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
184   // because it understands lcssa phis while SCEV does not.
185   Value *FromPtr = FromVal;
186   Value *ToPtr = ToVal;
187   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
188     FromPtr = GEP->getPointerOperand();
189   }
190   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
191     ToPtr = GEP->getPointerOperand();
192   }
193   if (FromPtr != FromVal || ToPtr != ToVal) {
194     // Quickly check the common case
195     if (FromPtr == ToPtr)
196       return true;
197 
198     // SCEV may have rewritten an expression that produces the GEP's pointer
199     // operand. That's ok as long as the pointer operand has the same base
200     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
201     // base of a recurrence. This handles the case in which SCEV expansion
202     // converts a pointer type recurrence into a nonrecurrent pointer base
203     // indexed by an integer recurrence.
204 
205     // If the GEP base pointer is a vector of pointers, abort.
206     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
207       return false;
208 
209     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
210     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
211     if (FromBase == ToBase)
212       return true;
213 
214     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
215                       << " != " << *ToBase << "\n");
216 
217     return false;
218   }
219   return true;
220 }
221 
222 /// Determine the insertion point for this user. By default, insert immediately
223 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
224 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
225 /// common dominator for the incoming blocks. A nullptr can be returned if no
226 /// viable location is found: it may happen if User is a PHI and Def only comes
227 /// to this PHI from unreachable blocks.
228 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
229                                           DominatorTree *DT, LoopInfo *LI) {
230   PHINode *PHI = dyn_cast<PHINode>(User);
231   if (!PHI)
232     return User;
233 
234   Instruction *InsertPt = nullptr;
235   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
236     if (PHI->getIncomingValue(i) != Def)
237       continue;
238 
239     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
240 
241     if (!DT->isReachableFromEntry(InsertBB))
242       continue;
243 
244     if (!InsertPt) {
245       InsertPt = InsertBB->getTerminator();
246       continue;
247     }
248     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
249     InsertPt = InsertBB->getTerminator();
250   }
251 
252   // If we have skipped all inputs, it means that Def only comes to Phi from
253   // unreachable blocks.
254   if (!InsertPt)
255     return nullptr;
256 
257   auto *DefI = dyn_cast<Instruction>(Def);
258   if (!DefI)
259     return InsertPt;
260 
261   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
262 
263   auto *L = LI->getLoopFor(DefI->getParent());
264   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
265 
266   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
267     if (LI->getLoopFor(DTN->getBlock()) == L)
268       return DTN->getBlock()->getTerminator();
269 
270   llvm_unreachable("DefI dominates InsertPt!");
271 }
272 
273 //===----------------------------------------------------------------------===//
274 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
275 //===----------------------------------------------------------------------===//
276 
277 /// Convert APF to an integer, if possible.
278 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
279   bool isExact = false;
280   // See if we can convert this to an int64_t
281   uint64_t UIntVal;
282   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
283                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
284       !isExact)
285     return false;
286   IntVal = UIntVal;
287   return true;
288 }
289 
290 /// If the loop has floating induction variable then insert corresponding
291 /// integer induction variable if possible.
292 /// For example,
293 /// for(double i = 0; i < 10000; ++i)
294 ///   bar(i)
295 /// is converted into
296 /// for(int i = 0; i < 10000; ++i)
297 ///   bar((double)i);
298 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
299   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
300   unsigned BackEdge     = IncomingEdge^1;
301 
302   // Check incoming value.
303   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
304 
305   int64_t InitValue;
306   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
307     return false;
308 
309   // Check IV increment. Reject this PN if increment operation is not
310   // an add or increment value can not be represented by an integer.
311   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
312   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
313 
314   // If this is not an add of the PHI with a constantfp, or if the constant fp
315   // is not an integer, bail out.
316   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
317   int64_t IncValue;
318   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
319       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
320     return false;
321 
322   // Check Incr uses. One user is PN and the other user is an exit condition
323   // used by the conditional terminator.
324   Value::user_iterator IncrUse = Incr->user_begin();
325   Instruction *U1 = cast<Instruction>(*IncrUse++);
326   if (IncrUse == Incr->user_end()) return false;
327   Instruction *U2 = cast<Instruction>(*IncrUse++);
328   if (IncrUse != Incr->user_end()) return false;
329 
330   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
331   // only used by a branch, we can't transform it.
332   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
333   if (!Compare)
334     Compare = dyn_cast<FCmpInst>(U2);
335   if (!Compare || !Compare->hasOneUse() ||
336       !isa<BranchInst>(Compare->user_back()))
337     return false;
338 
339   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
340 
341   // We need to verify that the branch actually controls the iteration count
342   // of the loop.  If not, the new IV can overflow and no one will notice.
343   // The branch block must be in the loop and one of the successors must be out
344   // of the loop.
345   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
346   if (!L->contains(TheBr->getParent()) ||
347       (L->contains(TheBr->getSuccessor(0)) &&
348        L->contains(TheBr->getSuccessor(1))))
349     return false;
350 
351   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
352   // transform it.
353   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
354   int64_t ExitValue;
355   if (ExitValueVal == nullptr ||
356       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
357     return false;
358 
359   // Find new predicate for integer comparison.
360   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
361   switch (Compare->getPredicate()) {
362   default: return false;  // Unknown comparison.
363   case CmpInst::FCMP_OEQ:
364   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
365   case CmpInst::FCMP_ONE:
366   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
367   case CmpInst::FCMP_OGT:
368   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
369   case CmpInst::FCMP_OGE:
370   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
371   case CmpInst::FCMP_OLT:
372   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
373   case CmpInst::FCMP_OLE:
374   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
375   }
376 
377   // We convert the floating point induction variable to a signed i32 value if
378   // we can.  This is only safe if the comparison will not overflow in a way
379   // that won't be trapped by the integer equivalent operations.  Check for this
380   // now.
381   // TODO: We could use i64 if it is native and the range requires it.
382 
383   // The start/stride/exit values must all fit in signed i32.
384   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
385     return false;
386 
387   // If not actually striding (add x, 0.0), avoid touching the code.
388   if (IncValue == 0)
389     return false;
390 
391   // Positive and negative strides have different safety conditions.
392   if (IncValue > 0) {
393     // If we have a positive stride, we require the init to be less than the
394     // exit value.
395     if (InitValue >= ExitValue)
396       return false;
397 
398     uint32_t Range = uint32_t(ExitValue-InitValue);
399     // Check for infinite loop, either:
400     // while (i <= Exit) or until (i > Exit)
401     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
402       if (++Range == 0) return false;  // Range overflows.
403     }
404 
405     unsigned Leftover = Range % uint32_t(IncValue);
406 
407     // If this is an equality comparison, we require that the strided value
408     // exactly land on the exit value, otherwise the IV condition will wrap
409     // around and do things the fp IV wouldn't.
410     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
411         Leftover != 0)
412       return false;
413 
414     // If the stride would wrap around the i32 before exiting, we can't
415     // transform the IV.
416     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
417       return false;
418   } else {
419     // If we have a negative stride, we require the init to be greater than the
420     // exit value.
421     if (InitValue <= ExitValue)
422       return false;
423 
424     uint32_t Range = uint32_t(InitValue-ExitValue);
425     // Check for infinite loop, either:
426     // while (i >= Exit) or until (i < Exit)
427     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
428       if (++Range == 0) return false;  // Range overflows.
429     }
430 
431     unsigned Leftover = Range % uint32_t(-IncValue);
432 
433     // If this is an equality comparison, we require that the strided value
434     // exactly land on the exit value, otherwise the IV condition will wrap
435     // around and do things the fp IV wouldn't.
436     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
437         Leftover != 0)
438       return false;
439 
440     // If the stride would wrap around the i32 before exiting, we can't
441     // transform the IV.
442     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
443       return false;
444   }
445 
446   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
447 
448   // Insert new integer induction variable.
449   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
450   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
451                       PN->getIncomingBlock(IncomingEdge));
452 
453   Value *NewAdd =
454     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
455                               Incr->getName()+".int", Incr);
456   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
457 
458   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
459                                       ConstantInt::get(Int32Ty, ExitValue),
460                                       Compare->getName());
461 
462   // In the following deletions, PN may become dead and may be deleted.
463   // Use a WeakTrackingVH to observe whether this happens.
464   WeakTrackingVH WeakPH = PN;
465 
466   // Delete the old floating point exit comparison.  The branch starts using the
467   // new comparison.
468   NewCompare->takeName(Compare);
469   Compare->replaceAllUsesWith(NewCompare);
470   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
471 
472   // Delete the old floating point increment.
473   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
474   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
475 
476   // If the FP induction variable still has uses, this is because something else
477   // in the loop uses its value.  In order to canonicalize the induction
478   // variable, we chose to eliminate the IV and rewrite it in terms of an
479   // int->fp cast.
480   //
481   // We give preference to sitofp over uitofp because it is faster on most
482   // platforms.
483   if (WeakPH) {
484     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
485                                  &*PN->getParent()->getFirstInsertionPt());
486     PN->replaceAllUsesWith(Conv);
487     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
488   }
489   return true;
490 }
491 
492 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
493   // First step.  Check to see if there are any floating-point recurrences.
494   // If there are, change them into integer recurrences, permitting analysis by
495   // the SCEV routines.
496   BasicBlock *Header = L->getHeader();
497 
498   SmallVector<WeakTrackingVH, 8> PHIs;
499   for (PHINode &PN : Header->phis())
500     PHIs.push_back(&PN);
501 
502   bool Changed = false;
503   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
504     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
505       Changed |= handleFloatingPointIV(L, PN);
506 
507   // If the loop previously had floating-point IV, ScalarEvolution
508   // may not have been able to compute a trip count. Now that we've done some
509   // re-writing, the trip count may be computable.
510   if (Changed)
511     SE->forgetLoop(L);
512   return Changed;
513 }
514 
515 namespace {
516 
517 // Collect information about PHI nodes which can be transformed in
518 // rewriteLoopExitValues.
519 struct RewritePhi {
520   PHINode *PN;
521 
522   // Ith incoming value.
523   unsigned Ith;
524 
525   // Exit value after expansion.
526   Value *Val;
527 
528   // High Cost when expansion.
529   bool HighCost;
530 
531   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
532       : PN(P), Ith(I), Val(V), HighCost(H) {}
533 };
534 
535 } // end anonymous namespace
536 
537 //===----------------------------------------------------------------------===//
538 // rewriteLoopExitValues - Optimize IV users outside the loop.
539 // As a side effect, reduces the amount of IV processing within the loop.
540 //===----------------------------------------------------------------------===//
541 
542 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
543   SmallPtrSet<const Instruction *, 8> Visited;
544   SmallVector<const Instruction *, 8> WorkList;
545   Visited.insert(I);
546   WorkList.push_back(I);
547   while (!WorkList.empty()) {
548     const Instruction *Curr = WorkList.pop_back_val();
549     // This use is outside the loop, nothing to do.
550     if (!L->contains(Curr))
551       continue;
552     // Do we assume it is a "hard" use which will not be eliminated easily?
553     if (Curr->mayHaveSideEffects())
554       return true;
555     // Otherwise, add all its users to worklist.
556     for (auto U : Curr->users()) {
557       auto *UI = cast<Instruction>(U);
558       if (Visited.insert(UI).second)
559         WorkList.push_back(UI);
560     }
561   }
562   return false;
563 }
564 
565 /// Check to see if this loop has a computable loop-invariant execution count.
566 /// If so, this means that we can compute the final value of any expressions
567 /// that are recurrent in the loop, and substitute the exit values from the loop
568 /// into any instructions outside of the loop that use the final values of the
569 /// current expressions.
570 ///
571 /// This is mostly redundant with the regular IndVarSimplify activities that
572 /// happen later, except that it's more powerful in some cases, because it's
573 /// able to brute-force evaluate arbitrary instructions as long as they have
574 /// constant operands at the beginning of the loop.
575 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
576   // Check a pre-condition.
577   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
578          "Indvars did not preserve LCSSA!");
579 
580   SmallVector<BasicBlock*, 8> ExitBlocks;
581   L->getUniqueExitBlocks(ExitBlocks);
582 
583   SmallVector<RewritePhi, 8> RewritePhiSet;
584   // Find all values that are computed inside the loop, but used outside of it.
585   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
586   // the exit blocks of the loop to find them.
587   for (BasicBlock *ExitBB : ExitBlocks) {
588     // If there are no PHI nodes in this exit block, then no values defined
589     // inside the loop are used on this path, skip it.
590     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
591     if (!PN) continue;
592 
593     unsigned NumPreds = PN->getNumIncomingValues();
594 
595     // Iterate over all of the PHI nodes.
596     BasicBlock::iterator BBI = ExitBB->begin();
597     while ((PN = dyn_cast<PHINode>(BBI++))) {
598       if (PN->use_empty())
599         continue; // dead use, don't replace it
600 
601       if (!SE->isSCEVable(PN->getType()))
602         continue;
603 
604       // It's necessary to tell ScalarEvolution about this explicitly so that
605       // it can walk the def-use list and forget all SCEVs, as it may not be
606       // watching the PHI itself. Once the new exit value is in place, there
607       // may not be a def-use connection between the loop and every instruction
608       // which got a SCEVAddRecExpr for that loop.
609       SE->forgetValue(PN);
610 
611       // Iterate over all of the values in all the PHI nodes.
612       for (unsigned i = 0; i != NumPreds; ++i) {
613         // If the value being merged in is not integer or is not defined
614         // in the loop, skip it.
615         Value *InVal = PN->getIncomingValue(i);
616         if (!isa<Instruction>(InVal))
617           continue;
618 
619         // If this pred is for a subloop, not L itself, skip it.
620         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
621           continue; // The Block is in a subloop, skip it.
622 
623         // Check that InVal is defined in the loop.
624         Instruction *Inst = cast<Instruction>(InVal);
625         if (!L->contains(Inst))
626           continue;
627 
628         // Okay, this instruction has a user outside of the current loop
629         // and varies predictably *inside* the loop.  Evaluate the value it
630         // contains when the loop exits, if possible.
631         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
632         if (!SE->isLoopInvariant(ExitValue, L) ||
633             !isSafeToExpand(ExitValue, *SE))
634           continue;
635 
636         // Computing the value outside of the loop brings no benefit if it is
637         // definitely used inside the loop in a way which can not be optimized
638         // away.  Avoid doing so unless we know we have a value which computes
639         // the ExitValue already.  TODO: This should be merged into SCEV
640         // expander to leverage its knowledge of existing expressions.
641         if (ReplaceExitValue != AlwaysRepl &&
642             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
643             hasHardUserWithinLoop(L, Inst))
644           continue;
645 
646         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
647         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
648 
649         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
650                           << '\n'
651                           << "  LoopVal = " << *Inst << "\n");
652 
653         if (!isValidRewrite(Inst, ExitVal)) {
654           DeadInsts.push_back(ExitVal);
655           continue;
656         }
657 
658 #ifndef NDEBUG
659         // If we reuse an instruction from a loop which is neither L nor one of
660         // its containing loops, we end up breaking LCSSA form for this loop by
661         // creating a new use of its instruction.
662         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
663           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
664             if (EVL != L)
665               assert(EVL->contains(L) && "LCSSA breach detected!");
666 #endif
667 
668         // Collect all the candidate PHINodes to be rewritten.
669         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
670       }
671     }
672   }
673 
674   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
675 
676   bool Changed = false;
677   // Transformation.
678   for (const RewritePhi &Phi : RewritePhiSet) {
679     PHINode *PN = Phi.PN;
680     Value *ExitVal = Phi.Val;
681 
682     // Only do the rewrite when the ExitValue can be expanded cheaply.
683     // If LoopCanBeDel is true, rewrite exit value aggressively.
684     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
685       DeadInsts.push_back(ExitVal);
686       continue;
687     }
688 
689     Changed = true;
690     ++NumReplaced;
691     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
692     PN->setIncomingValue(Phi.Ith, ExitVal);
693 
694     // If this instruction is dead now, delete it. Don't do it now to avoid
695     // invalidating iterators.
696     if (isInstructionTriviallyDead(Inst, TLI))
697       DeadInsts.push_back(Inst);
698 
699     // Replace PN with ExitVal if that is legal and does not break LCSSA.
700     if (PN->getNumIncomingValues() == 1 &&
701         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
702       PN->replaceAllUsesWith(ExitVal);
703       PN->eraseFromParent();
704     }
705   }
706 
707   // The insertion point instruction may have been deleted; clear it out
708   // so that the rewriter doesn't trip over it later.
709   Rewriter.clearInsertPoint();
710   return Changed;
711 }
712 
713 //===---------------------------------------------------------------------===//
714 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
715 // they will exit at the first iteration.
716 //===---------------------------------------------------------------------===//
717 
718 /// Check to see if this loop has loop invariant conditions which lead to loop
719 /// exits. If so, we know that if the exit path is taken, it is at the first
720 /// loop iteration. This lets us predict exit values of PHI nodes that live in
721 /// loop header.
722 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
723   // Verify the input to the pass is already in LCSSA form.
724   assert(L->isLCSSAForm(*DT));
725 
726   SmallVector<BasicBlock *, 8> ExitBlocks;
727   L->getUniqueExitBlocks(ExitBlocks);
728 
729   bool MadeAnyChanges = false;
730   for (auto *ExitBB : ExitBlocks) {
731     // If there are no more PHI nodes in this exit block, then no more
732     // values defined inside the loop are used on this path.
733     for (PHINode &PN : ExitBB->phis()) {
734       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
735            IncomingValIdx != E; ++IncomingValIdx) {
736         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
737 
738         // Can we prove that the exit must run on the first iteration if it
739         // runs at all?  (i.e. early exits are fine for our purposes, but
740         // traces which lead to this exit being taken on the 2nd iteration
741         // aren't.)  Note that this is about whether the exit branch is
742         // executed, not about whether it is taken.
743         if (!L->getLoopLatch() ||
744             !DT->dominates(IncomingBB, L->getLoopLatch()))
745           continue;
746 
747         // Get condition that leads to the exit path.
748         auto *TermInst = IncomingBB->getTerminator();
749 
750         Value *Cond = nullptr;
751         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
752           // Must be a conditional branch, otherwise the block
753           // should not be in the loop.
754           Cond = BI->getCondition();
755         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
756           Cond = SI->getCondition();
757         else
758           continue;
759 
760         if (!L->isLoopInvariant(Cond))
761           continue;
762 
763         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
764 
765         // Only deal with PHIs in the loop header.
766         if (!ExitVal || ExitVal->getParent() != L->getHeader())
767           continue;
768 
769         // If ExitVal is a PHI on the loop header, then we know its
770         // value along this exit because the exit can only be taken
771         // on the first iteration.
772         auto *LoopPreheader = L->getLoopPreheader();
773         assert(LoopPreheader && "Invalid loop");
774         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
775         if (PreheaderIdx != -1) {
776           assert(ExitVal->getParent() == L->getHeader() &&
777                  "ExitVal must be in loop header");
778           MadeAnyChanges = true;
779           PN.setIncomingValue(IncomingValIdx,
780                               ExitVal->getIncomingValue(PreheaderIdx));
781         }
782       }
783     }
784   }
785   return MadeAnyChanges;
786 }
787 
788 /// Check whether it is possible to delete the loop after rewriting exit
789 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
790 /// aggressively.
791 bool IndVarSimplify::canLoopBeDeleted(
792     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
793   BasicBlock *Preheader = L->getLoopPreheader();
794   // If there is no preheader, the loop will not be deleted.
795   if (!Preheader)
796     return false;
797 
798   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
799   // We obviate multiple ExitingBlocks case for simplicity.
800   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
801   // after exit value rewriting, we can enhance the logic here.
802   SmallVector<BasicBlock *, 4> ExitingBlocks;
803   L->getExitingBlocks(ExitingBlocks);
804   SmallVector<BasicBlock *, 8> ExitBlocks;
805   L->getUniqueExitBlocks(ExitBlocks);
806   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
807     return false;
808 
809   BasicBlock *ExitBlock = ExitBlocks[0];
810   BasicBlock::iterator BI = ExitBlock->begin();
811   while (PHINode *P = dyn_cast<PHINode>(BI)) {
812     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
813 
814     // If the Incoming value of P is found in RewritePhiSet, we know it
815     // could be rewritten to use a loop invariant value in transformation
816     // phase later. Skip it in the loop invariant check below.
817     bool found = false;
818     for (const RewritePhi &Phi : RewritePhiSet) {
819       unsigned i = Phi.Ith;
820       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
821         found = true;
822         break;
823       }
824     }
825 
826     Instruction *I;
827     if (!found && (I = dyn_cast<Instruction>(Incoming)))
828       if (!L->hasLoopInvariantOperands(I))
829         return false;
830 
831     ++BI;
832   }
833 
834   for (auto *BB : L->blocks())
835     if (llvm::any_of(*BB, [](Instruction &I) {
836           return I.mayHaveSideEffects();
837         }))
838       return false;
839 
840   return true;
841 }
842 
843 //===----------------------------------------------------------------------===//
844 //  IV Widening - Extend the width of an IV to cover its widest uses.
845 //===----------------------------------------------------------------------===//
846 
847 namespace {
848 
849 // Collect information about induction variables that are used by sign/zero
850 // extend operations. This information is recorded by CollectExtend and provides
851 // the input to WidenIV.
852 struct WideIVInfo {
853   PHINode *NarrowIV = nullptr;
854 
855   // Widest integer type created [sz]ext
856   Type *WidestNativeType = nullptr;
857 
858   // Was a sext user seen before a zext?
859   bool IsSigned = false;
860 };
861 
862 } // end anonymous namespace
863 
864 /// Update information about the induction variable that is extended by this
865 /// sign or zero extend operation. This is used to determine the final width of
866 /// the IV before actually widening it.
867 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
868                         const TargetTransformInfo *TTI) {
869   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
870   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
871     return;
872 
873   Type *Ty = Cast->getType();
874   uint64_t Width = SE->getTypeSizeInBits(Ty);
875   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
876     return;
877 
878   // Check that `Cast` actually extends the induction variable (we rely on this
879   // later).  This takes care of cases where `Cast` is extending a truncation of
880   // the narrow induction variable, and thus can end up being narrower than the
881   // "narrow" induction variable.
882   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
883   if (NarrowIVWidth >= Width)
884     return;
885 
886   // Cast is either an sext or zext up to this point.
887   // We should not widen an indvar if arithmetics on the wider indvar are more
888   // expensive than those on the narrower indvar. We check only the cost of ADD
889   // because at least an ADD is required to increment the induction variable. We
890   // could compute more comprehensively the cost of all instructions on the
891   // induction variable when necessary.
892   if (TTI &&
893       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
894           TTI->getArithmeticInstrCost(Instruction::Add,
895                                       Cast->getOperand(0)->getType())) {
896     return;
897   }
898 
899   if (!WI.WidestNativeType) {
900     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
901     WI.IsSigned = IsSigned;
902     return;
903   }
904 
905   // We extend the IV to satisfy the sign of its first user, arbitrarily.
906   if (WI.IsSigned != IsSigned)
907     return;
908 
909   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
910     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
911 }
912 
913 namespace {
914 
915 /// Record a link in the Narrow IV def-use chain along with the WideIV that
916 /// computes the same value as the Narrow IV def.  This avoids caching Use*
917 /// pointers.
918 struct NarrowIVDefUse {
919   Instruction *NarrowDef = nullptr;
920   Instruction *NarrowUse = nullptr;
921   Instruction *WideDef = nullptr;
922 
923   // True if the narrow def is never negative.  Tracking this information lets
924   // us use a sign extension instead of a zero extension or vice versa, when
925   // profitable and legal.
926   bool NeverNegative = false;
927 
928   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
929                  bool NeverNegative)
930       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
931         NeverNegative(NeverNegative) {}
932 };
933 
934 /// The goal of this transform is to remove sign and zero extends without
935 /// creating any new induction variables. To do this, it creates a new phi of
936 /// the wider type and redirects all users, either removing extends or inserting
937 /// truncs whenever we stop propagating the type.
938 class WidenIV {
939   // Parameters
940   PHINode *OrigPhi;
941   Type *WideType;
942 
943   // Context
944   LoopInfo        *LI;
945   Loop            *L;
946   ScalarEvolution *SE;
947   DominatorTree   *DT;
948 
949   // Does the module have any calls to the llvm.experimental.guard intrinsic
950   // at all? If not we can avoid scanning instructions looking for guards.
951   bool HasGuards;
952 
953   // Result
954   PHINode *WidePhi = nullptr;
955   Instruction *WideInc = nullptr;
956   const SCEV *WideIncExpr = nullptr;
957   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
958 
959   SmallPtrSet<Instruction *,16> Widened;
960   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
961 
962   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
963 
964   // A map tracking the kind of extension used to widen each narrow IV
965   // and narrow IV user.
966   // Key: pointer to a narrow IV or IV user.
967   // Value: the kind of extension used to widen this Instruction.
968   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
969 
970   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
971 
972   // A map with control-dependent ranges for post increment IV uses. The key is
973   // a pair of IV def and a use of this def denoting the context. The value is
974   // a ConstantRange representing possible values of the def at the given
975   // context.
976   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
977 
978   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
979                                               Instruction *UseI) {
980     DefUserPair Key(Def, UseI);
981     auto It = PostIncRangeInfos.find(Key);
982     return It == PostIncRangeInfos.end()
983                ? Optional<ConstantRange>(None)
984                : Optional<ConstantRange>(It->second);
985   }
986 
987   void calculatePostIncRanges(PHINode *OrigPhi);
988   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
989 
990   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
991     DefUserPair Key(Def, UseI);
992     auto It = PostIncRangeInfos.find(Key);
993     if (It == PostIncRangeInfos.end())
994       PostIncRangeInfos.insert({Key, R});
995     else
996       It->second = R.intersectWith(It->second);
997   }
998 
999 public:
1000   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1001           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1002           bool HasGuards)
1003       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1004         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1005         HasGuards(HasGuards), DeadInsts(DI) {
1006     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1007     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1008   }
1009 
1010   PHINode *createWideIV(SCEVExpander &Rewriter);
1011 
1012 protected:
1013   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1014                           Instruction *Use);
1015 
1016   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1017   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1018                                      const SCEVAddRecExpr *WideAR);
1019   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1020 
1021   ExtendKind getExtendKind(Instruction *I);
1022 
1023   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1024 
1025   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1026 
1027   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1028 
1029   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1030                               unsigned OpCode) const;
1031 
1032   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1033 
1034   bool widenLoopCompare(NarrowIVDefUse DU);
1035   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1036   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1037 
1038   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1039 };
1040 
1041 } // end anonymous namespace
1042 
1043 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1044                                  bool IsSigned, Instruction *Use) {
1045   // Set the debug location and conservative insertion point.
1046   IRBuilder<> Builder(Use);
1047   // Hoist the insertion point into loop preheaders as far as possible.
1048   for (const Loop *L = LI->getLoopFor(Use->getParent());
1049        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1050        L = L->getParentLoop())
1051     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1052 
1053   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1054                     Builder.CreateZExt(NarrowOper, WideType);
1055 }
1056 
1057 /// Instantiate a wide operation to replace a narrow operation. This only needs
1058 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1059 /// 0 for any operation we decide not to clone.
1060 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1061                                   const SCEVAddRecExpr *WideAR) {
1062   unsigned Opcode = DU.NarrowUse->getOpcode();
1063   switch (Opcode) {
1064   default:
1065     return nullptr;
1066   case Instruction::Add:
1067   case Instruction::Mul:
1068   case Instruction::UDiv:
1069   case Instruction::Sub:
1070     return cloneArithmeticIVUser(DU, WideAR);
1071 
1072   case Instruction::And:
1073   case Instruction::Or:
1074   case Instruction::Xor:
1075   case Instruction::Shl:
1076   case Instruction::LShr:
1077   case Instruction::AShr:
1078     return cloneBitwiseIVUser(DU);
1079   }
1080 }
1081 
1082 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1083   Instruction *NarrowUse = DU.NarrowUse;
1084   Instruction *NarrowDef = DU.NarrowDef;
1085   Instruction *WideDef = DU.WideDef;
1086 
1087   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1088 
1089   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1090   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1091   // invariant and will be folded or hoisted. If it actually comes from a
1092   // widened IV, it should be removed during a future call to widenIVUse.
1093   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1094   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1095                    ? WideDef
1096                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1097                                       IsSigned, NarrowUse);
1098   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1099                    ? WideDef
1100                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1101                                       IsSigned, NarrowUse);
1102 
1103   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1104   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1105                                         NarrowBO->getName());
1106   IRBuilder<> Builder(NarrowUse);
1107   Builder.Insert(WideBO);
1108   WideBO->copyIRFlags(NarrowBO);
1109   return WideBO;
1110 }
1111 
1112 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1113                                             const SCEVAddRecExpr *WideAR) {
1114   Instruction *NarrowUse = DU.NarrowUse;
1115   Instruction *NarrowDef = DU.NarrowDef;
1116   Instruction *WideDef = DU.WideDef;
1117 
1118   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1119 
1120   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1121 
1122   // We're trying to find X such that
1123   //
1124   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1125   //
1126   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1127   // and check using SCEV if any of them are correct.
1128 
1129   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1130   // correct solution to X.
1131   auto GuessNonIVOperand = [&](bool SignExt) {
1132     const SCEV *WideLHS;
1133     const SCEV *WideRHS;
1134 
1135     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1136       if (SignExt)
1137         return SE->getSignExtendExpr(S, Ty);
1138       return SE->getZeroExtendExpr(S, Ty);
1139     };
1140 
1141     if (IVOpIdx == 0) {
1142       WideLHS = SE->getSCEV(WideDef);
1143       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1144       WideRHS = GetExtend(NarrowRHS, WideType);
1145     } else {
1146       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1147       WideLHS = GetExtend(NarrowLHS, WideType);
1148       WideRHS = SE->getSCEV(WideDef);
1149     }
1150 
1151     // WideUse is "WideDef `op.wide` X" as described in the comment.
1152     const SCEV *WideUse = nullptr;
1153 
1154     switch (NarrowUse->getOpcode()) {
1155     default:
1156       llvm_unreachable("No other possibility!");
1157 
1158     case Instruction::Add:
1159       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1160       break;
1161 
1162     case Instruction::Mul:
1163       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1164       break;
1165 
1166     case Instruction::UDiv:
1167       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1168       break;
1169 
1170     case Instruction::Sub:
1171       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1172       break;
1173     }
1174 
1175     return WideUse == WideAR;
1176   };
1177 
1178   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1179   if (!GuessNonIVOperand(SignExtend)) {
1180     SignExtend = !SignExtend;
1181     if (!GuessNonIVOperand(SignExtend))
1182       return nullptr;
1183   }
1184 
1185   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1186                    ? WideDef
1187                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1188                                       SignExtend, NarrowUse);
1189   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1190                    ? WideDef
1191                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1192                                       SignExtend, NarrowUse);
1193 
1194   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1195   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1196                                         NarrowBO->getName());
1197 
1198   IRBuilder<> Builder(NarrowUse);
1199   Builder.Insert(WideBO);
1200   WideBO->copyIRFlags(NarrowBO);
1201   return WideBO;
1202 }
1203 
1204 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1205   auto It = ExtendKindMap.find(I);
1206   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1207   return It->second;
1208 }
1209 
1210 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1211                                      unsigned OpCode) const {
1212   if (OpCode == Instruction::Add)
1213     return SE->getAddExpr(LHS, RHS);
1214   if (OpCode == Instruction::Sub)
1215     return SE->getMinusSCEV(LHS, RHS);
1216   if (OpCode == Instruction::Mul)
1217     return SE->getMulExpr(LHS, RHS);
1218 
1219   llvm_unreachable("Unsupported opcode.");
1220 }
1221 
1222 /// No-wrap operations can transfer sign extension of their result to their
1223 /// operands. Generate the SCEV value for the widened operation without
1224 /// actually modifying the IR yet. If the expression after extending the
1225 /// operands is an AddRec for this loop, return the AddRec and the kind of
1226 /// extension used.
1227 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1228   // Handle the common case of add<nsw/nuw>
1229   const unsigned OpCode = DU.NarrowUse->getOpcode();
1230   // Only Add/Sub/Mul instructions supported yet.
1231   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1232       OpCode != Instruction::Mul)
1233     return {nullptr, Unknown};
1234 
1235   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1236   // if extending the other will lead to a recurrence.
1237   const unsigned ExtendOperIdx =
1238       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1239   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1240 
1241   const SCEV *ExtendOperExpr = nullptr;
1242   const OverflowingBinaryOperator *OBO =
1243     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1244   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1245   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1246     ExtendOperExpr = SE->getSignExtendExpr(
1247       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1248   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1249     ExtendOperExpr = SE->getZeroExtendExpr(
1250       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1251   else
1252     return {nullptr, Unknown};
1253 
1254   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1255   // flags. This instruction may be guarded by control flow that the no-wrap
1256   // behavior depends on. Non-control-equivalent instructions can be mapped to
1257   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1258   // semantics to those operations.
1259   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1260   const SCEV *rhs = ExtendOperExpr;
1261 
1262   // Let's swap operands to the initial order for the case of non-commutative
1263   // operations, like SUB. See PR21014.
1264   if (ExtendOperIdx == 0)
1265     std::swap(lhs, rhs);
1266   const SCEVAddRecExpr *AddRec =
1267       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1268 
1269   if (!AddRec || AddRec->getLoop() != L)
1270     return {nullptr, Unknown};
1271 
1272   return {AddRec, ExtKind};
1273 }
1274 
1275 /// Is this instruction potentially interesting for further simplification after
1276 /// widening it's type? In other words, can the extend be safely hoisted out of
1277 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1278 /// so, return the extended recurrence and the kind of extension used. Otherwise
1279 /// return {nullptr, Unknown}.
1280 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1281   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1282     return {nullptr, Unknown};
1283 
1284   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1285   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1286       SE->getTypeSizeInBits(WideType)) {
1287     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1288     // index. So don't follow this use.
1289     return {nullptr, Unknown};
1290   }
1291 
1292   const SCEV *WideExpr;
1293   ExtendKind ExtKind;
1294   if (DU.NeverNegative) {
1295     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1296     if (isa<SCEVAddRecExpr>(WideExpr))
1297       ExtKind = SignExtended;
1298     else {
1299       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1300       ExtKind = ZeroExtended;
1301     }
1302   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1303     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1304     ExtKind = SignExtended;
1305   } else {
1306     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1307     ExtKind = ZeroExtended;
1308   }
1309   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1310   if (!AddRec || AddRec->getLoop() != L)
1311     return {nullptr, Unknown};
1312   return {AddRec, ExtKind};
1313 }
1314 
1315 /// This IV user cannot be widen. Replace this use of the original narrow IV
1316 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1317 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1318   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1319   if (!InsertPt)
1320     return;
1321   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1322                     << *DU.NarrowUse << "\n");
1323   IRBuilder<> Builder(InsertPt);
1324   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1325   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1326 }
1327 
1328 /// If the narrow use is a compare instruction, then widen the compare
1329 //  (and possibly the other operand).  The extend operation is hoisted into the
1330 // loop preheader as far as possible.
1331 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1332   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1333   if (!Cmp)
1334     return false;
1335 
1336   // We can legally widen the comparison in the following two cases:
1337   //
1338   //  - The signedness of the IV extension and comparison match
1339   //
1340   //  - The narrow IV is always positive (and thus its sign extension is equal
1341   //    to its zero extension).  For instance, let's say we're zero extending
1342   //    %narrow for the following use
1343   //
1344   //      icmp slt i32 %narrow, %val   ... (A)
1345   //
1346   //    and %narrow is always positive.  Then
1347   //
1348   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1349   //          == icmp slt i32 zext(%narrow), sext(%val)
1350   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1351   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1352     return false;
1353 
1354   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1355   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1356   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1357   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1358 
1359   // Widen the compare instruction.
1360   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1361   if (!InsertPt)
1362     return false;
1363   IRBuilder<> Builder(InsertPt);
1364   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1365 
1366   // Widen the other operand of the compare, if necessary.
1367   if (CastWidth < IVWidth) {
1368     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1369     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1370   }
1371   return true;
1372 }
1373 
1374 /// If the narrow use is an instruction whose two operands are the defining
1375 /// instruction of DU and a load instruction, then we have the following:
1376 /// if the load is hoisted outside the loop, then we do not reach this function
1377 /// as scalar evolution analysis works fine in widenIVUse with variables
1378 /// hoisted outside the loop and efficient code is subsequently generated by
1379 /// not emitting truncate instructions. But when the load is not hoisted
1380 /// (whether due to limitation in alias analysis or due to a true legality),
1381 /// then scalar evolution can not proceed with loop variant values and
1382 /// inefficient code is generated. This function handles the non-hoisted load
1383 /// special case by making the optimization generate the same type of code for
1384 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1385 /// instruction). This special case is important especially when the induction
1386 /// variables are affecting addressing mode in code generation.
1387 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1388   Instruction *NarrowUse = DU.NarrowUse;
1389   Instruction *NarrowDef = DU.NarrowDef;
1390   Instruction *WideDef = DU.WideDef;
1391 
1392   // Handle the common case of add<nsw/nuw>
1393   const unsigned OpCode = NarrowUse->getOpcode();
1394   // Only Add/Sub/Mul instructions are supported.
1395   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1396       OpCode != Instruction::Mul)
1397     return false;
1398 
1399   // The operand that is not defined by NarrowDef of DU. Let's call it the
1400   // other operand.
1401   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1402   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1403          "bad DU");
1404 
1405   const SCEV *ExtendOperExpr = nullptr;
1406   const OverflowingBinaryOperator *OBO =
1407     cast<OverflowingBinaryOperator>(NarrowUse);
1408   ExtendKind ExtKind = getExtendKind(NarrowDef);
1409   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1410     ExtendOperExpr = SE->getSignExtendExpr(
1411       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1412   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1413     ExtendOperExpr = SE->getZeroExtendExpr(
1414       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1415   else
1416     return false;
1417 
1418   // We are interested in the other operand being a load instruction.
1419   // But, we should look into relaxing this restriction later on.
1420   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1421   if (I && I->getOpcode() != Instruction::Load)
1422     return false;
1423 
1424   // Verifying that Defining operand is an AddRec
1425   const SCEV *Op1 = SE->getSCEV(WideDef);
1426   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1427   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1428     return false;
1429   // Verifying that other operand is an Extend.
1430   if (ExtKind == SignExtended) {
1431     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1432       return false;
1433   } else {
1434     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1435       return false;
1436   }
1437 
1438   if (ExtKind == SignExtended) {
1439     for (Use &U : NarrowUse->uses()) {
1440       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1441       if (!User || User->getType() != WideType)
1442         return false;
1443     }
1444   } else { // ExtKind == ZeroExtended
1445     for (Use &U : NarrowUse->uses()) {
1446       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1447       if (!User || User->getType() != WideType)
1448         return false;
1449     }
1450   }
1451 
1452   return true;
1453 }
1454 
1455 /// Special Case for widening with variant Loads (see
1456 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1457 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1458   Instruction *NarrowUse = DU.NarrowUse;
1459   Instruction *NarrowDef = DU.NarrowDef;
1460   Instruction *WideDef = DU.WideDef;
1461 
1462   ExtendKind ExtKind = getExtendKind(NarrowDef);
1463 
1464   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1465 
1466   // Generating a widening use instruction.
1467   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1468                    ? WideDef
1469                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1470                                       ExtKind, NarrowUse);
1471   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1472                    ? WideDef
1473                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1474                                       ExtKind, NarrowUse);
1475 
1476   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1477   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1478                                         NarrowBO->getName());
1479   IRBuilder<> Builder(NarrowUse);
1480   Builder.Insert(WideBO);
1481   WideBO->copyIRFlags(NarrowBO);
1482 
1483   if (ExtKind == SignExtended)
1484     ExtendKindMap[NarrowUse] = SignExtended;
1485   else
1486     ExtendKindMap[NarrowUse] = ZeroExtended;
1487 
1488   // Update the Use.
1489   if (ExtKind == SignExtended) {
1490     for (Use &U : NarrowUse->uses()) {
1491       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1492       if (User && User->getType() == WideType) {
1493         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1494                           << *WideBO << "\n");
1495         ++NumElimExt;
1496         User->replaceAllUsesWith(WideBO);
1497         DeadInsts.emplace_back(User);
1498       }
1499     }
1500   } else { // ExtKind == ZeroExtended
1501     for (Use &U : NarrowUse->uses()) {
1502       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1503       if (User && User->getType() == WideType) {
1504         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1505                           << *WideBO << "\n");
1506         ++NumElimExt;
1507         User->replaceAllUsesWith(WideBO);
1508         DeadInsts.emplace_back(User);
1509       }
1510     }
1511   }
1512 }
1513 
1514 /// Determine whether an individual user of the narrow IV can be widened. If so,
1515 /// return the wide clone of the user.
1516 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1517   assert(ExtendKindMap.count(DU.NarrowDef) &&
1518          "Should already know the kind of extension used to widen NarrowDef");
1519 
1520   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1521   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1522     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1523       // For LCSSA phis, sink the truncate outside the loop.
1524       // After SimplifyCFG most loop exit targets have a single predecessor.
1525       // Otherwise fall back to a truncate within the loop.
1526       if (UsePhi->getNumOperands() != 1)
1527         truncateIVUse(DU, DT, LI);
1528       else {
1529         // Widening the PHI requires us to insert a trunc.  The logical place
1530         // for this trunc is in the same BB as the PHI.  This is not possible if
1531         // the BB is terminated by a catchswitch.
1532         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1533           return nullptr;
1534 
1535         PHINode *WidePhi =
1536           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1537                           UsePhi);
1538         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1539         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1540         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1541         UsePhi->replaceAllUsesWith(Trunc);
1542         DeadInsts.emplace_back(UsePhi);
1543         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1544                           << *WidePhi << "\n");
1545       }
1546       return nullptr;
1547     }
1548   }
1549 
1550   // This narrow use can be widened by a sext if it's non-negative or its narrow
1551   // def was widended by a sext. Same for zext.
1552   auto canWidenBySExt = [&]() {
1553     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1554   };
1555   auto canWidenByZExt = [&]() {
1556     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1557   };
1558 
1559   // Our raison d'etre! Eliminate sign and zero extension.
1560   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1561       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1562     Value *NewDef = DU.WideDef;
1563     if (DU.NarrowUse->getType() != WideType) {
1564       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1565       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1566       if (CastWidth < IVWidth) {
1567         // The cast isn't as wide as the IV, so insert a Trunc.
1568         IRBuilder<> Builder(DU.NarrowUse);
1569         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1570       }
1571       else {
1572         // A wider extend was hidden behind a narrower one. This may induce
1573         // another round of IV widening in which the intermediate IV becomes
1574         // dead. It should be very rare.
1575         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1576                           << " not wide enough to subsume " << *DU.NarrowUse
1577                           << "\n");
1578         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1579         NewDef = DU.NarrowUse;
1580       }
1581     }
1582     if (NewDef != DU.NarrowUse) {
1583       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1584                         << " replaced by " << *DU.WideDef << "\n");
1585       ++NumElimExt;
1586       DU.NarrowUse->replaceAllUsesWith(NewDef);
1587       DeadInsts.emplace_back(DU.NarrowUse);
1588     }
1589     // Now that the extend is gone, we want to expose it's uses for potential
1590     // further simplification. We don't need to directly inform SimplifyIVUsers
1591     // of the new users, because their parent IV will be processed later as a
1592     // new loop phi. If we preserved IVUsers analysis, we would also want to
1593     // push the uses of WideDef here.
1594 
1595     // No further widening is needed. The deceased [sz]ext had done it for us.
1596     return nullptr;
1597   }
1598 
1599   // Does this user itself evaluate to a recurrence after widening?
1600   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1601   if (!WideAddRec.first)
1602     WideAddRec = getWideRecurrence(DU);
1603 
1604   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1605   if (!WideAddRec.first) {
1606     // If use is a loop condition, try to promote the condition instead of
1607     // truncating the IV first.
1608     if (widenLoopCompare(DU))
1609       return nullptr;
1610 
1611     // We are here about to generate a truncate instruction that may hurt
1612     // performance because the scalar evolution expression computed earlier
1613     // in WideAddRec.first does not indicate a polynomial induction expression.
1614     // In that case, look at the operands of the use instruction to determine
1615     // if we can still widen the use instead of truncating its operand.
1616     if (widenWithVariantLoadUse(DU)) {
1617       widenWithVariantLoadUseCodegen(DU);
1618       return nullptr;
1619     }
1620 
1621     // This user does not evaluate to a recurrence after widening, so don't
1622     // follow it. Instead insert a Trunc to kill off the original use,
1623     // eventually isolating the original narrow IV so it can be removed.
1624     truncateIVUse(DU, DT, LI);
1625     return nullptr;
1626   }
1627   // Assume block terminators cannot evaluate to a recurrence. We can't to
1628   // insert a Trunc after a terminator if there happens to be a critical edge.
1629   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1630          "SCEV is not expected to evaluate a block terminator");
1631 
1632   // Reuse the IV increment that SCEVExpander created as long as it dominates
1633   // NarrowUse.
1634   Instruction *WideUse = nullptr;
1635   if (WideAddRec.first == WideIncExpr &&
1636       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1637     WideUse = WideInc;
1638   else {
1639     WideUse = cloneIVUser(DU, WideAddRec.first);
1640     if (!WideUse)
1641       return nullptr;
1642   }
1643   // Evaluation of WideAddRec ensured that the narrow expression could be
1644   // extended outside the loop without overflow. This suggests that the wide use
1645   // evaluates to the same expression as the extended narrow use, but doesn't
1646   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1647   // where it fails, we simply throw away the newly created wide use.
1648   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1649     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1650                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1651                       << "\n");
1652     DeadInsts.emplace_back(WideUse);
1653     return nullptr;
1654   }
1655 
1656   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1657   // Returning WideUse pushes it on the worklist.
1658   return WideUse;
1659 }
1660 
1661 /// Add eligible users of NarrowDef to NarrowIVUsers.
1662 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1663   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1664   bool NonNegativeDef =
1665       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1666                            SE->getConstant(NarrowSCEV->getType(), 0));
1667   for (User *U : NarrowDef->users()) {
1668     Instruction *NarrowUser = cast<Instruction>(U);
1669 
1670     // Handle data flow merges and bizarre phi cycles.
1671     if (!Widened.insert(NarrowUser).second)
1672       continue;
1673 
1674     bool NonNegativeUse = false;
1675     if (!NonNegativeDef) {
1676       // We might have a control-dependent range information for this context.
1677       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1678         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1679     }
1680 
1681     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1682                                NonNegativeDef || NonNegativeUse);
1683   }
1684 }
1685 
1686 /// Process a single induction variable. First use the SCEVExpander to create a
1687 /// wide induction variable that evaluates to the same recurrence as the
1688 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1689 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1690 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1691 ///
1692 /// It would be simpler to delete uses as they are processed, but we must avoid
1693 /// invalidating SCEV expressions.
1694 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1695   // Is this phi an induction variable?
1696   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1697   if (!AddRec)
1698     return nullptr;
1699 
1700   // Widen the induction variable expression.
1701   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1702                                ? SE->getSignExtendExpr(AddRec, WideType)
1703                                : SE->getZeroExtendExpr(AddRec, WideType);
1704 
1705   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1706          "Expect the new IV expression to preserve its type");
1707 
1708   // Can the IV be extended outside the loop without overflow?
1709   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1710   if (!AddRec || AddRec->getLoop() != L)
1711     return nullptr;
1712 
1713   // An AddRec must have loop-invariant operands. Since this AddRec is
1714   // materialized by a loop header phi, the expression cannot have any post-loop
1715   // operands, so they must dominate the loop header.
1716   assert(
1717       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1718       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1719       "Loop header phi recurrence inputs do not dominate the loop");
1720 
1721   // Iterate over IV uses (including transitive ones) looking for IV increments
1722   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1723   // the increment calculate control-dependent range information basing on
1724   // dominating conditions inside of the loop (e.g. a range check inside of the
1725   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1726   //
1727   // Control-dependent range information is later used to prove that a narrow
1728   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1729   // this on demand because when pushNarrowIVUsers needs this information some
1730   // of the dominating conditions might be already widened.
1731   if (UsePostIncrementRanges)
1732     calculatePostIncRanges(OrigPhi);
1733 
1734   // The rewriter provides a value for the desired IV expression. This may
1735   // either find an existing phi or materialize a new one. Either way, we
1736   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1737   // of the phi-SCC dominates the loop entry.
1738   Instruction *InsertPt = &L->getHeader()->front();
1739   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1740 
1741   // Remembering the WideIV increment generated by SCEVExpander allows
1742   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1743   // employ a general reuse mechanism because the call above is the only call to
1744   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1745   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1746     WideInc =
1747       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1748     WideIncExpr = SE->getSCEV(WideInc);
1749     // Propagate the debug location associated with the original loop increment
1750     // to the new (widened) increment.
1751     auto *OrigInc =
1752       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1753     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1754   }
1755 
1756   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1757   ++NumWidened;
1758 
1759   // Traverse the def-use chain using a worklist starting at the original IV.
1760   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1761 
1762   Widened.insert(OrigPhi);
1763   pushNarrowIVUsers(OrigPhi, WidePhi);
1764 
1765   while (!NarrowIVUsers.empty()) {
1766     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1767 
1768     // Process a def-use edge. This may replace the use, so don't hold a
1769     // use_iterator across it.
1770     Instruction *WideUse = widenIVUse(DU, Rewriter);
1771 
1772     // Follow all def-use edges from the previous narrow use.
1773     if (WideUse)
1774       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1775 
1776     // widenIVUse may have removed the def-use edge.
1777     if (DU.NarrowDef->use_empty())
1778       DeadInsts.emplace_back(DU.NarrowDef);
1779   }
1780 
1781   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1782   // evaluate the same recurrence, we can just copy the debug info over.
1783   SmallVector<DbgValueInst *, 1> DbgValues;
1784   llvm::findDbgValues(DbgValues, OrigPhi);
1785   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1786                                      ValueAsMetadata::get(WidePhi));
1787   for (auto &DbgValue : DbgValues)
1788     DbgValue->setOperand(0, MDPhi);
1789   return WidePhi;
1790 }
1791 
1792 /// Calculates control-dependent range for the given def at the given context
1793 /// by looking at dominating conditions inside of the loop
1794 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1795                                     Instruction *NarrowUser) {
1796   using namespace llvm::PatternMatch;
1797 
1798   Value *NarrowDefLHS;
1799   const APInt *NarrowDefRHS;
1800   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1801                                  m_APInt(NarrowDefRHS))) ||
1802       !NarrowDefRHS->isNonNegative())
1803     return;
1804 
1805   auto UpdateRangeFromCondition = [&] (Value *Condition,
1806                                        bool TrueDest) {
1807     CmpInst::Predicate Pred;
1808     Value *CmpRHS;
1809     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1810                                  m_Value(CmpRHS))))
1811       return;
1812 
1813     CmpInst::Predicate P =
1814             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1815 
1816     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1817     auto CmpConstrainedLHSRange =
1818             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1819     auto NarrowDefRange =
1820             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1821 
1822     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1823   };
1824 
1825   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1826     if (!HasGuards)
1827       return;
1828 
1829     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1830                                      Ctx->getParent()->rend())) {
1831       Value *C = nullptr;
1832       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1833         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1834     }
1835   };
1836 
1837   UpdateRangeFromGuards(NarrowUser);
1838 
1839   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1840   // If NarrowUserBB is statically unreachable asking dominator queries may
1841   // yield surprising results. (e.g. the block may not have a dom tree node)
1842   if (!DT->isReachableFromEntry(NarrowUserBB))
1843     return;
1844 
1845   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1846        L->contains(DTB->getBlock());
1847        DTB = DTB->getIDom()) {
1848     auto *BB = DTB->getBlock();
1849     auto *TI = BB->getTerminator();
1850     UpdateRangeFromGuards(TI);
1851 
1852     auto *BI = dyn_cast<BranchInst>(TI);
1853     if (!BI || !BI->isConditional())
1854       continue;
1855 
1856     auto *TrueSuccessor = BI->getSuccessor(0);
1857     auto *FalseSuccessor = BI->getSuccessor(1);
1858 
1859     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1860       return BBE.isSingleEdge() &&
1861              DT->dominates(BBE, NarrowUser->getParent());
1862     };
1863 
1864     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1865       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1866 
1867     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1868       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1869   }
1870 }
1871 
1872 /// Calculates PostIncRangeInfos map for the given IV
1873 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1874   SmallPtrSet<Instruction *, 16> Visited;
1875   SmallVector<Instruction *, 6> Worklist;
1876   Worklist.push_back(OrigPhi);
1877   Visited.insert(OrigPhi);
1878 
1879   while (!Worklist.empty()) {
1880     Instruction *NarrowDef = Worklist.pop_back_val();
1881 
1882     for (Use &U : NarrowDef->uses()) {
1883       auto *NarrowUser = cast<Instruction>(U.getUser());
1884 
1885       // Don't go looking outside the current loop.
1886       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1887       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1888         continue;
1889 
1890       if (!Visited.insert(NarrowUser).second)
1891         continue;
1892 
1893       Worklist.push_back(NarrowUser);
1894 
1895       calculatePostIncRange(NarrowDef, NarrowUser);
1896     }
1897   }
1898 }
1899 
1900 //===----------------------------------------------------------------------===//
1901 //  Live IV Reduction - Minimize IVs live across the loop.
1902 //===----------------------------------------------------------------------===//
1903 
1904 //===----------------------------------------------------------------------===//
1905 //  Simplification of IV users based on SCEV evaluation.
1906 //===----------------------------------------------------------------------===//
1907 
1908 namespace {
1909 
1910 class IndVarSimplifyVisitor : public IVVisitor {
1911   ScalarEvolution *SE;
1912   const TargetTransformInfo *TTI;
1913   PHINode *IVPhi;
1914 
1915 public:
1916   WideIVInfo WI;
1917 
1918   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1919                         const TargetTransformInfo *TTI,
1920                         const DominatorTree *DTree)
1921     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1922     DT = DTree;
1923     WI.NarrowIV = IVPhi;
1924   }
1925 
1926   // Implement the interface used by simplifyUsersOfIV.
1927   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1928 };
1929 
1930 } // end anonymous namespace
1931 
1932 /// Iteratively perform simplification on a worklist of IV users. Each
1933 /// successive simplification may push more users which may themselves be
1934 /// candidates for simplification.
1935 ///
1936 /// Sign/Zero extend elimination is interleaved with IV simplification.
1937 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1938                                        SCEVExpander &Rewriter,
1939                                        LoopInfo *LI) {
1940   SmallVector<WideIVInfo, 8> WideIVs;
1941 
1942   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1943           Intrinsic::getName(Intrinsic::experimental_guard));
1944   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1945 
1946   SmallVector<PHINode*, 8> LoopPhis;
1947   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1948     LoopPhis.push_back(cast<PHINode>(I));
1949   }
1950   // Each round of simplification iterates through the SimplifyIVUsers worklist
1951   // for all current phis, then determines whether any IVs can be
1952   // widened. Widening adds new phis to LoopPhis, inducing another round of
1953   // simplification on the wide IVs.
1954   bool Changed = false;
1955   while (!LoopPhis.empty()) {
1956     // Evaluate as many IV expressions as possible before widening any IVs. This
1957     // forces SCEV to set no-wrap flags before evaluating sign/zero
1958     // extension. The first time SCEV attempts to normalize sign/zero extension,
1959     // the result becomes final. So for the most predictable results, we delay
1960     // evaluation of sign/zero extend evaluation until needed, and avoid running
1961     // other SCEV based analysis prior to simplifyAndExtend.
1962     do {
1963       PHINode *CurrIV = LoopPhis.pop_back_val();
1964 
1965       // Information about sign/zero extensions of CurrIV.
1966       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1967 
1968       Changed |=
1969           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1970 
1971       if (Visitor.WI.WidestNativeType) {
1972         WideIVs.push_back(Visitor.WI);
1973       }
1974     } while(!LoopPhis.empty());
1975 
1976     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1977       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1978       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1979         Changed = true;
1980         LoopPhis.push_back(WidePhi);
1981       }
1982     }
1983   }
1984   return Changed;
1985 }
1986 
1987 //===----------------------------------------------------------------------===//
1988 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1989 //===----------------------------------------------------------------------===//
1990 
1991 /// Given an Value which is hoped to be part of an add recurance in the given
1992 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1993 /// that this is less general than SCEVs AddRec checking.
1994 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1995   Instruction *IncI = dyn_cast<Instruction>(IncV);
1996   if (!IncI)
1997     return nullptr;
1998 
1999   switch (IncI->getOpcode()) {
2000   case Instruction::Add:
2001   case Instruction::Sub:
2002     break;
2003   case Instruction::GetElementPtr:
2004     // An IV counter must preserve its type.
2005     if (IncI->getNumOperands() == 2)
2006       break;
2007     LLVM_FALLTHROUGH;
2008   default:
2009     return nullptr;
2010   }
2011 
2012   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2013   if (Phi && Phi->getParent() == L->getHeader()) {
2014     if (L->isLoopInvariant(IncI->getOperand(1)))
2015       return Phi;
2016     return nullptr;
2017   }
2018   if (IncI->getOpcode() == Instruction::GetElementPtr)
2019     return nullptr;
2020 
2021   // Allow add/sub to be commuted.
2022   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2023   if (Phi && Phi->getParent() == L->getHeader()) {
2024     if (L->isLoopInvariant(IncI->getOperand(0)))
2025       return Phi;
2026   }
2027   return nullptr;
2028 }
2029 
2030 /// Given a loop with one backedge and one exit, return the ICmpInst
2031 /// controlling the sole loop exit.  There is no guarantee that the exiting
2032 /// block is also the latch.
2033 static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) {
2034 
2035   BasicBlock *LatchBlock = L->getLoopLatch();
2036   // Don't bother with LFTR if the loop is not properly simplified.
2037   if (!LatchBlock)
2038     return nullptr;
2039 
2040   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2041   return dyn_cast<ICmpInst>(BI->getCondition());
2042 }
2043 
2044 /// linearFunctionTestReplace policy. Return true unless we can show that the
2045 /// current exit test is already sufficiently canonical.
2046 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2047   // Avoid converting a constant or loop invariant test back to a runtime
2048   // test.  This is critical for when SCEV's cached ExitCount is less precise
2049   // than the current IR (such as after we've proven a particular exit is
2050   // actually dead and thus the BE count never reaches our ExitCount.)
2051   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2052   if (L->isLoopInvariant(BI->getCondition()))
2053     return false;
2054 
2055   // Do LFTR to simplify the exit condition to an ICMP.
2056   ICmpInst *Cond = getLoopTest(L, ExitingBB);
2057   if (!Cond)
2058     return true;
2059 
2060   // Do LFTR to simplify the exit ICMP to EQ/NE
2061   ICmpInst::Predicate Pred = Cond->getPredicate();
2062   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2063     return true;
2064 
2065   // Look for a loop invariant RHS
2066   Value *LHS = Cond->getOperand(0);
2067   Value *RHS = Cond->getOperand(1);
2068   if (!L->isLoopInvariant(RHS)) {
2069     if (!L->isLoopInvariant(LHS))
2070       return true;
2071     std::swap(LHS, RHS);
2072   }
2073   // Look for a simple IV counter LHS
2074   PHINode *Phi = dyn_cast<PHINode>(LHS);
2075   if (!Phi)
2076     Phi = getLoopPhiForCounter(LHS, L);
2077 
2078   if (!Phi)
2079     return true;
2080 
2081   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2082   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2083   if (Idx < 0)
2084     return true;
2085 
2086   // Do LFTR if the exit condition's IV is *not* a simple counter.
2087   Value *IncV = Phi->getIncomingValue(Idx);
2088   return Phi != getLoopPhiForCounter(IncV, L);
2089 }
2090 
2091 /// Return true if undefined behavior would provable be executed on the path to
2092 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2093 /// anything about whether OnPathTo is actually executed or whether Root is
2094 /// actually poison.  This can be used to assess whether a new use of Root can
2095 /// be added at a location which is control equivalent with OnPathTo (such as
2096 /// immediately before it) without introducing UB which didn't previously
2097 /// exist.  Note that a false result conveys no information.
2098 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2099                                           Instruction *OnPathTo,
2100                                           DominatorTree *DT) {
2101   // Basic approach is to assume Root is poison, propagate poison forward
2102   // through all users we can easily track, and then check whether any of those
2103   // users are provable UB and must execute before out exiting block might
2104   // exit.
2105 
2106   // The set of all recursive users we've visited (which are assumed to all be
2107   // poison because of said visit)
2108   SmallSet<const Value *, 16> KnownPoison;
2109   SmallVector<const Instruction*, 16> Worklist;
2110   Worklist.push_back(Root);
2111   while (!Worklist.empty()) {
2112     const Instruction *I = Worklist.pop_back_val();
2113 
2114     // If we know this must trigger UB on a path leading our target.
2115     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2116       return true;
2117 
2118     // If we can't analyze propagation through this instruction, just skip it
2119     // and transitive users.  Safe as false is a conservative result.
2120     if (!propagatesFullPoison(I) && I != Root)
2121       continue;
2122 
2123     if (KnownPoison.insert(I).second)
2124       for (const User *User : I->users())
2125         Worklist.push_back(cast<Instruction>(User));
2126   }
2127 
2128   // Might be non-UB, or might have a path we couldn't prove must execute on
2129   // way to exiting bb.
2130   return false;
2131 }
2132 
2133 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2134 /// down to checking that all operands are constant and listing instructions
2135 /// that may hide undef.
2136 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2137                                unsigned Depth) {
2138   if (isa<Constant>(V))
2139     return !isa<UndefValue>(V);
2140 
2141   if (Depth >= 6)
2142     return false;
2143 
2144   // Conservatively handle non-constant non-instructions. For example, Arguments
2145   // may be undef.
2146   Instruction *I = dyn_cast<Instruction>(V);
2147   if (!I)
2148     return false;
2149 
2150   // Load and return values may be undef.
2151   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2152     return false;
2153 
2154   // Optimistically handle other instructions.
2155   for (Value *Op : I->operands()) {
2156     if (!Visited.insert(Op).second)
2157       continue;
2158     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2159       return false;
2160   }
2161   return true;
2162 }
2163 
2164 /// Return true if the given value is concrete. We must prove that undef can
2165 /// never reach it.
2166 ///
2167 /// TODO: If we decide that this is a good approach to checking for undef, we
2168 /// may factor it into a common location.
2169 static bool hasConcreteDef(Value *V) {
2170   SmallPtrSet<Value*, 8> Visited;
2171   Visited.insert(V);
2172   return hasConcreteDefImpl(V, Visited, 0);
2173 }
2174 
2175 /// Return true if this IV has any uses other than the (soon to be rewritten)
2176 /// loop exit test.
2177 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2178   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2179   Value *IncV = Phi->getIncomingValue(LatchIdx);
2180 
2181   for (User *U : Phi->users())
2182     if (U != Cond && U != IncV) return false;
2183 
2184   for (User *U : IncV->users())
2185     if (U != Cond && U != Phi) return false;
2186   return true;
2187 }
2188 
2189 /// Return true if the given phi is a "counter" in L.  A counter is an
2190 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2191 /// step of 1.  Note that L must have exactly one latch.
2192 static bool isLoopCounter(PHINode* Phi, Loop *L,
2193                           ScalarEvolution *SE) {
2194   assert(Phi->getParent() == L->getHeader());
2195   assert(L->getLoopLatch());
2196 
2197   if (!SE->isSCEVable(Phi->getType()))
2198     return false;
2199 
2200   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2201   if (!AR || AR->getLoop() != L || !AR->isAffine())
2202     return false;
2203 
2204   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2205   if (!Step || !Step->isOne())
2206     return false;
2207 
2208   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2209   Value *IncV = Phi->getIncomingValue(LatchIdx);
2210   return (getLoopPhiForCounter(IncV, L) == Phi);
2211 }
2212 
2213 /// Search the loop header for a loop counter (anadd rec w/step of one)
2214 /// suitable for use by LFTR.  If multiple counters are available, select the
2215 /// "best" one based profitable heuristics.
2216 ///
2217 /// BECount may be an i8* pointer type. The pointer difference is already
2218 /// valid count without scaling the address stride, so it remains a pointer
2219 /// expression as far as SCEV is concerned.
2220 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2221                                 const SCEV *BECount,
2222                                 ScalarEvolution *SE, DominatorTree *DT) {
2223   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2224 
2225   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2226 
2227   // Loop over all of the PHI nodes, looking for a simple counter.
2228   PHINode *BestPhi = nullptr;
2229   const SCEV *BestInit = nullptr;
2230   BasicBlock *LatchBlock = L->getLoopLatch();
2231   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
2232   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2233 
2234   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2235     PHINode *Phi = cast<PHINode>(I);
2236     if (!isLoopCounter(Phi, L, SE))
2237       continue;
2238 
2239     // Avoid comparing an integer IV against a pointer Limit.
2240     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2241       continue;
2242 
2243     const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2244 
2245     // AR may be a pointer type, while BECount is an integer type.
2246     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2247     // AR may not be a narrower type, or we may never exit.
2248     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2249     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2250       continue;
2251 
2252     // Avoid reusing a potentially undef value to compute other values that may
2253     // have originally had a concrete definition.
2254     if (!hasConcreteDef(Phi)) {
2255       // We explicitly allow unknown phis as long as they are already used by
2256       // the loop exit test.  This is legal since performing LFTR could not
2257       // increase the number of undef users.
2258       if (ICmpInst *Cond = getLoopTest(L, ExitingBB))
2259         if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) &&
2260             Phi != getLoopPhiForCounter(Cond->getOperand(1), L))
2261           continue;
2262     }
2263 
2264     // Avoid introducing undefined behavior due to poison which didn't exist in
2265     // the original program.  (Annoyingly, the rules for poison and undef
2266     // propagation are distinct, so this does NOT cover the undef case above.)
2267     // We have to ensure that we don't introduce UB by introducing a use on an
2268     // iteration where said IV produces poison.  Our strategy here differs for
2269     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2270     // see code in linearFunctionTestReplace.  For pointers, we restrict
2271     // transforms as there is no good way to reinfer inbounds once lost.
2272     if (!Phi->getType()->isIntegerTy() &&
2273         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2274       continue;
2275 
2276     const SCEV *Init = AR->getStart();
2277 
2278     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2279       // Don't force a live loop counter if another IV can be used.
2280       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2281         continue;
2282 
2283       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2284       // also prefers integer to pointer IVs.
2285       if (BestInit->isZero() != Init->isZero()) {
2286         if (BestInit->isZero())
2287           continue;
2288       }
2289       // If two IVs both count from zero or both count from nonzero then the
2290       // narrower is likely a dead phi that has been widened. Use the wider phi
2291       // to allow the other to be eliminated.
2292       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2293         continue;
2294     }
2295     BestPhi = Phi;
2296     BestInit = Init;
2297   }
2298   return BestPhi;
2299 }
2300 
2301 /// Insert an IR expression which computes the value held by the IV IndVar
2302 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2303 /// is taken IVCount times.
2304 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2305                            const SCEV *IVCount, Loop *L,
2306                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2307   assert(isLoopCounter(IndVar, L, SE));
2308   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2309   const SCEV *IVInit = AR->getStart();
2310 
2311   // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2312   // finds a valid pointer IV. Sign extend BECount in order to materialize a
2313   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2314   // the existing GEPs whenever possible.
2315   if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2316     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2317     // signed value. IVCount on the other hand represents the loop trip count,
2318     // which is an unsigned value. FindLoopCounter only allows induction
2319     // variables that have a positive unit stride of one. This means we don't
2320     // have to handle the case of negative offsets (yet) and just need to zero
2321     // extend IVCount.
2322     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2323     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2324 
2325     // Expand the code for the iteration count.
2326     assert(SE->isLoopInvariant(IVOffset, L) &&
2327            "Computed iteration count is not loop invariant!");
2328     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2329     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2330 
2331     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2332     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2333     // We could handle pointer IVs other than i8*, but we need to compensate for
2334     // gep index scaling.
2335     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2336                              cast<PointerType>(GEPBase->getType())
2337                                  ->getElementType())->isOne() &&
2338            "unit stride pointer IV must be i8*");
2339 
2340     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2341     return Builder.CreateGEP(GEPBase->getType()->getPointerElementType(),
2342                              GEPBase, GEPOffset, "lftr.limit");
2343   } else {
2344     // In any other case, convert both IVInit and IVCount to integers before
2345     // comparing. This may result in SCEV expansion of pointers, but in practice
2346     // SCEV will fold the pointer arithmetic away as such:
2347     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2348     //
2349     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2350     // for simple memset-style loops.
2351     //
2352     // IVInit integer and IVCount pointer would only occur if a canonical IV
2353     // were generated on top of case #2, which is not expected.
2354 
2355     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2356     const SCEV *IVLimit = nullptr;
2357     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2358     // For non-zero Start, compute IVCount here.
2359     if (AR->getStart()->isZero())
2360       IVLimit = IVCount;
2361     else {
2362       const SCEV *IVInit = AR->getStart();
2363 
2364       // For integer IVs, truncate the IV before computing IVInit + BECount.
2365       if (SE->getTypeSizeInBits(IVInit->getType())
2366           > SE->getTypeSizeInBits(IVCount->getType()))
2367         IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2368 
2369       IVLimit = SE->getAddExpr(IVInit, IVCount);
2370     }
2371     // Expand the code for the iteration count.
2372     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2373     IRBuilder<> Builder(BI);
2374     assert(SE->isLoopInvariant(IVLimit, L) &&
2375            "Computed iteration count is not loop invariant!");
2376     // Ensure that we generate the same type as IndVar, or a smaller integer
2377     // type. In the presence of null pointer values, we have an integer type
2378     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2379     Type *LimitTy = IVCount->getType()->isPointerTy() ?
2380       IndVar->getType() : IVCount->getType();
2381     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2382   }
2383 }
2384 
2385 /// This method rewrites the exit condition of the loop to be a canonical !=
2386 /// comparison against the incremented loop induction variable.  This pass is
2387 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2388 /// determine a loop-invariant trip count of the loop, which is actually a much
2389 /// broader range than just linear tests.
2390 bool IndVarSimplify::
2391 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2392                           const SCEV *ExitCount,
2393                           PHINode *IndVar, SCEVExpander &Rewriter) {
2394   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2395   assert(isLoopCounter(IndVar, L, SE));
2396   Instruction * const IncVar =
2397     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2398 
2399   // Initialize CmpIndVar and IVCount to their preincremented values.
2400   Value *CmpIndVar = IndVar;
2401   const SCEV *IVCount = ExitCount;
2402 
2403   // If the exiting block is the same as the backedge block, we prefer to
2404   // compare against the post-incremented value, otherwise we must compare
2405   // against the preincremented value.
2406   if (ExitingBB == L->getLoopLatch()) {
2407     bool SafeToPostInc = IndVar->getType()->isIntegerTy();
2408     if (!SafeToPostInc) {
2409       // For pointer IVs, we chose to not strip inbounds which requires us not
2410       // to add a potentially UB introducing use.  We need to either a) show
2411       // the loop test we're modifying is already in post-inc form, or b) show
2412       // that adding a use must not introduce UB.
2413       if (ICmpInst *LoopTest = getLoopTest(L, ExitingBB))
2414         SafeToPostInc = LoopTest->getOperand(0) == IncVar ||
2415           LoopTest->getOperand(1) == IncVar;
2416       if (!SafeToPostInc)
2417         SafeToPostInc =
2418           mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2419     }
2420     if (SafeToPostInc) {
2421       // Add one to the "backedge-taken" count to get the trip count.
2422       // This addition may overflow, which is valid as long as the comparison
2423       // is truncated to ExitCount->getType().
2424       IVCount = SE->getAddExpr(ExitCount,
2425                                SE->getOne(ExitCount->getType()));
2426       // The BackedgeTaken expression contains the number of times that the
2427       // backedge branches to the loop header.  This is one less than the
2428       // number of times the loop executes, so use the incremented indvar.
2429       CmpIndVar = IncVar;
2430     }
2431   }
2432 
2433   // It may be necessary to drop nowrap flags on the incrementing instruction
2434   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2435   // case the increment might have previously been poison on the last iteration
2436   // only) or if LFTR switches to a different IV that was previously dynamically
2437   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2438   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2439   // check), because the pre-inc addrec flags may be adopted from the original
2440   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2441   // TODO: This handling is inaccurate for one case: If we switch to a
2442   // dynamically dead IV that wraps on the first loop iteration only, which is
2443   // not covered by the post-inc addrec. (If the new IV was not dynamically
2444   // dead, it could not be poison on the first iteration in the first place.)
2445   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2446     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2447     if (BO->hasNoUnsignedWrap())
2448       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2449     if (BO->hasNoSignedWrap())
2450       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2451   }
2452 
2453   Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE);
2454   assert(ExitCnt->getType()->isPointerTy() ==
2455              IndVar->getType()->isPointerTy() &&
2456          "genLoopLimit missed a cast");
2457 
2458   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2459   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2460   ICmpInst::Predicate P;
2461   if (L->contains(BI->getSuccessor(0)))
2462     P = ICmpInst::ICMP_NE;
2463   else
2464     P = ICmpInst::ICMP_EQ;
2465 
2466   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2467                     << "      LHS:" << *CmpIndVar << '\n'
2468                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2469                     << "\n"
2470                     << "      RHS:\t" << *ExitCnt << "\n"
2471                     << "  IVCount:\t" << *IVCount << "\n");
2472 
2473   IRBuilder<> Builder(BI);
2474 
2475   // The new loop exit condition should reuse the debug location of the
2476   // original loop exit condition.
2477   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2478     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2479 
2480   // LFTR can ignore IV overflow and truncate to the width of
2481   // BECount. This avoids materializing the add(zext(add)) expression.
2482   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2483   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2484   if (CmpIndVarSize > ExitCntSize) {
2485     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2486     const SCEV *ARStart = AR->getStart();
2487     const SCEV *ARStep = AR->getStepRecurrence(*SE);
2488     // For constant IVCount, avoid truncation.
2489     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2490       const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2491       APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2492       // Note that the post-inc value of ExitCount may have overflowed
2493       // above such that IVCount is now zero.
2494       if (IVCount != ExitCount && Count == 0) {
2495         Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2496         ++Count;
2497       }
2498       else
2499         Count = Count.zext(CmpIndVarSize);
2500       APInt NewLimit;
2501       if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2502         NewLimit = Start - Count;
2503       else
2504         NewLimit = Start + Count;
2505       ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2506 
2507       LLVM_DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
2508     } else {
2509       // We try to extend trip count first. If that doesn't work we truncate IV.
2510       // Zext(trunc(IV)) == IV implies equivalence of the following two:
2511       // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2512       // one of the two holds, extend the trip count, otherwise we truncate IV.
2513       bool Extended = false;
2514       const SCEV *IV = SE->getSCEV(CmpIndVar);
2515       const SCEV *ZExtTrunc =
2516            SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2517                                                      ExitCnt->getType()),
2518                                  CmpIndVar->getType());
2519 
2520       if (ZExtTrunc == IV) {
2521         Extended = true;
2522         ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2523                                      "wide.trip.count");
2524       } else {
2525         const SCEV *SExtTrunc =
2526           SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2527                                                     ExitCnt->getType()),
2528                                 CmpIndVar->getType());
2529         if (SExtTrunc == IV) {
2530           Extended = true;
2531           ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2532                                        "wide.trip.count");
2533         }
2534       }
2535 
2536       if (!Extended)
2537         CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2538                                         "lftr.wideiv");
2539     }
2540   }
2541   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2542   Value *OrigCond = BI->getCondition();
2543   // It's tempting to use replaceAllUsesWith here to fully replace the old
2544   // comparison, but that's not immediately safe, since users of the old
2545   // comparison may not be dominated by the new comparison. Instead, just
2546   // update the branch to use the new comparison; in the common case this
2547   // will make old comparison dead.
2548   BI->setCondition(Cond);
2549   DeadInsts.push_back(OrigCond);
2550 
2551   ++NumLFTR;
2552   return true;
2553 }
2554 
2555 //===----------------------------------------------------------------------===//
2556 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2557 //===----------------------------------------------------------------------===//
2558 
2559 /// If there's a single exit block, sink any loop-invariant values that
2560 /// were defined in the preheader but not used inside the loop into the
2561 /// exit block to reduce register pressure in the loop.
2562 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2563   BasicBlock *ExitBlock = L->getExitBlock();
2564   if (!ExitBlock) return false;
2565 
2566   BasicBlock *Preheader = L->getLoopPreheader();
2567   if (!Preheader) return false;
2568 
2569   bool MadeAnyChanges = false;
2570   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2571   BasicBlock::iterator I(Preheader->getTerminator());
2572   while (I != Preheader->begin()) {
2573     --I;
2574     // New instructions were inserted at the end of the preheader.
2575     if (isa<PHINode>(I))
2576       break;
2577 
2578     // Don't move instructions which might have side effects, since the side
2579     // effects need to complete before instructions inside the loop.  Also don't
2580     // move instructions which might read memory, since the loop may modify
2581     // memory. Note that it's okay if the instruction might have undefined
2582     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2583     // block.
2584     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2585       continue;
2586 
2587     // Skip debug info intrinsics.
2588     if (isa<DbgInfoIntrinsic>(I))
2589       continue;
2590 
2591     // Skip eh pad instructions.
2592     if (I->isEHPad())
2593       continue;
2594 
2595     // Don't sink alloca: we never want to sink static alloca's out of the
2596     // entry block, and correctly sinking dynamic alloca's requires
2597     // checks for stacksave/stackrestore intrinsics.
2598     // FIXME: Refactor this check somehow?
2599     if (isa<AllocaInst>(I))
2600       continue;
2601 
2602     // Determine if there is a use in or before the loop (direct or
2603     // otherwise).
2604     bool UsedInLoop = false;
2605     for (Use &U : I->uses()) {
2606       Instruction *User = cast<Instruction>(U.getUser());
2607       BasicBlock *UseBB = User->getParent();
2608       if (PHINode *P = dyn_cast<PHINode>(User)) {
2609         unsigned i =
2610           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2611         UseBB = P->getIncomingBlock(i);
2612       }
2613       if (UseBB == Preheader || L->contains(UseBB)) {
2614         UsedInLoop = true;
2615         break;
2616       }
2617     }
2618 
2619     // If there is, the def must remain in the preheader.
2620     if (UsedInLoop)
2621       continue;
2622 
2623     // Otherwise, sink it to the exit block.
2624     Instruction *ToMove = &*I;
2625     bool Done = false;
2626 
2627     if (I != Preheader->begin()) {
2628       // Skip debug info intrinsics.
2629       do {
2630         --I;
2631       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2632 
2633       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2634         Done = true;
2635     } else {
2636       Done = true;
2637     }
2638 
2639     MadeAnyChanges = true;
2640     ToMove->moveBefore(*ExitBlock, InsertPt);
2641     if (Done) break;
2642     InsertPt = ToMove->getIterator();
2643   }
2644 
2645   return MadeAnyChanges;
2646 }
2647 
2648 //===----------------------------------------------------------------------===//
2649 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2650 //===----------------------------------------------------------------------===//
2651 
2652 bool IndVarSimplify::run(Loop *L) {
2653   // We need (and expect!) the incoming loop to be in LCSSA.
2654   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2655          "LCSSA required to run indvars!");
2656   bool Changed = false;
2657 
2658   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2659   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2660   //    canonicalization can be a pessimization without LSR to "clean up"
2661   //    afterwards.
2662   //  - We depend on having a preheader; in particular,
2663   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2664   //    and we're in trouble if we can't find the induction variable even when
2665   //    we've manually inserted one.
2666   //  - LFTR relies on having a single backedge.
2667   if (!L->isLoopSimplifyForm())
2668     return false;
2669 
2670   // If there are any floating-point recurrences, attempt to
2671   // transform them to use integer recurrences.
2672   Changed |= rewriteNonIntegerIVs(L);
2673 
2674   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2675 
2676   // Create a rewriter object which we'll use to transform the code with.
2677   SCEVExpander Rewriter(*SE, DL, "indvars");
2678 #ifndef NDEBUG
2679   Rewriter.setDebugType(DEBUG_TYPE);
2680 #endif
2681 
2682   // Eliminate redundant IV users.
2683   //
2684   // Simplification works best when run before other consumers of SCEV. We
2685   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2686   // other expressions involving loop IVs have been evaluated. This helps SCEV
2687   // set no-wrap flags before normalizing sign/zero extension.
2688   Rewriter.disableCanonicalMode();
2689   Changed |= simplifyAndExtend(L, Rewriter, LI);
2690 
2691   // Check to see if this loop has a computable loop-invariant execution count.
2692   // If so, this means that we can compute the final value of any expressions
2693   // that are recurrent in the loop, and substitute the exit values from the
2694   // loop into any instructions outside of the loop that use the final values of
2695   // the current expressions.
2696   //
2697   if (ReplaceExitValue != NeverRepl &&
2698       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2699     Changed |= rewriteLoopExitValues(L, Rewriter);
2700 
2701   // Eliminate redundant IV cycles.
2702   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2703 
2704   // If we have a trip count expression, rewrite the loop's exit condition
2705   // using it.
2706   if (!DisableLFTR) {
2707     SmallVector<BasicBlock*, 16> ExitingBlocks;
2708     L->getExitingBlocks(ExitingBlocks);
2709     for (BasicBlock *ExitingBB : ExitingBlocks) {
2710       // Can't rewrite non-branch yet.
2711       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2712         continue;
2713 
2714       // If our exitting block exits multiple loops, we can only rewrite the
2715       // innermost one.  Otherwise, we're changing how many times the innermost
2716       // loop runs before it exits.
2717       if (LI->getLoopFor(ExitingBB) != L)
2718         continue;
2719 
2720       if (!needsLFTR(L, ExitingBB))
2721         continue;
2722 
2723       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2724       if (isa<SCEVCouldNotCompute>(ExitCount))
2725         continue;
2726 
2727       // If we know we'd exit on the first iteration, rewrite the exit to
2728       // reflect this.  This does not imply the loop must exit through this
2729       // exit; there may be an earlier one taken on the first iteration.
2730       // TODO: Given we know the backedge can't be taken, we should go ahead
2731       // and break it.  Or at least, kill all the header phis and simplify.
2732       if (ExitCount->isZero()) {
2733         auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2734         bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2735         auto *NewCond = ExitIfTrue ?
2736           ConstantInt::getTrue(BI->getCondition()->getType()) :
2737           ConstantInt::getFalse(BI->getCondition()->getType());
2738         BI->setCondition(NewCond);
2739         Changed = true;
2740         continue;
2741       }
2742 
2743       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2744       if (!IndVar)
2745         continue;
2746 
2747       // Avoid high cost expansions.  Note: This heuristic is questionable in
2748       // that our definition of "high cost" is not exactly principled.
2749       if (Rewriter.isHighCostExpansion(ExitCount, L))
2750         continue;
2751 
2752       // Check preconditions for proper SCEVExpander operation. SCEV does not
2753       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2754       // any pass that uses the SCEVExpander must do it. This does not work
2755       // well for loop passes because SCEVExpander makes assumptions about
2756       // all loops, while LoopPassManager only forces the current loop to be
2757       // simplified.
2758       //
2759       // FIXME: SCEV expansion has no way to bail out, so the caller must
2760       // explicitly check any assumptions made by SCEV. Brittle.
2761       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2762       if (!AR || AR->getLoop()->getLoopPreheader())
2763         Changed |= linearFunctionTestReplace(L, ExitingBB,
2764                                              ExitCount, IndVar,
2765                                              Rewriter);
2766     }
2767   }
2768   // Clear the rewriter cache, because values that are in the rewriter's cache
2769   // can be deleted in the loop below, causing the AssertingVH in the cache to
2770   // trigger.
2771   Rewriter.clear();
2772 
2773   // Now that we're done iterating through lists, clean up any instructions
2774   // which are now dead.
2775   while (!DeadInsts.empty())
2776     if (Instruction *Inst =
2777             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2778       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2779 
2780   // The Rewriter may not be used from this point on.
2781 
2782   // Loop-invariant instructions in the preheader that aren't used in the
2783   // loop may be sunk below the loop to reduce register pressure.
2784   Changed |= sinkUnusedInvariants(L);
2785 
2786   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2787   // trip count and therefore can further simplify exit values in addition to
2788   // rewriteLoopExitValues.
2789   Changed |= rewriteFirstIterationLoopExitValues(L);
2790 
2791   // Clean up dead instructions.
2792   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2793 
2794   // Check a post-condition.
2795   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2796          "Indvars did not preserve LCSSA!");
2797 
2798   // Verify that LFTR, and any other change have not interfered with SCEV's
2799   // ability to compute trip count.
2800 #ifndef NDEBUG
2801   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2802     SE->forgetLoop(L);
2803     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2804     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2805         SE->getTypeSizeInBits(NewBECount->getType()))
2806       NewBECount = SE->getTruncateOrNoop(NewBECount,
2807                                          BackedgeTakenCount->getType());
2808     else
2809       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2810                                                  NewBECount->getType());
2811     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2812   }
2813 #endif
2814 
2815   return Changed;
2816 }
2817 
2818 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2819                                           LoopStandardAnalysisResults &AR,
2820                                           LPMUpdater &) {
2821   Function *F = L.getHeader()->getParent();
2822   const DataLayout &DL = F->getParent()->getDataLayout();
2823 
2824   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2825   if (!IVS.run(&L))
2826     return PreservedAnalyses::all();
2827 
2828   auto PA = getLoopPassPreservedAnalyses();
2829   PA.preserveSet<CFGAnalyses>();
2830   return PA;
2831 }
2832 
2833 namespace {
2834 
2835 struct IndVarSimplifyLegacyPass : public LoopPass {
2836   static char ID; // Pass identification, replacement for typeid
2837 
2838   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2839     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2840   }
2841 
2842   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2843     if (skipLoop(L))
2844       return false;
2845 
2846     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2847     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2848     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2849     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2850     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2851     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2852     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2853     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2854 
2855     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2856     return IVS.run(L);
2857   }
2858 
2859   void getAnalysisUsage(AnalysisUsage &AU) const override {
2860     AU.setPreservesCFG();
2861     getLoopAnalysisUsage(AU);
2862   }
2863 };
2864 
2865 } // end anonymous namespace
2866 
2867 char IndVarSimplifyLegacyPass::ID = 0;
2868 
2869 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2870                       "Induction Variable Simplification", false, false)
2871 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2872 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2873                     "Induction Variable Simplification", false, false)
2874 
2875 Pass *llvm::createIndVarSimplifyPass() {
2876   return new IndVarSimplifyLegacyPass();
2877 }
2878