xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision e862e78b63f918c000ce65f9c475730e673a4966)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 namespace {
135 
136 struct RewritePhi;
137 
138 class IndVarSimplify {
139   LoopInfo *LI;
140   ScalarEvolution *SE;
141   DominatorTree *DT;
142   const DataLayout &DL;
143   TargetLibraryInfo *TLI;
144   const TargetTransformInfo *TTI;
145   std::unique_ptr<MemorySSAUpdater> MSSAU;
146 
147   SmallVector<WeakTrackingVH, 16> DeadInsts;
148 
149   bool handleFloatingPointIV(Loop *L, PHINode *PH);
150   bool rewriteNonIntegerIVs(Loop *L);
151 
152   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
153   /// Try to eliminate loop exits based on analyzeable exit counts
154   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155   /// Try to form loop invariant tests for loop exits by changing how many
156   /// iterations of the loop run when that is unobservable.
157   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158 
159   bool rewriteFirstIterationLoopExitValues(Loop *L);
160 
161   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162                                  const SCEV *ExitCount,
163                                  PHINode *IndVar, SCEVExpander &Rewriter);
164 
165   bool sinkUnusedInvariants(Loop *L);
166 
167 public:
168   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
169                  const DataLayout &DL, TargetLibraryInfo *TLI,
170                  TargetTransformInfo *TTI, MemorySSA *MSSA)
171       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
172     if (MSSA)
173       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
174   }
175 
176   bool run(Loop *L);
177 };
178 
179 } // end anonymous namespace
180 
181 /// Determine the insertion point for this user. By default, insert immediately
182 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
183 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
184 /// common dominator for the incoming blocks. A nullptr can be returned if no
185 /// viable location is found: it may happen if User is a PHI and Def only comes
186 /// to this PHI from unreachable blocks.
187 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
188                                           DominatorTree *DT, LoopInfo *LI) {
189   PHINode *PHI = dyn_cast<PHINode>(User);
190   if (!PHI)
191     return User;
192 
193   Instruction *InsertPt = nullptr;
194   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
195     if (PHI->getIncomingValue(i) != Def)
196       continue;
197 
198     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
199 
200     if (!DT->isReachableFromEntry(InsertBB))
201       continue;
202 
203     if (!InsertPt) {
204       InsertPt = InsertBB->getTerminator();
205       continue;
206     }
207     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
208     InsertPt = InsertBB->getTerminator();
209   }
210 
211   // If we have skipped all inputs, it means that Def only comes to Phi from
212   // unreachable blocks.
213   if (!InsertPt)
214     return nullptr;
215 
216   auto *DefI = dyn_cast<Instruction>(Def);
217   if (!DefI)
218     return InsertPt;
219 
220   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
221 
222   auto *L = LI->getLoopFor(DefI->getParent());
223   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
224 
225   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
226     if (LI->getLoopFor(DTN->getBlock()) == L)
227       return DTN->getBlock()->getTerminator();
228 
229   llvm_unreachable("DefI dominates InsertPt!");
230 }
231 
232 //===----------------------------------------------------------------------===//
233 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
234 //===----------------------------------------------------------------------===//
235 
236 /// Convert APF to an integer, if possible.
237 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
238   bool isExact = false;
239   // See if we can convert this to an int64_t
240   uint64_t UIntVal;
241   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
242                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
243       !isExact)
244     return false;
245   IntVal = UIntVal;
246   return true;
247 }
248 
249 /// If the loop has floating induction variable then insert corresponding
250 /// integer induction variable if possible.
251 /// For example,
252 /// for(double i = 0; i < 10000; ++i)
253 ///   bar(i)
254 /// is converted into
255 /// for(int i = 0; i < 10000; ++i)
256 ///   bar((double)i);
257 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
258   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
259   unsigned BackEdge     = IncomingEdge^1;
260 
261   // Check incoming value.
262   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
263 
264   int64_t InitValue;
265   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
266     return false;
267 
268   // Check IV increment. Reject this PN if increment operation is not
269   // an add or increment value can not be represented by an integer.
270   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
271   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
272 
273   // If this is not an add of the PHI with a constantfp, or if the constant fp
274   // is not an integer, bail out.
275   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
276   int64_t IncValue;
277   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
278       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
279     return false;
280 
281   // Check Incr uses. One user is PN and the other user is an exit condition
282   // used by the conditional terminator.
283   Value::user_iterator IncrUse = Incr->user_begin();
284   Instruction *U1 = cast<Instruction>(*IncrUse++);
285   if (IncrUse == Incr->user_end()) return false;
286   Instruction *U2 = cast<Instruction>(*IncrUse++);
287   if (IncrUse != Incr->user_end()) return false;
288 
289   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
290   // only used by a branch, we can't transform it.
291   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
292   if (!Compare)
293     Compare = dyn_cast<FCmpInst>(U2);
294   if (!Compare || !Compare->hasOneUse() ||
295       !isa<BranchInst>(Compare->user_back()))
296     return false;
297 
298   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
299 
300   // We need to verify that the branch actually controls the iteration count
301   // of the loop.  If not, the new IV can overflow and no one will notice.
302   // The branch block must be in the loop and one of the successors must be out
303   // of the loop.
304   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
305   if (!L->contains(TheBr->getParent()) ||
306       (L->contains(TheBr->getSuccessor(0)) &&
307        L->contains(TheBr->getSuccessor(1))))
308     return false;
309 
310   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
311   // transform it.
312   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
313   int64_t ExitValue;
314   if (ExitValueVal == nullptr ||
315       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
316     return false;
317 
318   // Find new predicate for integer comparison.
319   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
320   switch (Compare->getPredicate()) {
321   default: return false;  // Unknown comparison.
322   case CmpInst::FCMP_OEQ:
323   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
324   case CmpInst::FCMP_ONE:
325   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
326   case CmpInst::FCMP_OGT:
327   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
328   case CmpInst::FCMP_OGE:
329   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
330   case CmpInst::FCMP_OLT:
331   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
332   case CmpInst::FCMP_OLE:
333   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
334   }
335 
336   // We convert the floating point induction variable to a signed i32 value if
337   // we can.  This is only safe if the comparison will not overflow in a way
338   // that won't be trapped by the integer equivalent operations.  Check for this
339   // now.
340   // TODO: We could use i64 if it is native and the range requires it.
341 
342   // The start/stride/exit values must all fit in signed i32.
343   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
344     return false;
345 
346   // If not actually striding (add x, 0.0), avoid touching the code.
347   if (IncValue == 0)
348     return false;
349 
350   // Positive and negative strides have different safety conditions.
351   if (IncValue > 0) {
352     // If we have a positive stride, we require the init to be less than the
353     // exit value.
354     if (InitValue >= ExitValue)
355       return false;
356 
357     uint32_t Range = uint32_t(ExitValue-InitValue);
358     // Check for infinite loop, either:
359     // while (i <= Exit) or until (i > Exit)
360     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
361       if (++Range == 0) return false;  // Range overflows.
362     }
363 
364     unsigned Leftover = Range % uint32_t(IncValue);
365 
366     // If this is an equality comparison, we require that the strided value
367     // exactly land on the exit value, otherwise the IV condition will wrap
368     // around and do things the fp IV wouldn't.
369     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
370         Leftover != 0)
371       return false;
372 
373     // If the stride would wrap around the i32 before exiting, we can't
374     // transform the IV.
375     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
376       return false;
377   } else {
378     // If we have a negative stride, we require the init to be greater than the
379     // exit value.
380     if (InitValue <= ExitValue)
381       return false;
382 
383     uint32_t Range = uint32_t(InitValue-ExitValue);
384     // Check for infinite loop, either:
385     // while (i >= Exit) or until (i < Exit)
386     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
387       if (++Range == 0) return false;  // Range overflows.
388     }
389 
390     unsigned Leftover = Range % uint32_t(-IncValue);
391 
392     // If this is an equality comparison, we require that the strided value
393     // exactly land on the exit value, otherwise the IV condition will wrap
394     // around and do things the fp IV wouldn't.
395     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
396         Leftover != 0)
397       return false;
398 
399     // If the stride would wrap around the i32 before exiting, we can't
400     // transform the IV.
401     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
402       return false;
403   }
404 
405   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
406 
407   // Insert new integer induction variable.
408   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
409   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
410                       PN->getIncomingBlock(IncomingEdge));
411 
412   Value *NewAdd =
413     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
414                               Incr->getName()+".int", Incr);
415   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
416 
417   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
418                                       ConstantInt::get(Int32Ty, ExitValue),
419                                       Compare->getName());
420 
421   // In the following deletions, PN may become dead and may be deleted.
422   // Use a WeakTrackingVH to observe whether this happens.
423   WeakTrackingVH WeakPH = PN;
424 
425   // Delete the old floating point exit comparison.  The branch starts using the
426   // new comparison.
427   NewCompare->takeName(Compare);
428   Compare->replaceAllUsesWith(NewCompare);
429   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
430 
431   // Delete the old floating point increment.
432   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
433   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
434 
435   // If the FP induction variable still has uses, this is because something else
436   // in the loop uses its value.  In order to canonicalize the induction
437   // variable, we chose to eliminate the IV and rewrite it in terms of an
438   // int->fp cast.
439   //
440   // We give preference to sitofp over uitofp because it is faster on most
441   // platforms.
442   if (WeakPH) {
443     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
444                                  &*PN->getParent()->getFirstInsertionPt());
445     PN->replaceAllUsesWith(Conv);
446     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
447   }
448   return true;
449 }
450 
451 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
452   // First step.  Check to see if there are any floating-point recurrences.
453   // If there are, change them into integer recurrences, permitting analysis by
454   // the SCEV routines.
455   BasicBlock *Header = L->getHeader();
456 
457   SmallVector<WeakTrackingVH, 8> PHIs;
458   for (PHINode &PN : Header->phis())
459     PHIs.push_back(&PN);
460 
461   bool Changed = false;
462   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
463     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
464       Changed |= handleFloatingPointIV(L, PN);
465 
466   // If the loop previously had floating-point IV, ScalarEvolution
467   // may not have been able to compute a trip count. Now that we've done some
468   // re-writing, the trip count may be computable.
469   if (Changed)
470     SE->forgetLoop(L);
471   return Changed;
472 }
473 
474 //===---------------------------------------------------------------------===//
475 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
476 // they will exit at the first iteration.
477 //===---------------------------------------------------------------------===//
478 
479 /// Check to see if this loop has loop invariant conditions which lead to loop
480 /// exits. If so, we know that if the exit path is taken, it is at the first
481 /// loop iteration. This lets us predict exit values of PHI nodes that live in
482 /// loop header.
483 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
484   // Verify the input to the pass is already in LCSSA form.
485   assert(L->isLCSSAForm(*DT));
486 
487   SmallVector<BasicBlock *, 8> ExitBlocks;
488   L->getUniqueExitBlocks(ExitBlocks);
489 
490   bool MadeAnyChanges = false;
491   for (auto *ExitBB : ExitBlocks) {
492     // If there are no more PHI nodes in this exit block, then no more
493     // values defined inside the loop are used on this path.
494     for (PHINode &PN : ExitBB->phis()) {
495       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
496            IncomingValIdx != E; ++IncomingValIdx) {
497         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
498 
499         // Can we prove that the exit must run on the first iteration if it
500         // runs at all?  (i.e. early exits are fine for our purposes, but
501         // traces which lead to this exit being taken on the 2nd iteration
502         // aren't.)  Note that this is about whether the exit branch is
503         // executed, not about whether it is taken.
504         if (!L->getLoopLatch() ||
505             !DT->dominates(IncomingBB, L->getLoopLatch()))
506           continue;
507 
508         // Get condition that leads to the exit path.
509         auto *TermInst = IncomingBB->getTerminator();
510 
511         Value *Cond = nullptr;
512         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
513           // Must be a conditional branch, otherwise the block
514           // should not be in the loop.
515           Cond = BI->getCondition();
516         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
517           Cond = SI->getCondition();
518         else
519           continue;
520 
521         if (!L->isLoopInvariant(Cond))
522           continue;
523 
524         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
525 
526         // Only deal with PHIs in the loop header.
527         if (!ExitVal || ExitVal->getParent() != L->getHeader())
528           continue;
529 
530         // If ExitVal is a PHI on the loop header, then we know its
531         // value along this exit because the exit can only be taken
532         // on the first iteration.
533         auto *LoopPreheader = L->getLoopPreheader();
534         assert(LoopPreheader && "Invalid loop");
535         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
536         if (PreheaderIdx != -1) {
537           assert(ExitVal->getParent() == L->getHeader() &&
538                  "ExitVal must be in loop header");
539           MadeAnyChanges = true;
540           PN.setIncomingValue(IncomingValIdx,
541                               ExitVal->getIncomingValue(PreheaderIdx));
542         }
543       }
544     }
545   }
546   return MadeAnyChanges;
547 }
548 
549 //===----------------------------------------------------------------------===//
550 //  IV Widening - Extend the width of an IV to cover its widest uses.
551 //===----------------------------------------------------------------------===//
552 
553 namespace {
554 
555 // Collect information about induction variables that are used by sign/zero
556 // extend operations. This information is recorded by CollectExtend and provides
557 // the input to WidenIV.
558 struct WideIVInfo {
559   PHINode *NarrowIV = nullptr;
560 
561   // Widest integer type created [sz]ext
562   Type *WidestNativeType = nullptr;
563 
564   // Was a sext user seen before a zext?
565   bool IsSigned = false;
566 };
567 
568 } // end anonymous namespace
569 
570 /// Update information about the induction variable that is extended by this
571 /// sign or zero extend operation. This is used to determine the final width of
572 /// the IV before actually widening it.
573 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
574                         const TargetTransformInfo *TTI) {
575   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
576   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
577     return;
578 
579   Type *Ty = Cast->getType();
580   uint64_t Width = SE->getTypeSizeInBits(Ty);
581   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
582     return;
583 
584   // Check that `Cast` actually extends the induction variable (we rely on this
585   // later).  This takes care of cases where `Cast` is extending a truncation of
586   // the narrow induction variable, and thus can end up being narrower than the
587   // "narrow" induction variable.
588   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
589   if (NarrowIVWidth >= Width)
590     return;
591 
592   // Cast is either an sext or zext up to this point.
593   // We should not widen an indvar if arithmetics on the wider indvar are more
594   // expensive than those on the narrower indvar. We check only the cost of ADD
595   // because at least an ADD is required to increment the induction variable. We
596   // could compute more comprehensively the cost of all instructions on the
597   // induction variable when necessary.
598   if (TTI &&
599       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
600           TTI->getArithmeticInstrCost(Instruction::Add,
601                                       Cast->getOperand(0)->getType())) {
602     return;
603   }
604 
605   if (!WI.WidestNativeType) {
606     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
607     WI.IsSigned = IsSigned;
608     return;
609   }
610 
611   // We extend the IV to satisfy the sign of its first user, arbitrarily.
612   if (WI.IsSigned != IsSigned)
613     return;
614 
615   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
616     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
617 }
618 
619 namespace {
620 
621 /// Record a link in the Narrow IV def-use chain along with the WideIV that
622 /// computes the same value as the Narrow IV def.  This avoids caching Use*
623 /// pointers.
624 struct NarrowIVDefUse {
625   Instruction *NarrowDef = nullptr;
626   Instruction *NarrowUse = nullptr;
627   Instruction *WideDef = nullptr;
628 
629   // True if the narrow def is never negative.  Tracking this information lets
630   // us use a sign extension instead of a zero extension or vice versa, when
631   // profitable and legal.
632   bool NeverNegative = false;
633 
634   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
635                  bool NeverNegative)
636       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
637         NeverNegative(NeverNegative) {}
638 };
639 
640 /// The goal of this transform is to remove sign and zero extends without
641 /// creating any new induction variables. To do this, it creates a new phi of
642 /// the wider type and redirects all users, either removing extends or inserting
643 /// truncs whenever we stop propagating the type.
644 class WidenIV {
645   // Parameters
646   PHINode *OrigPhi;
647   Type *WideType;
648 
649   // Context
650   LoopInfo        *LI;
651   Loop            *L;
652   ScalarEvolution *SE;
653   DominatorTree   *DT;
654 
655   // Does the module have any calls to the llvm.experimental.guard intrinsic
656   // at all? If not we can avoid scanning instructions looking for guards.
657   bool HasGuards;
658 
659   // Result
660   PHINode *WidePhi = nullptr;
661   Instruction *WideInc = nullptr;
662   const SCEV *WideIncExpr = nullptr;
663   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
664 
665   SmallPtrSet<Instruction *,16> Widened;
666   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
667 
668   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
669 
670   // A map tracking the kind of extension used to widen each narrow IV
671   // and narrow IV user.
672   // Key: pointer to a narrow IV or IV user.
673   // Value: the kind of extension used to widen this Instruction.
674   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
675 
676   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
677 
678   // A map with control-dependent ranges for post increment IV uses. The key is
679   // a pair of IV def and a use of this def denoting the context. The value is
680   // a ConstantRange representing possible values of the def at the given
681   // context.
682   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
683 
684   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
685                                               Instruction *UseI) {
686     DefUserPair Key(Def, UseI);
687     auto It = PostIncRangeInfos.find(Key);
688     return It == PostIncRangeInfos.end()
689                ? Optional<ConstantRange>(None)
690                : Optional<ConstantRange>(It->second);
691   }
692 
693   void calculatePostIncRanges(PHINode *OrigPhi);
694   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
695 
696   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
697     DefUserPair Key(Def, UseI);
698     auto It = PostIncRangeInfos.find(Key);
699     if (It == PostIncRangeInfos.end())
700       PostIncRangeInfos.insert({Key, R});
701     else
702       It->second = R.intersectWith(It->second);
703   }
704 
705 public:
706   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
707           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
708           bool HasGuards)
709       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
710         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
711         HasGuards(HasGuards), DeadInsts(DI) {
712     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
713     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
714   }
715 
716   PHINode *createWideIV(SCEVExpander &Rewriter);
717 
718 protected:
719   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
720                           Instruction *Use);
721 
722   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
723   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
724                                      const SCEVAddRecExpr *WideAR);
725   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
726 
727   ExtendKind getExtendKind(Instruction *I);
728 
729   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
730 
731   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
732 
733   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
734 
735   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
736                               unsigned OpCode) const;
737 
738   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
739 
740   bool widenLoopCompare(NarrowIVDefUse DU);
741   bool widenWithVariantUse(NarrowIVDefUse DU);
742   void widenWithVariantUseCodegen(NarrowIVDefUse DU);
743 
744   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
745 };
746 
747 } // end anonymous namespace
748 
749 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
750                                  bool IsSigned, Instruction *Use) {
751   // Set the debug location and conservative insertion point.
752   IRBuilder<> Builder(Use);
753   // Hoist the insertion point into loop preheaders as far as possible.
754   for (const Loop *L = LI->getLoopFor(Use->getParent());
755        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
756        L = L->getParentLoop())
757     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
758 
759   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
760                     Builder.CreateZExt(NarrowOper, WideType);
761 }
762 
763 /// Instantiate a wide operation to replace a narrow operation. This only needs
764 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
765 /// 0 for any operation we decide not to clone.
766 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
767                                   const SCEVAddRecExpr *WideAR) {
768   unsigned Opcode = DU.NarrowUse->getOpcode();
769   switch (Opcode) {
770   default:
771     return nullptr;
772   case Instruction::Add:
773   case Instruction::Mul:
774   case Instruction::UDiv:
775   case Instruction::Sub:
776     return cloneArithmeticIVUser(DU, WideAR);
777 
778   case Instruction::And:
779   case Instruction::Or:
780   case Instruction::Xor:
781   case Instruction::Shl:
782   case Instruction::LShr:
783   case Instruction::AShr:
784     return cloneBitwiseIVUser(DU);
785   }
786 }
787 
788 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
789   Instruction *NarrowUse = DU.NarrowUse;
790   Instruction *NarrowDef = DU.NarrowDef;
791   Instruction *WideDef = DU.WideDef;
792 
793   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
794 
795   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
796   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
797   // invariant and will be folded or hoisted. If it actually comes from a
798   // widened IV, it should be removed during a future call to widenIVUse.
799   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
800   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
801                    ? WideDef
802                    : createExtendInst(NarrowUse->getOperand(0), WideType,
803                                       IsSigned, NarrowUse);
804   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
805                    ? WideDef
806                    : createExtendInst(NarrowUse->getOperand(1), WideType,
807                                       IsSigned, NarrowUse);
808 
809   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
810   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
811                                         NarrowBO->getName());
812   IRBuilder<> Builder(NarrowUse);
813   Builder.Insert(WideBO);
814   WideBO->copyIRFlags(NarrowBO);
815   return WideBO;
816 }
817 
818 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
819                                             const SCEVAddRecExpr *WideAR) {
820   Instruction *NarrowUse = DU.NarrowUse;
821   Instruction *NarrowDef = DU.NarrowDef;
822   Instruction *WideDef = DU.WideDef;
823 
824   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
825 
826   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
827 
828   // We're trying to find X such that
829   //
830   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
831   //
832   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
833   // and check using SCEV if any of them are correct.
834 
835   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
836   // correct solution to X.
837   auto GuessNonIVOperand = [&](bool SignExt) {
838     const SCEV *WideLHS;
839     const SCEV *WideRHS;
840 
841     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
842       if (SignExt)
843         return SE->getSignExtendExpr(S, Ty);
844       return SE->getZeroExtendExpr(S, Ty);
845     };
846 
847     if (IVOpIdx == 0) {
848       WideLHS = SE->getSCEV(WideDef);
849       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
850       WideRHS = GetExtend(NarrowRHS, WideType);
851     } else {
852       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
853       WideLHS = GetExtend(NarrowLHS, WideType);
854       WideRHS = SE->getSCEV(WideDef);
855     }
856 
857     // WideUse is "WideDef `op.wide` X" as described in the comment.
858     const SCEV *WideUse = nullptr;
859 
860     switch (NarrowUse->getOpcode()) {
861     default:
862       llvm_unreachable("No other possibility!");
863 
864     case Instruction::Add:
865       WideUse = SE->getAddExpr(WideLHS, WideRHS);
866       break;
867 
868     case Instruction::Mul:
869       WideUse = SE->getMulExpr(WideLHS, WideRHS);
870       break;
871 
872     case Instruction::UDiv:
873       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
874       break;
875 
876     case Instruction::Sub:
877       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
878       break;
879     }
880 
881     return WideUse == WideAR;
882   };
883 
884   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
885   if (!GuessNonIVOperand(SignExtend)) {
886     SignExtend = !SignExtend;
887     if (!GuessNonIVOperand(SignExtend))
888       return nullptr;
889   }
890 
891   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
892                    ? WideDef
893                    : createExtendInst(NarrowUse->getOperand(0), WideType,
894                                       SignExtend, NarrowUse);
895   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
896                    ? WideDef
897                    : createExtendInst(NarrowUse->getOperand(1), WideType,
898                                       SignExtend, NarrowUse);
899 
900   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
901   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
902                                         NarrowBO->getName());
903 
904   IRBuilder<> Builder(NarrowUse);
905   Builder.Insert(WideBO);
906   WideBO->copyIRFlags(NarrowBO);
907   return WideBO;
908 }
909 
910 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
911   auto It = ExtendKindMap.find(I);
912   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
913   return It->second;
914 }
915 
916 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
917                                      unsigned OpCode) const {
918   if (OpCode == Instruction::Add)
919     return SE->getAddExpr(LHS, RHS);
920   if (OpCode == Instruction::Sub)
921     return SE->getMinusSCEV(LHS, RHS);
922   if (OpCode == Instruction::Mul)
923     return SE->getMulExpr(LHS, RHS);
924 
925   llvm_unreachable("Unsupported opcode.");
926 }
927 
928 /// No-wrap operations can transfer sign extension of their result to their
929 /// operands. Generate the SCEV value for the widened operation without
930 /// actually modifying the IR yet. If the expression after extending the
931 /// operands is an AddRec for this loop, return the AddRec and the kind of
932 /// extension used.
933 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
934   // Handle the common case of add<nsw/nuw>
935   const unsigned OpCode = DU.NarrowUse->getOpcode();
936   // Only Add/Sub/Mul instructions supported yet.
937   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
938       OpCode != Instruction::Mul)
939     return {nullptr, Unknown};
940 
941   // One operand (NarrowDef) has already been extended to WideDef. Now determine
942   // if extending the other will lead to a recurrence.
943   const unsigned ExtendOperIdx =
944       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
945   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
946 
947   const SCEV *ExtendOperExpr = nullptr;
948   const OverflowingBinaryOperator *OBO =
949     cast<OverflowingBinaryOperator>(DU.NarrowUse);
950   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
951   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
952     ExtendOperExpr = SE->getSignExtendExpr(
953       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
954   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
955     ExtendOperExpr = SE->getZeroExtendExpr(
956       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
957   else
958     return {nullptr, Unknown};
959 
960   // When creating this SCEV expr, don't apply the current operations NSW or NUW
961   // flags. This instruction may be guarded by control flow that the no-wrap
962   // behavior depends on. Non-control-equivalent instructions can be mapped to
963   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
964   // semantics to those operations.
965   const SCEV *lhs = SE->getSCEV(DU.WideDef);
966   const SCEV *rhs = ExtendOperExpr;
967 
968   // Let's swap operands to the initial order for the case of non-commutative
969   // operations, like SUB. See PR21014.
970   if (ExtendOperIdx == 0)
971     std::swap(lhs, rhs);
972   const SCEVAddRecExpr *AddRec =
973       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
974 
975   if (!AddRec || AddRec->getLoop() != L)
976     return {nullptr, Unknown};
977 
978   return {AddRec, ExtKind};
979 }
980 
981 /// Is this instruction potentially interesting for further simplification after
982 /// widening it's type? In other words, can the extend be safely hoisted out of
983 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
984 /// so, return the extended recurrence and the kind of extension used. Otherwise
985 /// return {nullptr, Unknown}.
986 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
987   if (!SE->isSCEVable(DU.NarrowUse->getType()))
988     return {nullptr, Unknown};
989 
990   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
991   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
992       SE->getTypeSizeInBits(WideType)) {
993     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
994     // index. So don't follow this use.
995     return {nullptr, Unknown};
996   }
997 
998   const SCEV *WideExpr;
999   ExtendKind ExtKind;
1000   if (DU.NeverNegative) {
1001     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1002     if (isa<SCEVAddRecExpr>(WideExpr))
1003       ExtKind = SignExtended;
1004     else {
1005       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1006       ExtKind = ZeroExtended;
1007     }
1008   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1009     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1010     ExtKind = SignExtended;
1011   } else {
1012     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1013     ExtKind = ZeroExtended;
1014   }
1015   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1016   if (!AddRec || AddRec->getLoop() != L)
1017     return {nullptr, Unknown};
1018   return {AddRec, ExtKind};
1019 }
1020 
1021 /// This IV user cannot be widened. Replace this use of the original narrow IV
1022 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1023 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1024   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1025   if (!InsertPt)
1026     return;
1027   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1028                     << *DU.NarrowUse << "\n");
1029   IRBuilder<> Builder(InsertPt);
1030   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1031   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1032 }
1033 
1034 /// If the narrow use is a compare instruction, then widen the compare
1035 //  (and possibly the other operand).  The extend operation is hoisted into the
1036 // loop preheader as far as possible.
1037 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1038   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1039   if (!Cmp)
1040     return false;
1041 
1042   // We can legally widen the comparison in the following two cases:
1043   //
1044   //  - The signedness of the IV extension and comparison match
1045   //
1046   //  - The narrow IV is always positive (and thus its sign extension is equal
1047   //    to its zero extension).  For instance, let's say we're zero extending
1048   //    %narrow for the following use
1049   //
1050   //      icmp slt i32 %narrow, %val   ... (A)
1051   //
1052   //    and %narrow is always positive.  Then
1053   //
1054   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1055   //          == icmp slt i32 zext(%narrow), sext(%val)
1056   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1057   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1058     return false;
1059 
1060   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1061   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1062   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1063   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1064 
1065   // Widen the compare instruction.
1066   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1067   if (!InsertPt)
1068     return false;
1069   IRBuilder<> Builder(InsertPt);
1070   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1071 
1072   // Widen the other operand of the compare, if necessary.
1073   if (CastWidth < IVWidth) {
1074     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1075     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1076   }
1077   return true;
1078 }
1079 
1080 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1081 // will not work when:
1082 //    1) SCEV traces back to an instruction inside the loop that SCEV can not
1083 // expand, eg. add %indvar, (load %addr)
1084 //    2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1085 // While SCEV fails to avoid trunc, we can still try to use instruction
1086 // combining approach to prove trunc is not required. This can be further
1087 // extended with other instruction combining checks, but for now we handle the
1088 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1089 //
1090 // Src:
1091 //   %c = sub nsw %b, %indvar
1092 //   %d = sext %c to i64
1093 // Dst:
1094 //   %indvar.ext1 = sext %indvar to i64
1095 //   %m = sext %b to i64
1096 //   %d = sub nsw i64 %m, %indvar.ext1
1097 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1098 // trunc is required regardless of how %b is generated. This pattern is common
1099 // when calculating address in 64 bit architecture
1100 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
1101   Instruction *NarrowUse = DU.NarrowUse;
1102   Instruction *NarrowDef = DU.NarrowDef;
1103   Instruction *WideDef = DU.WideDef;
1104 
1105   // Handle the common case of add<nsw/nuw>
1106   const unsigned OpCode = NarrowUse->getOpcode();
1107   // Only Add/Sub/Mul instructions are supported.
1108   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1109       OpCode != Instruction::Mul)
1110     return false;
1111 
1112   // The operand that is not defined by NarrowDef of DU. Let's call it the
1113   // other operand.
1114   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1115   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1116          "bad DU");
1117 
1118   const SCEV *ExtendOperExpr = nullptr;
1119   const OverflowingBinaryOperator *OBO =
1120     cast<OverflowingBinaryOperator>(NarrowUse);
1121   ExtendKind ExtKind = getExtendKind(NarrowDef);
1122   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1123     ExtendOperExpr = SE->getSignExtendExpr(
1124       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1125   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1126     ExtendOperExpr = SE->getZeroExtendExpr(
1127       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1128   else
1129     return false;
1130 
1131   // Verifying that Defining operand is an AddRec
1132   const SCEV *Op1 = SE->getSCEV(WideDef);
1133   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1134   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1135     return false;
1136   // Verifying that other operand is an Extend.
1137   if (ExtKind == SignExtended) {
1138     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1139       return false;
1140   } else {
1141     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1142       return false;
1143   }
1144 
1145   if (ExtKind == SignExtended) {
1146     for (Use &U : NarrowUse->uses()) {
1147       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1148       if (!User || User->getType() != WideType)
1149         return false;
1150     }
1151   } else { // ExtKind == ZeroExtended
1152     for (Use &U : NarrowUse->uses()) {
1153       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1154       if (!User || User->getType() != WideType)
1155         return false;
1156     }
1157   }
1158 
1159   return true;
1160 }
1161 
1162 /// Special Case for widening with loop variant (see
1163 /// WidenIV::widenWithVariant). This is the code generation part.
1164 void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) {
1165   Instruction *NarrowUse = DU.NarrowUse;
1166   Instruction *NarrowDef = DU.NarrowDef;
1167   Instruction *WideDef = DU.WideDef;
1168 
1169   ExtendKind ExtKind = getExtendKind(NarrowDef);
1170 
1171   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1172 
1173   // Generating a widening use instruction.
1174   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1175                    ? WideDef
1176                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1177                                       ExtKind, NarrowUse);
1178   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1179                    ? WideDef
1180                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1181                                       ExtKind, NarrowUse);
1182 
1183   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1184   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1185                                         NarrowBO->getName());
1186   IRBuilder<> Builder(NarrowUse);
1187   Builder.Insert(WideBO);
1188   WideBO->copyIRFlags(NarrowBO);
1189 
1190   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1191 
1192   ExtendKindMap[NarrowUse] = ExtKind;
1193 
1194   for (Use &U : NarrowUse->uses()) {
1195     Instruction *User = nullptr;
1196     if (ExtKind == SignExtended)
1197       User = dyn_cast<SExtInst>(U.getUser());
1198     else
1199       User = dyn_cast<ZExtInst>(U.getUser());
1200     if (User && User->getType() == WideType) {
1201       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1202                         << *WideBO << "\n");
1203       ++NumElimExt;
1204       User->replaceAllUsesWith(WideBO);
1205       DeadInsts.emplace_back(User);
1206     }
1207   }
1208 }
1209 
1210 /// Determine whether an individual user of the narrow IV can be widened. If so,
1211 /// return the wide clone of the user.
1212 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1213   assert(ExtendKindMap.count(DU.NarrowDef) &&
1214          "Should already know the kind of extension used to widen NarrowDef");
1215 
1216   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1217   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1218     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1219       // For LCSSA phis, sink the truncate outside the loop.
1220       // After SimplifyCFG most loop exit targets have a single predecessor.
1221       // Otherwise fall back to a truncate within the loop.
1222       if (UsePhi->getNumOperands() != 1)
1223         truncateIVUse(DU, DT, LI);
1224       else {
1225         // Widening the PHI requires us to insert a trunc.  The logical place
1226         // for this trunc is in the same BB as the PHI.  This is not possible if
1227         // the BB is terminated by a catchswitch.
1228         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1229           return nullptr;
1230 
1231         PHINode *WidePhi =
1232           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1233                           UsePhi);
1234         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1235         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1236         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1237         UsePhi->replaceAllUsesWith(Trunc);
1238         DeadInsts.emplace_back(UsePhi);
1239         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1240                           << *WidePhi << "\n");
1241       }
1242       return nullptr;
1243     }
1244   }
1245 
1246   // This narrow use can be widened by a sext if it's non-negative or its narrow
1247   // def was widended by a sext. Same for zext.
1248   auto canWidenBySExt = [&]() {
1249     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1250   };
1251   auto canWidenByZExt = [&]() {
1252     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1253   };
1254 
1255   // Our raison d'etre! Eliminate sign and zero extension.
1256   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1257       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1258     Value *NewDef = DU.WideDef;
1259     if (DU.NarrowUse->getType() != WideType) {
1260       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1261       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1262       if (CastWidth < IVWidth) {
1263         // The cast isn't as wide as the IV, so insert a Trunc.
1264         IRBuilder<> Builder(DU.NarrowUse);
1265         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1266       }
1267       else {
1268         // A wider extend was hidden behind a narrower one. This may induce
1269         // another round of IV widening in which the intermediate IV becomes
1270         // dead. It should be very rare.
1271         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1272                           << " not wide enough to subsume " << *DU.NarrowUse
1273                           << "\n");
1274         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1275         NewDef = DU.NarrowUse;
1276       }
1277     }
1278     if (NewDef != DU.NarrowUse) {
1279       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1280                         << " replaced by " << *DU.WideDef << "\n");
1281       ++NumElimExt;
1282       DU.NarrowUse->replaceAllUsesWith(NewDef);
1283       DeadInsts.emplace_back(DU.NarrowUse);
1284     }
1285     // Now that the extend is gone, we want to expose it's uses for potential
1286     // further simplification. We don't need to directly inform SimplifyIVUsers
1287     // of the new users, because their parent IV will be processed later as a
1288     // new loop phi. If we preserved IVUsers analysis, we would also want to
1289     // push the uses of WideDef here.
1290 
1291     // No further widening is needed. The deceased [sz]ext had done it for us.
1292     return nullptr;
1293   }
1294 
1295   // Does this user itself evaluate to a recurrence after widening?
1296   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1297   if (!WideAddRec.first)
1298     WideAddRec = getWideRecurrence(DU);
1299 
1300   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1301   if (!WideAddRec.first) {
1302     // If use is a loop condition, try to promote the condition instead of
1303     // truncating the IV first.
1304     if (widenLoopCompare(DU))
1305       return nullptr;
1306 
1307     // We are here about to generate a truncate instruction that may hurt
1308     // performance because the scalar evolution expression computed earlier
1309     // in WideAddRec.first does not indicate a polynomial induction expression.
1310     // In that case, look at the operands of the use instruction to determine
1311     // if we can still widen the use instead of truncating its operand.
1312     if (widenWithVariantUse(DU)) {
1313       widenWithVariantUseCodegen(DU);
1314       return nullptr;
1315     }
1316 
1317     // This user does not evaluate to a recurrence after widening, so don't
1318     // follow it. Instead insert a Trunc to kill off the original use,
1319     // eventually isolating the original narrow IV so it can be removed.
1320     truncateIVUse(DU, DT, LI);
1321     return nullptr;
1322   }
1323   // Assume block terminators cannot evaluate to a recurrence. We can't to
1324   // insert a Trunc after a terminator if there happens to be a critical edge.
1325   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1326          "SCEV is not expected to evaluate a block terminator");
1327 
1328   // Reuse the IV increment that SCEVExpander created as long as it dominates
1329   // NarrowUse.
1330   Instruction *WideUse = nullptr;
1331   if (WideAddRec.first == WideIncExpr &&
1332       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1333     WideUse = WideInc;
1334   else {
1335     WideUse = cloneIVUser(DU, WideAddRec.first);
1336     if (!WideUse)
1337       return nullptr;
1338   }
1339   // Evaluation of WideAddRec ensured that the narrow expression could be
1340   // extended outside the loop without overflow. This suggests that the wide use
1341   // evaluates to the same expression as the extended narrow use, but doesn't
1342   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1343   // where it fails, we simply throw away the newly created wide use.
1344   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1345     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1346                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1347                       << "\n");
1348     DeadInsts.emplace_back(WideUse);
1349     return nullptr;
1350   }
1351 
1352   // if we reached this point then we are going to replace
1353   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1354   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1355 
1356   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1357   // Returning WideUse pushes it on the worklist.
1358   return WideUse;
1359 }
1360 
1361 /// Add eligible users of NarrowDef to NarrowIVUsers.
1362 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1363   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1364   bool NonNegativeDef =
1365       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1366                            SE->getConstant(NarrowSCEV->getType(), 0));
1367   for (User *U : NarrowDef->users()) {
1368     Instruction *NarrowUser = cast<Instruction>(U);
1369 
1370     // Handle data flow merges and bizarre phi cycles.
1371     if (!Widened.insert(NarrowUser).second)
1372       continue;
1373 
1374     bool NonNegativeUse = false;
1375     if (!NonNegativeDef) {
1376       // We might have a control-dependent range information for this context.
1377       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1378         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1379     }
1380 
1381     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1382                                NonNegativeDef || NonNegativeUse);
1383   }
1384 }
1385 
1386 /// Process a single induction variable. First use the SCEVExpander to create a
1387 /// wide induction variable that evaluates to the same recurrence as the
1388 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1389 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1390 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1391 ///
1392 /// It would be simpler to delete uses as they are processed, but we must avoid
1393 /// invalidating SCEV expressions.
1394 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1395   // Is this phi an induction variable?
1396   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1397   if (!AddRec)
1398     return nullptr;
1399 
1400   // Widen the induction variable expression.
1401   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1402                                ? SE->getSignExtendExpr(AddRec, WideType)
1403                                : SE->getZeroExtendExpr(AddRec, WideType);
1404 
1405   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1406          "Expect the new IV expression to preserve its type");
1407 
1408   // Can the IV be extended outside the loop without overflow?
1409   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1410   if (!AddRec || AddRec->getLoop() != L)
1411     return nullptr;
1412 
1413   // An AddRec must have loop-invariant operands. Since this AddRec is
1414   // materialized by a loop header phi, the expression cannot have any post-loop
1415   // operands, so they must dominate the loop header.
1416   assert(
1417       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1418       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1419       "Loop header phi recurrence inputs do not dominate the loop");
1420 
1421   // Iterate over IV uses (including transitive ones) looking for IV increments
1422   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1423   // the increment calculate control-dependent range information basing on
1424   // dominating conditions inside of the loop (e.g. a range check inside of the
1425   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1426   //
1427   // Control-dependent range information is later used to prove that a narrow
1428   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1429   // this on demand because when pushNarrowIVUsers needs this information some
1430   // of the dominating conditions might be already widened.
1431   if (UsePostIncrementRanges)
1432     calculatePostIncRanges(OrigPhi);
1433 
1434   // The rewriter provides a value for the desired IV expression. This may
1435   // either find an existing phi or materialize a new one. Either way, we
1436   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1437   // of the phi-SCC dominates the loop entry.
1438   Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
1439   Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
1440   // If the wide phi is not a phi node, for example a cast node, like bitcast,
1441   // inttoptr, ptrtoint, just skip for now.
1442   if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
1443     // if the cast node is an inserted instruction without any user, we should
1444     // remove it to make sure the pass don't touch the function as we can not
1445     // wide the phi.
1446     if (ExpandInst->hasNUses(0) &&
1447         Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
1448       DeadInsts.emplace_back(ExpandInst);
1449     return nullptr;
1450   }
1451 
1452   // Remembering the WideIV increment generated by SCEVExpander allows
1453   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1454   // employ a general reuse mechanism because the call above is the only call to
1455   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1456   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1457     WideInc =
1458       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1459     WideIncExpr = SE->getSCEV(WideInc);
1460     // Propagate the debug location associated with the original loop increment
1461     // to the new (widened) increment.
1462     auto *OrigInc =
1463       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1464     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1465   }
1466 
1467   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1468   ++NumWidened;
1469 
1470   // Traverse the def-use chain using a worklist starting at the original IV.
1471   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1472 
1473   Widened.insert(OrigPhi);
1474   pushNarrowIVUsers(OrigPhi, WidePhi);
1475 
1476   while (!NarrowIVUsers.empty()) {
1477     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1478 
1479     // Process a def-use edge. This may replace the use, so don't hold a
1480     // use_iterator across it.
1481     Instruction *WideUse = widenIVUse(DU, Rewriter);
1482 
1483     // Follow all def-use edges from the previous narrow use.
1484     if (WideUse)
1485       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1486 
1487     // widenIVUse may have removed the def-use edge.
1488     if (DU.NarrowDef->use_empty())
1489       DeadInsts.emplace_back(DU.NarrowDef);
1490   }
1491 
1492   // Attach any debug information to the new PHI.
1493   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1494 
1495   return WidePhi;
1496 }
1497 
1498 /// Calculates control-dependent range for the given def at the given context
1499 /// by looking at dominating conditions inside of the loop
1500 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1501                                     Instruction *NarrowUser) {
1502   using namespace llvm::PatternMatch;
1503 
1504   Value *NarrowDefLHS;
1505   const APInt *NarrowDefRHS;
1506   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1507                                  m_APInt(NarrowDefRHS))) ||
1508       !NarrowDefRHS->isNonNegative())
1509     return;
1510 
1511   auto UpdateRangeFromCondition = [&] (Value *Condition,
1512                                        bool TrueDest) {
1513     CmpInst::Predicate Pred;
1514     Value *CmpRHS;
1515     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1516                                  m_Value(CmpRHS))))
1517       return;
1518 
1519     CmpInst::Predicate P =
1520             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1521 
1522     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1523     auto CmpConstrainedLHSRange =
1524             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1525     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1526         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1527 
1528     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1529   };
1530 
1531   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1532     if (!HasGuards)
1533       return;
1534 
1535     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1536                                      Ctx->getParent()->rend())) {
1537       Value *C = nullptr;
1538       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1539         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1540     }
1541   };
1542 
1543   UpdateRangeFromGuards(NarrowUser);
1544 
1545   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1546   // If NarrowUserBB is statically unreachable asking dominator queries may
1547   // yield surprising results. (e.g. the block may not have a dom tree node)
1548   if (!DT->isReachableFromEntry(NarrowUserBB))
1549     return;
1550 
1551   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1552        L->contains(DTB->getBlock());
1553        DTB = DTB->getIDom()) {
1554     auto *BB = DTB->getBlock();
1555     auto *TI = BB->getTerminator();
1556     UpdateRangeFromGuards(TI);
1557 
1558     auto *BI = dyn_cast<BranchInst>(TI);
1559     if (!BI || !BI->isConditional())
1560       continue;
1561 
1562     auto *TrueSuccessor = BI->getSuccessor(0);
1563     auto *FalseSuccessor = BI->getSuccessor(1);
1564 
1565     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1566       return BBE.isSingleEdge() &&
1567              DT->dominates(BBE, NarrowUser->getParent());
1568     };
1569 
1570     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1571       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1572 
1573     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1574       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1575   }
1576 }
1577 
1578 /// Calculates PostIncRangeInfos map for the given IV
1579 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1580   SmallPtrSet<Instruction *, 16> Visited;
1581   SmallVector<Instruction *, 6> Worklist;
1582   Worklist.push_back(OrigPhi);
1583   Visited.insert(OrigPhi);
1584 
1585   while (!Worklist.empty()) {
1586     Instruction *NarrowDef = Worklist.pop_back_val();
1587 
1588     for (Use &U : NarrowDef->uses()) {
1589       auto *NarrowUser = cast<Instruction>(U.getUser());
1590 
1591       // Don't go looking outside the current loop.
1592       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1593       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1594         continue;
1595 
1596       if (!Visited.insert(NarrowUser).second)
1597         continue;
1598 
1599       Worklist.push_back(NarrowUser);
1600 
1601       calculatePostIncRange(NarrowDef, NarrowUser);
1602     }
1603   }
1604 }
1605 
1606 //===----------------------------------------------------------------------===//
1607 //  Live IV Reduction - Minimize IVs live across the loop.
1608 //===----------------------------------------------------------------------===//
1609 
1610 //===----------------------------------------------------------------------===//
1611 //  Simplification of IV users based on SCEV evaluation.
1612 //===----------------------------------------------------------------------===//
1613 
1614 namespace {
1615 
1616 class IndVarSimplifyVisitor : public IVVisitor {
1617   ScalarEvolution *SE;
1618   const TargetTransformInfo *TTI;
1619   PHINode *IVPhi;
1620 
1621 public:
1622   WideIVInfo WI;
1623 
1624   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1625                         const TargetTransformInfo *TTI,
1626                         const DominatorTree *DTree)
1627     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1628     DT = DTree;
1629     WI.NarrowIV = IVPhi;
1630   }
1631 
1632   // Implement the interface used by simplifyUsersOfIV.
1633   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1634 };
1635 
1636 } // end anonymous namespace
1637 
1638 /// Iteratively perform simplification on a worklist of IV users. Each
1639 /// successive simplification may push more users which may themselves be
1640 /// candidates for simplification.
1641 ///
1642 /// Sign/Zero extend elimination is interleaved with IV simplification.
1643 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1644                                        SCEVExpander &Rewriter,
1645                                        LoopInfo *LI) {
1646   SmallVector<WideIVInfo, 8> WideIVs;
1647 
1648   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1649           Intrinsic::getName(Intrinsic::experimental_guard));
1650   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1651 
1652   SmallVector<PHINode*, 8> LoopPhis;
1653   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1654     LoopPhis.push_back(cast<PHINode>(I));
1655   }
1656   // Each round of simplification iterates through the SimplifyIVUsers worklist
1657   // for all current phis, then determines whether any IVs can be
1658   // widened. Widening adds new phis to LoopPhis, inducing another round of
1659   // simplification on the wide IVs.
1660   bool Changed = false;
1661   while (!LoopPhis.empty()) {
1662     // Evaluate as many IV expressions as possible before widening any IVs. This
1663     // forces SCEV to set no-wrap flags before evaluating sign/zero
1664     // extension. The first time SCEV attempts to normalize sign/zero extension,
1665     // the result becomes final. So for the most predictable results, we delay
1666     // evaluation of sign/zero extend evaluation until needed, and avoid running
1667     // other SCEV based analysis prior to simplifyAndExtend.
1668     do {
1669       PHINode *CurrIV = LoopPhis.pop_back_val();
1670 
1671       // Information about sign/zero extensions of CurrIV.
1672       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1673 
1674       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1675                                    &Visitor);
1676 
1677       if (Visitor.WI.WidestNativeType) {
1678         WideIVs.push_back(Visitor.WI);
1679       }
1680     } while(!LoopPhis.empty());
1681 
1682     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1683       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1684       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1685         Changed = true;
1686         LoopPhis.push_back(WidePhi);
1687       }
1688     }
1689   }
1690   return Changed;
1691 }
1692 
1693 //===----------------------------------------------------------------------===//
1694 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1695 //===----------------------------------------------------------------------===//
1696 
1697 /// Given an Value which is hoped to be part of an add recurance in the given
1698 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1699 /// that this is less general than SCEVs AddRec checking.
1700 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1701   Instruction *IncI = dyn_cast<Instruction>(IncV);
1702   if (!IncI)
1703     return nullptr;
1704 
1705   switch (IncI->getOpcode()) {
1706   case Instruction::Add:
1707   case Instruction::Sub:
1708     break;
1709   case Instruction::GetElementPtr:
1710     // An IV counter must preserve its type.
1711     if (IncI->getNumOperands() == 2)
1712       break;
1713     LLVM_FALLTHROUGH;
1714   default:
1715     return nullptr;
1716   }
1717 
1718   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1719   if (Phi && Phi->getParent() == L->getHeader()) {
1720     if (L->isLoopInvariant(IncI->getOperand(1)))
1721       return Phi;
1722     return nullptr;
1723   }
1724   if (IncI->getOpcode() == Instruction::GetElementPtr)
1725     return nullptr;
1726 
1727   // Allow add/sub to be commuted.
1728   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1729   if (Phi && Phi->getParent() == L->getHeader()) {
1730     if (L->isLoopInvariant(IncI->getOperand(0)))
1731       return Phi;
1732   }
1733   return nullptr;
1734 }
1735 
1736 /// Whether the current loop exit test is based on this value.  Currently this
1737 /// is limited to a direct use in the loop condition.
1738 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1739   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1740   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1741   // TODO: Allow non-icmp loop test.
1742   if (!ICmp)
1743     return false;
1744 
1745   // TODO: Allow indirect use.
1746   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1747 }
1748 
1749 /// linearFunctionTestReplace policy. Return true unless we can show that the
1750 /// current exit test is already sufficiently canonical.
1751 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1752   assert(L->getLoopLatch() && "Must be in simplified form");
1753 
1754   // Avoid converting a constant or loop invariant test back to a runtime
1755   // test.  This is critical for when SCEV's cached ExitCount is less precise
1756   // than the current IR (such as after we've proven a particular exit is
1757   // actually dead and thus the BE count never reaches our ExitCount.)
1758   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1759   if (L->isLoopInvariant(BI->getCondition()))
1760     return false;
1761 
1762   // Do LFTR to simplify the exit condition to an ICMP.
1763   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1764   if (!Cond)
1765     return true;
1766 
1767   // Do LFTR to simplify the exit ICMP to EQ/NE
1768   ICmpInst::Predicate Pred = Cond->getPredicate();
1769   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1770     return true;
1771 
1772   // Look for a loop invariant RHS
1773   Value *LHS = Cond->getOperand(0);
1774   Value *RHS = Cond->getOperand(1);
1775   if (!L->isLoopInvariant(RHS)) {
1776     if (!L->isLoopInvariant(LHS))
1777       return true;
1778     std::swap(LHS, RHS);
1779   }
1780   // Look for a simple IV counter LHS
1781   PHINode *Phi = dyn_cast<PHINode>(LHS);
1782   if (!Phi)
1783     Phi = getLoopPhiForCounter(LHS, L);
1784 
1785   if (!Phi)
1786     return true;
1787 
1788   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1789   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1790   if (Idx < 0)
1791     return true;
1792 
1793   // Do LFTR if the exit condition's IV is *not* a simple counter.
1794   Value *IncV = Phi->getIncomingValue(Idx);
1795   return Phi != getLoopPhiForCounter(IncV, L);
1796 }
1797 
1798 /// Return true if undefined behavior would provable be executed on the path to
1799 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1800 /// anything about whether OnPathTo is actually executed or whether Root is
1801 /// actually poison.  This can be used to assess whether a new use of Root can
1802 /// be added at a location which is control equivalent with OnPathTo (such as
1803 /// immediately before it) without introducing UB which didn't previously
1804 /// exist.  Note that a false result conveys no information.
1805 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1806                                           Instruction *OnPathTo,
1807                                           DominatorTree *DT) {
1808   // Basic approach is to assume Root is poison, propagate poison forward
1809   // through all users we can easily track, and then check whether any of those
1810   // users are provable UB and must execute before out exiting block might
1811   // exit.
1812 
1813   // The set of all recursive users we've visited (which are assumed to all be
1814   // poison because of said visit)
1815   SmallSet<const Value *, 16> KnownPoison;
1816   SmallVector<const Instruction*, 16> Worklist;
1817   Worklist.push_back(Root);
1818   while (!Worklist.empty()) {
1819     const Instruction *I = Worklist.pop_back_val();
1820 
1821     // If we know this must trigger UB on a path leading our target.
1822     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1823       return true;
1824 
1825     // If we can't analyze propagation through this instruction, just skip it
1826     // and transitive users.  Safe as false is a conservative result.
1827     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
1828       continue;
1829 
1830     if (KnownPoison.insert(I).second)
1831       for (const User *User : I->users())
1832         Worklist.push_back(cast<Instruction>(User));
1833   }
1834 
1835   // Might be non-UB, or might have a path we couldn't prove must execute on
1836   // way to exiting bb.
1837   return false;
1838 }
1839 
1840 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1841 /// down to checking that all operands are constant and listing instructions
1842 /// that may hide undef.
1843 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1844                                unsigned Depth) {
1845   if (isa<Constant>(V))
1846     return !isa<UndefValue>(V);
1847 
1848   if (Depth >= 6)
1849     return false;
1850 
1851   // Conservatively handle non-constant non-instructions. For example, Arguments
1852   // may be undef.
1853   Instruction *I = dyn_cast<Instruction>(V);
1854   if (!I)
1855     return false;
1856 
1857   // Load and return values may be undef.
1858   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1859     return false;
1860 
1861   // Optimistically handle other instructions.
1862   for (Value *Op : I->operands()) {
1863     if (!Visited.insert(Op).second)
1864       continue;
1865     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1866       return false;
1867   }
1868   return true;
1869 }
1870 
1871 /// Return true if the given value is concrete. We must prove that undef can
1872 /// never reach it.
1873 ///
1874 /// TODO: If we decide that this is a good approach to checking for undef, we
1875 /// may factor it into a common location.
1876 static bool hasConcreteDef(Value *V) {
1877   SmallPtrSet<Value*, 8> Visited;
1878   Visited.insert(V);
1879   return hasConcreteDefImpl(V, Visited, 0);
1880 }
1881 
1882 /// Return true if this IV has any uses other than the (soon to be rewritten)
1883 /// loop exit test.
1884 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1885   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1886   Value *IncV = Phi->getIncomingValue(LatchIdx);
1887 
1888   for (User *U : Phi->users())
1889     if (U != Cond && U != IncV) return false;
1890 
1891   for (User *U : IncV->users())
1892     if (U != Cond && U != Phi) return false;
1893   return true;
1894 }
1895 
1896 /// Return true if the given phi is a "counter" in L.  A counter is an
1897 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1898 /// step of 1.  Note that L must have exactly one latch.
1899 static bool isLoopCounter(PHINode* Phi, Loop *L,
1900                           ScalarEvolution *SE) {
1901   assert(Phi->getParent() == L->getHeader());
1902   assert(L->getLoopLatch());
1903 
1904   if (!SE->isSCEVable(Phi->getType()))
1905     return false;
1906 
1907   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1908   if (!AR || AR->getLoop() != L || !AR->isAffine())
1909     return false;
1910 
1911   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1912   if (!Step || !Step->isOne())
1913     return false;
1914 
1915   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1916   Value *IncV = Phi->getIncomingValue(LatchIdx);
1917   return (getLoopPhiForCounter(IncV, L) == Phi);
1918 }
1919 
1920 /// Search the loop header for a loop counter (anadd rec w/step of one)
1921 /// suitable for use by LFTR.  If multiple counters are available, select the
1922 /// "best" one based profitable heuristics.
1923 ///
1924 /// BECount may be an i8* pointer type. The pointer difference is already
1925 /// valid count without scaling the address stride, so it remains a pointer
1926 /// expression as far as SCEV is concerned.
1927 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1928                                 const SCEV *BECount,
1929                                 ScalarEvolution *SE, DominatorTree *DT) {
1930   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1931 
1932   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1933 
1934   // Loop over all of the PHI nodes, looking for a simple counter.
1935   PHINode *BestPhi = nullptr;
1936   const SCEV *BestInit = nullptr;
1937   BasicBlock *LatchBlock = L->getLoopLatch();
1938   assert(LatchBlock && "Must be in simplified form");
1939   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1940 
1941   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1942     PHINode *Phi = cast<PHINode>(I);
1943     if (!isLoopCounter(Phi, L, SE))
1944       continue;
1945 
1946     // Avoid comparing an integer IV against a pointer Limit.
1947     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1948       continue;
1949 
1950     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1951 
1952     // AR may be a pointer type, while BECount is an integer type.
1953     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1954     // AR may not be a narrower type, or we may never exit.
1955     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1956     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1957       continue;
1958 
1959     // Avoid reusing a potentially undef value to compute other values that may
1960     // have originally had a concrete definition.
1961     if (!hasConcreteDef(Phi)) {
1962       // We explicitly allow unknown phis as long as they are already used by
1963       // the loop exit test.  This is legal since performing LFTR could not
1964       // increase the number of undef users.
1965       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1966       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1967           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1968         continue;
1969     }
1970 
1971     // Avoid introducing undefined behavior due to poison which didn't exist in
1972     // the original program.  (Annoyingly, the rules for poison and undef
1973     // propagation are distinct, so this does NOT cover the undef case above.)
1974     // We have to ensure that we don't introduce UB by introducing a use on an
1975     // iteration where said IV produces poison.  Our strategy here differs for
1976     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1977     // see code in linearFunctionTestReplace.  For pointers, we restrict
1978     // transforms as there is no good way to reinfer inbounds once lost.
1979     if (!Phi->getType()->isIntegerTy() &&
1980         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1981       continue;
1982 
1983     const SCEV *Init = AR->getStart();
1984 
1985     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1986       // Don't force a live loop counter if another IV can be used.
1987       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1988         continue;
1989 
1990       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1991       // also prefers integer to pointer IVs.
1992       if (BestInit->isZero() != Init->isZero()) {
1993         if (BestInit->isZero())
1994           continue;
1995       }
1996       // If two IVs both count from zero or both count from nonzero then the
1997       // narrower is likely a dead phi that has been widened. Use the wider phi
1998       // to allow the other to be eliminated.
1999       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2000         continue;
2001     }
2002     BestPhi = Phi;
2003     BestInit = Init;
2004   }
2005   return BestPhi;
2006 }
2007 
2008 /// Insert an IR expression which computes the value held by the IV IndVar
2009 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2010 /// is taken ExitCount times.
2011 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2012                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2013                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2014   assert(isLoopCounter(IndVar, L, SE));
2015   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2016   const SCEV *IVInit = AR->getStart();
2017 
2018   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2019   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2020   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2021   // the existing GEPs whenever possible.
2022   if (IndVar->getType()->isPointerTy() &&
2023       !ExitCount->getType()->isPointerTy()) {
2024     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2025     // signed value. ExitCount on the other hand represents the loop trip count,
2026     // which is an unsigned value. FindLoopCounter only allows induction
2027     // variables that have a positive unit stride of one. This means we don't
2028     // have to handle the case of negative offsets (yet) and just need to zero
2029     // extend ExitCount.
2030     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2031     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2032     if (UsePostInc)
2033       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2034 
2035     // Expand the code for the iteration count.
2036     assert(SE->isLoopInvariant(IVOffset, L) &&
2037            "Computed iteration count is not loop invariant!");
2038 
2039     // We could handle pointer IVs other than i8*, but we need to compensate for
2040     // gep index scaling.
2041     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2042                              cast<PointerType>(IndVar->getType())
2043                                  ->getElementType())->isOne() &&
2044            "unit stride pointer IV must be i8*");
2045 
2046     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2047     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2048     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2049   } else {
2050     // In any other case, convert both IVInit and ExitCount to integers before
2051     // comparing. This may result in SCEV expansion of pointers, but in practice
2052     // SCEV will fold the pointer arithmetic away as such:
2053     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2054     //
2055     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2056     // for simple memset-style loops.
2057     //
2058     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2059     // were generated on top of case #2, which is not expected.
2060 
2061     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2062     // For unit stride, IVCount = Start + ExitCount with 2's complement
2063     // overflow.
2064 
2065     // For integer IVs, truncate the IV before computing IVInit + BECount,
2066     // unless we know apriori that the limit must be a constant when evaluated
2067     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2068     // of the IV in the loop over a (potentially) expensive expansion of the
2069     // widened exit count add(zext(add)) expression.
2070     if (SE->getTypeSizeInBits(IVInit->getType())
2071         > SE->getTypeSizeInBits(ExitCount->getType())) {
2072       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2073         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2074       else
2075         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2076     }
2077 
2078     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2079 
2080     if (UsePostInc)
2081       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2082 
2083     // Expand the code for the iteration count.
2084     assert(SE->isLoopInvariant(IVLimit, L) &&
2085            "Computed iteration count is not loop invariant!");
2086     // Ensure that we generate the same type as IndVar, or a smaller integer
2087     // type. In the presence of null pointer values, we have an integer type
2088     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2089     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2090       IndVar->getType() : ExitCount->getType();
2091     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2092     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2093   }
2094 }
2095 
2096 /// This method rewrites the exit condition of the loop to be a canonical !=
2097 /// comparison against the incremented loop induction variable.  This pass is
2098 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2099 /// determine a loop-invariant trip count of the loop, which is actually a much
2100 /// broader range than just linear tests.
2101 bool IndVarSimplify::
2102 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2103                           const SCEV *ExitCount,
2104                           PHINode *IndVar, SCEVExpander &Rewriter) {
2105   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2106   assert(isLoopCounter(IndVar, L, SE));
2107   Instruction * const IncVar =
2108     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2109 
2110   // Initialize CmpIndVar to the preincremented IV.
2111   Value *CmpIndVar = IndVar;
2112   bool UsePostInc = false;
2113 
2114   // If the exiting block is the same as the backedge block, we prefer to
2115   // compare against the post-incremented value, otherwise we must compare
2116   // against the preincremented value.
2117   if (ExitingBB == L->getLoopLatch()) {
2118     // For pointer IVs, we chose to not strip inbounds which requires us not
2119     // to add a potentially UB introducing use.  We need to either a) show
2120     // the loop test we're modifying is already in post-inc form, or b) show
2121     // that adding a use must not introduce UB.
2122     bool SafeToPostInc =
2123         IndVar->getType()->isIntegerTy() ||
2124         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2125         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2126     if (SafeToPostInc) {
2127       UsePostInc = true;
2128       CmpIndVar = IncVar;
2129     }
2130   }
2131 
2132   // It may be necessary to drop nowrap flags on the incrementing instruction
2133   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2134   // case the increment might have previously been poison on the last iteration
2135   // only) or if LFTR switches to a different IV that was previously dynamically
2136   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2137   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2138   // check), because the pre-inc addrec flags may be adopted from the original
2139   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2140   // TODO: This handling is inaccurate for one case: If we switch to a
2141   // dynamically dead IV that wraps on the first loop iteration only, which is
2142   // not covered by the post-inc addrec. (If the new IV was not dynamically
2143   // dead, it could not be poison on the first iteration in the first place.)
2144   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2145     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2146     if (BO->hasNoUnsignedWrap())
2147       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2148     if (BO->hasNoSignedWrap())
2149       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2150   }
2151 
2152   Value *ExitCnt = genLoopLimit(
2153       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2154   assert(ExitCnt->getType()->isPointerTy() ==
2155              IndVar->getType()->isPointerTy() &&
2156          "genLoopLimit missed a cast");
2157 
2158   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2159   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2160   ICmpInst::Predicate P;
2161   if (L->contains(BI->getSuccessor(0)))
2162     P = ICmpInst::ICMP_NE;
2163   else
2164     P = ICmpInst::ICMP_EQ;
2165 
2166   IRBuilder<> Builder(BI);
2167 
2168   // The new loop exit condition should reuse the debug location of the
2169   // original loop exit condition.
2170   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2171     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2172 
2173   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2174   // avoid the expensive expansion of the limit expression in the wider type,
2175   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2176   // since we know (from the exit count bitwidth), that we can't self-wrap in
2177   // the narrower type.
2178   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2179   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2180   if (CmpIndVarSize > ExitCntSize) {
2181     assert(!CmpIndVar->getType()->isPointerTy() &&
2182            !ExitCnt->getType()->isPointerTy());
2183 
2184     // Before resorting to actually inserting the truncate, use the same
2185     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2186     // the other side of the comparison instead.  We still evaluate the limit
2187     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2188     // a truncate within in.
2189     bool Extended = false;
2190     const SCEV *IV = SE->getSCEV(CmpIndVar);
2191     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2192                                                   ExitCnt->getType());
2193     const SCEV *ZExtTrunc =
2194       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2195 
2196     if (ZExtTrunc == IV) {
2197       Extended = true;
2198       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2199                                    "wide.trip.count");
2200     } else {
2201       const SCEV *SExtTrunc =
2202         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2203       if (SExtTrunc == IV) {
2204         Extended = true;
2205         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2206                                      "wide.trip.count");
2207       }
2208     }
2209 
2210     if (Extended) {
2211       bool Discard;
2212       L->makeLoopInvariant(ExitCnt, Discard);
2213     } else
2214       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2215                                       "lftr.wideiv");
2216   }
2217   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2218                     << "      LHS:" << *CmpIndVar << '\n'
2219                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2220                     << "\n"
2221                     << "      RHS:\t" << *ExitCnt << "\n"
2222                     << "ExitCount:\t" << *ExitCount << "\n"
2223                     << "  was: " << *BI->getCondition() << "\n");
2224 
2225   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2226   Value *OrigCond = BI->getCondition();
2227   // It's tempting to use replaceAllUsesWith here to fully replace the old
2228   // comparison, but that's not immediately safe, since users of the old
2229   // comparison may not be dominated by the new comparison. Instead, just
2230   // update the branch to use the new comparison; in the common case this
2231   // will make old comparison dead.
2232   BI->setCondition(Cond);
2233   DeadInsts.emplace_back(OrigCond);
2234 
2235   ++NumLFTR;
2236   return true;
2237 }
2238 
2239 //===----------------------------------------------------------------------===//
2240 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2241 //===----------------------------------------------------------------------===//
2242 
2243 /// If there's a single exit block, sink any loop-invariant values that
2244 /// were defined in the preheader but not used inside the loop into the
2245 /// exit block to reduce register pressure in the loop.
2246 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2247   BasicBlock *ExitBlock = L->getExitBlock();
2248   if (!ExitBlock) return false;
2249 
2250   BasicBlock *Preheader = L->getLoopPreheader();
2251   if (!Preheader) return false;
2252 
2253   bool MadeAnyChanges = false;
2254   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2255   BasicBlock::iterator I(Preheader->getTerminator());
2256   while (I != Preheader->begin()) {
2257     --I;
2258     // New instructions were inserted at the end of the preheader.
2259     if (isa<PHINode>(I))
2260       break;
2261 
2262     // Don't move instructions which might have side effects, since the side
2263     // effects need to complete before instructions inside the loop.  Also don't
2264     // move instructions which might read memory, since the loop may modify
2265     // memory. Note that it's okay if the instruction might have undefined
2266     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2267     // block.
2268     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2269       continue;
2270 
2271     // Skip debug info intrinsics.
2272     if (isa<DbgInfoIntrinsic>(I))
2273       continue;
2274 
2275     // Skip eh pad instructions.
2276     if (I->isEHPad())
2277       continue;
2278 
2279     // Don't sink alloca: we never want to sink static alloca's out of the
2280     // entry block, and correctly sinking dynamic alloca's requires
2281     // checks for stacksave/stackrestore intrinsics.
2282     // FIXME: Refactor this check somehow?
2283     if (isa<AllocaInst>(I))
2284       continue;
2285 
2286     // Determine if there is a use in or before the loop (direct or
2287     // otherwise).
2288     bool UsedInLoop = false;
2289     for (Use &U : I->uses()) {
2290       Instruction *User = cast<Instruction>(U.getUser());
2291       BasicBlock *UseBB = User->getParent();
2292       if (PHINode *P = dyn_cast<PHINode>(User)) {
2293         unsigned i =
2294           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2295         UseBB = P->getIncomingBlock(i);
2296       }
2297       if (UseBB == Preheader || L->contains(UseBB)) {
2298         UsedInLoop = true;
2299         break;
2300       }
2301     }
2302 
2303     // If there is, the def must remain in the preheader.
2304     if (UsedInLoop)
2305       continue;
2306 
2307     // Otherwise, sink it to the exit block.
2308     Instruction *ToMove = &*I;
2309     bool Done = false;
2310 
2311     if (I != Preheader->begin()) {
2312       // Skip debug info intrinsics.
2313       do {
2314         --I;
2315       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2316 
2317       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2318         Done = true;
2319     } else {
2320       Done = true;
2321     }
2322 
2323     MadeAnyChanges = true;
2324     ToMove->moveBefore(*ExitBlock, InsertPt);
2325     if (Done) break;
2326     InsertPt = ToMove->getIterator();
2327   }
2328 
2329   return MadeAnyChanges;
2330 }
2331 
2332 // Returns true if the condition of \p BI being checked is invariant and can be
2333 // proved to be trivially true.
2334 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
2335                           bool ProvingLoopExit) {
2336   ICmpInst::Predicate Pred;
2337   Value *LHS, *RHS;
2338   using namespace PatternMatch;
2339   BasicBlock *TrueSucc, *FalseSucc;
2340   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
2341                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
2342     return false;
2343 
2344   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
2345          "Not a loop exit!");
2346 
2347   // 'LHS pred RHS' should now mean that we stay in loop.
2348   if (L->contains(FalseSucc))
2349     Pred = CmpInst::getInversePredicate(Pred);
2350 
2351   // If we are proving loop exit, invert the predicate.
2352   if (ProvingLoopExit)
2353     Pred = CmpInst::getInversePredicate(Pred);
2354 
2355   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
2356   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
2357   // Can we prove it to be trivially true?
2358   if (SE->isKnownPredicate(Pred, LHSS, RHSS))
2359     return true;
2360 
2361   return false;
2362 }
2363 
2364 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2365   SmallVector<BasicBlock*, 16> ExitingBlocks;
2366   L->getExitingBlocks(ExitingBlocks);
2367 
2368   // Remove all exits which aren't both rewriteable and execute on every
2369   // iteration.
2370   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2371     // If our exitting block exits multiple loops, we can only rewrite the
2372     // innermost one.  Otherwise, we're changing how many times the innermost
2373     // loop runs before it exits.
2374     if (LI->getLoopFor(ExitingBB) != L)
2375       return true;
2376 
2377     // Can't rewrite non-branch yet.
2378     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2379     if (!BI)
2380       return true;
2381 
2382     // If already constant, nothing to do.
2383     if (isa<Constant>(BI->getCondition()))
2384       return true;
2385 
2386     // Likewise, the loop latch must be dominated by the exiting BB.
2387     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
2388       return true;
2389 
2390     return false;
2391   });
2392   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2393 
2394   if (ExitingBlocks.empty())
2395     return false;
2396 
2397   // Get a symbolic upper bound on the loop backedge taken count.
2398   const SCEV *MaxExitCount = SE->computeMaxBackedgeTakenCount(L);
2399   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2400     return false;
2401 
2402   // Visit our exit blocks in order of dominance. We know from the fact that
2403   // all exits must dominate the latch, so there is a total dominance order
2404   // between them.
2405   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
2406                // std::sort sorts in ascending order, so we want the inverse of
2407                // the normal dominance relation.
2408                if (A == B) return false;
2409                if (DT->properlyDominates(A, B))
2410                  return true;
2411                else {
2412                  assert(DT->properlyDominates(B, A) &&
2413                         "expected total dominance order!");
2414                  return false;
2415                }
2416   });
2417 #ifdef ASSERT
2418   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2419     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2420   }
2421 #endif
2422 
2423   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2424     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2425     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2426     auto *OldCond = BI->getCondition();
2427     auto *NewCond = ConstantInt::get(OldCond->getType(),
2428                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2429     BI->setCondition(NewCond);
2430     if (OldCond->use_empty())
2431       DeadInsts.emplace_back(OldCond);
2432   };
2433 
2434   bool Changed = false;
2435   SmallSet<const SCEV*, 8> DominatingExitCounts;
2436   for (BasicBlock *ExitingBB : ExitingBlocks) {
2437     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2438     if (isa<SCEVCouldNotCompute>(ExitCount)) {
2439       // Okay, we do not know the exit count here. Can we at least prove that it
2440       // will remain the same within iteration space?
2441       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2442       if (isTrivialCond(L, BI, SE, false)) {
2443         FoldExit(ExitingBB, false);
2444         Changed = true;
2445       }
2446       if (isTrivialCond(L, BI, SE, true)) {
2447         FoldExit(ExitingBB, true);
2448         Changed = true;
2449       }
2450       continue;
2451     }
2452 
2453     // If we know we'd exit on the first iteration, rewrite the exit to
2454     // reflect this.  This does not imply the loop must exit through this
2455     // exit; there may be an earlier one taken on the first iteration.
2456     // TODO: Given we know the backedge can't be taken, we should go ahead
2457     // and break it.  Or at least, kill all the header phis and simplify.
2458     if (ExitCount->isZero()) {
2459       FoldExit(ExitingBB, true);
2460       Changed = true;
2461       continue;
2462     }
2463 
2464     // If we end up with a pointer exit count, bail.  Note that we can end up
2465     // with a pointer exit count for one exiting block, and not for another in
2466     // the same loop.
2467     if (!ExitCount->getType()->isIntegerTy() ||
2468         !MaxExitCount->getType()->isIntegerTy())
2469       continue;
2470 
2471     Type *WiderType =
2472       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2473     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2474     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2475     assert(MaxExitCount->getType() == ExitCount->getType());
2476 
2477     // Can we prove that some other exit must be taken strictly before this
2478     // one?
2479     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2480                                      MaxExitCount, ExitCount)) {
2481       FoldExit(ExitingBB, false);
2482       Changed = true;
2483       continue;
2484     }
2485 
2486     // As we run, keep track of which exit counts we've encountered.  If we
2487     // find a duplicate, we've found an exit which would have exited on the
2488     // exiting iteration, but (from the visit order) strictly follows another
2489     // which does the same and is thus dead.
2490     if (!DominatingExitCounts.insert(ExitCount).second) {
2491       FoldExit(ExitingBB, false);
2492       Changed = true;
2493       continue;
2494     }
2495 
2496     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2497     // here.  If we kept track of the min of dominanting exits so far, we could
2498     // discharge exits with EC >= MDEC. This is less powerful than the existing
2499     // transform (since later exits aren't considered), but potentially more
2500     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2501     // or a >u b.  Such a case is not currently known.
2502   }
2503   return Changed;
2504 }
2505 
2506 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2507   SmallVector<BasicBlock*, 16> ExitingBlocks;
2508   L->getExitingBlocks(ExitingBlocks);
2509 
2510   // Finally, see if we can rewrite our exit conditions into a loop invariant
2511   // form. If we have a read-only loop, and we can tell that we must exit down
2512   // a path which does not need any of the values computed within the loop, we
2513   // can rewrite the loop to exit on the first iteration.  Note that this
2514   // doesn't either a) tell us the loop exits on the first iteration (unless
2515   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2516   // This transformation looks a lot like a restricted form of dead loop
2517   // elimination, but restricted to read-only loops and without neccesssarily
2518   // needing to kill the loop entirely.
2519   if (!LoopPredication)
2520     return false;
2521 
2522   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2523     return false;
2524 
2525   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2526   // through *explicit* control flow.  We have to eliminate the possibility of
2527   // implicit exits (see below) before we know it's truly exact.
2528   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2529   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2530       !SE->isLoopInvariant(ExactBTC, L) ||
2531       !isSafeToExpand(ExactBTC, *SE))
2532     return false;
2533 
2534   // If we end up with a pointer exit count, bail.  It may be unsized.
2535   if (!ExactBTC->getType()->isIntegerTy())
2536     return false;
2537 
2538   auto BadExit = [&](BasicBlock *ExitingBB) {
2539     // If our exiting block exits multiple loops, we can only rewrite the
2540     // innermost one.  Otherwise, we're changing how many times the innermost
2541     // loop runs before it exits.
2542     if (LI->getLoopFor(ExitingBB) != L)
2543       return true;
2544 
2545     // Can't rewrite non-branch yet.
2546     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2547     if (!BI)
2548       return true;
2549 
2550     // If already constant, nothing to do.
2551     if (isa<Constant>(BI->getCondition()))
2552       return true;
2553 
2554     // If the exit block has phis, we need to be able to compute the values
2555     // within the loop which contains them.  This assumes trivially lcssa phis
2556     // have already been removed; TODO: generalize
2557     BasicBlock *ExitBlock =
2558     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2559     if (!ExitBlock->phis().empty())
2560       return true;
2561 
2562     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2563     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2564     if (!SE->isLoopInvariant(ExitCount, L) ||
2565         !isSafeToExpand(ExitCount, *SE))
2566       return true;
2567 
2568     // If we end up with a pointer exit count, bail.  It may be unsized.
2569     if (!ExitCount->getType()->isIntegerTy())
2570       return true;
2571 
2572     return false;
2573   };
2574 
2575   // If we have any exits which can't be predicated themselves, than we can't
2576   // predicate any exit which isn't guaranteed to execute before it.  Consider
2577   // two exits (a) and (b) which would both exit on the same iteration.  If we
2578   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2579   // we could convert a loop from exiting through (a) to one exiting through
2580   // (b).  Note that this problem exists only for exits with the same exit
2581   // count, and we could be more aggressive when exit counts are known inequal.
2582   llvm::sort(ExitingBlocks,
2583             [&](BasicBlock *A, BasicBlock *B) {
2584               // std::sort sorts in ascending order, so we want the inverse of
2585               // the normal dominance relation, plus a tie breaker for blocks
2586               // unordered by dominance.
2587               if (DT->properlyDominates(A, B)) return true;
2588               if (DT->properlyDominates(B, A)) return false;
2589               return A->getName() < B->getName();
2590             });
2591   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2592   // exits before the backedge).  If they aren't, reasoning about reachability
2593   // is complicated and we choose not to for now.
2594   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2595     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2596       return false;
2597 
2598   // Given our sorted total order, we know that exit[j] must be evaluated
2599   // after all exit[i] such j > i.
2600   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2601     if (BadExit(ExitingBlocks[i])) {
2602       ExitingBlocks.resize(i);
2603       break;
2604     }
2605 
2606   if (ExitingBlocks.empty())
2607     return false;
2608 
2609   // We rely on not being able to reach an exiting block on a later iteration
2610   // then it's statically compute exit count.  The implementaton of
2611   // getExitCount currently has this invariant, but assert it here so that
2612   // breakage is obvious if this ever changes..
2613   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2614         return DT->dominates(ExitingBB, L->getLoopLatch());
2615       }));
2616 
2617   // At this point, ExitingBlocks consists of only those blocks which are
2618   // predicatable.  Given that, we know we have at least one exit we can
2619   // predicate if the loop is doesn't have side effects and doesn't have any
2620   // implicit exits (because then our exact BTC isn't actually exact).
2621   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2622   // suggestions on how to improve this?  I can obviously bail out for outer
2623   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2624   // is that enough for *all* side effects?
2625   for (BasicBlock *BB : L->blocks())
2626     for (auto &I : *BB)
2627       // TODO:isGuaranteedToTransfer
2628       if (I.mayHaveSideEffects() || I.mayThrow())
2629         return false;
2630 
2631   bool Changed = false;
2632   // Finally, do the actual predication for all predicatable blocks.  A couple
2633   // of notes here:
2634   // 1) We don't bother to constant fold dominated exits with identical exit
2635   //    counts; that's simply a form of CSE/equality propagation and we leave
2636   //    it for dedicated passes.
2637   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2638   //    legality constraints and we leave that to dedicated logic.  We want to
2639   //    predicate even if we can't insert a loop invariant expression as
2640   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2641   //    varying check.
2642   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2643   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2644   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2645   for (BasicBlock *ExitingBB : ExitingBlocks) {
2646     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2647 
2648     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2649     Value *NewCond;
2650     if (ExitCount == ExactBTC) {
2651       NewCond = L->contains(BI->getSuccessor(0)) ?
2652         B.getFalse() : B.getTrue();
2653     } else {
2654       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2655       if (!ExactBTCV)
2656         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2657       Value *RHS = ExactBTCV;
2658       if (ECV->getType() != RHS->getType()) {
2659         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2660         ECV = B.CreateZExt(ECV, WiderTy);
2661         RHS = B.CreateZExt(RHS, WiderTy);
2662       }
2663       auto Pred = L->contains(BI->getSuccessor(0)) ?
2664         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2665       NewCond = B.CreateICmp(Pred, ECV, RHS);
2666     }
2667     Value *OldCond = BI->getCondition();
2668     BI->setCondition(NewCond);
2669     if (OldCond->use_empty())
2670       DeadInsts.emplace_back(OldCond);
2671     Changed = true;
2672   }
2673 
2674   return Changed;
2675 }
2676 
2677 //===----------------------------------------------------------------------===//
2678 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2679 //===----------------------------------------------------------------------===//
2680 
2681 bool IndVarSimplify::run(Loop *L) {
2682   // We need (and expect!) the incoming loop to be in LCSSA.
2683   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2684          "LCSSA required to run indvars!");
2685 
2686   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2687   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2688   //    canonicalization can be a pessimization without LSR to "clean up"
2689   //    afterwards.
2690   //  - We depend on having a preheader; in particular,
2691   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2692   //    and we're in trouble if we can't find the induction variable even when
2693   //    we've manually inserted one.
2694   //  - LFTR relies on having a single backedge.
2695   if (!L->isLoopSimplifyForm())
2696     return false;
2697 
2698 #ifndef NDEBUG
2699   // Used below for a consistency check only
2700   // Note: Since the result returned by ScalarEvolution may depend on the order
2701   // in which previous results are added to its cache, the call to
2702   // getBackedgeTakenCount() may change following SCEV queries.
2703   const SCEV *BackedgeTakenCount;
2704   if (VerifyIndvars)
2705     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2706 #endif
2707 
2708   bool Changed = false;
2709   // If there are any floating-point recurrences, attempt to
2710   // transform them to use integer recurrences.
2711   Changed |= rewriteNonIntegerIVs(L);
2712 
2713   // Create a rewriter object which we'll use to transform the code with.
2714   SCEVExpander Rewriter(*SE, DL, "indvars");
2715 #ifndef NDEBUG
2716   Rewriter.setDebugType(DEBUG_TYPE);
2717 #endif
2718 
2719   // Eliminate redundant IV users.
2720   //
2721   // Simplification works best when run before other consumers of SCEV. We
2722   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2723   // other expressions involving loop IVs have been evaluated. This helps SCEV
2724   // set no-wrap flags before normalizing sign/zero extension.
2725   Rewriter.disableCanonicalMode();
2726   Changed |= simplifyAndExtend(L, Rewriter, LI);
2727 
2728   // Check to see if we can compute the final value of any expressions
2729   // that are recurrent in the loop, and substitute the exit values from the
2730   // loop into any instructions outside of the loop that use the final values
2731   // of the current expressions.
2732   if (ReplaceExitValue != NeverRepl) {
2733     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2734                                              ReplaceExitValue, DeadInsts)) {
2735       NumReplaced += Rewrites;
2736       Changed = true;
2737     }
2738   }
2739 
2740   // Eliminate redundant IV cycles.
2741   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2742 
2743   // Try to eliminate loop exits based on analyzeable exit counts
2744   if (optimizeLoopExits(L, Rewriter))  {
2745     Changed = true;
2746     // Given we've changed exit counts, notify SCEV
2747     SE->forgetLoop(L);
2748   }
2749 
2750   // Try to form loop invariant tests for loop exits by changing how many
2751   // iterations of the loop run when that is unobservable.
2752   if (predicateLoopExits(L, Rewriter)) {
2753     Changed = true;
2754     // Given we've changed exit counts, notify SCEV
2755     SE->forgetLoop(L);
2756   }
2757 
2758   // If we have a trip count expression, rewrite the loop's exit condition
2759   // using it.
2760   if (!DisableLFTR) {
2761     BasicBlock *PreHeader = L->getLoopPreheader();
2762     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2763 
2764     SmallVector<BasicBlock*, 16> ExitingBlocks;
2765     L->getExitingBlocks(ExitingBlocks);
2766     for (BasicBlock *ExitingBB : ExitingBlocks) {
2767       // Can't rewrite non-branch yet.
2768       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2769         continue;
2770 
2771       // If our exitting block exits multiple loops, we can only rewrite the
2772       // innermost one.  Otherwise, we're changing how many times the innermost
2773       // loop runs before it exits.
2774       if (LI->getLoopFor(ExitingBB) != L)
2775         continue;
2776 
2777       if (!needsLFTR(L, ExitingBB))
2778         continue;
2779 
2780       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2781       if (isa<SCEVCouldNotCompute>(ExitCount))
2782         continue;
2783 
2784       // This was handled above, but as we form SCEVs, we can sometimes refine
2785       // existing ones; this allows exit counts to be folded to zero which
2786       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2787       // until stable to handle cases like this better.
2788       if (ExitCount->isZero())
2789         continue;
2790 
2791       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2792       if (!IndVar)
2793         continue;
2794 
2795       // Avoid high cost expansions.  Note: This heuristic is questionable in
2796       // that our definition of "high cost" is not exactly principled.
2797       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2798                                        TTI, PreHeaderBR))
2799         continue;
2800 
2801       // Check preconditions for proper SCEVExpander operation. SCEV does not
2802       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2803       // any pass that uses the SCEVExpander must do it. This does not work
2804       // well for loop passes because SCEVExpander makes assumptions about
2805       // all loops, while LoopPassManager only forces the current loop to be
2806       // simplified.
2807       //
2808       // FIXME: SCEV expansion has no way to bail out, so the caller must
2809       // explicitly check any assumptions made by SCEV. Brittle.
2810       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2811       if (!AR || AR->getLoop()->getLoopPreheader())
2812         Changed |= linearFunctionTestReplace(L, ExitingBB,
2813                                              ExitCount, IndVar,
2814                                              Rewriter);
2815     }
2816   }
2817   // Clear the rewriter cache, because values that are in the rewriter's cache
2818   // can be deleted in the loop below, causing the AssertingVH in the cache to
2819   // trigger.
2820   Rewriter.clear();
2821 
2822   // Now that we're done iterating through lists, clean up any instructions
2823   // which are now dead.
2824   while (!DeadInsts.empty())
2825     if (Instruction *Inst =
2826             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2827       Changed |=
2828           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2829 
2830   // The Rewriter may not be used from this point on.
2831 
2832   // Loop-invariant instructions in the preheader that aren't used in the
2833   // loop may be sunk below the loop to reduce register pressure.
2834   Changed |= sinkUnusedInvariants(L);
2835 
2836   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2837   // trip count and therefore can further simplify exit values in addition to
2838   // rewriteLoopExitValues.
2839   Changed |= rewriteFirstIterationLoopExitValues(L);
2840 
2841   // Clean up dead instructions.
2842   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2843 
2844   // Check a post-condition.
2845   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2846          "Indvars did not preserve LCSSA!");
2847 
2848   // Verify that LFTR, and any other change have not interfered with SCEV's
2849   // ability to compute trip count.  We may have *changed* the exit count, but
2850   // only by reducing it.
2851 #ifndef NDEBUG
2852   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2853     SE->forgetLoop(L);
2854     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2855     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2856         SE->getTypeSizeInBits(NewBECount->getType()))
2857       NewBECount = SE->getTruncateOrNoop(NewBECount,
2858                                          BackedgeTakenCount->getType());
2859     else
2860       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2861                                                  NewBECount->getType());
2862     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2863                                  NewBECount) && "indvars must preserve SCEV");
2864   }
2865   if (VerifyMemorySSA && MSSAU)
2866     MSSAU->getMemorySSA()->verifyMemorySSA();
2867 #endif
2868 
2869   return Changed;
2870 }
2871 
2872 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2873                                           LoopStandardAnalysisResults &AR,
2874                                           LPMUpdater &) {
2875   Function *F = L.getHeader()->getParent();
2876   const DataLayout &DL = F->getParent()->getDataLayout();
2877 
2878   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2879   if (!IVS.run(&L))
2880     return PreservedAnalyses::all();
2881 
2882   auto PA = getLoopPassPreservedAnalyses();
2883   PA.preserveSet<CFGAnalyses>();
2884   if (AR.MSSA)
2885     PA.preserve<MemorySSAAnalysis>();
2886   return PA;
2887 }
2888 
2889 namespace {
2890 
2891 struct IndVarSimplifyLegacyPass : public LoopPass {
2892   static char ID; // Pass identification, replacement for typeid
2893 
2894   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2895     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2896   }
2897 
2898   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2899     if (skipLoop(L))
2900       return false;
2901 
2902     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2903     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2904     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2905     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2906     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2907     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2908     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2909     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2910     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2911     MemorySSA *MSSA = nullptr;
2912     if (MSSAAnalysis)
2913       MSSA = &MSSAAnalysis->getMSSA();
2914 
2915     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2916     return IVS.run(L);
2917   }
2918 
2919   void getAnalysisUsage(AnalysisUsage &AU) const override {
2920     AU.setPreservesCFG();
2921     AU.addPreserved<MemorySSAWrapperPass>();
2922     getLoopAnalysisUsage(AU);
2923   }
2924 };
2925 
2926 } // end anonymous namespace
2927 
2928 char IndVarSimplifyLegacyPass::ID = 0;
2929 
2930 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2931                       "Induction Variable Simplification", false, false)
2932 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2933 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2934                     "Induction Variable Simplification", false, false)
2935 
2936 Pass *llvm::createIndVarSimplifyPass() {
2937   return new IndVarSimplifyLegacyPass();
2938 }
2939