xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision fdc845b36130d162e5a66e427bf69b2c37b6c6bb)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 struct RewritePhi;
141 
142 class IndVarSimplify {
143   LoopInfo *LI;
144   ScalarEvolution *SE;
145   DominatorTree *DT;
146   const DataLayout &DL;
147   TargetLibraryInfo *TLI;
148   const TargetTransformInfo *TTI;
149   std::unique_ptr<MemorySSAUpdater> MSSAU;
150 
151   SmallVector<WeakTrackingVH, 16> DeadInsts;
152 
153   bool handleFloatingPointIV(Loop *L, PHINode *PH);
154   bool rewriteNonIntegerIVs(Loop *L);
155 
156   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157   /// Try to eliminate loop exits based on analyzeable exit counts
158   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
159   /// Try to form loop invariant tests for loop exits by changing how many
160   /// iterations of the loop run when that is unobservable.
161   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
162 
163   bool rewriteFirstIterationLoopExitValues(Loop *L);
164 
165   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
166                                  const SCEV *ExitCount,
167                                  PHINode *IndVar, SCEVExpander &Rewriter);
168 
169   bool sinkUnusedInvariants(Loop *L);
170 
171 public:
172   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
173                  const DataLayout &DL, TargetLibraryInfo *TLI,
174                  TargetTransformInfo *TTI, MemorySSA *MSSA)
175       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
176     if (MSSA)
177       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
178   }
179 
180   bool run(Loop *L);
181 };
182 
183 } // end anonymous namespace
184 
185 /// Determine the insertion point for this user. By default, insert immediately
186 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
187 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
188 /// common dominator for the incoming blocks. A nullptr can be returned if no
189 /// viable location is found: it may happen if User is a PHI and Def only comes
190 /// to this PHI from unreachable blocks.
191 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
192                                           DominatorTree *DT, LoopInfo *LI) {
193   PHINode *PHI = dyn_cast<PHINode>(User);
194   if (!PHI)
195     return User;
196 
197   Instruction *InsertPt = nullptr;
198   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
199     if (PHI->getIncomingValue(i) != Def)
200       continue;
201 
202     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
203 
204     if (!DT->isReachableFromEntry(InsertBB))
205       continue;
206 
207     if (!InsertPt) {
208       InsertPt = InsertBB->getTerminator();
209       continue;
210     }
211     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
212     InsertPt = InsertBB->getTerminator();
213   }
214 
215   // If we have skipped all inputs, it means that Def only comes to Phi from
216   // unreachable blocks.
217   if (!InsertPt)
218     return nullptr;
219 
220   auto *DefI = dyn_cast<Instruction>(Def);
221   if (!DefI)
222     return InsertPt;
223 
224   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
225 
226   auto *L = LI->getLoopFor(DefI->getParent());
227   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
228 
229   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
230     if (LI->getLoopFor(DTN->getBlock()) == L)
231       return DTN->getBlock()->getTerminator();
232 
233   llvm_unreachable("DefI dominates InsertPt!");
234 }
235 
236 //===----------------------------------------------------------------------===//
237 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
238 //===----------------------------------------------------------------------===//
239 
240 /// Convert APF to an integer, if possible.
241 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
242   bool isExact = false;
243   // See if we can convert this to an int64_t
244   uint64_t UIntVal;
245   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
246                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
247       !isExact)
248     return false;
249   IntVal = UIntVal;
250   return true;
251 }
252 
253 /// If the loop has floating induction variable then insert corresponding
254 /// integer induction variable if possible.
255 /// For example,
256 /// for(double i = 0; i < 10000; ++i)
257 ///   bar(i)
258 /// is converted into
259 /// for(int i = 0; i < 10000; ++i)
260 ///   bar((double)i);
261 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
262   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
263   unsigned BackEdge     = IncomingEdge^1;
264 
265   // Check incoming value.
266   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
267 
268   int64_t InitValue;
269   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
270     return false;
271 
272   // Check IV increment. Reject this PN if increment operation is not
273   // an add or increment value can not be represented by an integer.
274   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
275   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
276 
277   // If this is not an add of the PHI with a constantfp, or if the constant fp
278   // is not an integer, bail out.
279   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
280   int64_t IncValue;
281   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
282       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
283     return false;
284 
285   // Check Incr uses. One user is PN and the other user is an exit condition
286   // used by the conditional terminator.
287   Value::user_iterator IncrUse = Incr->user_begin();
288   Instruction *U1 = cast<Instruction>(*IncrUse++);
289   if (IncrUse == Incr->user_end()) return false;
290   Instruction *U2 = cast<Instruction>(*IncrUse++);
291   if (IncrUse != Incr->user_end()) return false;
292 
293   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
294   // only used by a branch, we can't transform it.
295   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
296   if (!Compare)
297     Compare = dyn_cast<FCmpInst>(U2);
298   if (!Compare || !Compare->hasOneUse() ||
299       !isa<BranchInst>(Compare->user_back()))
300     return false;
301 
302   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
303 
304   // We need to verify that the branch actually controls the iteration count
305   // of the loop.  If not, the new IV can overflow and no one will notice.
306   // The branch block must be in the loop and one of the successors must be out
307   // of the loop.
308   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
309   if (!L->contains(TheBr->getParent()) ||
310       (L->contains(TheBr->getSuccessor(0)) &&
311        L->contains(TheBr->getSuccessor(1))))
312     return false;
313 
314   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
315   // transform it.
316   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
317   int64_t ExitValue;
318   if (ExitValueVal == nullptr ||
319       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
320     return false;
321 
322   // Find new predicate for integer comparison.
323   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
324   switch (Compare->getPredicate()) {
325   default: return false;  // Unknown comparison.
326   case CmpInst::FCMP_OEQ:
327   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
328   case CmpInst::FCMP_ONE:
329   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
330   case CmpInst::FCMP_OGT:
331   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
332   case CmpInst::FCMP_OGE:
333   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
334   case CmpInst::FCMP_OLT:
335   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
336   case CmpInst::FCMP_OLE:
337   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
338   }
339 
340   // We convert the floating point induction variable to a signed i32 value if
341   // we can.  This is only safe if the comparison will not overflow in a way
342   // that won't be trapped by the integer equivalent operations.  Check for this
343   // now.
344   // TODO: We could use i64 if it is native and the range requires it.
345 
346   // The start/stride/exit values must all fit in signed i32.
347   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
348     return false;
349 
350   // If not actually striding (add x, 0.0), avoid touching the code.
351   if (IncValue == 0)
352     return false;
353 
354   // Positive and negative strides have different safety conditions.
355   if (IncValue > 0) {
356     // If we have a positive stride, we require the init to be less than the
357     // exit value.
358     if (InitValue >= ExitValue)
359       return false;
360 
361     uint32_t Range = uint32_t(ExitValue-InitValue);
362     // Check for infinite loop, either:
363     // while (i <= Exit) or until (i > Exit)
364     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
365       if (++Range == 0) return false;  // Range overflows.
366     }
367 
368     unsigned Leftover = Range % uint32_t(IncValue);
369 
370     // If this is an equality comparison, we require that the strided value
371     // exactly land on the exit value, otherwise the IV condition will wrap
372     // around and do things the fp IV wouldn't.
373     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
374         Leftover != 0)
375       return false;
376 
377     // If the stride would wrap around the i32 before exiting, we can't
378     // transform the IV.
379     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
380       return false;
381   } else {
382     // If we have a negative stride, we require the init to be greater than the
383     // exit value.
384     if (InitValue <= ExitValue)
385       return false;
386 
387     uint32_t Range = uint32_t(InitValue-ExitValue);
388     // Check for infinite loop, either:
389     // while (i >= Exit) or until (i < Exit)
390     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
391       if (++Range == 0) return false;  // Range overflows.
392     }
393 
394     unsigned Leftover = Range % uint32_t(-IncValue);
395 
396     // If this is an equality comparison, we require that the strided value
397     // exactly land on the exit value, otherwise the IV condition will wrap
398     // around and do things the fp IV wouldn't.
399     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
400         Leftover != 0)
401       return false;
402 
403     // If the stride would wrap around the i32 before exiting, we can't
404     // transform the IV.
405     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
406       return false;
407   }
408 
409   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
410 
411   // Insert new integer induction variable.
412   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
413   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
414                       PN->getIncomingBlock(IncomingEdge));
415 
416   Value *NewAdd =
417     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
418                               Incr->getName()+".int", Incr);
419   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
420 
421   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
422                                       ConstantInt::get(Int32Ty, ExitValue),
423                                       Compare->getName());
424 
425   // In the following deletions, PN may become dead and may be deleted.
426   // Use a WeakTrackingVH to observe whether this happens.
427   WeakTrackingVH WeakPH = PN;
428 
429   // Delete the old floating point exit comparison.  The branch starts using the
430   // new comparison.
431   NewCompare->takeName(Compare);
432   Compare->replaceAllUsesWith(NewCompare);
433   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
434 
435   // Delete the old floating point increment.
436   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
437   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
438 
439   // If the FP induction variable still has uses, this is because something else
440   // in the loop uses its value.  In order to canonicalize the induction
441   // variable, we chose to eliminate the IV and rewrite it in terms of an
442   // int->fp cast.
443   //
444   // We give preference to sitofp over uitofp because it is faster on most
445   // platforms.
446   if (WeakPH) {
447     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
448                                  &*PN->getParent()->getFirstInsertionPt());
449     PN->replaceAllUsesWith(Conv);
450     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
451   }
452   return true;
453 }
454 
455 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
456   // First step.  Check to see if there are any floating-point recurrences.
457   // If there are, change them into integer recurrences, permitting analysis by
458   // the SCEV routines.
459   BasicBlock *Header = L->getHeader();
460 
461   SmallVector<WeakTrackingVH, 8> PHIs;
462   for (PHINode &PN : Header->phis())
463     PHIs.push_back(&PN);
464 
465   bool Changed = false;
466   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
467     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
468       Changed |= handleFloatingPointIV(L, PN);
469 
470   // If the loop previously had floating-point IV, ScalarEvolution
471   // may not have been able to compute a trip count. Now that we've done some
472   // re-writing, the trip count may be computable.
473   if (Changed)
474     SE->forgetLoop(L);
475   return Changed;
476 }
477 
478 //===---------------------------------------------------------------------===//
479 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
480 // they will exit at the first iteration.
481 //===---------------------------------------------------------------------===//
482 
483 /// Check to see if this loop has loop invariant conditions which lead to loop
484 /// exits. If so, we know that if the exit path is taken, it is at the first
485 /// loop iteration. This lets us predict exit values of PHI nodes that live in
486 /// loop header.
487 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
488   // Verify the input to the pass is already in LCSSA form.
489   assert(L->isLCSSAForm(*DT));
490 
491   SmallVector<BasicBlock *, 8> ExitBlocks;
492   L->getUniqueExitBlocks(ExitBlocks);
493 
494   bool MadeAnyChanges = false;
495   for (auto *ExitBB : ExitBlocks) {
496     // If there are no more PHI nodes in this exit block, then no more
497     // values defined inside the loop are used on this path.
498     for (PHINode &PN : ExitBB->phis()) {
499       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
500            IncomingValIdx != E; ++IncomingValIdx) {
501         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
502 
503         // Can we prove that the exit must run on the first iteration if it
504         // runs at all?  (i.e. early exits are fine for our purposes, but
505         // traces which lead to this exit being taken on the 2nd iteration
506         // aren't.)  Note that this is about whether the exit branch is
507         // executed, not about whether it is taken.
508         if (!L->getLoopLatch() ||
509             !DT->dominates(IncomingBB, L->getLoopLatch()))
510           continue;
511 
512         // Get condition that leads to the exit path.
513         auto *TermInst = IncomingBB->getTerminator();
514 
515         Value *Cond = nullptr;
516         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
517           // Must be a conditional branch, otherwise the block
518           // should not be in the loop.
519           Cond = BI->getCondition();
520         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
521           Cond = SI->getCondition();
522         else
523           continue;
524 
525         if (!L->isLoopInvariant(Cond))
526           continue;
527 
528         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
529 
530         // Only deal with PHIs in the loop header.
531         if (!ExitVal || ExitVal->getParent() != L->getHeader())
532           continue;
533 
534         // If ExitVal is a PHI on the loop header, then we know its
535         // value along this exit because the exit can only be taken
536         // on the first iteration.
537         auto *LoopPreheader = L->getLoopPreheader();
538         assert(LoopPreheader && "Invalid loop");
539         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
540         if (PreheaderIdx != -1) {
541           assert(ExitVal->getParent() == L->getHeader() &&
542                  "ExitVal must be in loop header");
543           MadeAnyChanges = true;
544           PN.setIncomingValue(IncomingValIdx,
545                               ExitVal->getIncomingValue(PreheaderIdx));
546         }
547       }
548     }
549   }
550   return MadeAnyChanges;
551 }
552 
553 //===----------------------------------------------------------------------===//
554 //  IV Widening - Extend the width of an IV to cover its widest uses.
555 //===----------------------------------------------------------------------===//
556 
557 namespace {
558 
559 // Collect information about induction variables that are used by sign/zero
560 // extend operations. This information is recorded by CollectExtend and provides
561 // the input to WidenIV.
562 struct WideIVInfo {
563   PHINode *NarrowIV = nullptr;
564 
565   // Widest integer type created [sz]ext
566   Type *WidestNativeType = nullptr;
567 
568   // Was a sext user seen before a zext?
569   bool IsSigned = false;
570 };
571 
572 } // end anonymous namespace
573 
574 /// Update information about the induction variable that is extended by this
575 /// sign or zero extend operation. This is used to determine the final width of
576 /// the IV before actually widening it.
577 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
578                         const TargetTransformInfo *TTI) {
579   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
580   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
581     return;
582 
583   Type *Ty = Cast->getType();
584   uint64_t Width = SE->getTypeSizeInBits(Ty);
585   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
586     return;
587 
588   // Check that `Cast` actually extends the induction variable (we rely on this
589   // later).  This takes care of cases where `Cast` is extending a truncation of
590   // the narrow induction variable, and thus can end up being narrower than the
591   // "narrow" induction variable.
592   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
593   if (NarrowIVWidth >= Width)
594     return;
595 
596   // Cast is either an sext or zext up to this point.
597   // We should not widen an indvar if arithmetics on the wider indvar are more
598   // expensive than those on the narrower indvar. We check only the cost of ADD
599   // because at least an ADD is required to increment the induction variable. We
600   // could compute more comprehensively the cost of all instructions on the
601   // induction variable when necessary.
602   if (TTI &&
603       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
604           TTI->getArithmeticInstrCost(Instruction::Add,
605                                       Cast->getOperand(0)->getType())) {
606     return;
607   }
608 
609   if (!WI.WidestNativeType) {
610     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
611     WI.IsSigned = IsSigned;
612     return;
613   }
614 
615   // We extend the IV to satisfy the sign of its first user, arbitrarily.
616   if (WI.IsSigned != IsSigned)
617     return;
618 
619   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
620     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
621 }
622 
623 namespace {
624 
625 /// Record a link in the Narrow IV def-use chain along with the WideIV that
626 /// computes the same value as the Narrow IV def.  This avoids caching Use*
627 /// pointers.
628 struct NarrowIVDefUse {
629   Instruction *NarrowDef = nullptr;
630   Instruction *NarrowUse = nullptr;
631   Instruction *WideDef = nullptr;
632 
633   // True if the narrow def is never negative.  Tracking this information lets
634   // us use a sign extension instead of a zero extension or vice versa, when
635   // profitable and legal.
636   bool NeverNegative = false;
637 
638   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
639                  bool NeverNegative)
640       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
641         NeverNegative(NeverNegative) {}
642 };
643 
644 /// The goal of this transform is to remove sign and zero extends without
645 /// creating any new induction variables. To do this, it creates a new phi of
646 /// the wider type and redirects all users, either removing extends or inserting
647 /// truncs whenever we stop propagating the type.
648 class WidenIV {
649   // Parameters
650   PHINode *OrigPhi;
651   Type *WideType;
652 
653   // Context
654   LoopInfo        *LI;
655   Loop            *L;
656   ScalarEvolution *SE;
657   DominatorTree   *DT;
658 
659   // Does the module have any calls to the llvm.experimental.guard intrinsic
660   // at all? If not we can avoid scanning instructions looking for guards.
661   bool HasGuards;
662 
663   // Result
664   PHINode *WidePhi = nullptr;
665   Instruction *WideInc = nullptr;
666   const SCEV *WideIncExpr = nullptr;
667   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
668 
669   SmallPtrSet<Instruction *,16> Widened;
670   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
671 
672   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
673 
674   // A map tracking the kind of extension used to widen each narrow IV
675   // and narrow IV user.
676   // Key: pointer to a narrow IV or IV user.
677   // Value: the kind of extension used to widen this Instruction.
678   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
679 
680   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
681 
682   // A map with control-dependent ranges for post increment IV uses. The key is
683   // a pair of IV def and a use of this def denoting the context. The value is
684   // a ConstantRange representing possible values of the def at the given
685   // context.
686   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
687 
688   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
689                                               Instruction *UseI) {
690     DefUserPair Key(Def, UseI);
691     auto It = PostIncRangeInfos.find(Key);
692     return It == PostIncRangeInfos.end()
693                ? Optional<ConstantRange>(None)
694                : Optional<ConstantRange>(It->second);
695   }
696 
697   void calculatePostIncRanges(PHINode *OrigPhi);
698   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
699 
700   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
701     DefUserPair Key(Def, UseI);
702     auto It = PostIncRangeInfos.find(Key);
703     if (It == PostIncRangeInfos.end())
704       PostIncRangeInfos.insert({Key, R});
705     else
706       It->second = R.intersectWith(It->second);
707   }
708 
709 public:
710   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
711           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
712           bool HasGuards)
713       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
714         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
715         HasGuards(HasGuards), DeadInsts(DI) {
716     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
717     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
718   }
719 
720   PHINode *createWideIV(SCEVExpander &Rewriter);
721 
722 protected:
723   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
724                           Instruction *Use);
725 
726   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
727   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
728                                      const SCEVAddRecExpr *WideAR);
729   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
730 
731   ExtendKind getExtendKind(Instruction *I);
732 
733   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
734 
735   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
736 
737   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
738 
739   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
740                               unsigned OpCode) const;
741 
742   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
743 
744   bool widenLoopCompare(NarrowIVDefUse DU);
745   bool widenWithVariantUse(NarrowIVDefUse DU);
746 
747   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
748 };
749 
750 } // end anonymous namespace
751 
752 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
753                                  bool IsSigned, Instruction *Use) {
754   // Set the debug location and conservative insertion point.
755   IRBuilder<> Builder(Use);
756   // Hoist the insertion point into loop preheaders as far as possible.
757   for (const Loop *L = LI->getLoopFor(Use->getParent());
758        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
759        L = L->getParentLoop())
760     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
761 
762   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
763                     Builder.CreateZExt(NarrowOper, WideType);
764 }
765 
766 /// Instantiate a wide operation to replace a narrow operation. This only needs
767 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
768 /// 0 for any operation we decide not to clone.
769 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
770                                   const SCEVAddRecExpr *WideAR) {
771   unsigned Opcode = DU.NarrowUse->getOpcode();
772   switch (Opcode) {
773   default:
774     return nullptr;
775   case Instruction::Add:
776   case Instruction::Mul:
777   case Instruction::UDiv:
778   case Instruction::Sub:
779     return cloneArithmeticIVUser(DU, WideAR);
780 
781   case Instruction::And:
782   case Instruction::Or:
783   case Instruction::Xor:
784   case Instruction::Shl:
785   case Instruction::LShr:
786   case Instruction::AShr:
787     return cloneBitwiseIVUser(DU);
788   }
789 }
790 
791 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
792   Instruction *NarrowUse = DU.NarrowUse;
793   Instruction *NarrowDef = DU.NarrowDef;
794   Instruction *WideDef = DU.WideDef;
795 
796   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
797 
798   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
799   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
800   // invariant and will be folded or hoisted. If it actually comes from a
801   // widened IV, it should be removed during a future call to widenIVUse.
802   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
803   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
804                    ? WideDef
805                    : createExtendInst(NarrowUse->getOperand(0), WideType,
806                                       IsSigned, NarrowUse);
807   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
808                    ? WideDef
809                    : createExtendInst(NarrowUse->getOperand(1), WideType,
810                                       IsSigned, NarrowUse);
811 
812   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
813   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
814                                         NarrowBO->getName());
815   IRBuilder<> Builder(NarrowUse);
816   Builder.Insert(WideBO);
817   WideBO->copyIRFlags(NarrowBO);
818   return WideBO;
819 }
820 
821 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
822                                             const SCEVAddRecExpr *WideAR) {
823   Instruction *NarrowUse = DU.NarrowUse;
824   Instruction *NarrowDef = DU.NarrowDef;
825   Instruction *WideDef = DU.WideDef;
826 
827   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
828 
829   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
830 
831   // We're trying to find X such that
832   //
833   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
834   //
835   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
836   // and check using SCEV if any of them are correct.
837 
838   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
839   // correct solution to X.
840   auto GuessNonIVOperand = [&](bool SignExt) {
841     const SCEV *WideLHS;
842     const SCEV *WideRHS;
843 
844     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
845       if (SignExt)
846         return SE->getSignExtendExpr(S, Ty);
847       return SE->getZeroExtendExpr(S, Ty);
848     };
849 
850     if (IVOpIdx == 0) {
851       WideLHS = SE->getSCEV(WideDef);
852       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
853       WideRHS = GetExtend(NarrowRHS, WideType);
854     } else {
855       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
856       WideLHS = GetExtend(NarrowLHS, WideType);
857       WideRHS = SE->getSCEV(WideDef);
858     }
859 
860     // WideUse is "WideDef `op.wide` X" as described in the comment.
861     const SCEV *WideUse = nullptr;
862 
863     switch (NarrowUse->getOpcode()) {
864     default:
865       llvm_unreachable("No other possibility!");
866 
867     case Instruction::Add:
868       WideUse = SE->getAddExpr(WideLHS, WideRHS);
869       break;
870 
871     case Instruction::Mul:
872       WideUse = SE->getMulExpr(WideLHS, WideRHS);
873       break;
874 
875     case Instruction::UDiv:
876       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
877       break;
878 
879     case Instruction::Sub:
880       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
881       break;
882     }
883 
884     return WideUse == WideAR;
885   };
886 
887   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
888   if (!GuessNonIVOperand(SignExtend)) {
889     SignExtend = !SignExtend;
890     if (!GuessNonIVOperand(SignExtend))
891       return nullptr;
892   }
893 
894   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
895                    ? WideDef
896                    : createExtendInst(NarrowUse->getOperand(0), WideType,
897                                       SignExtend, NarrowUse);
898   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
899                    ? WideDef
900                    : createExtendInst(NarrowUse->getOperand(1), WideType,
901                                       SignExtend, NarrowUse);
902 
903   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
904   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
905                                         NarrowBO->getName());
906 
907   IRBuilder<> Builder(NarrowUse);
908   Builder.Insert(WideBO);
909   WideBO->copyIRFlags(NarrowBO);
910   return WideBO;
911 }
912 
913 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
914   auto It = ExtendKindMap.find(I);
915   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
916   return It->second;
917 }
918 
919 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
920                                      unsigned OpCode) const {
921   if (OpCode == Instruction::Add)
922     return SE->getAddExpr(LHS, RHS);
923   if (OpCode == Instruction::Sub)
924     return SE->getMinusSCEV(LHS, RHS);
925   if (OpCode == Instruction::Mul)
926     return SE->getMulExpr(LHS, RHS);
927 
928   llvm_unreachable("Unsupported opcode.");
929 }
930 
931 /// No-wrap operations can transfer sign extension of their result to their
932 /// operands. Generate the SCEV value for the widened operation without
933 /// actually modifying the IR yet. If the expression after extending the
934 /// operands is an AddRec for this loop, return the AddRec and the kind of
935 /// extension used.
936 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
937   // Handle the common case of add<nsw/nuw>
938   const unsigned OpCode = DU.NarrowUse->getOpcode();
939   // Only Add/Sub/Mul instructions supported yet.
940   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
941       OpCode != Instruction::Mul)
942     return {nullptr, Unknown};
943 
944   // One operand (NarrowDef) has already been extended to WideDef. Now determine
945   // if extending the other will lead to a recurrence.
946   const unsigned ExtendOperIdx =
947       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
948   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
949 
950   const SCEV *ExtendOperExpr = nullptr;
951   const OverflowingBinaryOperator *OBO =
952     cast<OverflowingBinaryOperator>(DU.NarrowUse);
953   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
954   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
955     ExtendOperExpr = SE->getSignExtendExpr(
956       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
957   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
958     ExtendOperExpr = SE->getZeroExtendExpr(
959       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
960   else
961     return {nullptr, Unknown};
962 
963   // When creating this SCEV expr, don't apply the current operations NSW or NUW
964   // flags. This instruction may be guarded by control flow that the no-wrap
965   // behavior depends on. Non-control-equivalent instructions can be mapped to
966   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
967   // semantics to those operations.
968   const SCEV *lhs = SE->getSCEV(DU.WideDef);
969   const SCEV *rhs = ExtendOperExpr;
970 
971   // Let's swap operands to the initial order for the case of non-commutative
972   // operations, like SUB. See PR21014.
973   if (ExtendOperIdx == 0)
974     std::swap(lhs, rhs);
975   const SCEVAddRecExpr *AddRec =
976       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
977 
978   if (!AddRec || AddRec->getLoop() != L)
979     return {nullptr, Unknown};
980 
981   return {AddRec, ExtKind};
982 }
983 
984 /// Is this instruction potentially interesting for further simplification after
985 /// widening it's type? In other words, can the extend be safely hoisted out of
986 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
987 /// so, return the extended recurrence and the kind of extension used. Otherwise
988 /// return {nullptr, Unknown}.
989 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
990   if (!SE->isSCEVable(DU.NarrowUse->getType()))
991     return {nullptr, Unknown};
992 
993   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
994   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
995       SE->getTypeSizeInBits(WideType)) {
996     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
997     // index. So don't follow this use.
998     return {nullptr, Unknown};
999   }
1000 
1001   const SCEV *WideExpr;
1002   ExtendKind ExtKind;
1003   if (DU.NeverNegative) {
1004     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1005     if (isa<SCEVAddRecExpr>(WideExpr))
1006       ExtKind = SignExtended;
1007     else {
1008       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1009       ExtKind = ZeroExtended;
1010     }
1011   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1012     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1013     ExtKind = SignExtended;
1014   } else {
1015     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1016     ExtKind = ZeroExtended;
1017   }
1018   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1019   if (!AddRec || AddRec->getLoop() != L)
1020     return {nullptr, Unknown};
1021   return {AddRec, ExtKind};
1022 }
1023 
1024 /// This IV user cannot be widened. Replace this use of the original narrow IV
1025 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1026 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1027   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1028   if (!InsertPt)
1029     return;
1030   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1031                     << *DU.NarrowUse << "\n");
1032   IRBuilder<> Builder(InsertPt);
1033   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1034   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1035 }
1036 
1037 /// If the narrow use is a compare instruction, then widen the compare
1038 //  (and possibly the other operand).  The extend operation is hoisted into the
1039 // loop preheader as far as possible.
1040 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1041   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1042   if (!Cmp)
1043     return false;
1044 
1045   // We can legally widen the comparison in the following two cases:
1046   //
1047   //  - The signedness of the IV extension and comparison match
1048   //
1049   //  - The narrow IV is always positive (and thus its sign extension is equal
1050   //    to its zero extension).  For instance, let's say we're zero extending
1051   //    %narrow for the following use
1052   //
1053   //      icmp slt i32 %narrow, %val   ... (A)
1054   //
1055   //    and %narrow is always positive.  Then
1056   //
1057   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1058   //          == icmp slt i32 zext(%narrow), sext(%val)
1059   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1060   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1061     return false;
1062 
1063   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1064   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1065   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1066   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1067 
1068   // Widen the compare instruction.
1069   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1070   if (!InsertPt)
1071     return false;
1072   IRBuilder<> Builder(InsertPt);
1073   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1074 
1075   // Widen the other operand of the compare, if necessary.
1076   if (CastWidth < IVWidth) {
1077     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1078     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1079   }
1080   return true;
1081 }
1082 
1083 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1084 // will not work when:
1085 //    1) SCEV traces back to an instruction inside the loop that SCEV can not
1086 // expand, eg. add %indvar, (load %addr)
1087 //    2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1088 // While SCEV fails to avoid trunc, we can still try to use instruction
1089 // combining approach to prove trunc is not required. This can be further
1090 // extended with other instruction combining checks, but for now we handle the
1091 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1092 //
1093 // Src:
1094 //   %c = sub nsw %b, %indvar
1095 //   %d = sext %c to i64
1096 // Dst:
1097 //   %indvar.ext1 = sext %indvar to i64
1098 //   %m = sext %b to i64
1099 //   %d = sub nsw i64 %m, %indvar.ext1
1100 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1101 // trunc is required regardless of how %b is generated. This pattern is common
1102 // when calculating address in 64 bit architecture
1103 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
1104   Instruction *NarrowUse = DU.NarrowUse;
1105   Instruction *NarrowDef = DU.NarrowDef;
1106   Instruction *WideDef = DU.WideDef;
1107 
1108   // Handle the common case of add<nsw/nuw>
1109   const unsigned OpCode = NarrowUse->getOpcode();
1110   // Only Add/Sub/Mul instructions are supported.
1111   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1112       OpCode != Instruction::Mul)
1113     return false;
1114 
1115   // The operand that is not defined by NarrowDef of DU. Let's call it the
1116   // other operand.
1117   assert((NarrowUse->getOperand(0) == NarrowDef ||
1118           NarrowUse->getOperand(1) == NarrowDef) &&
1119          "bad DU");
1120 
1121   const OverflowingBinaryOperator *OBO =
1122     cast<OverflowingBinaryOperator>(NarrowUse);
1123   ExtendKind ExtKind = getExtendKind(NarrowDef);
1124   bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap();
1125   bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
1126   if (!CanSignExtend && !CanZeroExtend)
1127     return false;
1128 
1129   // Verifying that Defining operand is an AddRec
1130   const SCEV *Op1 = SE->getSCEV(WideDef);
1131   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1132   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1133     return false;
1134 
1135   if (ExtKind == SignExtended) {
1136     for (Use &U : NarrowUse->uses()) {
1137       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1138       if (!User || User->getType() != WideType)
1139         return false;
1140     }
1141   } else { // ExtKind == ZeroExtended
1142     for (Use &U : NarrowUse->uses()) {
1143       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1144       if (!User || User->getType() != WideType)
1145         return false;
1146     }
1147   }
1148 
1149   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1150 
1151   // Generating a widening use instruction.
1152   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1153                    ? WideDef
1154                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1155                                       ExtKind, NarrowUse);
1156   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1157                    ? WideDef
1158                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1159                                       ExtKind, NarrowUse);
1160 
1161   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1162   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1163                                         NarrowBO->getName());
1164   IRBuilder<> Builder(NarrowUse);
1165   Builder.Insert(WideBO);
1166   WideBO->copyIRFlags(NarrowBO);
1167   ExtendKindMap[NarrowUse] = ExtKind;
1168 
1169   for (Use &U : NarrowUse->uses()) {
1170     Instruction *User = nullptr;
1171     if (ExtKind == SignExtended)
1172       User = cast<SExtInst>(U.getUser());
1173     else
1174       User = cast<ZExtInst>(U.getUser());
1175     assert(User->getType() == WideType && "Checked before!");
1176     LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1177                       << *WideBO << "\n");
1178     ++NumElimExt;
1179     User->replaceAllUsesWith(WideBO);
1180     DeadInsts.emplace_back(User);
1181   }
1182   return true;
1183 }
1184 
1185 /// Determine whether an individual user of the narrow IV can be widened. If so,
1186 /// return the wide clone of the user.
1187 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1188   assert(ExtendKindMap.count(DU.NarrowDef) &&
1189          "Should already know the kind of extension used to widen NarrowDef");
1190 
1191   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1192   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1193     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1194       // For LCSSA phis, sink the truncate outside the loop.
1195       // After SimplifyCFG most loop exit targets have a single predecessor.
1196       // Otherwise fall back to a truncate within the loop.
1197       if (UsePhi->getNumOperands() != 1)
1198         truncateIVUse(DU, DT, LI);
1199       else {
1200         // Widening the PHI requires us to insert a trunc.  The logical place
1201         // for this trunc is in the same BB as the PHI.  This is not possible if
1202         // the BB is terminated by a catchswitch.
1203         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1204           return nullptr;
1205 
1206         PHINode *WidePhi =
1207           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1208                           UsePhi);
1209         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1210         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1211         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1212         UsePhi->replaceAllUsesWith(Trunc);
1213         DeadInsts.emplace_back(UsePhi);
1214         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1215                           << *WidePhi << "\n");
1216       }
1217       return nullptr;
1218     }
1219   }
1220 
1221   // This narrow use can be widened by a sext if it's non-negative or its narrow
1222   // def was widended by a sext. Same for zext.
1223   auto canWidenBySExt = [&]() {
1224     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1225   };
1226   auto canWidenByZExt = [&]() {
1227     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1228   };
1229 
1230   // Our raison d'etre! Eliminate sign and zero extension.
1231   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1232       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1233     Value *NewDef = DU.WideDef;
1234     if (DU.NarrowUse->getType() != WideType) {
1235       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1236       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1237       if (CastWidth < IVWidth) {
1238         // The cast isn't as wide as the IV, so insert a Trunc.
1239         IRBuilder<> Builder(DU.NarrowUse);
1240         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1241       }
1242       else {
1243         // A wider extend was hidden behind a narrower one. This may induce
1244         // another round of IV widening in which the intermediate IV becomes
1245         // dead. It should be very rare.
1246         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1247                           << " not wide enough to subsume " << *DU.NarrowUse
1248                           << "\n");
1249         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1250         NewDef = DU.NarrowUse;
1251       }
1252     }
1253     if (NewDef != DU.NarrowUse) {
1254       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1255                         << " replaced by " << *DU.WideDef << "\n");
1256       ++NumElimExt;
1257       DU.NarrowUse->replaceAllUsesWith(NewDef);
1258       DeadInsts.emplace_back(DU.NarrowUse);
1259     }
1260     // Now that the extend is gone, we want to expose it's uses for potential
1261     // further simplification. We don't need to directly inform SimplifyIVUsers
1262     // of the new users, because their parent IV will be processed later as a
1263     // new loop phi. If we preserved IVUsers analysis, we would also want to
1264     // push the uses of WideDef here.
1265 
1266     // No further widening is needed. The deceased [sz]ext had done it for us.
1267     return nullptr;
1268   }
1269 
1270   // Does this user itself evaluate to a recurrence after widening?
1271   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1272   if (!WideAddRec.first)
1273     WideAddRec = getWideRecurrence(DU);
1274 
1275   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1276   if (!WideAddRec.first) {
1277     // If use is a loop condition, try to promote the condition instead of
1278     // truncating the IV first.
1279     if (widenLoopCompare(DU))
1280       return nullptr;
1281 
1282     // We are here about to generate a truncate instruction that may hurt
1283     // performance because the scalar evolution expression computed earlier
1284     // in WideAddRec.first does not indicate a polynomial induction expression.
1285     // In that case, look at the operands of the use instruction to determine
1286     // if we can still widen the use instead of truncating its operand.
1287     if (widenWithVariantUse(DU))
1288       return nullptr;
1289 
1290     // This user does not evaluate to a recurrence after widening, so don't
1291     // follow it. Instead insert a Trunc to kill off the original use,
1292     // eventually isolating the original narrow IV so it can be removed.
1293     truncateIVUse(DU, DT, LI);
1294     return nullptr;
1295   }
1296   // Assume block terminators cannot evaluate to a recurrence. We can't to
1297   // insert a Trunc after a terminator if there happens to be a critical edge.
1298   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1299          "SCEV is not expected to evaluate a block terminator");
1300 
1301   // Reuse the IV increment that SCEVExpander created as long as it dominates
1302   // NarrowUse.
1303   Instruction *WideUse = nullptr;
1304   if (WideAddRec.first == WideIncExpr &&
1305       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1306     WideUse = WideInc;
1307   else {
1308     WideUse = cloneIVUser(DU, WideAddRec.first);
1309     if (!WideUse)
1310       return nullptr;
1311   }
1312   // Evaluation of WideAddRec ensured that the narrow expression could be
1313   // extended outside the loop without overflow. This suggests that the wide use
1314   // evaluates to the same expression as the extended narrow use, but doesn't
1315   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1316   // where it fails, we simply throw away the newly created wide use.
1317   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1318     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1319                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1320                       << "\n");
1321     DeadInsts.emplace_back(WideUse);
1322     return nullptr;
1323   }
1324 
1325   // if we reached this point then we are going to replace
1326   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1327   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1328 
1329   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1330   // Returning WideUse pushes it on the worklist.
1331   return WideUse;
1332 }
1333 
1334 /// Add eligible users of NarrowDef to NarrowIVUsers.
1335 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1336   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1337   bool NonNegativeDef =
1338       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1339                            SE->getZero(NarrowSCEV->getType()));
1340   for (User *U : NarrowDef->users()) {
1341     Instruction *NarrowUser = cast<Instruction>(U);
1342 
1343     // Handle data flow merges and bizarre phi cycles.
1344     if (!Widened.insert(NarrowUser).second)
1345       continue;
1346 
1347     bool NonNegativeUse = false;
1348     if (!NonNegativeDef) {
1349       // We might have a control-dependent range information for this context.
1350       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1351         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1352     }
1353 
1354     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1355                                NonNegativeDef || NonNegativeUse);
1356   }
1357 }
1358 
1359 /// Process a single induction variable. First use the SCEVExpander to create a
1360 /// wide induction variable that evaluates to the same recurrence as the
1361 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1362 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1363 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1364 ///
1365 /// It would be simpler to delete uses as they are processed, but we must avoid
1366 /// invalidating SCEV expressions.
1367 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1368   // Bail if we disallowed widening.
1369   if(!AllowIVWidening)
1370     return nullptr;
1371 
1372   // Is this phi an induction variable?
1373   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1374   if (!AddRec)
1375     return nullptr;
1376 
1377   // Widen the induction variable expression.
1378   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1379                                ? SE->getSignExtendExpr(AddRec, WideType)
1380                                : SE->getZeroExtendExpr(AddRec, WideType);
1381 
1382   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1383          "Expect the new IV expression to preserve its type");
1384 
1385   // Can the IV be extended outside the loop without overflow?
1386   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1387   if (!AddRec || AddRec->getLoop() != L)
1388     return nullptr;
1389 
1390   // An AddRec must have loop-invariant operands. Since this AddRec is
1391   // materialized by a loop header phi, the expression cannot have any post-loop
1392   // operands, so they must dominate the loop header.
1393   assert(
1394       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1395       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1396       "Loop header phi recurrence inputs do not dominate the loop");
1397 
1398   // Iterate over IV uses (including transitive ones) looking for IV increments
1399   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1400   // the increment calculate control-dependent range information basing on
1401   // dominating conditions inside of the loop (e.g. a range check inside of the
1402   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1403   //
1404   // Control-dependent range information is later used to prove that a narrow
1405   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1406   // this on demand because when pushNarrowIVUsers needs this information some
1407   // of the dominating conditions might be already widened.
1408   if (UsePostIncrementRanges)
1409     calculatePostIncRanges(OrigPhi);
1410 
1411   // The rewriter provides a value for the desired IV expression. This may
1412   // either find an existing phi or materialize a new one. Either way, we
1413   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1414   // of the phi-SCC dominates the loop entry.
1415   Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
1416   Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
1417   // If the wide phi is not a phi node, for example a cast node, like bitcast,
1418   // inttoptr, ptrtoint, just skip for now.
1419   if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
1420     // if the cast node is an inserted instruction without any user, we should
1421     // remove it to make sure the pass don't touch the function as we can not
1422     // wide the phi.
1423     if (ExpandInst->hasNUses(0) &&
1424         Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
1425       DeadInsts.emplace_back(ExpandInst);
1426     return nullptr;
1427   }
1428 
1429   // Remembering the WideIV increment generated by SCEVExpander allows
1430   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1431   // employ a general reuse mechanism because the call above is the only call to
1432   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1433   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1434     WideInc =
1435       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1436     WideIncExpr = SE->getSCEV(WideInc);
1437     // Propagate the debug location associated with the original loop increment
1438     // to the new (widened) increment.
1439     auto *OrigInc =
1440       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1441     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1442   }
1443 
1444   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1445   ++NumWidened;
1446 
1447   // Traverse the def-use chain using a worklist starting at the original IV.
1448   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1449 
1450   Widened.insert(OrigPhi);
1451   pushNarrowIVUsers(OrigPhi, WidePhi);
1452 
1453   while (!NarrowIVUsers.empty()) {
1454     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1455 
1456     // Process a def-use edge. This may replace the use, so don't hold a
1457     // use_iterator across it.
1458     Instruction *WideUse = widenIVUse(DU, Rewriter);
1459 
1460     // Follow all def-use edges from the previous narrow use.
1461     if (WideUse)
1462       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1463 
1464     // widenIVUse may have removed the def-use edge.
1465     if (DU.NarrowDef->use_empty())
1466       DeadInsts.emplace_back(DU.NarrowDef);
1467   }
1468 
1469   // Attach any debug information to the new PHI.
1470   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1471 
1472   return WidePhi;
1473 }
1474 
1475 /// Calculates control-dependent range for the given def at the given context
1476 /// by looking at dominating conditions inside of the loop
1477 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1478                                     Instruction *NarrowUser) {
1479   using namespace llvm::PatternMatch;
1480 
1481   Value *NarrowDefLHS;
1482   const APInt *NarrowDefRHS;
1483   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1484                                  m_APInt(NarrowDefRHS))) ||
1485       !NarrowDefRHS->isNonNegative())
1486     return;
1487 
1488   auto UpdateRangeFromCondition = [&] (Value *Condition,
1489                                        bool TrueDest) {
1490     CmpInst::Predicate Pred;
1491     Value *CmpRHS;
1492     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1493                                  m_Value(CmpRHS))))
1494       return;
1495 
1496     CmpInst::Predicate P =
1497             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1498 
1499     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1500     auto CmpConstrainedLHSRange =
1501             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1502     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1503         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1504 
1505     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1506   };
1507 
1508   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1509     if (!HasGuards)
1510       return;
1511 
1512     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1513                                      Ctx->getParent()->rend())) {
1514       Value *C = nullptr;
1515       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1516         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1517     }
1518   };
1519 
1520   UpdateRangeFromGuards(NarrowUser);
1521 
1522   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1523   // If NarrowUserBB is statically unreachable asking dominator queries may
1524   // yield surprising results. (e.g. the block may not have a dom tree node)
1525   if (!DT->isReachableFromEntry(NarrowUserBB))
1526     return;
1527 
1528   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1529        L->contains(DTB->getBlock());
1530        DTB = DTB->getIDom()) {
1531     auto *BB = DTB->getBlock();
1532     auto *TI = BB->getTerminator();
1533     UpdateRangeFromGuards(TI);
1534 
1535     auto *BI = dyn_cast<BranchInst>(TI);
1536     if (!BI || !BI->isConditional())
1537       continue;
1538 
1539     auto *TrueSuccessor = BI->getSuccessor(0);
1540     auto *FalseSuccessor = BI->getSuccessor(1);
1541 
1542     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1543       return BBE.isSingleEdge() &&
1544              DT->dominates(BBE, NarrowUser->getParent());
1545     };
1546 
1547     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1548       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1549 
1550     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1551       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1552   }
1553 }
1554 
1555 /// Calculates PostIncRangeInfos map for the given IV
1556 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1557   SmallPtrSet<Instruction *, 16> Visited;
1558   SmallVector<Instruction *, 6> Worklist;
1559   Worklist.push_back(OrigPhi);
1560   Visited.insert(OrigPhi);
1561 
1562   while (!Worklist.empty()) {
1563     Instruction *NarrowDef = Worklist.pop_back_val();
1564 
1565     for (Use &U : NarrowDef->uses()) {
1566       auto *NarrowUser = cast<Instruction>(U.getUser());
1567 
1568       // Don't go looking outside the current loop.
1569       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1570       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1571         continue;
1572 
1573       if (!Visited.insert(NarrowUser).second)
1574         continue;
1575 
1576       Worklist.push_back(NarrowUser);
1577 
1578       calculatePostIncRange(NarrowDef, NarrowUser);
1579     }
1580   }
1581 }
1582 
1583 //===----------------------------------------------------------------------===//
1584 //  Live IV Reduction - Minimize IVs live across the loop.
1585 //===----------------------------------------------------------------------===//
1586 
1587 //===----------------------------------------------------------------------===//
1588 //  Simplification of IV users based on SCEV evaluation.
1589 //===----------------------------------------------------------------------===//
1590 
1591 namespace {
1592 
1593 class IndVarSimplifyVisitor : public IVVisitor {
1594   ScalarEvolution *SE;
1595   const TargetTransformInfo *TTI;
1596   PHINode *IVPhi;
1597 
1598 public:
1599   WideIVInfo WI;
1600 
1601   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1602                         const TargetTransformInfo *TTI,
1603                         const DominatorTree *DTree)
1604     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1605     DT = DTree;
1606     WI.NarrowIV = IVPhi;
1607   }
1608 
1609   // Implement the interface used by simplifyUsersOfIV.
1610   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1611 };
1612 
1613 } // end anonymous namespace
1614 
1615 /// Iteratively perform simplification on a worklist of IV users. Each
1616 /// successive simplification may push more users which may themselves be
1617 /// candidates for simplification.
1618 ///
1619 /// Sign/Zero extend elimination is interleaved with IV simplification.
1620 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1621                                        SCEVExpander &Rewriter,
1622                                        LoopInfo *LI) {
1623   SmallVector<WideIVInfo, 8> WideIVs;
1624 
1625   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1626           Intrinsic::getName(Intrinsic::experimental_guard));
1627   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1628 
1629   SmallVector<PHINode*, 8> LoopPhis;
1630   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1631     LoopPhis.push_back(cast<PHINode>(I));
1632   }
1633   // Each round of simplification iterates through the SimplifyIVUsers worklist
1634   // for all current phis, then determines whether any IVs can be
1635   // widened. Widening adds new phis to LoopPhis, inducing another round of
1636   // simplification on the wide IVs.
1637   bool Changed = false;
1638   while (!LoopPhis.empty()) {
1639     // Evaluate as many IV expressions as possible before widening any IVs. This
1640     // forces SCEV to set no-wrap flags before evaluating sign/zero
1641     // extension. The first time SCEV attempts to normalize sign/zero extension,
1642     // the result becomes final. So for the most predictable results, we delay
1643     // evaluation of sign/zero extend evaluation until needed, and avoid running
1644     // other SCEV based analysis prior to simplifyAndExtend.
1645     do {
1646       PHINode *CurrIV = LoopPhis.pop_back_val();
1647 
1648       // Information about sign/zero extensions of CurrIV.
1649       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1650 
1651       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1652                                    &Visitor);
1653 
1654       if (Visitor.WI.WidestNativeType) {
1655         WideIVs.push_back(Visitor.WI);
1656       }
1657     } while(!LoopPhis.empty());
1658 
1659     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1660       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1661       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1662         Changed = true;
1663         LoopPhis.push_back(WidePhi);
1664       }
1665     }
1666   }
1667   return Changed;
1668 }
1669 
1670 //===----------------------------------------------------------------------===//
1671 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1672 //===----------------------------------------------------------------------===//
1673 
1674 /// Given an Value which is hoped to be part of an add recurance in the given
1675 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1676 /// that this is less general than SCEVs AddRec checking.
1677 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1678   Instruction *IncI = dyn_cast<Instruction>(IncV);
1679   if (!IncI)
1680     return nullptr;
1681 
1682   switch (IncI->getOpcode()) {
1683   case Instruction::Add:
1684   case Instruction::Sub:
1685     break;
1686   case Instruction::GetElementPtr:
1687     // An IV counter must preserve its type.
1688     if (IncI->getNumOperands() == 2)
1689       break;
1690     LLVM_FALLTHROUGH;
1691   default:
1692     return nullptr;
1693   }
1694 
1695   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1696   if (Phi && Phi->getParent() == L->getHeader()) {
1697     if (L->isLoopInvariant(IncI->getOperand(1)))
1698       return Phi;
1699     return nullptr;
1700   }
1701   if (IncI->getOpcode() == Instruction::GetElementPtr)
1702     return nullptr;
1703 
1704   // Allow add/sub to be commuted.
1705   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1706   if (Phi && Phi->getParent() == L->getHeader()) {
1707     if (L->isLoopInvariant(IncI->getOperand(0)))
1708       return Phi;
1709   }
1710   return nullptr;
1711 }
1712 
1713 /// Whether the current loop exit test is based on this value.  Currently this
1714 /// is limited to a direct use in the loop condition.
1715 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1716   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1717   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1718   // TODO: Allow non-icmp loop test.
1719   if (!ICmp)
1720     return false;
1721 
1722   // TODO: Allow indirect use.
1723   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1724 }
1725 
1726 /// linearFunctionTestReplace policy. Return true unless we can show that the
1727 /// current exit test is already sufficiently canonical.
1728 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1729   assert(L->getLoopLatch() && "Must be in simplified form");
1730 
1731   // Avoid converting a constant or loop invariant test back to a runtime
1732   // test.  This is critical for when SCEV's cached ExitCount is less precise
1733   // than the current IR (such as after we've proven a particular exit is
1734   // actually dead and thus the BE count never reaches our ExitCount.)
1735   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1736   if (L->isLoopInvariant(BI->getCondition()))
1737     return false;
1738 
1739   // Do LFTR to simplify the exit condition to an ICMP.
1740   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1741   if (!Cond)
1742     return true;
1743 
1744   // Do LFTR to simplify the exit ICMP to EQ/NE
1745   ICmpInst::Predicate Pred = Cond->getPredicate();
1746   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1747     return true;
1748 
1749   // Look for a loop invariant RHS
1750   Value *LHS = Cond->getOperand(0);
1751   Value *RHS = Cond->getOperand(1);
1752   if (!L->isLoopInvariant(RHS)) {
1753     if (!L->isLoopInvariant(LHS))
1754       return true;
1755     std::swap(LHS, RHS);
1756   }
1757   // Look for a simple IV counter LHS
1758   PHINode *Phi = dyn_cast<PHINode>(LHS);
1759   if (!Phi)
1760     Phi = getLoopPhiForCounter(LHS, L);
1761 
1762   if (!Phi)
1763     return true;
1764 
1765   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1766   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1767   if (Idx < 0)
1768     return true;
1769 
1770   // Do LFTR if the exit condition's IV is *not* a simple counter.
1771   Value *IncV = Phi->getIncomingValue(Idx);
1772   return Phi != getLoopPhiForCounter(IncV, L);
1773 }
1774 
1775 /// Return true if undefined behavior would provable be executed on the path to
1776 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1777 /// anything about whether OnPathTo is actually executed or whether Root is
1778 /// actually poison.  This can be used to assess whether a new use of Root can
1779 /// be added at a location which is control equivalent with OnPathTo (such as
1780 /// immediately before it) without introducing UB which didn't previously
1781 /// exist.  Note that a false result conveys no information.
1782 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1783                                           Instruction *OnPathTo,
1784                                           DominatorTree *DT) {
1785   // Basic approach is to assume Root is poison, propagate poison forward
1786   // through all users we can easily track, and then check whether any of those
1787   // users are provable UB and must execute before out exiting block might
1788   // exit.
1789 
1790   // The set of all recursive users we've visited (which are assumed to all be
1791   // poison because of said visit)
1792   SmallSet<const Value *, 16> KnownPoison;
1793   SmallVector<const Instruction*, 16> Worklist;
1794   Worklist.push_back(Root);
1795   while (!Worklist.empty()) {
1796     const Instruction *I = Worklist.pop_back_val();
1797 
1798     // If we know this must trigger UB on a path leading our target.
1799     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1800       return true;
1801 
1802     // If we can't analyze propagation through this instruction, just skip it
1803     // and transitive users.  Safe as false is a conservative result.
1804     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
1805       continue;
1806 
1807     if (KnownPoison.insert(I).second)
1808       for (const User *User : I->users())
1809         Worklist.push_back(cast<Instruction>(User));
1810   }
1811 
1812   // Might be non-UB, or might have a path we couldn't prove must execute on
1813   // way to exiting bb.
1814   return false;
1815 }
1816 
1817 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1818 /// down to checking that all operands are constant and listing instructions
1819 /// that may hide undef.
1820 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1821                                unsigned Depth) {
1822   if (isa<Constant>(V))
1823     return !isa<UndefValue>(V);
1824 
1825   if (Depth >= 6)
1826     return false;
1827 
1828   // Conservatively handle non-constant non-instructions. For example, Arguments
1829   // may be undef.
1830   Instruction *I = dyn_cast<Instruction>(V);
1831   if (!I)
1832     return false;
1833 
1834   // Load and return values may be undef.
1835   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1836     return false;
1837 
1838   // Optimistically handle other instructions.
1839   for (Value *Op : I->operands()) {
1840     if (!Visited.insert(Op).second)
1841       continue;
1842     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1843       return false;
1844   }
1845   return true;
1846 }
1847 
1848 /// Return true if the given value is concrete. We must prove that undef can
1849 /// never reach it.
1850 ///
1851 /// TODO: If we decide that this is a good approach to checking for undef, we
1852 /// may factor it into a common location.
1853 static bool hasConcreteDef(Value *V) {
1854   SmallPtrSet<Value*, 8> Visited;
1855   Visited.insert(V);
1856   return hasConcreteDefImpl(V, Visited, 0);
1857 }
1858 
1859 /// Return true if this IV has any uses other than the (soon to be rewritten)
1860 /// loop exit test.
1861 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1862   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1863   Value *IncV = Phi->getIncomingValue(LatchIdx);
1864 
1865   for (User *U : Phi->users())
1866     if (U != Cond && U != IncV) return false;
1867 
1868   for (User *U : IncV->users())
1869     if (U != Cond && U != Phi) return false;
1870   return true;
1871 }
1872 
1873 /// Return true if the given phi is a "counter" in L.  A counter is an
1874 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1875 /// step of 1.  Note that L must have exactly one latch.
1876 static bool isLoopCounter(PHINode* Phi, Loop *L,
1877                           ScalarEvolution *SE) {
1878   assert(Phi->getParent() == L->getHeader());
1879   assert(L->getLoopLatch());
1880 
1881   if (!SE->isSCEVable(Phi->getType()))
1882     return false;
1883 
1884   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1885   if (!AR || AR->getLoop() != L || !AR->isAffine())
1886     return false;
1887 
1888   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1889   if (!Step || !Step->isOne())
1890     return false;
1891 
1892   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1893   Value *IncV = Phi->getIncomingValue(LatchIdx);
1894   return (getLoopPhiForCounter(IncV, L) == Phi);
1895 }
1896 
1897 /// Search the loop header for a loop counter (anadd rec w/step of one)
1898 /// suitable for use by LFTR.  If multiple counters are available, select the
1899 /// "best" one based profitable heuristics.
1900 ///
1901 /// BECount may be an i8* pointer type. The pointer difference is already
1902 /// valid count without scaling the address stride, so it remains a pointer
1903 /// expression as far as SCEV is concerned.
1904 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1905                                 const SCEV *BECount,
1906                                 ScalarEvolution *SE, DominatorTree *DT) {
1907   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1908 
1909   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1910 
1911   // Loop over all of the PHI nodes, looking for a simple counter.
1912   PHINode *BestPhi = nullptr;
1913   const SCEV *BestInit = nullptr;
1914   BasicBlock *LatchBlock = L->getLoopLatch();
1915   assert(LatchBlock && "Must be in simplified form");
1916   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1917 
1918   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1919     PHINode *Phi = cast<PHINode>(I);
1920     if (!isLoopCounter(Phi, L, SE))
1921       continue;
1922 
1923     // Avoid comparing an integer IV against a pointer Limit.
1924     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1925       continue;
1926 
1927     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1928 
1929     // AR may be a pointer type, while BECount is an integer type.
1930     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1931     // AR may not be a narrower type, or we may never exit.
1932     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1933     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1934       continue;
1935 
1936     // Avoid reusing a potentially undef value to compute other values that may
1937     // have originally had a concrete definition.
1938     if (!hasConcreteDef(Phi)) {
1939       // We explicitly allow unknown phis as long as they are already used by
1940       // the loop exit test.  This is legal since performing LFTR could not
1941       // increase the number of undef users.
1942       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1943       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1944           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1945         continue;
1946     }
1947 
1948     // Avoid introducing undefined behavior due to poison which didn't exist in
1949     // the original program.  (Annoyingly, the rules for poison and undef
1950     // propagation are distinct, so this does NOT cover the undef case above.)
1951     // We have to ensure that we don't introduce UB by introducing a use on an
1952     // iteration where said IV produces poison.  Our strategy here differs for
1953     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1954     // see code in linearFunctionTestReplace.  For pointers, we restrict
1955     // transforms as there is no good way to reinfer inbounds once lost.
1956     if (!Phi->getType()->isIntegerTy() &&
1957         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1958       continue;
1959 
1960     const SCEV *Init = AR->getStart();
1961 
1962     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1963       // Don't force a live loop counter if another IV can be used.
1964       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1965         continue;
1966 
1967       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1968       // also prefers integer to pointer IVs.
1969       if (BestInit->isZero() != Init->isZero()) {
1970         if (BestInit->isZero())
1971           continue;
1972       }
1973       // If two IVs both count from zero or both count from nonzero then the
1974       // narrower is likely a dead phi that has been widened. Use the wider phi
1975       // to allow the other to be eliminated.
1976       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1977         continue;
1978     }
1979     BestPhi = Phi;
1980     BestInit = Init;
1981   }
1982   return BestPhi;
1983 }
1984 
1985 /// Insert an IR expression which computes the value held by the IV IndVar
1986 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1987 /// is taken ExitCount times.
1988 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1989                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
1990                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
1991   assert(isLoopCounter(IndVar, L, SE));
1992   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1993   const SCEV *IVInit = AR->getStart();
1994 
1995   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
1996   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
1997   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
1998   // the existing GEPs whenever possible.
1999   if (IndVar->getType()->isPointerTy() &&
2000       !ExitCount->getType()->isPointerTy()) {
2001     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2002     // signed value. ExitCount on the other hand represents the loop trip count,
2003     // which is an unsigned value. FindLoopCounter only allows induction
2004     // variables that have a positive unit stride of one. This means we don't
2005     // have to handle the case of negative offsets (yet) and just need to zero
2006     // extend ExitCount.
2007     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2008     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2009     if (UsePostInc)
2010       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2011 
2012     // Expand the code for the iteration count.
2013     assert(SE->isLoopInvariant(IVOffset, L) &&
2014            "Computed iteration count is not loop invariant!");
2015 
2016     // We could handle pointer IVs other than i8*, but we need to compensate for
2017     // gep index scaling.
2018     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2019                              cast<PointerType>(IndVar->getType())
2020                                  ->getElementType())->isOne() &&
2021            "unit stride pointer IV must be i8*");
2022 
2023     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2024     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2025     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2026   } else {
2027     // In any other case, convert both IVInit and ExitCount to integers before
2028     // comparing. This may result in SCEV expansion of pointers, but in practice
2029     // SCEV will fold the pointer arithmetic away as such:
2030     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2031     //
2032     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2033     // for simple memset-style loops.
2034     //
2035     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2036     // were generated on top of case #2, which is not expected.
2037 
2038     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2039     // For unit stride, IVCount = Start + ExitCount with 2's complement
2040     // overflow.
2041 
2042     // For integer IVs, truncate the IV before computing IVInit + BECount,
2043     // unless we know apriori that the limit must be a constant when evaluated
2044     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2045     // of the IV in the loop over a (potentially) expensive expansion of the
2046     // widened exit count add(zext(add)) expression.
2047     if (SE->getTypeSizeInBits(IVInit->getType())
2048         > SE->getTypeSizeInBits(ExitCount->getType())) {
2049       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2050         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2051       else
2052         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2053     }
2054 
2055     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2056 
2057     if (UsePostInc)
2058       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2059 
2060     // Expand the code for the iteration count.
2061     assert(SE->isLoopInvariant(IVLimit, L) &&
2062            "Computed iteration count is not loop invariant!");
2063     // Ensure that we generate the same type as IndVar, or a smaller integer
2064     // type. In the presence of null pointer values, we have an integer type
2065     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2066     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2067       IndVar->getType() : ExitCount->getType();
2068     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2069     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2070   }
2071 }
2072 
2073 /// This method rewrites the exit condition of the loop to be a canonical !=
2074 /// comparison against the incremented loop induction variable.  This pass is
2075 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2076 /// determine a loop-invariant trip count of the loop, which is actually a much
2077 /// broader range than just linear tests.
2078 bool IndVarSimplify::
2079 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2080                           const SCEV *ExitCount,
2081                           PHINode *IndVar, SCEVExpander &Rewriter) {
2082   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2083   assert(isLoopCounter(IndVar, L, SE));
2084   Instruction * const IncVar =
2085     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2086 
2087   // Initialize CmpIndVar to the preincremented IV.
2088   Value *CmpIndVar = IndVar;
2089   bool UsePostInc = false;
2090 
2091   // If the exiting block is the same as the backedge block, we prefer to
2092   // compare against the post-incremented value, otherwise we must compare
2093   // against the preincremented value.
2094   if (ExitingBB == L->getLoopLatch()) {
2095     // For pointer IVs, we chose to not strip inbounds which requires us not
2096     // to add a potentially UB introducing use.  We need to either a) show
2097     // the loop test we're modifying is already in post-inc form, or b) show
2098     // that adding a use must not introduce UB.
2099     bool SafeToPostInc =
2100         IndVar->getType()->isIntegerTy() ||
2101         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2102         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2103     if (SafeToPostInc) {
2104       UsePostInc = true;
2105       CmpIndVar = IncVar;
2106     }
2107   }
2108 
2109   // It may be necessary to drop nowrap flags on the incrementing instruction
2110   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2111   // case the increment might have previously been poison on the last iteration
2112   // only) or if LFTR switches to a different IV that was previously dynamically
2113   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2114   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2115   // check), because the pre-inc addrec flags may be adopted from the original
2116   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2117   // TODO: This handling is inaccurate for one case: If we switch to a
2118   // dynamically dead IV that wraps on the first loop iteration only, which is
2119   // not covered by the post-inc addrec. (If the new IV was not dynamically
2120   // dead, it could not be poison on the first iteration in the first place.)
2121   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2122     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2123     if (BO->hasNoUnsignedWrap())
2124       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2125     if (BO->hasNoSignedWrap())
2126       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2127   }
2128 
2129   Value *ExitCnt = genLoopLimit(
2130       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2131   assert(ExitCnt->getType()->isPointerTy() ==
2132              IndVar->getType()->isPointerTy() &&
2133          "genLoopLimit missed a cast");
2134 
2135   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2136   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2137   ICmpInst::Predicate P;
2138   if (L->contains(BI->getSuccessor(0)))
2139     P = ICmpInst::ICMP_NE;
2140   else
2141     P = ICmpInst::ICMP_EQ;
2142 
2143   IRBuilder<> Builder(BI);
2144 
2145   // The new loop exit condition should reuse the debug location of the
2146   // original loop exit condition.
2147   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2148     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2149 
2150   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2151   // avoid the expensive expansion of the limit expression in the wider type,
2152   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2153   // since we know (from the exit count bitwidth), that we can't self-wrap in
2154   // the narrower type.
2155   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2156   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2157   if (CmpIndVarSize > ExitCntSize) {
2158     assert(!CmpIndVar->getType()->isPointerTy() &&
2159            !ExitCnt->getType()->isPointerTy());
2160 
2161     // Before resorting to actually inserting the truncate, use the same
2162     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2163     // the other side of the comparison instead.  We still evaluate the limit
2164     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2165     // a truncate within in.
2166     bool Extended = false;
2167     const SCEV *IV = SE->getSCEV(CmpIndVar);
2168     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2169                                                   ExitCnt->getType());
2170     const SCEV *ZExtTrunc =
2171       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2172 
2173     if (ZExtTrunc == IV) {
2174       Extended = true;
2175       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2176                                    "wide.trip.count");
2177     } else {
2178       const SCEV *SExtTrunc =
2179         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2180       if (SExtTrunc == IV) {
2181         Extended = true;
2182         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2183                                      "wide.trip.count");
2184       }
2185     }
2186 
2187     if (Extended) {
2188       bool Discard;
2189       L->makeLoopInvariant(ExitCnt, Discard);
2190     } else
2191       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2192                                       "lftr.wideiv");
2193   }
2194   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2195                     << "      LHS:" << *CmpIndVar << '\n'
2196                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2197                     << "\n"
2198                     << "      RHS:\t" << *ExitCnt << "\n"
2199                     << "ExitCount:\t" << *ExitCount << "\n"
2200                     << "  was: " << *BI->getCondition() << "\n");
2201 
2202   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2203   Value *OrigCond = BI->getCondition();
2204   // It's tempting to use replaceAllUsesWith here to fully replace the old
2205   // comparison, but that's not immediately safe, since users of the old
2206   // comparison may not be dominated by the new comparison. Instead, just
2207   // update the branch to use the new comparison; in the common case this
2208   // will make old comparison dead.
2209   BI->setCondition(Cond);
2210   DeadInsts.emplace_back(OrigCond);
2211 
2212   ++NumLFTR;
2213   return true;
2214 }
2215 
2216 //===----------------------------------------------------------------------===//
2217 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2218 //===----------------------------------------------------------------------===//
2219 
2220 /// If there's a single exit block, sink any loop-invariant values that
2221 /// were defined in the preheader but not used inside the loop into the
2222 /// exit block to reduce register pressure in the loop.
2223 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2224   BasicBlock *ExitBlock = L->getExitBlock();
2225   if (!ExitBlock) return false;
2226 
2227   BasicBlock *Preheader = L->getLoopPreheader();
2228   if (!Preheader) return false;
2229 
2230   bool MadeAnyChanges = false;
2231   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2232   BasicBlock::iterator I(Preheader->getTerminator());
2233   while (I != Preheader->begin()) {
2234     --I;
2235     // New instructions were inserted at the end of the preheader.
2236     if (isa<PHINode>(I))
2237       break;
2238 
2239     // Don't move instructions which might have side effects, since the side
2240     // effects need to complete before instructions inside the loop.  Also don't
2241     // move instructions which might read memory, since the loop may modify
2242     // memory. Note that it's okay if the instruction might have undefined
2243     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2244     // block.
2245     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2246       continue;
2247 
2248     // Skip debug info intrinsics.
2249     if (isa<DbgInfoIntrinsic>(I))
2250       continue;
2251 
2252     // Skip eh pad instructions.
2253     if (I->isEHPad())
2254       continue;
2255 
2256     // Don't sink alloca: we never want to sink static alloca's out of the
2257     // entry block, and correctly sinking dynamic alloca's requires
2258     // checks for stacksave/stackrestore intrinsics.
2259     // FIXME: Refactor this check somehow?
2260     if (isa<AllocaInst>(I))
2261       continue;
2262 
2263     // Determine if there is a use in or before the loop (direct or
2264     // otherwise).
2265     bool UsedInLoop = false;
2266     for (Use &U : I->uses()) {
2267       Instruction *User = cast<Instruction>(U.getUser());
2268       BasicBlock *UseBB = User->getParent();
2269       if (PHINode *P = dyn_cast<PHINode>(User)) {
2270         unsigned i =
2271           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2272         UseBB = P->getIncomingBlock(i);
2273       }
2274       if (UseBB == Preheader || L->contains(UseBB)) {
2275         UsedInLoop = true;
2276         break;
2277       }
2278     }
2279 
2280     // If there is, the def must remain in the preheader.
2281     if (UsedInLoop)
2282       continue;
2283 
2284     // Otherwise, sink it to the exit block.
2285     Instruction *ToMove = &*I;
2286     bool Done = false;
2287 
2288     if (I != Preheader->begin()) {
2289       // Skip debug info intrinsics.
2290       do {
2291         --I;
2292       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2293 
2294       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2295         Done = true;
2296     } else {
2297       Done = true;
2298     }
2299 
2300     MadeAnyChanges = true;
2301     ToMove->moveBefore(*ExitBlock, InsertPt);
2302     if (Done) break;
2303     InsertPt = ToMove->getIterator();
2304   }
2305 
2306   return MadeAnyChanges;
2307 }
2308 
2309 // Returns true if the condition of \p BI being checked is invariant and can be
2310 // proved to be trivially true during at least first \p MaxIter iterations.
2311 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
2312                           bool ProvingLoopExit, const SCEV *MaxIter) {
2313   ICmpInst::Predicate Pred;
2314   Value *LHS, *RHS;
2315   using namespace PatternMatch;
2316   BasicBlock *TrueSucc, *FalseSucc;
2317   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
2318                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
2319     return false;
2320 
2321   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
2322          "Not a loop exit!");
2323 
2324   // 'LHS pred RHS' should now mean that we stay in loop.
2325   if (L->contains(FalseSucc))
2326     Pred = CmpInst::getInversePredicate(Pred);
2327 
2328   // If we are proving loop exit, invert the predicate.
2329   if (ProvingLoopExit)
2330     Pred = CmpInst::getInversePredicate(Pred);
2331 
2332   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
2333   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
2334   // Can we prove it to be trivially true?
2335   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI))
2336     return true;
2337 
2338   if (ProvingLoopExit)
2339     return false;
2340 
2341   ICmpInst::Predicate InvariantPred;
2342   const SCEV *InvariantLHS, *InvariantRHS;
2343 
2344   // Check if there is a loop-invariant predicate equivalent to our check.
2345   if (!SE->isLoopInvariantExitCondDuringFirstIterations(
2346            Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS,
2347            InvariantRHS))
2348     return false;
2349 
2350   // Can we prove it to be trivially true?
2351   return SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI);
2352 }
2353 
2354 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2355   SmallVector<BasicBlock*, 16> ExitingBlocks;
2356   L->getExitingBlocks(ExitingBlocks);
2357 
2358   // Remove all exits which aren't both rewriteable and execute on every
2359   // iteration.
2360   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2361     // If our exitting block exits multiple loops, we can only rewrite the
2362     // innermost one.  Otherwise, we're changing how many times the innermost
2363     // loop runs before it exits.
2364     if (LI->getLoopFor(ExitingBB) != L)
2365       return true;
2366 
2367     // Can't rewrite non-branch yet.
2368     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2369     if (!BI)
2370       return true;
2371 
2372     // If already constant, nothing to do.
2373     if (isa<Constant>(BI->getCondition()))
2374       return true;
2375 
2376     // Likewise, the loop latch must be dominated by the exiting BB.
2377     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
2378       return true;
2379 
2380     return false;
2381   });
2382   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2383 
2384   if (ExitingBlocks.empty())
2385     return false;
2386 
2387   // Get a symbolic upper bound on the loop backedge taken count.
2388   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
2389   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2390     return false;
2391 
2392   // Visit our exit blocks in order of dominance. We know from the fact that
2393   // all exits must dominate the latch, so there is a total dominance order
2394   // between them.
2395   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
2396                // std::sort sorts in ascending order, so we want the inverse of
2397                // the normal dominance relation.
2398                if (A == B) return false;
2399                if (DT->properlyDominates(A, B))
2400                  return true;
2401                else {
2402                  assert(DT->properlyDominates(B, A) &&
2403                         "expected total dominance order!");
2404                  return false;
2405                }
2406   });
2407 #ifdef ASSERT
2408   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2409     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2410   }
2411 #endif
2412 
2413   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2414     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2415     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2416     auto *OldCond = BI->getCondition();
2417     auto *NewCond = ConstantInt::get(OldCond->getType(),
2418                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2419     BI->setCondition(NewCond);
2420     if (OldCond->use_empty())
2421       DeadInsts.emplace_back(OldCond);
2422   };
2423 
2424   bool Changed = false;
2425   SmallSet<const SCEV*, 8> DominatingExitCounts;
2426   for (BasicBlock *ExitingBB : ExitingBlocks) {
2427     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2428     if (isa<SCEVCouldNotCompute>(ExitCount)) {
2429       // Okay, we do not know the exit count here. Can we at least prove that it
2430       // will remain the same within iteration space?
2431       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2432       auto OptimizeCond = [&](bool Inverted) {
2433         if (isTrivialCond(L, BI, SE, Inverted, MaxExitCount)) {
2434           FoldExit(ExitingBB, Inverted);
2435           return true;
2436         }
2437         return false;
2438       };
2439       if (OptimizeCond(false) || OptimizeCond(true))
2440         Changed = true;
2441       continue;
2442     }
2443 
2444     // If we know we'd exit on the first iteration, rewrite the exit to
2445     // reflect this.  This does not imply the loop must exit through this
2446     // exit; there may be an earlier one taken on the first iteration.
2447     // TODO: Given we know the backedge can't be taken, we should go ahead
2448     // and break it.  Or at least, kill all the header phis and simplify.
2449     if (ExitCount->isZero()) {
2450       FoldExit(ExitingBB, true);
2451       Changed = true;
2452       continue;
2453     }
2454 
2455     // If we end up with a pointer exit count, bail.  Note that we can end up
2456     // with a pointer exit count for one exiting block, and not for another in
2457     // the same loop.
2458     if (!ExitCount->getType()->isIntegerTy() ||
2459         !MaxExitCount->getType()->isIntegerTy())
2460       continue;
2461 
2462     Type *WiderType =
2463       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2464     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2465     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2466     assert(MaxExitCount->getType() == ExitCount->getType());
2467 
2468     // Can we prove that some other exit must be taken strictly before this
2469     // one?
2470     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2471                                      MaxExitCount, ExitCount)) {
2472       FoldExit(ExitingBB, false);
2473       Changed = true;
2474       continue;
2475     }
2476 
2477     // As we run, keep track of which exit counts we've encountered.  If we
2478     // find a duplicate, we've found an exit which would have exited on the
2479     // exiting iteration, but (from the visit order) strictly follows another
2480     // which does the same and is thus dead.
2481     if (!DominatingExitCounts.insert(ExitCount).second) {
2482       FoldExit(ExitingBB, false);
2483       Changed = true;
2484       continue;
2485     }
2486 
2487     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2488     // here.  If we kept track of the min of dominanting exits so far, we could
2489     // discharge exits with EC >= MDEC. This is less powerful than the existing
2490     // transform (since later exits aren't considered), but potentially more
2491     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2492     // or a >u b.  Such a case is not currently known.
2493   }
2494   return Changed;
2495 }
2496 
2497 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2498   SmallVector<BasicBlock*, 16> ExitingBlocks;
2499   L->getExitingBlocks(ExitingBlocks);
2500 
2501   // Finally, see if we can rewrite our exit conditions into a loop invariant
2502   // form. If we have a read-only loop, and we can tell that we must exit down
2503   // a path which does not need any of the values computed within the loop, we
2504   // can rewrite the loop to exit on the first iteration.  Note that this
2505   // doesn't either a) tell us the loop exits on the first iteration (unless
2506   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2507   // This transformation looks a lot like a restricted form of dead loop
2508   // elimination, but restricted to read-only loops and without neccesssarily
2509   // needing to kill the loop entirely.
2510   if (!LoopPredication)
2511     return false;
2512 
2513   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2514     return false;
2515 
2516   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2517   // through *explicit* control flow.  We have to eliminate the possibility of
2518   // implicit exits (see below) before we know it's truly exact.
2519   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2520   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2521       !SE->isLoopInvariant(ExactBTC, L) ||
2522       !isSafeToExpand(ExactBTC, *SE))
2523     return false;
2524 
2525   // If we end up with a pointer exit count, bail.  It may be unsized.
2526   if (!ExactBTC->getType()->isIntegerTy())
2527     return false;
2528 
2529   auto BadExit = [&](BasicBlock *ExitingBB) {
2530     // If our exiting block exits multiple loops, we can only rewrite the
2531     // innermost one.  Otherwise, we're changing how many times the innermost
2532     // loop runs before it exits.
2533     if (LI->getLoopFor(ExitingBB) != L)
2534       return true;
2535 
2536     // Can't rewrite non-branch yet.
2537     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2538     if (!BI)
2539       return true;
2540 
2541     // If already constant, nothing to do.
2542     if (isa<Constant>(BI->getCondition()))
2543       return true;
2544 
2545     // If the exit block has phis, we need to be able to compute the values
2546     // within the loop which contains them.  This assumes trivially lcssa phis
2547     // have already been removed; TODO: generalize
2548     BasicBlock *ExitBlock =
2549     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2550     if (!ExitBlock->phis().empty())
2551       return true;
2552 
2553     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2554     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2555     if (!SE->isLoopInvariant(ExitCount, L) ||
2556         !isSafeToExpand(ExitCount, *SE))
2557       return true;
2558 
2559     // If we end up with a pointer exit count, bail.  It may be unsized.
2560     if (!ExitCount->getType()->isIntegerTy())
2561       return true;
2562 
2563     return false;
2564   };
2565 
2566   // If we have any exits which can't be predicated themselves, than we can't
2567   // predicate any exit which isn't guaranteed to execute before it.  Consider
2568   // two exits (a) and (b) which would both exit on the same iteration.  If we
2569   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2570   // we could convert a loop from exiting through (a) to one exiting through
2571   // (b).  Note that this problem exists only for exits with the same exit
2572   // count, and we could be more aggressive when exit counts are known inequal.
2573   llvm::sort(ExitingBlocks,
2574             [&](BasicBlock *A, BasicBlock *B) {
2575               // std::sort sorts in ascending order, so we want the inverse of
2576               // the normal dominance relation, plus a tie breaker for blocks
2577               // unordered by dominance.
2578               if (DT->properlyDominates(A, B)) return true;
2579               if (DT->properlyDominates(B, A)) return false;
2580               return A->getName() < B->getName();
2581             });
2582   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2583   // exits before the backedge).  If they aren't, reasoning about reachability
2584   // is complicated and we choose not to for now.
2585   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2586     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2587       return false;
2588 
2589   // Given our sorted total order, we know that exit[j] must be evaluated
2590   // after all exit[i] such j > i.
2591   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2592     if (BadExit(ExitingBlocks[i])) {
2593       ExitingBlocks.resize(i);
2594       break;
2595     }
2596 
2597   if (ExitingBlocks.empty())
2598     return false;
2599 
2600   // We rely on not being able to reach an exiting block on a later iteration
2601   // then it's statically compute exit count.  The implementaton of
2602   // getExitCount currently has this invariant, but assert it here so that
2603   // breakage is obvious if this ever changes..
2604   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2605         return DT->dominates(ExitingBB, L->getLoopLatch());
2606       }));
2607 
2608   // At this point, ExitingBlocks consists of only those blocks which are
2609   // predicatable.  Given that, we know we have at least one exit we can
2610   // predicate if the loop is doesn't have side effects and doesn't have any
2611   // implicit exits (because then our exact BTC isn't actually exact).
2612   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2613   // suggestions on how to improve this?  I can obviously bail out for outer
2614   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2615   // is that enough for *all* side effects?
2616   for (BasicBlock *BB : L->blocks())
2617     for (auto &I : *BB)
2618       // TODO:isGuaranteedToTransfer
2619       if (I.mayHaveSideEffects() || I.mayThrow())
2620         return false;
2621 
2622   bool Changed = false;
2623   // Finally, do the actual predication for all predicatable blocks.  A couple
2624   // of notes here:
2625   // 1) We don't bother to constant fold dominated exits with identical exit
2626   //    counts; that's simply a form of CSE/equality propagation and we leave
2627   //    it for dedicated passes.
2628   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2629   //    legality constraints and we leave that to dedicated logic.  We want to
2630   //    predicate even if we can't insert a loop invariant expression as
2631   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2632   //    varying check.
2633   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2634   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2635   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2636   for (BasicBlock *ExitingBB : ExitingBlocks) {
2637     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2638 
2639     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2640     Value *NewCond;
2641     if (ExitCount == ExactBTC) {
2642       NewCond = L->contains(BI->getSuccessor(0)) ?
2643         B.getFalse() : B.getTrue();
2644     } else {
2645       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2646       if (!ExactBTCV)
2647         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2648       Value *RHS = ExactBTCV;
2649       if (ECV->getType() != RHS->getType()) {
2650         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2651         ECV = B.CreateZExt(ECV, WiderTy);
2652         RHS = B.CreateZExt(RHS, WiderTy);
2653       }
2654       auto Pred = L->contains(BI->getSuccessor(0)) ?
2655         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2656       NewCond = B.CreateICmp(Pred, ECV, RHS);
2657     }
2658     Value *OldCond = BI->getCondition();
2659     BI->setCondition(NewCond);
2660     if (OldCond->use_empty())
2661       DeadInsts.emplace_back(OldCond);
2662     Changed = true;
2663   }
2664 
2665   return Changed;
2666 }
2667 
2668 //===----------------------------------------------------------------------===//
2669 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2670 //===----------------------------------------------------------------------===//
2671 
2672 bool IndVarSimplify::run(Loop *L) {
2673   // We need (and expect!) the incoming loop to be in LCSSA.
2674   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2675          "LCSSA required to run indvars!");
2676 
2677   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2678   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2679   //    canonicalization can be a pessimization without LSR to "clean up"
2680   //    afterwards.
2681   //  - We depend on having a preheader; in particular,
2682   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2683   //    and we're in trouble if we can't find the induction variable even when
2684   //    we've manually inserted one.
2685   //  - LFTR relies on having a single backedge.
2686   if (!L->isLoopSimplifyForm())
2687     return false;
2688 
2689 #ifndef NDEBUG
2690   // Used below for a consistency check only
2691   // Note: Since the result returned by ScalarEvolution may depend on the order
2692   // in which previous results are added to its cache, the call to
2693   // getBackedgeTakenCount() may change following SCEV queries.
2694   const SCEV *BackedgeTakenCount;
2695   if (VerifyIndvars)
2696     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2697 #endif
2698 
2699   bool Changed = false;
2700   // If there are any floating-point recurrences, attempt to
2701   // transform them to use integer recurrences.
2702   Changed |= rewriteNonIntegerIVs(L);
2703 
2704   // Create a rewriter object which we'll use to transform the code with.
2705   SCEVExpander Rewriter(*SE, DL, "indvars");
2706 #ifndef NDEBUG
2707   Rewriter.setDebugType(DEBUG_TYPE);
2708 #endif
2709 
2710   // Eliminate redundant IV users.
2711   //
2712   // Simplification works best when run before other consumers of SCEV. We
2713   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2714   // other expressions involving loop IVs have been evaluated. This helps SCEV
2715   // set no-wrap flags before normalizing sign/zero extension.
2716   Rewriter.disableCanonicalMode();
2717   Changed |= simplifyAndExtend(L, Rewriter, LI);
2718 
2719   // Check to see if we can compute the final value of any expressions
2720   // that are recurrent in the loop, and substitute the exit values from the
2721   // loop into any instructions outside of the loop that use the final values
2722   // of the current expressions.
2723   if (ReplaceExitValue != NeverRepl) {
2724     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2725                                              ReplaceExitValue, DeadInsts)) {
2726       NumReplaced += Rewrites;
2727       Changed = true;
2728     }
2729   }
2730 
2731   // Eliminate redundant IV cycles.
2732   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2733 
2734   // Try to eliminate loop exits based on analyzeable exit counts
2735   if (optimizeLoopExits(L, Rewriter))  {
2736     Changed = true;
2737     // Given we've changed exit counts, notify SCEV
2738     SE->forgetLoop(L);
2739   }
2740 
2741   // Try to form loop invariant tests for loop exits by changing how many
2742   // iterations of the loop run when that is unobservable.
2743   if (predicateLoopExits(L, Rewriter)) {
2744     Changed = true;
2745     // Given we've changed exit counts, notify SCEV
2746     SE->forgetLoop(L);
2747   }
2748 
2749   // If we have a trip count expression, rewrite the loop's exit condition
2750   // using it.
2751   if (!DisableLFTR) {
2752     BasicBlock *PreHeader = L->getLoopPreheader();
2753     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2754 
2755     SmallVector<BasicBlock*, 16> ExitingBlocks;
2756     L->getExitingBlocks(ExitingBlocks);
2757     for (BasicBlock *ExitingBB : ExitingBlocks) {
2758       // Can't rewrite non-branch yet.
2759       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2760         continue;
2761 
2762       // If our exitting block exits multiple loops, we can only rewrite the
2763       // innermost one.  Otherwise, we're changing how many times the innermost
2764       // loop runs before it exits.
2765       if (LI->getLoopFor(ExitingBB) != L)
2766         continue;
2767 
2768       if (!needsLFTR(L, ExitingBB))
2769         continue;
2770 
2771       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2772       if (isa<SCEVCouldNotCompute>(ExitCount))
2773         continue;
2774 
2775       // This was handled above, but as we form SCEVs, we can sometimes refine
2776       // existing ones; this allows exit counts to be folded to zero which
2777       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2778       // until stable to handle cases like this better.
2779       if (ExitCount->isZero())
2780         continue;
2781 
2782       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2783       if (!IndVar)
2784         continue;
2785 
2786       // Avoid high cost expansions.  Note: This heuristic is questionable in
2787       // that our definition of "high cost" is not exactly principled.
2788       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2789                                        TTI, PreHeaderBR))
2790         continue;
2791 
2792       // Check preconditions for proper SCEVExpander operation. SCEV does not
2793       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2794       // any pass that uses the SCEVExpander must do it. This does not work
2795       // well for loop passes because SCEVExpander makes assumptions about
2796       // all loops, while LoopPassManager only forces the current loop to be
2797       // simplified.
2798       //
2799       // FIXME: SCEV expansion has no way to bail out, so the caller must
2800       // explicitly check any assumptions made by SCEV. Brittle.
2801       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2802       if (!AR || AR->getLoop()->getLoopPreheader())
2803         Changed |= linearFunctionTestReplace(L, ExitingBB,
2804                                              ExitCount, IndVar,
2805                                              Rewriter);
2806     }
2807   }
2808   // Clear the rewriter cache, because values that are in the rewriter's cache
2809   // can be deleted in the loop below, causing the AssertingVH in the cache to
2810   // trigger.
2811   Rewriter.clear();
2812 
2813   // Now that we're done iterating through lists, clean up any instructions
2814   // which are now dead.
2815   while (!DeadInsts.empty())
2816     if (Instruction *Inst =
2817             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2818       Changed |=
2819           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2820 
2821   // The Rewriter may not be used from this point on.
2822 
2823   // Loop-invariant instructions in the preheader that aren't used in the
2824   // loop may be sunk below the loop to reduce register pressure.
2825   Changed |= sinkUnusedInvariants(L);
2826 
2827   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2828   // trip count and therefore can further simplify exit values in addition to
2829   // rewriteLoopExitValues.
2830   Changed |= rewriteFirstIterationLoopExitValues(L);
2831 
2832   // Clean up dead instructions.
2833   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2834 
2835   // Check a post-condition.
2836   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2837          "Indvars did not preserve LCSSA!");
2838 
2839   // Verify that LFTR, and any other change have not interfered with SCEV's
2840   // ability to compute trip count.  We may have *changed* the exit count, but
2841   // only by reducing it.
2842 #ifndef NDEBUG
2843   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2844     SE->forgetLoop(L);
2845     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2846     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2847         SE->getTypeSizeInBits(NewBECount->getType()))
2848       NewBECount = SE->getTruncateOrNoop(NewBECount,
2849                                          BackedgeTakenCount->getType());
2850     else
2851       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2852                                                  NewBECount->getType());
2853     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2854                                  NewBECount) && "indvars must preserve SCEV");
2855   }
2856   if (VerifyMemorySSA && MSSAU)
2857     MSSAU->getMemorySSA()->verifyMemorySSA();
2858 #endif
2859 
2860   return Changed;
2861 }
2862 
2863 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2864                                           LoopStandardAnalysisResults &AR,
2865                                           LPMUpdater &) {
2866   Function *F = L.getHeader()->getParent();
2867   const DataLayout &DL = F->getParent()->getDataLayout();
2868 
2869   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2870   if (!IVS.run(&L))
2871     return PreservedAnalyses::all();
2872 
2873   auto PA = getLoopPassPreservedAnalyses();
2874   PA.preserveSet<CFGAnalyses>();
2875   if (AR.MSSA)
2876     PA.preserve<MemorySSAAnalysis>();
2877   return PA;
2878 }
2879 
2880 namespace {
2881 
2882 struct IndVarSimplifyLegacyPass : public LoopPass {
2883   static char ID; // Pass identification, replacement for typeid
2884 
2885   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2886     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2887   }
2888 
2889   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2890     if (skipLoop(L))
2891       return false;
2892 
2893     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2894     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2895     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2896     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2897     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2898     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2899     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2900     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2901     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2902     MemorySSA *MSSA = nullptr;
2903     if (MSSAAnalysis)
2904       MSSA = &MSSAAnalysis->getMSSA();
2905 
2906     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2907     return IVS.run(L);
2908   }
2909 
2910   void getAnalysisUsage(AnalysisUsage &AU) const override {
2911     AU.setPreservesCFG();
2912     AU.addPreserved<MemorySSAWrapperPass>();
2913     getLoopAnalysisUsage(AU);
2914   }
2915 };
2916 
2917 } // end anonymous namespace
2918 
2919 char IndVarSimplifyLegacyPass::ID = 0;
2920 
2921 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2922                       "Induction Variable Simplification", false, false)
2923 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2924 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2925                     "Induction Variable Simplification", false, false)
2926 
2927 Pass *llvm::createIndVarSimplifyPass() {
2928   return new IndVarSimplifyLegacyPass();
2929 }
2930