xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 400ceda425ab392096ff2800acad41a4322974d2)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpander.h"
45 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
46 #include "llvm/Analysis/TargetLibraryInfo.h"
47 #include "llvm/Analysis/TargetTransformInfo.h"
48 #include "llvm/Analysis/ValueTracking.h"
49 #include "llvm/IR/BasicBlock.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/ConstantRange.h"
52 #include "llvm/IR/Constants.h"
53 #include "llvm/IR/DataLayout.h"
54 #include "llvm/IR/DerivedTypes.h"
55 #include "llvm/IR/Dominators.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/IRBuilder.h"
58 #include "llvm/IR/InstrTypes.h"
59 #include "llvm/IR/Instruction.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/Intrinsics.h"
63 #include "llvm/IR/Module.h"
64 #include "llvm/IR/Operator.h"
65 #include "llvm/IR/PassManager.h"
66 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/Use.h"
69 #include "llvm/IR/User.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/IR/ValueHandle.h"
72 #include "llvm/InitializePasses.h"
73 #include "llvm/Pass.h"
74 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/CommandLine.h"
76 #include "llvm/Support/Compiler.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/ErrorHandling.h"
79 #include "llvm/Support/MathExtras.h"
80 #include "llvm/Support/raw_ostream.h"
81 #include "llvm/Transforms/Scalar.h"
82 #include "llvm/Transforms/Scalar/LoopPassManager.h"
83 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
84 #include "llvm/Transforms/Utils/Local.h"
85 #include "llvm/Transforms/Utils/LoopUtils.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 namespace {
135 
136 struct RewritePhi;
137 
138 class IndVarSimplify {
139   LoopInfo *LI;
140   ScalarEvolution *SE;
141   DominatorTree *DT;
142   const DataLayout &DL;
143   TargetLibraryInfo *TLI;
144   const TargetTransformInfo *TTI;
145   std::unique_ptr<MemorySSAUpdater> MSSAU;
146 
147   SmallVector<WeakTrackingVH, 16> DeadInsts;
148 
149   bool handleFloatingPointIV(Loop *L, PHINode *PH);
150   bool rewriteNonIntegerIVs(Loop *L);
151 
152   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
153   /// Try to eliminate loop exits based on analyzeable exit counts
154   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
155   /// Try to form loop invariant tests for loop exits by changing how many
156   /// iterations of the loop run when that is unobservable.
157   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
158 
159   bool rewriteFirstIterationLoopExitValues(Loop *L);
160 
161   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
162                                  const SCEV *ExitCount,
163                                  PHINode *IndVar, SCEVExpander &Rewriter);
164 
165   bool sinkUnusedInvariants(Loop *L);
166 
167 public:
168   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
169                  const DataLayout &DL, TargetLibraryInfo *TLI,
170                  TargetTransformInfo *TTI, MemorySSA *MSSA)
171       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
172     if (MSSA)
173       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
174   }
175 
176   bool run(Loop *L);
177 };
178 
179 } // end anonymous namespace
180 
181 /// Determine the insertion point for this user. By default, insert immediately
182 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
183 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
184 /// common dominator for the incoming blocks. A nullptr can be returned if no
185 /// viable location is found: it may happen if User is a PHI and Def only comes
186 /// to this PHI from unreachable blocks.
187 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
188                                           DominatorTree *DT, LoopInfo *LI) {
189   PHINode *PHI = dyn_cast<PHINode>(User);
190   if (!PHI)
191     return User;
192 
193   Instruction *InsertPt = nullptr;
194   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
195     if (PHI->getIncomingValue(i) != Def)
196       continue;
197 
198     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
199 
200     if (!DT->isReachableFromEntry(InsertBB))
201       continue;
202 
203     if (!InsertPt) {
204       InsertPt = InsertBB->getTerminator();
205       continue;
206     }
207     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
208     InsertPt = InsertBB->getTerminator();
209   }
210 
211   // If we have skipped all inputs, it means that Def only comes to Phi from
212   // unreachable blocks.
213   if (!InsertPt)
214     return nullptr;
215 
216   auto *DefI = dyn_cast<Instruction>(Def);
217   if (!DefI)
218     return InsertPt;
219 
220   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
221 
222   auto *L = LI->getLoopFor(DefI->getParent());
223   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
224 
225   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
226     if (LI->getLoopFor(DTN->getBlock()) == L)
227       return DTN->getBlock()->getTerminator();
228 
229   llvm_unreachable("DefI dominates InsertPt!");
230 }
231 
232 //===----------------------------------------------------------------------===//
233 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
234 //===----------------------------------------------------------------------===//
235 
236 /// Convert APF to an integer, if possible.
237 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
238   bool isExact = false;
239   // See if we can convert this to an int64_t
240   uint64_t UIntVal;
241   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
242                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
243       !isExact)
244     return false;
245   IntVal = UIntVal;
246   return true;
247 }
248 
249 /// If the loop has floating induction variable then insert corresponding
250 /// integer induction variable if possible.
251 /// For example,
252 /// for(double i = 0; i < 10000; ++i)
253 ///   bar(i)
254 /// is converted into
255 /// for(int i = 0; i < 10000; ++i)
256 ///   bar((double)i);
257 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
258   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
259   unsigned BackEdge     = IncomingEdge^1;
260 
261   // Check incoming value.
262   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
263 
264   int64_t InitValue;
265   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
266     return false;
267 
268   // Check IV increment. Reject this PN if increment operation is not
269   // an add or increment value can not be represented by an integer.
270   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
271   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
272 
273   // If this is not an add of the PHI with a constantfp, or if the constant fp
274   // is not an integer, bail out.
275   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
276   int64_t IncValue;
277   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
278       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
279     return false;
280 
281   // Check Incr uses. One user is PN and the other user is an exit condition
282   // used by the conditional terminator.
283   Value::user_iterator IncrUse = Incr->user_begin();
284   Instruction *U1 = cast<Instruction>(*IncrUse++);
285   if (IncrUse == Incr->user_end()) return false;
286   Instruction *U2 = cast<Instruction>(*IncrUse++);
287   if (IncrUse != Incr->user_end()) return false;
288 
289   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
290   // only used by a branch, we can't transform it.
291   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
292   if (!Compare)
293     Compare = dyn_cast<FCmpInst>(U2);
294   if (!Compare || !Compare->hasOneUse() ||
295       !isa<BranchInst>(Compare->user_back()))
296     return false;
297 
298   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
299 
300   // We need to verify that the branch actually controls the iteration count
301   // of the loop.  If not, the new IV can overflow and no one will notice.
302   // The branch block must be in the loop and one of the successors must be out
303   // of the loop.
304   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
305   if (!L->contains(TheBr->getParent()) ||
306       (L->contains(TheBr->getSuccessor(0)) &&
307        L->contains(TheBr->getSuccessor(1))))
308     return false;
309 
310   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
311   // transform it.
312   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
313   int64_t ExitValue;
314   if (ExitValueVal == nullptr ||
315       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
316     return false;
317 
318   // Find new predicate for integer comparison.
319   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
320   switch (Compare->getPredicate()) {
321   default: return false;  // Unknown comparison.
322   case CmpInst::FCMP_OEQ:
323   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
324   case CmpInst::FCMP_ONE:
325   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
326   case CmpInst::FCMP_OGT:
327   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
328   case CmpInst::FCMP_OGE:
329   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
330   case CmpInst::FCMP_OLT:
331   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
332   case CmpInst::FCMP_OLE:
333   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
334   }
335 
336   // We convert the floating point induction variable to a signed i32 value if
337   // we can.  This is only safe if the comparison will not overflow in a way
338   // that won't be trapped by the integer equivalent operations.  Check for this
339   // now.
340   // TODO: We could use i64 if it is native and the range requires it.
341 
342   // The start/stride/exit values must all fit in signed i32.
343   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
344     return false;
345 
346   // If not actually striding (add x, 0.0), avoid touching the code.
347   if (IncValue == 0)
348     return false;
349 
350   // Positive and negative strides have different safety conditions.
351   if (IncValue > 0) {
352     // If we have a positive stride, we require the init to be less than the
353     // exit value.
354     if (InitValue >= ExitValue)
355       return false;
356 
357     uint32_t Range = uint32_t(ExitValue-InitValue);
358     // Check for infinite loop, either:
359     // while (i <= Exit) or until (i > Exit)
360     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
361       if (++Range == 0) return false;  // Range overflows.
362     }
363 
364     unsigned Leftover = Range % uint32_t(IncValue);
365 
366     // If this is an equality comparison, we require that the strided value
367     // exactly land on the exit value, otherwise the IV condition will wrap
368     // around and do things the fp IV wouldn't.
369     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
370         Leftover != 0)
371       return false;
372 
373     // If the stride would wrap around the i32 before exiting, we can't
374     // transform the IV.
375     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
376       return false;
377   } else {
378     // If we have a negative stride, we require the init to be greater than the
379     // exit value.
380     if (InitValue <= ExitValue)
381       return false;
382 
383     uint32_t Range = uint32_t(InitValue-ExitValue);
384     // Check for infinite loop, either:
385     // while (i >= Exit) or until (i < Exit)
386     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
387       if (++Range == 0) return false;  // Range overflows.
388     }
389 
390     unsigned Leftover = Range % uint32_t(-IncValue);
391 
392     // If this is an equality comparison, we require that the strided value
393     // exactly land on the exit value, otherwise the IV condition will wrap
394     // around and do things the fp IV wouldn't.
395     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
396         Leftover != 0)
397       return false;
398 
399     // If the stride would wrap around the i32 before exiting, we can't
400     // transform the IV.
401     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
402       return false;
403   }
404 
405   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
406 
407   // Insert new integer induction variable.
408   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
409   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
410                       PN->getIncomingBlock(IncomingEdge));
411 
412   Value *NewAdd =
413     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
414                               Incr->getName()+".int", Incr);
415   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
416 
417   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
418                                       ConstantInt::get(Int32Ty, ExitValue),
419                                       Compare->getName());
420 
421   // In the following deletions, PN may become dead and may be deleted.
422   // Use a WeakTrackingVH to observe whether this happens.
423   WeakTrackingVH WeakPH = PN;
424 
425   // Delete the old floating point exit comparison.  The branch starts using the
426   // new comparison.
427   NewCompare->takeName(Compare);
428   Compare->replaceAllUsesWith(NewCompare);
429   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
430 
431   // Delete the old floating point increment.
432   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
433   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
434 
435   // If the FP induction variable still has uses, this is because something else
436   // in the loop uses its value.  In order to canonicalize the induction
437   // variable, we chose to eliminate the IV and rewrite it in terms of an
438   // int->fp cast.
439   //
440   // We give preference to sitofp over uitofp because it is faster on most
441   // platforms.
442   if (WeakPH) {
443     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
444                                  &*PN->getParent()->getFirstInsertionPt());
445     PN->replaceAllUsesWith(Conv);
446     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
447   }
448   return true;
449 }
450 
451 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
452   // First step.  Check to see if there are any floating-point recurrences.
453   // If there are, change them into integer recurrences, permitting analysis by
454   // the SCEV routines.
455   BasicBlock *Header = L->getHeader();
456 
457   SmallVector<WeakTrackingVH, 8> PHIs;
458   for (PHINode &PN : Header->phis())
459     PHIs.push_back(&PN);
460 
461   bool Changed = false;
462   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
463     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
464       Changed |= handleFloatingPointIV(L, PN);
465 
466   // If the loop previously had floating-point IV, ScalarEvolution
467   // may not have been able to compute a trip count. Now that we've done some
468   // re-writing, the trip count may be computable.
469   if (Changed)
470     SE->forgetLoop(L);
471   return Changed;
472 }
473 
474 //===---------------------------------------------------------------------===//
475 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
476 // they will exit at the first iteration.
477 //===---------------------------------------------------------------------===//
478 
479 /// Check to see if this loop has loop invariant conditions which lead to loop
480 /// exits. If so, we know that if the exit path is taken, it is at the first
481 /// loop iteration. This lets us predict exit values of PHI nodes that live in
482 /// loop header.
483 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
484   // Verify the input to the pass is already in LCSSA form.
485   assert(L->isLCSSAForm(*DT));
486 
487   SmallVector<BasicBlock *, 8> ExitBlocks;
488   L->getUniqueExitBlocks(ExitBlocks);
489 
490   bool MadeAnyChanges = false;
491   for (auto *ExitBB : ExitBlocks) {
492     // If there are no more PHI nodes in this exit block, then no more
493     // values defined inside the loop are used on this path.
494     for (PHINode &PN : ExitBB->phis()) {
495       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
496            IncomingValIdx != E; ++IncomingValIdx) {
497         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
498 
499         // Can we prove that the exit must run on the first iteration if it
500         // runs at all?  (i.e. early exits are fine for our purposes, but
501         // traces which lead to this exit being taken on the 2nd iteration
502         // aren't.)  Note that this is about whether the exit branch is
503         // executed, not about whether it is taken.
504         if (!L->getLoopLatch() ||
505             !DT->dominates(IncomingBB, L->getLoopLatch()))
506           continue;
507 
508         // Get condition that leads to the exit path.
509         auto *TermInst = IncomingBB->getTerminator();
510 
511         Value *Cond = nullptr;
512         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
513           // Must be a conditional branch, otherwise the block
514           // should not be in the loop.
515           Cond = BI->getCondition();
516         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
517           Cond = SI->getCondition();
518         else
519           continue;
520 
521         if (!L->isLoopInvariant(Cond))
522           continue;
523 
524         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
525 
526         // Only deal with PHIs in the loop header.
527         if (!ExitVal || ExitVal->getParent() != L->getHeader())
528           continue;
529 
530         // If ExitVal is a PHI on the loop header, then we know its
531         // value along this exit because the exit can only be taken
532         // on the first iteration.
533         auto *LoopPreheader = L->getLoopPreheader();
534         assert(LoopPreheader && "Invalid loop");
535         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
536         if (PreheaderIdx != -1) {
537           assert(ExitVal->getParent() == L->getHeader() &&
538                  "ExitVal must be in loop header");
539           MadeAnyChanges = true;
540           PN.setIncomingValue(IncomingValIdx,
541                               ExitVal->getIncomingValue(PreheaderIdx));
542         }
543       }
544     }
545   }
546   return MadeAnyChanges;
547 }
548 
549 //===----------------------------------------------------------------------===//
550 //  IV Widening - Extend the width of an IV to cover its widest uses.
551 //===----------------------------------------------------------------------===//
552 
553 namespace {
554 
555 // Collect information about induction variables that are used by sign/zero
556 // extend operations. This information is recorded by CollectExtend and provides
557 // the input to WidenIV.
558 struct WideIVInfo {
559   PHINode *NarrowIV = nullptr;
560 
561   // Widest integer type created [sz]ext
562   Type *WidestNativeType = nullptr;
563 
564   // Was a sext user seen before a zext?
565   bool IsSigned = false;
566 };
567 
568 } // end anonymous namespace
569 
570 /// Update information about the induction variable that is extended by this
571 /// sign or zero extend operation. This is used to determine the final width of
572 /// the IV before actually widening it.
573 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
574                         const TargetTransformInfo *TTI) {
575   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
576   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
577     return;
578 
579   Type *Ty = Cast->getType();
580   uint64_t Width = SE->getTypeSizeInBits(Ty);
581   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
582     return;
583 
584   // Check that `Cast` actually extends the induction variable (we rely on this
585   // later).  This takes care of cases where `Cast` is extending a truncation of
586   // the narrow induction variable, and thus can end up being narrower than the
587   // "narrow" induction variable.
588   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
589   if (NarrowIVWidth >= Width)
590     return;
591 
592   // Cast is either an sext or zext up to this point.
593   // We should not widen an indvar if arithmetics on the wider indvar are more
594   // expensive than those on the narrower indvar. We check only the cost of ADD
595   // because at least an ADD is required to increment the induction variable. We
596   // could compute more comprehensively the cost of all instructions on the
597   // induction variable when necessary.
598   if (TTI &&
599       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
600           TTI->getArithmeticInstrCost(Instruction::Add,
601                                       Cast->getOperand(0)->getType())) {
602     return;
603   }
604 
605   if (!WI.WidestNativeType) {
606     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
607     WI.IsSigned = IsSigned;
608     return;
609   }
610 
611   // We extend the IV to satisfy the sign of its first user, arbitrarily.
612   if (WI.IsSigned != IsSigned)
613     return;
614 
615   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
616     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
617 }
618 
619 namespace {
620 
621 /// Record a link in the Narrow IV def-use chain along with the WideIV that
622 /// computes the same value as the Narrow IV def.  This avoids caching Use*
623 /// pointers.
624 struct NarrowIVDefUse {
625   Instruction *NarrowDef = nullptr;
626   Instruction *NarrowUse = nullptr;
627   Instruction *WideDef = nullptr;
628 
629   // True if the narrow def is never negative.  Tracking this information lets
630   // us use a sign extension instead of a zero extension or vice versa, when
631   // profitable and legal.
632   bool NeverNegative = false;
633 
634   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
635                  bool NeverNegative)
636       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
637         NeverNegative(NeverNegative) {}
638 };
639 
640 /// The goal of this transform is to remove sign and zero extends without
641 /// creating any new induction variables. To do this, it creates a new phi of
642 /// the wider type and redirects all users, either removing extends or inserting
643 /// truncs whenever we stop propagating the type.
644 class WidenIV {
645   // Parameters
646   PHINode *OrigPhi;
647   Type *WideType;
648 
649   // Context
650   LoopInfo        *LI;
651   Loop            *L;
652   ScalarEvolution *SE;
653   DominatorTree   *DT;
654 
655   // Does the module have any calls to the llvm.experimental.guard intrinsic
656   // at all? If not we can avoid scanning instructions looking for guards.
657   bool HasGuards;
658 
659   // Result
660   PHINode *WidePhi = nullptr;
661   Instruction *WideInc = nullptr;
662   const SCEV *WideIncExpr = nullptr;
663   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
664 
665   SmallPtrSet<Instruction *,16> Widened;
666   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
667 
668   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
669 
670   // A map tracking the kind of extension used to widen each narrow IV
671   // and narrow IV user.
672   // Key: pointer to a narrow IV or IV user.
673   // Value: the kind of extension used to widen this Instruction.
674   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
675 
676   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
677 
678   // A map with control-dependent ranges for post increment IV uses. The key is
679   // a pair of IV def and a use of this def denoting the context. The value is
680   // a ConstantRange representing possible values of the def at the given
681   // context.
682   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
683 
684   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
685                                               Instruction *UseI) {
686     DefUserPair Key(Def, UseI);
687     auto It = PostIncRangeInfos.find(Key);
688     return It == PostIncRangeInfos.end()
689                ? Optional<ConstantRange>(None)
690                : Optional<ConstantRange>(It->second);
691   }
692 
693   void calculatePostIncRanges(PHINode *OrigPhi);
694   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
695 
696   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
697     DefUserPair Key(Def, UseI);
698     auto It = PostIncRangeInfos.find(Key);
699     if (It == PostIncRangeInfos.end())
700       PostIncRangeInfos.insert({Key, R});
701     else
702       It->second = R.intersectWith(It->second);
703   }
704 
705 public:
706   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
707           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
708           bool HasGuards)
709       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
710         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
711         HasGuards(HasGuards), DeadInsts(DI) {
712     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
713     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
714   }
715 
716   PHINode *createWideIV(SCEVExpander &Rewriter);
717 
718 protected:
719   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
720                           Instruction *Use);
721 
722   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
723   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
724                                      const SCEVAddRecExpr *WideAR);
725   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
726 
727   ExtendKind getExtendKind(Instruction *I);
728 
729   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
730 
731   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
732 
733   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
734 
735   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
736                               unsigned OpCode) const;
737 
738   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
739 
740   bool widenLoopCompare(NarrowIVDefUse DU);
741   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
742   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
743 
744   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
745 };
746 
747 } // end anonymous namespace
748 
749 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
750                                  bool IsSigned, Instruction *Use) {
751   // Set the debug location and conservative insertion point.
752   IRBuilder<> Builder(Use);
753   // Hoist the insertion point into loop preheaders as far as possible.
754   for (const Loop *L = LI->getLoopFor(Use->getParent());
755        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
756        L = L->getParentLoop())
757     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
758 
759   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
760                     Builder.CreateZExt(NarrowOper, WideType);
761 }
762 
763 /// Instantiate a wide operation to replace a narrow operation. This only needs
764 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
765 /// 0 for any operation we decide not to clone.
766 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
767                                   const SCEVAddRecExpr *WideAR) {
768   unsigned Opcode = DU.NarrowUse->getOpcode();
769   switch (Opcode) {
770   default:
771     return nullptr;
772   case Instruction::Add:
773   case Instruction::Mul:
774   case Instruction::UDiv:
775   case Instruction::Sub:
776     return cloneArithmeticIVUser(DU, WideAR);
777 
778   case Instruction::And:
779   case Instruction::Or:
780   case Instruction::Xor:
781   case Instruction::Shl:
782   case Instruction::LShr:
783   case Instruction::AShr:
784     return cloneBitwiseIVUser(DU);
785   }
786 }
787 
788 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
789   Instruction *NarrowUse = DU.NarrowUse;
790   Instruction *NarrowDef = DU.NarrowDef;
791   Instruction *WideDef = DU.WideDef;
792 
793   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
794 
795   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
796   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
797   // invariant and will be folded or hoisted. If it actually comes from a
798   // widened IV, it should be removed during a future call to widenIVUse.
799   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
800   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
801                    ? WideDef
802                    : createExtendInst(NarrowUse->getOperand(0), WideType,
803                                       IsSigned, NarrowUse);
804   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
805                    ? WideDef
806                    : createExtendInst(NarrowUse->getOperand(1), WideType,
807                                       IsSigned, NarrowUse);
808 
809   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
810   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
811                                         NarrowBO->getName());
812   IRBuilder<> Builder(NarrowUse);
813   Builder.Insert(WideBO);
814   WideBO->copyIRFlags(NarrowBO);
815   return WideBO;
816 }
817 
818 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
819                                             const SCEVAddRecExpr *WideAR) {
820   Instruction *NarrowUse = DU.NarrowUse;
821   Instruction *NarrowDef = DU.NarrowDef;
822   Instruction *WideDef = DU.WideDef;
823 
824   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
825 
826   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
827 
828   // We're trying to find X such that
829   //
830   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
831   //
832   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
833   // and check using SCEV if any of them are correct.
834 
835   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
836   // correct solution to X.
837   auto GuessNonIVOperand = [&](bool SignExt) {
838     const SCEV *WideLHS;
839     const SCEV *WideRHS;
840 
841     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
842       if (SignExt)
843         return SE->getSignExtendExpr(S, Ty);
844       return SE->getZeroExtendExpr(S, Ty);
845     };
846 
847     if (IVOpIdx == 0) {
848       WideLHS = SE->getSCEV(WideDef);
849       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
850       WideRHS = GetExtend(NarrowRHS, WideType);
851     } else {
852       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
853       WideLHS = GetExtend(NarrowLHS, WideType);
854       WideRHS = SE->getSCEV(WideDef);
855     }
856 
857     // WideUse is "WideDef `op.wide` X" as described in the comment.
858     const SCEV *WideUse = nullptr;
859 
860     switch (NarrowUse->getOpcode()) {
861     default:
862       llvm_unreachable("No other possibility!");
863 
864     case Instruction::Add:
865       WideUse = SE->getAddExpr(WideLHS, WideRHS);
866       break;
867 
868     case Instruction::Mul:
869       WideUse = SE->getMulExpr(WideLHS, WideRHS);
870       break;
871 
872     case Instruction::UDiv:
873       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
874       break;
875 
876     case Instruction::Sub:
877       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
878       break;
879     }
880 
881     return WideUse == WideAR;
882   };
883 
884   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
885   if (!GuessNonIVOperand(SignExtend)) {
886     SignExtend = !SignExtend;
887     if (!GuessNonIVOperand(SignExtend))
888       return nullptr;
889   }
890 
891   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
892                    ? WideDef
893                    : createExtendInst(NarrowUse->getOperand(0), WideType,
894                                       SignExtend, NarrowUse);
895   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
896                    ? WideDef
897                    : createExtendInst(NarrowUse->getOperand(1), WideType,
898                                       SignExtend, NarrowUse);
899 
900   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
901   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
902                                         NarrowBO->getName());
903 
904   IRBuilder<> Builder(NarrowUse);
905   Builder.Insert(WideBO);
906   WideBO->copyIRFlags(NarrowBO);
907   return WideBO;
908 }
909 
910 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
911   auto It = ExtendKindMap.find(I);
912   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
913   return It->second;
914 }
915 
916 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
917                                      unsigned OpCode) const {
918   if (OpCode == Instruction::Add)
919     return SE->getAddExpr(LHS, RHS);
920   if (OpCode == Instruction::Sub)
921     return SE->getMinusSCEV(LHS, RHS);
922   if (OpCode == Instruction::Mul)
923     return SE->getMulExpr(LHS, RHS);
924 
925   llvm_unreachable("Unsupported opcode.");
926 }
927 
928 /// No-wrap operations can transfer sign extension of their result to their
929 /// operands. Generate the SCEV value for the widened operation without
930 /// actually modifying the IR yet. If the expression after extending the
931 /// operands is an AddRec for this loop, return the AddRec and the kind of
932 /// extension used.
933 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
934   // Handle the common case of add<nsw/nuw>
935   const unsigned OpCode = DU.NarrowUse->getOpcode();
936   // Only Add/Sub/Mul instructions supported yet.
937   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
938       OpCode != Instruction::Mul)
939     return {nullptr, Unknown};
940 
941   // One operand (NarrowDef) has already been extended to WideDef. Now determine
942   // if extending the other will lead to a recurrence.
943   const unsigned ExtendOperIdx =
944       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
945   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
946 
947   const SCEV *ExtendOperExpr = nullptr;
948   const OverflowingBinaryOperator *OBO =
949     cast<OverflowingBinaryOperator>(DU.NarrowUse);
950   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
951   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
952     ExtendOperExpr = SE->getSignExtendExpr(
953       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
954   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
955     ExtendOperExpr = SE->getZeroExtendExpr(
956       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
957   else
958     return {nullptr, Unknown};
959 
960   // When creating this SCEV expr, don't apply the current operations NSW or NUW
961   // flags. This instruction may be guarded by control flow that the no-wrap
962   // behavior depends on. Non-control-equivalent instructions can be mapped to
963   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
964   // semantics to those operations.
965   const SCEV *lhs = SE->getSCEV(DU.WideDef);
966   const SCEV *rhs = ExtendOperExpr;
967 
968   // Let's swap operands to the initial order for the case of non-commutative
969   // operations, like SUB. See PR21014.
970   if (ExtendOperIdx == 0)
971     std::swap(lhs, rhs);
972   const SCEVAddRecExpr *AddRec =
973       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
974 
975   if (!AddRec || AddRec->getLoop() != L)
976     return {nullptr, Unknown};
977 
978   return {AddRec, ExtKind};
979 }
980 
981 /// Is this instruction potentially interesting for further simplification after
982 /// widening it's type? In other words, can the extend be safely hoisted out of
983 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
984 /// so, return the extended recurrence and the kind of extension used. Otherwise
985 /// return {nullptr, Unknown}.
986 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
987   if (!SE->isSCEVable(DU.NarrowUse->getType()))
988     return {nullptr, Unknown};
989 
990   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
991   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
992       SE->getTypeSizeInBits(WideType)) {
993     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
994     // index. So don't follow this use.
995     return {nullptr, Unknown};
996   }
997 
998   const SCEV *WideExpr;
999   ExtendKind ExtKind;
1000   if (DU.NeverNegative) {
1001     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1002     if (isa<SCEVAddRecExpr>(WideExpr))
1003       ExtKind = SignExtended;
1004     else {
1005       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1006       ExtKind = ZeroExtended;
1007     }
1008   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1009     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1010     ExtKind = SignExtended;
1011   } else {
1012     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1013     ExtKind = ZeroExtended;
1014   }
1015   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1016   if (!AddRec || AddRec->getLoop() != L)
1017     return {nullptr, Unknown};
1018   return {AddRec, ExtKind};
1019 }
1020 
1021 /// This IV user cannot be widened. Replace this use of the original narrow IV
1022 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1023 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1024   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1025   if (!InsertPt)
1026     return;
1027   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1028                     << *DU.NarrowUse << "\n");
1029   IRBuilder<> Builder(InsertPt);
1030   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1031   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1032 }
1033 
1034 /// If the narrow use is a compare instruction, then widen the compare
1035 //  (and possibly the other operand).  The extend operation is hoisted into the
1036 // loop preheader as far as possible.
1037 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1038   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1039   if (!Cmp)
1040     return false;
1041 
1042   // We can legally widen the comparison in the following two cases:
1043   //
1044   //  - The signedness of the IV extension and comparison match
1045   //
1046   //  - The narrow IV is always positive (and thus its sign extension is equal
1047   //    to its zero extension).  For instance, let's say we're zero extending
1048   //    %narrow for the following use
1049   //
1050   //      icmp slt i32 %narrow, %val   ... (A)
1051   //
1052   //    and %narrow is always positive.  Then
1053   //
1054   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1055   //          == icmp slt i32 zext(%narrow), sext(%val)
1056   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1057   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1058     return false;
1059 
1060   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1061   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1062   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1063   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1064 
1065   // Widen the compare instruction.
1066   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1067   if (!InsertPt)
1068     return false;
1069   IRBuilder<> Builder(InsertPt);
1070   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1071 
1072   // Widen the other operand of the compare, if necessary.
1073   if (CastWidth < IVWidth) {
1074     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1075     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1076   }
1077   return true;
1078 }
1079 
1080 /// If the narrow use is an instruction whose two operands are the defining
1081 /// instruction of DU and a load instruction, then we have the following:
1082 /// if the load is hoisted outside the loop, then we do not reach this function
1083 /// as scalar evolution analysis works fine in widenIVUse with variables
1084 /// hoisted outside the loop and efficient code is subsequently generated by
1085 /// not emitting truncate instructions. But when the load is not hoisted
1086 /// (whether due to limitation in alias analysis or due to a true legality),
1087 /// then scalar evolution can not proceed with loop variant values and
1088 /// inefficient code is generated. This function handles the non-hoisted load
1089 /// special case by making the optimization generate the same type of code for
1090 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1091 /// instruction). This special case is important especially when the induction
1092 /// variables are affecting addressing mode in code generation.
1093 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1094   Instruction *NarrowUse = DU.NarrowUse;
1095   Instruction *NarrowDef = DU.NarrowDef;
1096   Instruction *WideDef = DU.WideDef;
1097 
1098   // Handle the common case of add<nsw/nuw>
1099   const unsigned OpCode = NarrowUse->getOpcode();
1100   // Only Add/Sub/Mul instructions are supported.
1101   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1102       OpCode != Instruction::Mul)
1103     return false;
1104 
1105   // The operand that is not defined by NarrowDef of DU. Let's call it the
1106   // other operand.
1107   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1108   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1109          "bad DU");
1110 
1111   const SCEV *ExtendOperExpr = nullptr;
1112   const OverflowingBinaryOperator *OBO =
1113     cast<OverflowingBinaryOperator>(NarrowUse);
1114   ExtendKind ExtKind = getExtendKind(NarrowDef);
1115   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1116     ExtendOperExpr = SE->getSignExtendExpr(
1117       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1118   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1119     ExtendOperExpr = SE->getZeroExtendExpr(
1120       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1121   else
1122     return false;
1123 
1124   // We are interested in the other operand being a load instruction.
1125   // But, we should look into relaxing this restriction later on.
1126   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1127   if (I && I->getOpcode() != Instruction::Load)
1128     return false;
1129 
1130   // Verifying that Defining operand is an AddRec
1131   const SCEV *Op1 = SE->getSCEV(WideDef);
1132   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1133   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1134     return false;
1135   // Verifying that other operand is an Extend.
1136   if (ExtKind == SignExtended) {
1137     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1138       return false;
1139   } else {
1140     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1141       return false;
1142   }
1143 
1144   if (ExtKind == SignExtended) {
1145     for (Use &U : NarrowUse->uses()) {
1146       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1147       if (!User || User->getType() != WideType)
1148         return false;
1149     }
1150   } else { // ExtKind == ZeroExtended
1151     for (Use &U : NarrowUse->uses()) {
1152       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1153       if (!User || User->getType() != WideType)
1154         return false;
1155     }
1156   }
1157 
1158   return true;
1159 }
1160 
1161 /// Special Case for widening with variant Loads (see
1162 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1163 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1164   Instruction *NarrowUse = DU.NarrowUse;
1165   Instruction *NarrowDef = DU.NarrowDef;
1166   Instruction *WideDef = DU.WideDef;
1167 
1168   ExtendKind ExtKind = getExtendKind(NarrowDef);
1169 
1170   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1171 
1172   // Generating a widening use instruction.
1173   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1174                    ? WideDef
1175                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1176                                       ExtKind, NarrowUse);
1177   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1178                    ? WideDef
1179                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1180                                       ExtKind, NarrowUse);
1181 
1182   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1183   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1184                                         NarrowBO->getName());
1185   IRBuilder<> Builder(NarrowUse);
1186   Builder.Insert(WideBO);
1187   WideBO->copyIRFlags(NarrowBO);
1188 
1189   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1190 
1191   ExtendKindMap[NarrowUse] = ExtKind;
1192 
1193   for (Use &U : NarrowUse->uses()) {
1194     Instruction *User = nullptr;
1195     if (ExtKind == SignExtended)
1196       User = dyn_cast<SExtInst>(U.getUser());
1197     else
1198       User = dyn_cast<ZExtInst>(U.getUser());
1199     if (User && User->getType() == WideType) {
1200       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1201                         << *WideBO << "\n");
1202       ++NumElimExt;
1203       User->replaceAllUsesWith(WideBO);
1204       DeadInsts.emplace_back(User);
1205     }
1206   }
1207 }
1208 
1209 /// Determine whether an individual user of the narrow IV can be widened. If so,
1210 /// return the wide clone of the user.
1211 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1212   assert(ExtendKindMap.count(DU.NarrowDef) &&
1213          "Should already know the kind of extension used to widen NarrowDef");
1214 
1215   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1216   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1217     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1218       // For LCSSA phis, sink the truncate outside the loop.
1219       // After SimplifyCFG most loop exit targets have a single predecessor.
1220       // Otherwise fall back to a truncate within the loop.
1221       if (UsePhi->getNumOperands() != 1)
1222         truncateIVUse(DU, DT, LI);
1223       else {
1224         // Widening the PHI requires us to insert a trunc.  The logical place
1225         // for this trunc is in the same BB as the PHI.  This is not possible if
1226         // the BB is terminated by a catchswitch.
1227         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1228           return nullptr;
1229 
1230         PHINode *WidePhi =
1231           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1232                           UsePhi);
1233         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1234         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1235         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1236         UsePhi->replaceAllUsesWith(Trunc);
1237         DeadInsts.emplace_back(UsePhi);
1238         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1239                           << *WidePhi << "\n");
1240       }
1241       return nullptr;
1242     }
1243   }
1244 
1245   // This narrow use can be widened by a sext if it's non-negative or its narrow
1246   // def was widended by a sext. Same for zext.
1247   auto canWidenBySExt = [&]() {
1248     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1249   };
1250   auto canWidenByZExt = [&]() {
1251     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1252   };
1253 
1254   // Our raison d'etre! Eliminate sign and zero extension.
1255   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1256       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1257     Value *NewDef = DU.WideDef;
1258     if (DU.NarrowUse->getType() != WideType) {
1259       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1260       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1261       if (CastWidth < IVWidth) {
1262         // The cast isn't as wide as the IV, so insert a Trunc.
1263         IRBuilder<> Builder(DU.NarrowUse);
1264         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1265       }
1266       else {
1267         // A wider extend was hidden behind a narrower one. This may induce
1268         // another round of IV widening in which the intermediate IV becomes
1269         // dead. It should be very rare.
1270         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1271                           << " not wide enough to subsume " << *DU.NarrowUse
1272                           << "\n");
1273         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1274         NewDef = DU.NarrowUse;
1275       }
1276     }
1277     if (NewDef != DU.NarrowUse) {
1278       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1279                         << " replaced by " << *DU.WideDef << "\n");
1280       ++NumElimExt;
1281       DU.NarrowUse->replaceAllUsesWith(NewDef);
1282       DeadInsts.emplace_back(DU.NarrowUse);
1283     }
1284     // Now that the extend is gone, we want to expose it's uses for potential
1285     // further simplification. We don't need to directly inform SimplifyIVUsers
1286     // of the new users, because their parent IV will be processed later as a
1287     // new loop phi. If we preserved IVUsers analysis, we would also want to
1288     // push the uses of WideDef here.
1289 
1290     // No further widening is needed. The deceased [sz]ext had done it for us.
1291     return nullptr;
1292   }
1293 
1294   // Does this user itself evaluate to a recurrence after widening?
1295   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1296   if (!WideAddRec.first)
1297     WideAddRec = getWideRecurrence(DU);
1298 
1299   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1300   if (!WideAddRec.first) {
1301     // If use is a loop condition, try to promote the condition instead of
1302     // truncating the IV first.
1303     if (widenLoopCompare(DU))
1304       return nullptr;
1305 
1306     // We are here about to generate a truncate instruction that may hurt
1307     // performance because the scalar evolution expression computed earlier
1308     // in WideAddRec.first does not indicate a polynomial induction expression.
1309     // In that case, look at the operands of the use instruction to determine
1310     // if we can still widen the use instead of truncating its operand.
1311     if (widenWithVariantLoadUse(DU)) {
1312       widenWithVariantLoadUseCodegen(DU);
1313       return nullptr;
1314     }
1315 
1316     // This user does not evaluate to a recurrence after widening, so don't
1317     // follow it. Instead insert a Trunc to kill off the original use,
1318     // eventually isolating the original narrow IV so it can be removed.
1319     truncateIVUse(DU, DT, LI);
1320     return nullptr;
1321   }
1322   // Assume block terminators cannot evaluate to a recurrence. We can't to
1323   // insert a Trunc after a terminator if there happens to be a critical edge.
1324   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1325          "SCEV is not expected to evaluate a block terminator");
1326 
1327   // Reuse the IV increment that SCEVExpander created as long as it dominates
1328   // NarrowUse.
1329   Instruction *WideUse = nullptr;
1330   if (WideAddRec.first == WideIncExpr &&
1331       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1332     WideUse = WideInc;
1333   else {
1334     WideUse = cloneIVUser(DU, WideAddRec.first);
1335     if (!WideUse)
1336       return nullptr;
1337   }
1338   // Evaluation of WideAddRec ensured that the narrow expression could be
1339   // extended outside the loop without overflow. This suggests that the wide use
1340   // evaluates to the same expression as the extended narrow use, but doesn't
1341   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1342   // where it fails, we simply throw away the newly created wide use.
1343   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1344     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1345                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1346                       << "\n");
1347     DeadInsts.emplace_back(WideUse);
1348     return nullptr;
1349   }
1350 
1351   // if we reached this point then we are going to replace
1352   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1353   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1354 
1355   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1356   // Returning WideUse pushes it on the worklist.
1357   return WideUse;
1358 }
1359 
1360 /// Add eligible users of NarrowDef to NarrowIVUsers.
1361 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1362   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1363   bool NonNegativeDef =
1364       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1365                            SE->getConstant(NarrowSCEV->getType(), 0));
1366   for (User *U : NarrowDef->users()) {
1367     Instruction *NarrowUser = cast<Instruction>(U);
1368 
1369     // Handle data flow merges and bizarre phi cycles.
1370     if (!Widened.insert(NarrowUser).second)
1371       continue;
1372 
1373     bool NonNegativeUse = false;
1374     if (!NonNegativeDef) {
1375       // We might have a control-dependent range information for this context.
1376       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1377         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1378     }
1379 
1380     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1381                                NonNegativeDef || NonNegativeUse);
1382   }
1383 }
1384 
1385 /// Process a single induction variable. First use the SCEVExpander to create a
1386 /// wide induction variable that evaluates to the same recurrence as the
1387 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1388 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1389 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1390 ///
1391 /// It would be simpler to delete uses as they are processed, but we must avoid
1392 /// invalidating SCEV expressions.
1393 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1394   // Is this phi an induction variable?
1395   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1396   if (!AddRec)
1397     return nullptr;
1398 
1399   // Widen the induction variable expression.
1400   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1401                                ? SE->getSignExtendExpr(AddRec, WideType)
1402                                : SE->getZeroExtendExpr(AddRec, WideType);
1403 
1404   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1405          "Expect the new IV expression to preserve its type");
1406 
1407   // Can the IV be extended outside the loop without overflow?
1408   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1409   if (!AddRec || AddRec->getLoop() != L)
1410     return nullptr;
1411 
1412   // An AddRec must have loop-invariant operands. Since this AddRec is
1413   // materialized by a loop header phi, the expression cannot have any post-loop
1414   // operands, so they must dominate the loop header.
1415   assert(
1416       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1417       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1418       "Loop header phi recurrence inputs do not dominate the loop");
1419 
1420   // Iterate over IV uses (including transitive ones) looking for IV increments
1421   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1422   // the increment calculate control-dependent range information basing on
1423   // dominating conditions inside of the loop (e.g. a range check inside of the
1424   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1425   //
1426   // Control-dependent range information is later used to prove that a narrow
1427   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1428   // this on demand because when pushNarrowIVUsers needs this information some
1429   // of the dominating conditions might be already widened.
1430   if (UsePostIncrementRanges)
1431     calculatePostIncRanges(OrigPhi);
1432 
1433   // The rewriter provides a value for the desired IV expression. This may
1434   // either find an existing phi or materialize a new one. Either way, we
1435   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1436   // of the phi-SCC dominates the loop entry.
1437   Instruction *InsertPt = &L->getHeader()->front();
1438   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1439 
1440   // Remembering the WideIV increment generated by SCEVExpander allows
1441   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1442   // employ a general reuse mechanism because the call above is the only call to
1443   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1444   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1445     WideInc =
1446       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1447     WideIncExpr = SE->getSCEV(WideInc);
1448     // Propagate the debug location associated with the original loop increment
1449     // to the new (widened) increment.
1450     auto *OrigInc =
1451       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1452     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1453   }
1454 
1455   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1456   ++NumWidened;
1457 
1458   // Traverse the def-use chain using a worklist starting at the original IV.
1459   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1460 
1461   Widened.insert(OrigPhi);
1462   pushNarrowIVUsers(OrigPhi, WidePhi);
1463 
1464   while (!NarrowIVUsers.empty()) {
1465     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1466 
1467     // Process a def-use edge. This may replace the use, so don't hold a
1468     // use_iterator across it.
1469     Instruction *WideUse = widenIVUse(DU, Rewriter);
1470 
1471     // Follow all def-use edges from the previous narrow use.
1472     if (WideUse)
1473       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1474 
1475     // widenIVUse may have removed the def-use edge.
1476     if (DU.NarrowDef->use_empty())
1477       DeadInsts.emplace_back(DU.NarrowDef);
1478   }
1479 
1480   // Attach any debug information to the new PHI.
1481   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1482 
1483   return WidePhi;
1484 }
1485 
1486 /// Calculates control-dependent range for the given def at the given context
1487 /// by looking at dominating conditions inside of the loop
1488 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1489                                     Instruction *NarrowUser) {
1490   using namespace llvm::PatternMatch;
1491 
1492   Value *NarrowDefLHS;
1493   const APInt *NarrowDefRHS;
1494   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1495                                  m_APInt(NarrowDefRHS))) ||
1496       !NarrowDefRHS->isNonNegative())
1497     return;
1498 
1499   auto UpdateRangeFromCondition = [&] (Value *Condition,
1500                                        bool TrueDest) {
1501     CmpInst::Predicate Pred;
1502     Value *CmpRHS;
1503     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1504                                  m_Value(CmpRHS))))
1505       return;
1506 
1507     CmpInst::Predicate P =
1508             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1509 
1510     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1511     auto CmpConstrainedLHSRange =
1512             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1513     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1514         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1515 
1516     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1517   };
1518 
1519   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1520     if (!HasGuards)
1521       return;
1522 
1523     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1524                                      Ctx->getParent()->rend())) {
1525       Value *C = nullptr;
1526       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1527         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1528     }
1529   };
1530 
1531   UpdateRangeFromGuards(NarrowUser);
1532 
1533   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1534   // If NarrowUserBB is statically unreachable asking dominator queries may
1535   // yield surprising results. (e.g. the block may not have a dom tree node)
1536   if (!DT->isReachableFromEntry(NarrowUserBB))
1537     return;
1538 
1539   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1540        L->contains(DTB->getBlock());
1541        DTB = DTB->getIDom()) {
1542     auto *BB = DTB->getBlock();
1543     auto *TI = BB->getTerminator();
1544     UpdateRangeFromGuards(TI);
1545 
1546     auto *BI = dyn_cast<BranchInst>(TI);
1547     if (!BI || !BI->isConditional())
1548       continue;
1549 
1550     auto *TrueSuccessor = BI->getSuccessor(0);
1551     auto *FalseSuccessor = BI->getSuccessor(1);
1552 
1553     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1554       return BBE.isSingleEdge() &&
1555              DT->dominates(BBE, NarrowUser->getParent());
1556     };
1557 
1558     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1559       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1560 
1561     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1562       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1563   }
1564 }
1565 
1566 /// Calculates PostIncRangeInfos map for the given IV
1567 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1568   SmallPtrSet<Instruction *, 16> Visited;
1569   SmallVector<Instruction *, 6> Worklist;
1570   Worklist.push_back(OrigPhi);
1571   Visited.insert(OrigPhi);
1572 
1573   while (!Worklist.empty()) {
1574     Instruction *NarrowDef = Worklist.pop_back_val();
1575 
1576     for (Use &U : NarrowDef->uses()) {
1577       auto *NarrowUser = cast<Instruction>(U.getUser());
1578 
1579       // Don't go looking outside the current loop.
1580       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1581       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1582         continue;
1583 
1584       if (!Visited.insert(NarrowUser).second)
1585         continue;
1586 
1587       Worklist.push_back(NarrowUser);
1588 
1589       calculatePostIncRange(NarrowDef, NarrowUser);
1590     }
1591   }
1592 }
1593 
1594 //===----------------------------------------------------------------------===//
1595 //  Live IV Reduction - Minimize IVs live across the loop.
1596 //===----------------------------------------------------------------------===//
1597 
1598 //===----------------------------------------------------------------------===//
1599 //  Simplification of IV users based on SCEV evaluation.
1600 //===----------------------------------------------------------------------===//
1601 
1602 namespace {
1603 
1604 class IndVarSimplifyVisitor : public IVVisitor {
1605   ScalarEvolution *SE;
1606   const TargetTransformInfo *TTI;
1607   PHINode *IVPhi;
1608 
1609 public:
1610   WideIVInfo WI;
1611 
1612   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1613                         const TargetTransformInfo *TTI,
1614                         const DominatorTree *DTree)
1615     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1616     DT = DTree;
1617     WI.NarrowIV = IVPhi;
1618   }
1619 
1620   // Implement the interface used by simplifyUsersOfIV.
1621   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1622 };
1623 
1624 } // end anonymous namespace
1625 
1626 /// Iteratively perform simplification on a worklist of IV users. Each
1627 /// successive simplification may push more users which may themselves be
1628 /// candidates for simplification.
1629 ///
1630 /// Sign/Zero extend elimination is interleaved with IV simplification.
1631 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1632                                        SCEVExpander &Rewriter,
1633                                        LoopInfo *LI) {
1634   SmallVector<WideIVInfo, 8> WideIVs;
1635 
1636   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1637           Intrinsic::getName(Intrinsic::experimental_guard));
1638   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1639 
1640   SmallVector<PHINode*, 8> LoopPhis;
1641   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1642     LoopPhis.push_back(cast<PHINode>(I));
1643   }
1644   // Each round of simplification iterates through the SimplifyIVUsers worklist
1645   // for all current phis, then determines whether any IVs can be
1646   // widened. Widening adds new phis to LoopPhis, inducing another round of
1647   // simplification on the wide IVs.
1648   bool Changed = false;
1649   while (!LoopPhis.empty()) {
1650     // Evaluate as many IV expressions as possible before widening any IVs. This
1651     // forces SCEV to set no-wrap flags before evaluating sign/zero
1652     // extension. The first time SCEV attempts to normalize sign/zero extension,
1653     // the result becomes final. So for the most predictable results, we delay
1654     // evaluation of sign/zero extend evaluation until needed, and avoid running
1655     // other SCEV based analysis prior to simplifyAndExtend.
1656     do {
1657       PHINode *CurrIV = LoopPhis.pop_back_val();
1658 
1659       // Information about sign/zero extensions of CurrIV.
1660       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1661 
1662       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1663                                    &Visitor);
1664 
1665       if (Visitor.WI.WidestNativeType) {
1666         WideIVs.push_back(Visitor.WI);
1667       }
1668     } while(!LoopPhis.empty());
1669 
1670     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1671       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1672       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1673         Changed = true;
1674         LoopPhis.push_back(WidePhi);
1675       }
1676     }
1677   }
1678   return Changed;
1679 }
1680 
1681 //===----------------------------------------------------------------------===//
1682 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1683 //===----------------------------------------------------------------------===//
1684 
1685 /// Given an Value which is hoped to be part of an add recurance in the given
1686 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1687 /// that this is less general than SCEVs AddRec checking.
1688 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1689   Instruction *IncI = dyn_cast<Instruction>(IncV);
1690   if (!IncI)
1691     return nullptr;
1692 
1693   switch (IncI->getOpcode()) {
1694   case Instruction::Add:
1695   case Instruction::Sub:
1696     break;
1697   case Instruction::GetElementPtr:
1698     // An IV counter must preserve its type.
1699     if (IncI->getNumOperands() == 2)
1700       break;
1701     LLVM_FALLTHROUGH;
1702   default:
1703     return nullptr;
1704   }
1705 
1706   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1707   if (Phi && Phi->getParent() == L->getHeader()) {
1708     if (L->isLoopInvariant(IncI->getOperand(1)))
1709       return Phi;
1710     return nullptr;
1711   }
1712   if (IncI->getOpcode() == Instruction::GetElementPtr)
1713     return nullptr;
1714 
1715   // Allow add/sub to be commuted.
1716   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1717   if (Phi && Phi->getParent() == L->getHeader()) {
1718     if (L->isLoopInvariant(IncI->getOperand(0)))
1719       return Phi;
1720   }
1721   return nullptr;
1722 }
1723 
1724 /// Whether the current loop exit test is based on this value.  Currently this
1725 /// is limited to a direct use in the loop condition.
1726 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1727   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1728   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1729   // TODO: Allow non-icmp loop test.
1730   if (!ICmp)
1731     return false;
1732 
1733   // TODO: Allow indirect use.
1734   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1735 }
1736 
1737 /// linearFunctionTestReplace policy. Return true unless we can show that the
1738 /// current exit test is already sufficiently canonical.
1739 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1740   assert(L->getLoopLatch() && "Must be in simplified form");
1741 
1742   // Avoid converting a constant or loop invariant test back to a runtime
1743   // test.  This is critical for when SCEV's cached ExitCount is less precise
1744   // than the current IR (such as after we've proven a particular exit is
1745   // actually dead and thus the BE count never reaches our ExitCount.)
1746   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1747   if (L->isLoopInvariant(BI->getCondition()))
1748     return false;
1749 
1750   // Do LFTR to simplify the exit condition to an ICMP.
1751   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1752   if (!Cond)
1753     return true;
1754 
1755   // Do LFTR to simplify the exit ICMP to EQ/NE
1756   ICmpInst::Predicate Pred = Cond->getPredicate();
1757   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1758     return true;
1759 
1760   // Look for a loop invariant RHS
1761   Value *LHS = Cond->getOperand(0);
1762   Value *RHS = Cond->getOperand(1);
1763   if (!L->isLoopInvariant(RHS)) {
1764     if (!L->isLoopInvariant(LHS))
1765       return true;
1766     std::swap(LHS, RHS);
1767   }
1768   // Look for a simple IV counter LHS
1769   PHINode *Phi = dyn_cast<PHINode>(LHS);
1770   if (!Phi)
1771     Phi = getLoopPhiForCounter(LHS, L);
1772 
1773   if (!Phi)
1774     return true;
1775 
1776   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1777   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1778   if (Idx < 0)
1779     return true;
1780 
1781   // Do LFTR if the exit condition's IV is *not* a simple counter.
1782   Value *IncV = Phi->getIncomingValue(Idx);
1783   return Phi != getLoopPhiForCounter(IncV, L);
1784 }
1785 
1786 /// Return true if undefined behavior would provable be executed on the path to
1787 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1788 /// anything about whether OnPathTo is actually executed or whether Root is
1789 /// actually poison.  This can be used to assess whether a new use of Root can
1790 /// be added at a location which is control equivalent with OnPathTo (such as
1791 /// immediately before it) without introducing UB which didn't previously
1792 /// exist.  Note that a false result conveys no information.
1793 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1794                                           Instruction *OnPathTo,
1795                                           DominatorTree *DT) {
1796   // Basic approach is to assume Root is poison, propagate poison forward
1797   // through all users we can easily track, and then check whether any of those
1798   // users are provable UB and must execute before out exiting block might
1799   // exit.
1800 
1801   // The set of all recursive users we've visited (which are assumed to all be
1802   // poison because of said visit)
1803   SmallSet<const Value *, 16> KnownPoison;
1804   SmallVector<const Instruction*, 16> Worklist;
1805   Worklist.push_back(Root);
1806   while (!Worklist.empty()) {
1807     const Instruction *I = Worklist.pop_back_val();
1808 
1809     // If we know this must trigger UB on a path leading our target.
1810     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1811       return true;
1812 
1813     // If we can't analyze propagation through this instruction, just skip it
1814     // and transitive users.  Safe as false is a conservative result.
1815     if (!propagatesFullPoison(I) && I != Root)
1816       continue;
1817 
1818     if (KnownPoison.insert(I).second)
1819       for (const User *User : I->users())
1820         Worklist.push_back(cast<Instruction>(User));
1821   }
1822 
1823   // Might be non-UB, or might have a path we couldn't prove must execute on
1824   // way to exiting bb.
1825   return false;
1826 }
1827 
1828 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1829 /// down to checking that all operands are constant and listing instructions
1830 /// that may hide undef.
1831 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1832                                unsigned Depth) {
1833   if (isa<Constant>(V))
1834     return !isa<UndefValue>(V);
1835 
1836   if (Depth >= 6)
1837     return false;
1838 
1839   // Conservatively handle non-constant non-instructions. For example, Arguments
1840   // may be undef.
1841   Instruction *I = dyn_cast<Instruction>(V);
1842   if (!I)
1843     return false;
1844 
1845   // Load and return values may be undef.
1846   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1847     return false;
1848 
1849   // Optimistically handle other instructions.
1850   for (Value *Op : I->operands()) {
1851     if (!Visited.insert(Op).second)
1852       continue;
1853     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1854       return false;
1855   }
1856   return true;
1857 }
1858 
1859 /// Return true if the given value is concrete. We must prove that undef can
1860 /// never reach it.
1861 ///
1862 /// TODO: If we decide that this is a good approach to checking for undef, we
1863 /// may factor it into a common location.
1864 static bool hasConcreteDef(Value *V) {
1865   SmallPtrSet<Value*, 8> Visited;
1866   Visited.insert(V);
1867   return hasConcreteDefImpl(V, Visited, 0);
1868 }
1869 
1870 /// Return true if this IV has any uses other than the (soon to be rewritten)
1871 /// loop exit test.
1872 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1873   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1874   Value *IncV = Phi->getIncomingValue(LatchIdx);
1875 
1876   for (User *U : Phi->users())
1877     if (U != Cond && U != IncV) return false;
1878 
1879   for (User *U : IncV->users())
1880     if (U != Cond && U != Phi) return false;
1881   return true;
1882 }
1883 
1884 /// Return true if the given phi is a "counter" in L.  A counter is an
1885 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1886 /// step of 1.  Note that L must have exactly one latch.
1887 static bool isLoopCounter(PHINode* Phi, Loop *L,
1888                           ScalarEvolution *SE) {
1889   assert(Phi->getParent() == L->getHeader());
1890   assert(L->getLoopLatch());
1891 
1892   if (!SE->isSCEVable(Phi->getType()))
1893     return false;
1894 
1895   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1896   if (!AR || AR->getLoop() != L || !AR->isAffine())
1897     return false;
1898 
1899   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1900   if (!Step || !Step->isOne())
1901     return false;
1902 
1903   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1904   Value *IncV = Phi->getIncomingValue(LatchIdx);
1905   return (getLoopPhiForCounter(IncV, L) == Phi);
1906 }
1907 
1908 /// Search the loop header for a loop counter (anadd rec w/step of one)
1909 /// suitable for use by LFTR.  If multiple counters are available, select the
1910 /// "best" one based profitable heuristics.
1911 ///
1912 /// BECount may be an i8* pointer type. The pointer difference is already
1913 /// valid count without scaling the address stride, so it remains a pointer
1914 /// expression as far as SCEV is concerned.
1915 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1916                                 const SCEV *BECount,
1917                                 ScalarEvolution *SE, DominatorTree *DT) {
1918   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1919 
1920   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1921 
1922   // Loop over all of the PHI nodes, looking for a simple counter.
1923   PHINode *BestPhi = nullptr;
1924   const SCEV *BestInit = nullptr;
1925   BasicBlock *LatchBlock = L->getLoopLatch();
1926   assert(LatchBlock && "Must be in simplified form");
1927   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1928 
1929   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1930     PHINode *Phi = cast<PHINode>(I);
1931     if (!isLoopCounter(Phi, L, SE))
1932       continue;
1933 
1934     // Avoid comparing an integer IV against a pointer Limit.
1935     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1936       continue;
1937 
1938     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1939 
1940     // AR may be a pointer type, while BECount is an integer type.
1941     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1942     // AR may not be a narrower type, or we may never exit.
1943     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1944     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1945       continue;
1946 
1947     // Avoid reusing a potentially undef value to compute other values that may
1948     // have originally had a concrete definition.
1949     if (!hasConcreteDef(Phi)) {
1950       // We explicitly allow unknown phis as long as they are already used by
1951       // the loop exit test.  This is legal since performing LFTR could not
1952       // increase the number of undef users.
1953       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1954       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1955           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1956         continue;
1957     }
1958 
1959     // Avoid introducing undefined behavior due to poison which didn't exist in
1960     // the original program.  (Annoyingly, the rules for poison and undef
1961     // propagation are distinct, so this does NOT cover the undef case above.)
1962     // We have to ensure that we don't introduce UB by introducing a use on an
1963     // iteration where said IV produces poison.  Our strategy here differs for
1964     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1965     // see code in linearFunctionTestReplace.  For pointers, we restrict
1966     // transforms as there is no good way to reinfer inbounds once lost.
1967     if (!Phi->getType()->isIntegerTy() &&
1968         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1969       continue;
1970 
1971     const SCEV *Init = AR->getStart();
1972 
1973     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1974       // Don't force a live loop counter if another IV can be used.
1975       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1976         continue;
1977 
1978       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1979       // also prefers integer to pointer IVs.
1980       if (BestInit->isZero() != Init->isZero()) {
1981         if (BestInit->isZero())
1982           continue;
1983       }
1984       // If two IVs both count from zero or both count from nonzero then the
1985       // narrower is likely a dead phi that has been widened. Use the wider phi
1986       // to allow the other to be eliminated.
1987       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1988         continue;
1989     }
1990     BestPhi = Phi;
1991     BestInit = Init;
1992   }
1993   return BestPhi;
1994 }
1995 
1996 /// Insert an IR expression which computes the value held by the IV IndVar
1997 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1998 /// is taken ExitCount times.
1999 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2000                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2001                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2002   assert(isLoopCounter(IndVar, L, SE));
2003   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2004   const SCEV *IVInit = AR->getStart();
2005 
2006   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2007   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2008   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2009   // the existing GEPs whenever possible.
2010   if (IndVar->getType()->isPointerTy() &&
2011       !ExitCount->getType()->isPointerTy()) {
2012     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2013     // signed value. ExitCount on the other hand represents the loop trip count,
2014     // which is an unsigned value. FindLoopCounter only allows induction
2015     // variables that have a positive unit stride of one. This means we don't
2016     // have to handle the case of negative offsets (yet) and just need to zero
2017     // extend ExitCount.
2018     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2019     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2020     if (UsePostInc)
2021       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2022 
2023     // Expand the code for the iteration count.
2024     assert(SE->isLoopInvariant(IVOffset, L) &&
2025            "Computed iteration count is not loop invariant!");
2026 
2027     // We could handle pointer IVs other than i8*, but we need to compensate for
2028     // gep index scaling.
2029     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2030                              cast<PointerType>(IndVar->getType())
2031                                  ->getElementType())->isOne() &&
2032            "unit stride pointer IV must be i8*");
2033 
2034     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2035     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2036     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2037   } else {
2038     // In any other case, convert both IVInit and ExitCount to integers before
2039     // comparing. This may result in SCEV expansion of pointers, but in practice
2040     // SCEV will fold the pointer arithmetic away as such:
2041     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2042     //
2043     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2044     // for simple memset-style loops.
2045     //
2046     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2047     // were generated on top of case #2, which is not expected.
2048 
2049     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2050     // For unit stride, IVCount = Start + ExitCount with 2's complement
2051     // overflow.
2052 
2053     // For integer IVs, truncate the IV before computing IVInit + BECount,
2054     // unless we know apriori that the limit must be a constant when evaluated
2055     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2056     // of the IV in the loop over a (potentially) expensive expansion of the
2057     // widened exit count add(zext(add)) expression.
2058     if (SE->getTypeSizeInBits(IVInit->getType())
2059         > SE->getTypeSizeInBits(ExitCount->getType())) {
2060       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2061         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2062       else
2063         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2064     }
2065 
2066     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2067 
2068     if (UsePostInc)
2069       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2070 
2071     // Expand the code for the iteration count.
2072     assert(SE->isLoopInvariant(IVLimit, L) &&
2073            "Computed iteration count is not loop invariant!");
2074     // Ensure that we generate the same type as IndVar, or a smaller integer
2075     // type. In the presence of null pointer values, we have an integer type
2076     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2077     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2078       IndVar->getType() : ExitCount->getType();
2079     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2080     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2081   }
2082 }
2083 
2084 /// This method rewrites the exit condition of the loop to be a canonical !=
2085 /// comparison against the incremented loop induction variable.  This pass is
2086 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2087 /// determine a loop-invariant trip count of the loop, which is actually a much
2088 /// broader range than just linear tests.
2089 bool IndVarSimplify::
2090 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2091                           const SCEV *ExitCount,
2092                           PHINode *IndVar, SCEVExpander &Rewriter) {
2093   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2094   assert(isLoopCounter(IndVar, L, SE));
2095   Instruction * const IncVar =
2096     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2097 
2098   // Initialize CmpIndVar to the preincremented IV.
2099   Value *CmpIndVar = IndVar;
2100   bool UsePostInc = false;
2101 
2102   // If the exiting block is the same as the backedge block, we prefer to
2103   // compare against the post-incremented value, otherwise we must compare
2104   // against the preincremented value.
2105   if (ExitingBB == L->getLoopLatch()) {
2106     // For pointer IVs, we chose to not strip inbounds which requires us not
2107     // to add a potentially UB introducing use.  We need to either a) show
2108     // the loop test we're modifying is already in post-inc form, or b) show
2109     // that adding a use must not introduce UB.
2110     bool SafeToPostInc =
2111         IndVar->getType()->isIntegerTy() ||
2112         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2113         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2114     if (SafeToPostInc) {
2115       UsePostInc = true;
2116       CmpIndVar = IncVar;
2117     }
2118   }
2119 
2120   // It may be necessary to drop nowrap flags on the incrementing instruction
2121   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2122   // case the increment might have previously been poison on the last iteration
2123   // only) or if LFTR switches to a different IV that was previously dynamically
2124   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2125   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2126   // check), because the pre-inc addrec flags may be adopted from the original
2127   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2128   // TODO: This handling is inaccurate for one case: If we switch to a
2129   // dynamically dead IV that wraps on the first loop iteration only, which is
2130   // not covered by the post-inc addrec. (If the new IV was not dynamically
2131   // dead, it could not be poison on the first iteration in the first place.)
2132   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2133     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2134     if (BO->hasNoUnsignedWrap())
2135       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2136     if (BO->hasNoSignedWrap())
2137       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2138   }
2139 
2140   Value *ExitCnt = genLoopLimit(
2141       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2142   assert(ExitCnt->getType()->isPointerTy() ==
2143              IndVar->getType()->isPointerTy() &&
2144          "genLoopLimit missed a cast");
2145 
2146   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2147   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2148   ICmpInst::Predicate P;
2149   if (L->contains(BI->getSuccessor(0)))
2150     P = ICmpInst::ICMP_NE;
2151   else
2152     P = ICmpInst::ICMP_EQ;
2153 
2154   IRBuilder<> Builder(BI);
2155 
2156   // The new loop exit condition should reuse the debug location of the
2157   // original loop exit condition.
2158   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2159     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2160 
2161   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2162   // avoid the expensive expansion of the limit expression in the wider type,
2163   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2164   // since we know (from the exit count bitwidth), that we can't self-wrap in
2165   // the narrower type.
2166   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2167   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2168   if (CmpIndVarSize > ExitCntSize) {
2169     assert(!CmpIndVar->getType()->isPointerTy() &&
2170            !ExitCnt->getType()->isPointerTy());
2171 
2172     // Before resorting to actually inserting the truncate, use the same
2173     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2174     // the other side of the comparison instead.  We still evaluate the limit
2175     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2176     // a truncate within in.
2177     bool Extended = false;
2178     const SCEV *IV = SE->getSCEV(CmpIndVar);
2179     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2180                                                   ExitCnt->getType());
2181     const SCEV *ZExtTrunc =
2182       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2183 
2184     if (ZExtTrunc == IV) {
2185       Extended = true;
2186       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2187                                    "wide.trip.count");
2188     } else {
2189       const SCEV *SExtTrunc =
2190         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2191       if (SExtTrunc == IV) {
2192         Extended = true;
2193         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2194                                      "wide.trip.count");
2195       }
2196     }
2197 
2198     if (Extended) {
2199       bool Discard;
2200       L->makeLoopInvariant(ExitCnt, Discard);
2201     } else
2202       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2203                                       "lftr.wideiv");
2204   }
2205   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2206                     << "      LHS:" << *CmpIndVar << '\n'
2207                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2208                     << "\n"
2209                     << "      RHS:\t" << *ExitCnt << "\n"
2210                     << "ExitCount:\t" << *ExitCount << "\n"
2211                     << "  was: " << *BI->getCondition() << "\n");
2212 
2213   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2214   Value *OrigCond = BI->getCondition();
2215   // It's tempting to use replaceAllUsesWith here to fully replace the old
2216   // comparison, but that's not immediately safe, since users of the old
2217   // comparison may not be dominated by the new comparison. Instead, just
2218   // update the branch to use the new comparison; in the common case this
2219   // will make old comparison dead.
2220   BI->setCondition(Cond);
2221   DeadInsts.push_back(OrigCond);
2222 
2223   ++NumLFTR;
2224   return true;
2225 }
2226 
2227 //===----------------------------------------------------------------------===//
2228 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2229 //===----------------------------------------------------------------------===//
2230 
2231 /// If there's a single exit block, sink any loop-invariant values that
2232 /// were defined in the preheader but not used inside the loop into the
2233 /// exit block to reduce register pressure in the loop.
2234 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2235   BasicBlock *ExitBlock = L->getExitBlock();
2236   if (!ExitBlock) return false;
2237 
2238   BasicBlock *Preheader = L->getLoopPreheader();
2239   if (!Preheader) return false;
2240 
2241   bool MadeAnyChanges = false;
2242   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2243   BasicBlock::iterator I(Preheader->getTerminator());
2244   while (I != Preheader->begin()) {
2245     --I;
2246     // New instructions were inserted at the end of the preheader.
2247     if (isa<PHINode>(I))
2248       break;
2249 
2250     // Don't move instructions which might have side effects, since the side
2251     // effects need to complete before instructions inside the loop.  Also don't
2252     // move instructions which might read memory, since the loop may modify
2253     // memory. Note that it's okay if the instruction might have undefined
2254     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2255     // block.
2256     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2257       continue;
2258 
2259     // Skip debug info intrinsics.
2260     if (isa<DbgInfoIntrinsic>(I))
2261       continue;
2262 
2263     // Skip eh pad instructions.
2264     if (I->isEHPad())
2265       continue;
2266 
2267     // Don't sink alloca: we never want to sink static alloca's out of the
2268     // entry block, and correctly sinking dynamic alloca's requires
2269     // checks for stacksave/stackrestore intrinsics.
2270     // FIXME: Refactor this check somehow?
2271     if (isa<AllocaInst>(I))
2272       continue;
2273 
2274     // Determine if there is a use in or before the loop (direct or
2275     // otherwise).
2276     bool UsedInLoop = false;
2277     for (Use &U : I->uses()) {
2278       Instruction *User = cast<Instruction>(U.getUser());
2279       BasicBlock *UseBB = User->getParent();
2280       if (PHINode *P = dyn_cast<PHINode>(User)) {
2281         unsigned i =
2282           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2283         UseBB = P->getIncomingBlock(i);
2284       }
2285       if (UseBB == Preheader || L->contains(UseBB)) {
2286         UsedInLoop = true;
2287         break;
2288       }
2289     }
2290 
2291     // If there is, the def must remain in the preheader.
2292     if (UsedInLoop)
2293       continue;
2294 
2295     // Otherwise, sink it to the exit block.
2296     Instruction *ToMove = &*I;
2297     bool Done = false;
2298 
2299     if (I != Preheader->begin()) {
2300       // Skip debug info intrinsics.
2301       do {
2302         --I;
2303       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2304 
2305       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2306         Done = true;
2307     } else {
2308       Done = true;
2309     }
2310 
2311     MadeAnyChanges = true;
2312     ToMove->moveBefore(*ExitBlock, InsertPt);
2313     if (Done) break;
2314     InsertPt = ToMove->getIterator();
2315   }
2316 
2317   return MadeAnyChanges;
2318 }
2319 
2320 /// Return a symbolic upper bound for the backedge taken count of the loop.
2321 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2322 /// an arbitrary expression as opposed to only constants.
2323 /// TODO: Move into the ScalarEvolution class.
2324 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2325                                             DominatorTree &DT, Loop *L) {
2326   SmallVector<BasicBlock*, 16> ExitingBlocks;
2327   L->getExitingBlocks(ExitingBlocks);
2328 
2329   // Form an expression for the maximum exit count possible for this loop. We
2330   // merge the max and exact information to approximate a version of
2331   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2332   SmallVector<const SCEV*, 4> ExitCounts;
2333   for (BasicBlock *ExitingBB : ExitingBlocks) {
2334     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2335     if (isa<SCEVCouldNotCompute>(ExitCount))
2336       ExitCount = SE.getExitCount(L, ExitingBB,
2337                                   ScalarEvolution::ConstantMaximum);
2338     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2339       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2340              "We should only have known counts for exiting blocks that "
2341              "dominate latch!");
2342       ExitCounts.push_back(ExitCount);
2343     }
2344   }
2345   if (ExitCounts.empty())
2346     return SE.getCouldNotCompute();
2347   return SE.getUMinFromMismatchedTypes(ExitCounts);
2348 }
2349 
2350 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2351   SmallVector<BasicBlock*, 16> ExitingBlocks;
2352   L->getExitingBlocks(ExitingBlocks);
2353 
2354   // Remove all exits which aren't both rewriteable and analyzeable.
2355   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2356     // If our exitting block exits multiple loops, we can only rewrite the
2357     // innermost one.  Otherwise, we're changing how many times the innermost
2358     // loop runs before it exits.
2359     if (LI->getLoopFor(ExitingBB) != L)
2360       return true;
2361 
2362     // Can't rewrite non-branch yet.
2363     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2364     if (!BI)
2365       return true;
2366 
2367     // If already constant, nothing to do.
2368     if (isa<Constant>(BI->getCondition()))
2369       return true;
2370 
2371     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2372     if (isa<SCEVCouldNotCompute>(ExitCount))
2373       return true;
2374     return false;
2375   });
2376   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2377 
2378   if (ExitingBlocks.empty())
2379     return false;
2380 
2381   // Get a symbolic upper bound on the loop backedge taken count.
2382   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2383   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2384     return false;
2385 
2386   // Visit our exit blocks in order of dominance.  We know from the fact that
2387   // all exits (left) are analyzeable that the must be a total dominance order
2388   // between them as each must dominate the latch.  The visit order only
2389   // matters for the provably equal case.
2390   llvm::sort(ExitingBlocks,
2391              [&](BasicBlock *A, BasicBlock *B) {
2392                // std::sort sorts in ascending order, so we want the inverse of
2393                // the normal dominance relation.
2394                if (DT->properlyDominates(A, B)) return true;
2395                if (DT->properlyDominates(B, A)) return false;
2396                llvm_unreachable("expected total dominance order!");
2397              });
2398 #ifdef ASSERT
2399   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2400     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2401   }
2402 #endif
2403 
2404   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2405     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2406     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2407     auto *OldCond = BI->getCondition();
2408     auto *NewCond = ConstantInt::get(OldCond->getType(),
2409                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2410     BI->setCondition(NewCond);
2411     if (OldCond->use_empty())
2412       DeadInsts.push_back(OldCond);
2413   };
2414 
2415   bool Changed = false;
2416   SmallSet<const SCEV*, 8> DominatingExitCounts;
2417   for (BasicBlock *ExitingBB : ExitingBlocks) {
2418     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2419     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2420 
2421     // If we know we'd exit on the first iteration, rewrite the exit to
2422     // reflect this.  This does not imply the loop must exit through this
2423     // exit; there may be an earlier one taken on the first iteration.
2424     // TODO: Given we know the backedge can't be taken, we should go ahead
2425     // and break it.  Or at least, kill all the header phis and simplify.
2426     if (ExitCount->isZero()) {
2427       FoldExit(ExitingBB, true);
2428       Changed = true;
2429       continue;
2430     }
2431 
2432     // If we end up with a pointer exit count, bail.  Note that we can end up
2433     // with a pointer exit count for one exiting block, and not for another in
2434     // the same loop.
2435     if (!ExitCount->getType()->isIntegerTy() ||
2436         !MaxExitCount->getType()->isIntegerTy())
2437       continue;
2438 
2439     Type *WiderType =
2440       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2441     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2442     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2443     assert(MaxExitCount->getType() == ExitCount->getType());
2444 
2445     // Can we prove that some other exit must be taken strictly before this
2446     // one?
2447     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2448                                      MaxExitCount, ExitCount)) {
2449       FoldExit(ExitingBB, false);
2450       Changed = true;
2451       continue;
2452     }
2453 
2454     // As we run, keep track of which exit counts we've encountered.  If we
2455     // find a duplicate, we've found an exit which would have exited on the
2456     // exiting iteration, but (from the visit order) strictly follows another
2457     // which does the same and is thus dead.
2458     if (!DominatingExitCounts.insert(ExitCount).second) {
2459       FoldExit(ExitingBB, false);
2460       Changed = true;
2461       continue;
2462     }
2463 
2464     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2465     // here.  If we kept track of the min of dominanting exits so far, we could
2466     // discharge exits with EC >= MDEC. This is less powerful than the existing
2467     // transform (since later exits aren't considered), but potentially more
2468     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2469     // or a >u b.  Such a case is not currently known.
2470   }
2471   return Changed;
2472 }
2473 
2474 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2475   SmallVector<BasicBlock*, 16> ExitingBlocks;
2476   L->getExitingBlocks(ExitingBlocks);
2477 
2478   // Finally, see if we can rewrite our exit conditions into a loop invariant
2479   // form. If we have a read-only loop, and we can tell that we must exit down
2480   // a path which does not need any of the values computed within the loop, we
2481   // can rewrite the loop to exit on the first iteration.  Note that this
2482   // doesn't either a) tell us the loop exits on the first iteration (unless
2483   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2484   // This transformation looks a lot like a restricted form of dead loop
2485   // elimination, but restricted to read-only loops and without neccesssarily
2486   // needing to kill the loop entirely.
2487   if (!LoopPredication)
2488     return false;
2489 
2490   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2491     return false;
2492 
2493   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2494   // through *explicit* control flow.  We have to eliminate the possibility of
2495   // implicit exits (see below) before we know it's truly exact.
2496   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2497   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2498       !SE->isLoopInvariant(ExactBTC, L) ||
2499       !isSafeToExpand(ExactBTC, *SE))
2500     return false;
2501 
2502   // If we end up with a pointer exit count, bail.  It may be unsized.
2503   if (!ExactBTC->getType()->isIntegerTy())
2504     return false;
2505 
2506   auto BadExit = [&](BasicBlock *ExitingBB) {
2507     // If our exiting block exits multiple loops, we can only rewrite the
2508     // innermost one.  Otherwise, we're changing how many times the innermost
2509     // loop runs before it exits.
2510     if (LI->getLoopFor(ExitingBB) != L)
2511       return true;
2512 
2513     // Can't rewrite non-branch yet.
2514     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2515     if (!BI)
2516       return true;
2517 
2518     // If already constant, nothing to do.
2519     if (isa<Constant>(BI->getCondition()))
2520       return true;
2521 
2522     // If the exit block has phis, we need to be able to compute the values
2523     // within the loop which contains them.  This assumes trivially lcssa phis
2524     // have already been removed; TODO: generalize
2525     BasicBlock *ExitBlock =
2526     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2527     if (!ExitBlock->phis().empty())
2528       return true;
2529 
2530     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2531     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2532     if (!SE->isLoopInvariant(ExitCount, L) ||
2533         !isSafeToExpand(ExitCount, *SE))
2534       return true;
2535 
2536     // If we end up with a pointer exit count, bail.  It may be unsized.
2537     if (!ExitCount->getType()->isIntegerTy())
2538       return true;
2539 
2540     return false;
2541   };
2542 
2543   // If we have any exits which can't be predicated themselves, than we can't
2544   // predicate any exit which isn't guaranteed to execute before it.  Consider
2545   // two exits (a) and (b) which would both exit on the same iteration.  If we
2546   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2547   // we could convert a loop from exiting through (a) to one exiting through
2548   // (b).  Note that this problem exists only for exits with the same exit
2549   // count, and we could be more aggressive when exit counts are known inequal.
2550   llvm::sort(ExitingBlocks,
2551             [&](BasicBlock *A, BasicBlock *B) {
2552               // std::sort sorts in ascending order, so we want the inverse of
2553               // the normal dominance relation, plus a tie breaker for blocks
2554               // unordered by dominance.
2555               if (DT->properlyDominates(A, B)) return true;
2556               if (DT->properlyDominates(B, A)) return false;
2557               return A->getName() < B->getName();
2558             });
2559   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2560   // exits before the backedge).  If they aren't, reasoning about reachability
2561   // is complicated and we choose not to for now.
2562   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2563     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2564       return false;
2565 
2566   // Given our sorted total order, we know that exit[j] must be evaluated
2567   // after all exit[i] such j > i.
2568   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2569     if (BadExit(ExitingBlocks[i])) {
2570       ExitingBlocks.resize(i);
2571       break;
2572     }
2573 
2574   if (ExitingBlocks.empty())
2575     return false;
2576 
2577   // We rely on not being able to reach an exiting block on a later iteration
2578   // then it's statically compute exit count.  The implementaton of
2579   // getExitCount currently has this invariant, but assert it here so that
2580   // breakage is obvious if this ever changes..
2581   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2582         return DT->dominates(ExitingBB, L->getLoopLatch());
2583       }));
2584 
2585   // At this point, ExitingBlocks consists of only those blocks which are
2586   // predicatable.  Given that, we know we have at least one exit we can
2587   // predicate if the loop is doesn't have side effects and doesn't have any
2588   // implicit exits (because then our exact BTC isn't actually exact).
2589   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2590   // suggestions on how to improve this?  I can obviously bail out for outer
2591   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2592   // is that enough for *all* side effects?
2593   for (BasicBlock *BB : L->blocks())
2594     for (auto &I : *BB)
2595       // TODO:isGuaranteedToTransfer
2596       if (I.mayHaveSideEffects() || I.mayThrow())
2597         return false;
2598 
2599   bool Changed = false;
2600   // Finally, do the actual predication for all predicatable blocks.  A couple
2601   // of notes here:
2602   // 1) We don't bother to constant fold dominated exits with identical exit
2603   //    counts; that's simply a form of CSE/equality propagation and we leave
2604   //    it for dedicated passes.
2605   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2606   //    legality constraints and we leave that to dedicated logic.  We want to
2607   //    predicate even if we can't insert a loop invariant expression as
2608   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2609   //    varying check.
2610   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2611   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2612   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2613   for (BasicBlock *ExitingBB : ExitingBlocks) {
2614     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2615 
2616     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2617     Value *NewCond;
2618     if (ExitCount == ExactBTC) {
2619       NewCond = L->contains(BI->getSuccessor(0)) ?
2620         B.getFalse() : B.getTrue();
2621     } else {
2622       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2623       if (!ExactBTCV)
2624         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2625       Value *RHS = ExactBTCV;
2626       if (ECV->getType() != RHS->getType()) {
2627         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2628         ECV = B.CreateZExt(ECV, WiderTy);
2629         RHS = B.CreateZExt(RHS, WiderTy);
2630       }
2631       auto Pred = L->contains(BI->getSuccessor(0)) ?
2632         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2633       NewCond = B.CreateICmp(Pred, ECV, RHS);
2634     }
2635     Value *OldCond = BI->getCondition();
2636     BI->setCondition(NewCond);
2637     if (OldCond->use_empty())
2638       DeadInsts.push_back(OldCond);
2639     Changed = true;
2640   }
2641 
2642   return Changed;
2643 }
2644 
2645 //===----------------------------------------------------------------------===//
2646 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2647 //===----------------------------------------------------------------------===//
2648 
2649 bool IndVarSimplify::run(Loop *L) {
2650   // We need (and expect!) the incoming loop to be in LCSSA.
2651   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2652          "LCSSA required to run indvars!");
2653 
2654   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2655   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2656   //    canonicalization can be a pessimization without LSR to "clean up"
2657   //    afterwards.
2658   //  - We depend on having a preheader; in particular,
2659   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2660   //    and we're in trouble if we can't find the induction variable even when
2661   //    we've manually inserted one.
2662   //  - LFTR relies on having a single backedge.
2663   if (!L->isLoopSimplifyForm())
2664     return false;
2665 
2666 #ifndef NDEBUG
2667   // Used below for a consistency check only
2668   // Note: Since the result returned by ScalarEvolution may depend on the order
2669   // in which previous results are added to its cache, the call to
2670   // getBackedgeTakenCount() may change following SCEV queries.
2671   const SCEV *BackedgeTakenCount;
2672   if (VerifyIndvars)
2673     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2674 #endif
2675 
2676   bool Changed = false;
2677   // If there are any floating-point recurrences, attempt to
2678   // transform them to use integer recurrences.
2679   Changed |= rewriteNonIntegerIVs(L);
2680 
2681   // Create a rewriter object which we'll use to transform the code with.
2682   SCEVExpander Rewriter(*SE, DL, "indvars");
2683 #ifndef NDEBUG
2684   Rewriter.setDebugType(DEBUG_TYPE);
2685 #endif
2686 
2687   // Eliminate redundant IV users.
2688   //
2689   // Simplification works best when run before other consumers of SCEV. We
2690   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2691   // other expressions involving loop IVs have been evaluated. This helps SCEV
2692   // set no-wrap flags before normalizing sign/zero extension.
2693   Rewriter.disableCanonicalMode();
2694   Changed |= simplifyAndExtend(L, Rewriter, LI);
2695 
2696   // Check to see if we can compute the final value of any expressions
2697   // that are recurrent in the loop, and substitute the exit values from the
2698   // loop into any instructions outside of the loop that use the final values
2699   // of the current expressions.
2700   if (ReplaceExitValue != NeverRepl) {
2701     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2702                                              ReplaceExitValue, DeadInsts)) {
2703       NumReplaced += Rewrites;
2704       Changed = true;
2705     }
2706   }
2707 
2708   // Eliminate redundant IV cycles.
2709   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2710 
2711   // Try to eliminate loop exits based on analyzeable exit counts
2712   if (optimizeLoopExits(L, Rewriter))  {
2713     Changed = true;
2714     // Given we've changed exit counts, notify SCEV
2715     SE->forgetLoop(L);
2716   }
2717 
2718   // Try to form loop invariant tests for loop exits by changing how many
2719   // iterations of the loop run when that is unobservable.
2720   if (predicateLoopExits(L, Rewriter)) {
2721     Changed = true;
2722     // Given we've changed exit counts, notify SCEV
2723     SE->forgetLoop(L);
2724   }
2725 
2726   // If we have a trip count expression, rewrite the loop's exit condition
2727   // using it.
2728   if (!DisableLFTR) {
2729     BasicBlock *PreHeader = L->getLoopPreheader();
2730     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2731 
2732     SmallVector<BasicBlock*, 16> ExitingBlocks;
2733     L->getExitingBlocks(ExitingBlocks);
2734     for (BasicBlock *ExitingBB : ExitingBlocks) {
2735       // Can't rewrite non-branch yet.
2736       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2737         continue;
2738 
2739       // If our exitting block exits multiple loops, we can only rewrite the
2740       // innermost one.  Otherwise, we're changing how many times the innermost
2741       // loop runs before it exits.
2742       if (LI->getLoopFor(ExitingBB) != L)
2743         continue;
2744 
2745       if (!needsLFTR(L, ExitingBB))
2746         continue;
2747 
2748       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2749       if (isa<SCEVCouldNotCompute>(ExitCount))
2750         continue;
2751 
2752       // This was handled above, but as we form SCEVs, we can sometimes refine
2753       // existing ones; this allows exit counts to be folded to zero which
2754       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2755       // until stable to handle cases like this better.
2756       if (ExitCount->isZero())
2757         continue;
2758 
2759       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2760       if (!IndVar)
2761         continue;
2762 
2763       // Avoid high cost expansions.  Note: This heuristic is questionable in
2764       // that our definition of "high cost" is not exactly principled.
2765       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2766                                        TTI, PreHeaderBR))
2767         continue;
2768 
2769       // Check preconditions for proper SCEVExpander operation. SCEV does not
2770       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2771       // any pass that uses the SCEVExpander must do it. This does not work
2772       // well for loop passes because SCEVExpander makes assumptions about
2773       // all loops, while LoopPassManager only forces the current loop to be
2774       // simplified.
2775       //
2776       // FIXME: SCEV expansion has no way to bail out, so the caller must
2777       // explicitly check any assumptions made by SCEV. Brittle.
2778       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2779       if (!AR || AR->getLoop()->getLoopPreheader())
2780         Changed |= linearFunctionTestReplace(L, ExitingBB,
2781                                              ExitCount, IndVar,
2782                                              Rewriter);
2783     }
2784   }
2785   // Clear the rewriter cache, because values that are in the rewriter's cache
2786   // can be deleted in the loop below, causing the AssertingVH in the cache to
2787   // trigger.
2788   Rewriter.clear();
2789 
2790   // Now that we're done iterating through lists, clean up any instructions
2791   // which are now dead.
2792   while (!DeadInsts.empty())
2793     if (Instruction *Inst =
2794             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2795       Changed |=
2796           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2797 
2798   // The Rewriter may not be used from this point on.
2799 
2800   // Loop-invariant instructions in the preheader that aren't used in the
2801   // loop may be sunk below the loop to reduce register pressure.
2802   Changed |= sinkUnusedInvariants(L);
2803 
2804   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2805   // trip count and therefore can further simplify exit values in addition to
2806   // rewriteLoopExitValues.
2807   Changed |= rewriteFirstIterationLoopExitValues(L);
2808 
2809   // Clean up dead instructions.
2810   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2811 
2812   // Check a post-condition.
2813   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2814          "Indvars did not preserve LCSSA!");
2815 
2816   // Verify that LFTR, and any other change have not interfered with SCEV's
2817   // ability to compute trip count.  We may have *changed* the exit count, but
2818   // only by reducing it.
2819 #ifndef NDEBUG
2820   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2821     SE->forgetLoop(L);
2822     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2823     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2824         SE->getTypeSizeInBits(NewBECount->getType()))
2825       NewBECount = SE->getTruncateOrNoop(NewBECount,
2826                                          BackedgeTakenCount->getType());
2827     else
2828       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2829                                                  NewBECount->getType());
2830     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2831                                  NewBECount) && "indvars must preserve SCEV");
2832   }
2833   if (VerifyMemorySSA && MSSAU)
2834     MSSAU->getMemorySSA()->verifyMemorySSA();
2835 #endif
2836 
2837   return Changed;
2838 }
2839 
2840 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2841                                           LoopStandardAnalysisResults &AR,
2842                                           LPMUpdater &) {
2843   Function *F = L.getHeader()->getParent();
2844   const DataLayout &DL = F->getParent()->getDataLayout();
2845 
2846   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2847   if (!IVS.run(&L))
2848     return PreservedAnalyses::all();
2849 
2850   auto PA = getLoopPassPreservedAnalyses();
2851   PA.preserveSet<CFGAnalyses>();
2852   if (AR.MSSA)
2853     PA.preserve<MemorySSAAnalysis>();
2854   return PA;
2855 }
2856 
2857 namespace {
2858 
2859 struct IndVarSimplifyLegacyPass : public LoopPass {
2860   static char ID; // Pass identification, replacement for typeid
2861 
2862   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2863     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2864   }
2865 
2866   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2867     if (skipLoop(L))
2868       return false;
2869 
2870     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2871     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2872     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2873     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2874     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2875     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2876     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2877     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2878     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2879     MemorySSA *MSSA = nullptr;
2880     if (MSSAAnalysis)
2881       MSSA = &MSSAAnalysis->getMSSA();
2882 
2883     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2884     return IVS.run(L);
2885   }
2886 
2887   void getAnalysisUsage(AnalysisUsage &AU) const override {
2888     AU.setPreservesCFG();
2889     AU.addPreserved<MemorySSAWrapperPass>();
2890     getLoopAnalysisUsage(AU);
2891   }
2892 };
2893 
2894 } // end anonymous namespace
2895 
2896 char IndVarSimplifyLegacyPass::ID = 0;
2897 
2898 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2899                       "Induction Variable Simplification", false, false)
2900 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2901 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2902                     "Induction Variable Simplification", false, false)
2903 
2904 Pass *llvm::createIndVarSimplifyPass() {
2905   return new IndVarSimplifyLegacyPass();
2906 }
2907