xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision cf2b498d284004b82ac6ce94bdd5528e65b9e194)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpander.h"
45 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
46 #include "llvm/Analysis/TargetLibraryInfo.h"
47 #include "llvm/Analysis/TargetTransformInfo.h"
48 #include "llvm/Analysis/ValueTracking.h"
49 #include "llvm/IR/BasicBlock.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/ConstantRange.h"
52 #include "llvm/IR/Constants.h"
53 #include "llvm/IR/DataLayout.h"
54 #include "llvm/IR/DerivedTypes.h"
55 #include "llvm/IR/Dominators.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/IRBuilder.h"
58 #include "llvm/IR/InstrTypes.h"
59 #include "llvm/IR/Instruction.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/Intrinsics.h"
63 #include "llvm/IR/Module.h"
64 #include "llvm/IR/Operator.h"
65 #include "llvm/IR/PassManager.h"
66 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/Use.h"
69 #include "llvm/IR/User.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/IR/ValueHandle.h"
72 #include "llvm/InitializePasses.h"
73 #include "llvm/Pass.h"
74 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/CommandLine.h"
76 #include "llvm/Support/Compiler.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/ErrorHandling.h"
79 #include "llvm/Support/MathExtras.h"
80 #include "llvm/Support/raw_ostream.h"
81 #include "llvm/Transforms/Scalar.h"
82 #include "llvm/Transforms/Scalar/LoopPassManager.h"
83 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
84 #include "llvm/Transforms/Utils/Local.h"
85 #include "llvm/Transforms/Utils/LoopUtils.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105   "verify-indvars", cl::Hidden,
106   cl::desc("Verify the ScalarEvolution result after running indvars"));
107 
108 static cl::opt<ReplaceExitVal> ReplaceExitValue(
109     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
110     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
111     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
112                clEnumValN(OnlyCheapRepl, "cheap",
113                           "only replace exit value when the cost is cheap"),
114                clEnumValN(NoHardUse, "noharduse",
115                           "only replace exit values when loop def likely dead"),
116                clEnumValN(AlwaysRepl, "always",
117                           "always replace exit value whenever possible")));
118 
119 static cl::opt<bool> UsePostIncrementRanges(
120   "indvars-post-increment-ranges", cl::Hidden,
121   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
122   cl::init(true));
123 
124 static cl::opt<bool>
125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
126             cl::desc("Disable Linear Function Test Replace optimization"));
127 
128 static cl::opt<bool>
129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
130                 cl::desc("Predicate conditions in read only loops"));
131 
132 namespace {
133 
134 struct RewritePhi;
135 
136 class IndVarSimplify {
137   LoopInfo *LI;
138   ScalarEvolution *SE;
139   DominatorTree *DT;
140   const DataLayout &DL;
141   TargetLibraryInfo *TLI;
142   const TargetTransformInfo *TTI;
143   std::unique_ptr<MemorySSAUpdater> MSSAU;
144 
145   SmallVector<WeakTrackingVH, 16> DeadInsts;
146 
147   bool handleFloatingPointIV(Loop *L, PHINode *PH);
148   bool rewriteNonIntegerIVs(Loop *L);
149 
150   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
151   /// Try to eliminate loop exits based on analyzeable exit counts
152   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153   /// Try to form loop invariant tests for loop exits by changing how many
154   /// iterations of the loop run when that is unobservable.
155   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156 
157   bool rewriteFirstIterationLoopExitValues(Loop *L);
158 
159   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160                                  const SCEV *ExitCount,
161                                  PHINode *IndVar, SCEVExpander &Rewriter);
162 
163   bool sinkUnusedInvariants(Loop *L);
164 
165 public:
166   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167                  const DataLayout &DL, TargetLibraryInfo *TLI,
168                  TargetTransformInfo *TTI, MemorySSA *MSSA)
169       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
170     if (MSSA)
171       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
172   }
173 
174   bool run(Loop *L);
175 };
176 
177 } // end anonymous namespace
178 
179 /// Determine the insertion point for this user. By default, insert immediately
180 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
181 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
182 /// common dominator for the incoming blocks. A nullptr can be returned if no
183 /// viable location is found: it may happen if User is a PHI and Def only comes
184 /// to this PHI from unreachable blocks.
185 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
186                                           DominatorTree *DT, LoopInfo *LI) {
187   PHINode *PHI = dyn_cast<PHINode>(User);
188   if (!PHI)
189     return User;
190 
191   Instruction *InsertPt = nullptr;
192   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
193     if (PHI->getIncomingValue(i) != Def)
194       continue;
195 
196     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
197 
198     if (!DT->isReachableFromEntry(InsertBB))
199       continue;
200 
201     if (!InsertPt) {
202       InsertPt = InsertBB->getTerminator();
203       continue;
204     }
205     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
206     InsertPt = InsertBB->getTerminator();
207   }
208 
209   // If we have skipped all inputs, it means that Def only comes to Phi from
210   // unreachable blocks.
211   if (!InsertPt)
212     return nullptr;
213 
214   auto *DefI = dyn_cast<Instruction>(Def);
215   if (!DefI)
216     return InsertPt;
217 
218   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
219 
220   auto *L = LI->getLoopFor(DefI->getParent());
221   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
222 
223   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
224     if (LI->getLoopFor(DTN->getBlock()) == L)
225       return DTN->getBlock()->getTerminator();
226 
227   llvm_unreachable("DefI dominates InsertPt!");
228 }
229 
230 //===----------------------------------------------------------------------===//
231 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
232 //===----------------------------------------------------------------------===//
233 
234 /// Convert APF to an integer, if possible.
235 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
236   bool isExact = false;
237   // See if we can convert this to an int64_t
238   uint64_t UIntVal;
239   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
240                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
241       !isExact)
242     return false;
243   IntVal = UIntVal;
244   return true;
245 }
246 
247 /// If the loop has floating induction variable then insert corresponding
248 /// integer induction variable if possible.
249 /// For example,
250 /// for(double i = 0; i < 10000; ++i)
251 ///   bar(i)
252 /// is converted into
253 /// for(int i = 0; i < 10000; ++i)
254 ///   bar((double)i);
255 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
256   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
257   unsigned BackEdge     = IncomingEdge^1;
258 
259   // Check incoming value.
260   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
261 
262   int64_t InitValue;
263   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
264     return false;
265 
266   // Check IV increment. Reject this PN if increment operation is not
267   // an add or increment value can not be represented by an integer.
268   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
269   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
270 
271   // If this is not an add of the PHI with a constantfp, or if the constant fp
272   // is not an integer, bail out.
273   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
274   int64_t IncValue;
275   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
276       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
277     return false;
278 
279   // Check Incr uses. One user is PN and the other user is an exit condition
280   // used by the conditional terminator.
281   Value::user_iterator IncrUse = Incr->user_begin();
282   Instruction *U1 = cast<Instruction>(*IncrUse++);
283   if (IncrUse == Incr->user_end()) return false;
284   Instruction *U2 = cast<Instruction>(*IncrUse++);
285   if (IncrUse != Incr->user_end()) return false;
286 
287   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
288   // only used by a branch, we can't transform it.
289   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
290   if (!Compare)
291     Compare = dyn_cast<FCmpInst>(U2);
292   if (!Compare || !Compare->hasOneUse() ||
293       !isa<BranchInst>(Compare->user_back()))
294     return false;
295 
296   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
297 
298   // We need to verify that the branch actually controls the iteration count
299   // of the loop.  If not, the new IV can overflow and no one will notice.
300   // The branch block must be in the loop and one of the successors must be out
301   // of the loop.
302   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
303   if (!L->contains(TheBr->getParent()) ||
304       (L->contains(TheBr->getSuccessor(0)) &&
305        L->contains(TheBr->getSuccessor(1))))
306     return false;
307 
308   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
309   // transform it.
310   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
311   int64_t ExitValue;
312   if (ExitValueVal == nullptr ||
313       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
314     return false;
315 
316   // Find new predicate for integer comparison.
317   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
318   switch (Compare->getPredicate()) {
319   default: return false;  // Unknown comparison.
320   case CmpInst::FCMP_OEQ:
321   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
322   case CmpInst::FCMP_ONE:
323   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
324   case CmpInst::FCMP_OGT:
325   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
326   case CmpInst::FCMP_OGE:
327   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
328   case CmpInst::FCMP_OLT:
329   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
330   case CmpInst::FCMP_OLE:
331   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
332   }
333 
334   // We convert the floating point induction variable to a signed i32 value if
335   // we can.  This is only safe if the comparison will not overflow in a way
336   // that won't be trapped by the integer equivalent operations.  Check for this
337   // now.
338   // TODO: We could use i64 if it is native and the range requires it.
339 
340   // The start/stride/exit values must all fit in signed i32.
341   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
342     return false;
343 
344   // If not actually striding (add x, 0.0), avoid touching the code.
345   if (IncValue == 0)
346     return false;
347 
348   // Positive and negative strides have different safety conditions.
349   if (IncValue > 0) {
350     // If we have a positive stride, we require the init to be less than the
351     // exit value.
352     if (InitValue >= ExitValue)
353       return false;
354 
355     uint32_t Range = uint32_t(ExitValue-InitValue);
356     // Check for infinite loop, either:
357     // while (i <= Exit) or until (i > Exit)
358     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
359       if (++Range == 0) return false;  // Range overflows.
360     }
361 
362     unsigned Leftover = Range % uint32_t(IncValue);
363 
364     // If this is an equality comparison, we require that the strided value
365     // exactly land on the exit value, otherwise the IV condition will wrap
366     // around and do things the fp IV wouldn't.
367     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
368         Leftover != 0)
369       return false;
370 
371     // If the stride would wrap around the i32 before exiting, we can't
372     // transform the IV.
373     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
374       return false;
375   } else {
376     // If we have a negative stride, we require the init to be greater than the
377     // exit value.
378     if (InitValue <= ExitValue)
379       return false;
380 
381     uint32_t Range = uint32_t(InitValue-ExitValue);
382     // Check for infinite loop, either:
383     // while (i >= Exit) or until (i < Exit)
384     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
385       if (++Range == 0) return false;  // Range overflows.
386     }
387 
388     unsigned Leftover = Range % uint32_t(-IncValue);
389 
390     // If this is an equality comparison, we require that the strided value
391     // exactly land on the exit value, otherwise the IV condition will wrap
392     // around and do things the fp IV wouldn't.
393     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
394         Leftover != 0)
395       return false;
396 
397     // If the stride would wrap around the i32 before exiting, we can't
398     // transform the IV.
399     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
400       return false;
401   }
402 
403   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
404 
405   // Insert new integer induction variable.
406   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
407   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
408                       PN->getIncomingBlock(IncomingEdge));
409 
410   Value *NewAdd =
411     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
412                               Incr->getName()+".int", Incr);
413   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
414 
415   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
416                                       ConstantInt::get(Int32Ty, ExitValue),
417                                       Compare->getName());
418 
419   // In the following deletions, PN may become dead and may be deleted.
420   // Use a WeakTrackingVH to observe whether this happens.
421   WeakTrackingVH WeakPH = PN;
422 
423   // Delete the old floating point exit comparison.  The branch starts using the
424   // new comparison.
425   NewCompare->takeName(Compare);
426   Compare->replaceAllUsesWith(NewCompare);
427   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
428 
429   // Delete the old floating point increment.
430   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
431   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
432 
433   // If the FP induction variable still has uses, this is because something else
434   // in the loop uses its value.  In order to canonicalize the induction
435   // variable, we chose to eliminate the IV and rewrite it in terms of an
436   // int->fp cast.
437   //
438   // We give preference to sitofp over uitofp because it is faster on most
439   // platforms.
440   if (WeakPH) {
441     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
442                                  &*PN->getParent()->getFirstInsertionPt());
443     PN->replaceAllUsesWith(Conv);
444     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
445   }
446   return true;
447 }
448 
449 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
450   // First step.  Check to see if there are any floating-point recurrences.
451   // If there are, change them into integer recurrences, permitting analysis by
452   // the SCEV routines.
453   BasicBlock *Header = L->getHeader();
454 
455   SmallVector<WeakTrackingVH, 8> PHIs;
456   for (PHINode &PN : Header->phis())
457     PHIs.push_back(&PN);
458 
459   bool Changed = false;
460   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
461     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
462       Changed |= handleFloatingPointIV(L, PN);
463 
464   // If the loop previously had floating-point IV, ScalarEvolution
465   // may not have been able to compute a trip count. Now that we've done some
466   // re-writing, the trip count may be computable.
467   if (Changed)
468     SE->forgetLoop(L);
469   return Changed;
470 }
471 
472 //===---------------------------------------------------------------------===//
473 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
474 // they will exit at the first iteration.
475 //===---------------------------------------------------------------------===//
476 
477 /// Check to see if this loop has loop invariant conditions which lead to loop
478 /// exits. If so, we know that if the exit path is taken, it is at the first
479 /// loop iteration. This lets us predict exit values of PHI nodes that live in
480 /// loop header.
481 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
482   // Verify the input to the pass is already in LCSSA form.
483   assert(L->isLCSSAForm(*DT));
484 
485   SmallVector<BasicBlock *, 8> ExitBlocks;
486   L->getUniqueExitBlocks(ExitBlocks);
487 
488   bool MadeAnyChanges = false;
489   for (auto *ExitBB : ExitBlocks) {
490     // If there are no more PHI nodes in this exit block, then no more
491     // values defined inside the loop are used on this path.
492     for (PHINode &PN : ExitBB->phis()) {
493       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
494            IncomingValIdx != E; ++IncomingValIdx) {
495         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
496 
497         // Can we prove that the exit must run on the first iteration if it
498         // runs at all?  (i.e. early exits are fine for our purposes, but
499         // traces which lead to this exit being taken on the 2nd iteration
500         // aren't.)  Note that this is about whether the exit branch is
501         // executed, not about whether it is taken.
502         if (!L->getLoopLatch() ||
503             !DT->dominates(IncomingBB, L->getLoopLatch()))
504           continue;
505 
506         // Get condition that leads to the exit path.
507         auto *TermInst = IncomingBB->getTerminator();
508 
509         Value *Cond = nullptr;
510         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
511           // Must be a conditional branch, otherwise the block
512           // should not be in the loop.
513           Cond = BI->getCondition();
514         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
515           Cond = SI->getCondition();
516         else
517           continue;
518 
519         if (!L->isLoopInvariant(Cond))
520           continue;
521 
522         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
523 
524         // Only deal with PHIs in the loop header.
525         if (!ExitVal || ExitVal->getParent() != L->getHeader())
526           continue;
527 
528         // If ExitVal is a PHI on the loop header, then we know its
529         // value along this exit because the exit can only be taken
530         // on the first iteration.
531         auto *LoopPreheader = L->getLoopPreheader();
532         assert(LoopPreheader && "Invalid loop");
533         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
534         if (PreheaderIdx != -1) {
535           assert(ExitVal->getParent() == L->getHeader() &&
536                  "ExitVal must be in loop header");
537           MadeAnyChanges = true;
538           PN.setIncomingValue(IncomingValIdx,
539                               ExitVal->getIncomingValue(PreheaderIdx));
540         }
541       }
542     }
543   }
544   return MadeAnyChanges;
545 }
546 
547 //===----------------------------------------------------------------------===//
548 //  IV Widening - Extend the width of an IV to cover its widest uses.
549 //===----------------------------------------------------------------------===//
550 
551 namespace {
552 
553 // Collect information about induction variables that are used by sign/zero
554 // extend operations. This information is recorded by CollectExtend and provides
555 // the input to WidenIV.
556 struct WideIVInfo {
557   PHINode *NarrowIV = nullptr;
558 
559   // Widest integer type created [sz]ext
560   Type *WidestNativeType = nullptr;
561 
562   // Was a sext user seen before a zext?
563   bool IsSigned = false;
564 };
565 
566 } // end anonymous namespace
567 
568 /// Update information about the induction variable that is extended by this
569 /// sign or zero extend operation. This is used to determine the final width of
570 /// the IV before actually widening it.
571 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
572                         const TargetTransformInfo *TTI) {
573   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
574   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
575     return;
576 
577   Type *Ty = Cast->getType();
578   uint64_t Width = SE->getTypeSizeInBits(Ty);
579   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
580     return;
581 
582   // Check that `Cast` actually extends the induction variable (we rely on this
583   // later).  This takes care of cases where `Cast` is extending a truncation of
584   // the narrow induction variable, and thus can end up being narrower than the
585   // "narrow" induction variable.
586   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
587   if (NarrowIVWidth >= Width)
588     return;
589 
590   // Cast is either an sext or zext up to this point.
591   // We should not widen an indvar if arithmetics on the wider indvar are more
592   // expensive than those on the narrower indvar. We check only the cost of ADD
593   // because at least an ADD is required to increment the induction variable. We
594   // could compute more comprehensively the cost of all instructions on the
595   // induction variable when necessary.
596   if (TTI &&
597       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
598           TTI->getArithmeticInstrCost(Instruction::Add,
599                                       Cast->getOperand(0)->getType())) {
600     return;
601   }
602 
603   if (!WI.WidestNativeType) {
604     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
605     WI.IsSigned = IsSigned;
606     return;
607   }
608 
609   // We extend the IV to satisfy the sign of its first user, arbitrarily.
610   if (WI.IsSigned != IsSigned)
611     return;
612 
613   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
614     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
615 }
616 
617 namespace {
618 
619 /// Record a link in the Narrow IV def-use chain along with the WideIV that
620 /// computes the same value as the Narrow IV def.  This avoids caching Use*
621 /// pointers.
622 struct NarrowIVDefUse {
623   Instruction *NarrowDef = nullptr;
624   Instruction *NarrowUse = nullptr;
625   Instruction *WideDef = nullptr;
626 
627   // True if the narrow def is never negative.  Tracking this information lets
628   // us use a sign extension instead of a zero extension or vice versa, when
629   // profitable and legal.
630   bool NeverNegative = false;
631 
632   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
633                  bool NeverNegative)
634       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
635         NeverNegative(NeverNegative) {}
636 };
637 
638 /// The goal of this transform is to remove sign and zero extends without
639 /// creating any new induction variables. To do this, it creates a new phi of
640 /// the wider type and redirects all users, either removing extends or inserting
641 /// truncs whenever we stop propagating the type.
642 class WidenIV {
643   // Parameters
644   PHINode *OrigPhi;
645   Type *WideType;
646 
647   // Context
648   LoopInfo        *LI;
649   Loop            *L;
650   ScalarEvolution *SE;
651   DominatorTree   *DT;
652 
653   // Does the module have any calls to the llvm.experimental.guard intrinsic
654   // at all? If not we can avoid scanning instructions looking for guards.
655   bool HasGuards;
656 
657   // Result
658   PHINode *WidePhi = nullptr;
659   Instruction *WideInc = nullptr;
660   const SCEV *WideIncExpr = nullptr;
661   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
662 
663   SmallPtrSet<Instruction *,16> Widened;
664   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
665 
666   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
667 
668   // A map tracking the kind of extension used to widen each narrow IV
669   // and narrow IV user.
670   // Key: pointer to a narrow IV or IV user.
671   // Value: the kind of extension used to widen this Instruction.
672   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
673 
674   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
675 
676   // A map with control-dependent ranges for post increment IV uses. The key is
677   // a pair of IV def and a use of this def denoting the context. The value is
678   // a ConstantRange representing possible values of the def at the given
679   // context.
680   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
681 
682   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
683                                               Instruction *UseI) {
684     DefUserPair Key(Def, UseI);
685     auto It = PostIncRangeInfos.find(Key);
686     return It == PostIncRangeInfos.end()
687                ? Optional<ConstantRange>(None)
688                : Optional<ConstantRange>(It->second);
689   }
690 
691   void calculatePostIncRanges(PHINode *OrigPhi);
692   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
693 
694   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
695     DefUserPair Key(Def, UseI);
696     auto It = PostIncRangeInfos.find(Key);
697     if (It == PostIncRangeInfos.end())
698       PostIncRangeInfos.insert({Key, R});
699     else
700       It->second = R.intersectWith(It->second);
701   }
702 
703 public:
704   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
705           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
706           bool HasGuards)
707       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
708         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
709         HasGuards(HasGuards), DeadInsts(DI) {
710     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
711     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
712   }
713 
714   PHINode *createWideIV(SCEVExpander &Rewriter);
715 
716 protected:
717   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
718                           Instruction *Use);
719 
720   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
721   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
722                                      const SCEVAddRecExpr *WideAR);
723   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
724 
725   ExtendKind getExtendKind(Instruction *I);
726 
727   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
728 
729   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
730 
731   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
732 
733   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
734                               unsigned OpCode) const;
735 
736   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
737 
738   bool widenLoopCompare(NarrowIVDefUse DU);
739   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
740   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
741 
742   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
743 };
744 
745 } // end anonymous namespace
746 
747 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
748                                  bool IsSigned, Instruction *Use) {
749   // Set the debug location and conservative insertion point.
750   IRBuilder<> Builder(Use);
751   // Hoist the insertion point into loop preheaders as far as possible.
752   for (const Loop *L = LI->getLoopFor(Use->getParent());
753        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
754        L = L->getParentLoop())
755     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
756 
757   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
758                     Builder.CreateZExt(NarrowOper, WideType);
759 }
760 
761 /// Instantiate a wide operation to replace a narrow operation. This only needs
762 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
763 /// 0 for any operation we decide not to clone.
764 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
765                                   const SCEVAddRecExpr *WideAR) {
766   unsigned Opcode = DU.NarrowUse->getOpcode();
767   switch (Opcode) {
768   default:
769     return nullptr;
770   case Instruction::Add:
771   case Instruction::Mul:
772   case Instruction::UDiv:
773   case Instruction::Sub:
774     return cloneArithmeticIVUser(DU, WideAR);
775 
776   case Instruction::And:
777   case Instruction::Or:
778   case Instruction::Xor:
779   case Instruction::Shl:
780   case Instruction::LShr:
781   case Instruction::AShr:
782     return cloneBitwiseIVUser(DU);
783   }
784 }
785 
786 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
787   Instruction *NarrowUse = DU.NarrowUse;
788   Instruction *NarrowDef = DU.NarrowDef;
789   Instruction *WideDef = DU.WideDef;
790 
791   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
792 
793   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
794   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
795   // invariant and will be folded or hoisted. If it actually comes from a
796   // widened IV, it should be removed during a future call to widenIVUse.
797   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
798   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
799                    ? WideDef
800                    : createExtendInst(NarrowUse->getOperand(0), WideType,
801                                       IsSigned, NarrowUse);
802   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
803                    ? WideDef
804                    : createExtendInst(NarrowUse->getOperand(1), WideType,
805                                       IsSigned, NarrowUse);
806 
807   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
808   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
809                                         NarrowBO->getName());
810   IRBuilder<> Builder(NarrowUse);
811   Builder.Insert(WideBO);
812   WideBO->copyIRFlags(NarrowBO);
813   return WideBO;
814 }
815 
816 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
817                                             const SCEVAddRecExpr *WideAR) {
818   Instruction *NarrowUse = DU.NarrowUse;
819   Instruction *NarrowDef = DU.NarrowDef;
820   Instruction *WideDef = DU.WideDef;
821 
822   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
823 
824   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
825 
826   // We're trying to find X such that
827   //
828   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
829   //
830   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
831   // and check using SCEV if any of them are correct.
832 
833   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
834   // correct solution to X.
835   auto GuessNonIVOperand = [&](bool SignExt) {
836     const SCEV *WideLHS;
837     const SCEV *WideRHS;
838 
839     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
840       if (SignExt)
841         return SE->getSignExtendExpr(S, Ty);
842       return SE->getZeroExtendExpr(S, Ty);
843     };
844 
845     if (IVOpIdx == 0) {
846       WideLHS = SE->getSCEV(WideDef);
847       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
848       WideRHS = GetExtend(NarrowRHS, WideType);
849     } else {
850       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
851       WideLHS = GetExtend(NarrowLHS, WideType);
852       WideRHS = SE->getSCEV(WideDef);
853     }
854 
855     // WideUse is "WideDef `op.wide` X" as described in the comment.
856     const SCEV *WideUse = nullptr;
857 
858     switch (NarrowUse->getOpcode()) {
859     default:
860       llvm_unreachable("No other possibility!");
861 
862     case Instruction::Add:
863       WideUse = SE->getAddExpr(WideLHS, WideRHS);
864       break;
865 
866     case Instruction::Mul:
867       WideUse = SE->getMulExpr(WideLHS, WideRHS);
868       break;
869 
870     case Instruction::UDiv:
871       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
872       break;
873 
874     case Instruction::Sub:
875       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
876       break;
877     }
878 
879     return WideUse == WideAR;
880   };
881 
882   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
883   if (!GuessNonIVOperand(SignExtend)) {
884     SignExtend = !SignExtend;
885     if (!GuessNonIVOperand(SignExtend))
886       return nullptr;
887   }
888 
889   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
890                    ? WideDef
891                    : createExtendInst(NarrowUse->getOperand(0), WideType,
892                                       SignExtend, NarrowUse);
893   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
894                    ? WideDef
895                    : createExtendInst(NarrowUse->getOperand(1), WideType,
896                                       SignExtend, NarrowUse);
897 
898   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
899   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
900                                         NarrowBO->getName());
901 
902   IRBuilder<> Builder(NarrowUse);
903   Builder.Insert(WideBO);
904   WideBO->copyIRFlags(NarrowBO);
905   return WideBO;
906 }
907 
908 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
909   auto It = ExtendKindMap.find(I);
910   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
911   return It->second;
912 }
913 
914 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
915                                      unsigned OpCode) const {
916   if (OpCode == Instruction::Add)
917     return SE->getAddExpr(LHS, RHS);
918   if (OpCode == Instruction::Sub)
919     return SE->getMinusSCEV(LHS, RHS);
920   if (OpCode == Instruction::Mul)
921     return SE->getMulExpr(LHS, RHS);
922 
923   llvm_unreachable("Unsupported opcode.");
924 }
925 
926 /// No-wrap operations can transfer sign extension of their result to their
927 /// operands. Generate the SCEV value for the widened operation without
928 /// actually modifying the IR yet. If the expression after extending the
929 /// operands is an AddRec for this loop, return the AddRec and the kind of
930 /// extension used.
931 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
932   // Handle the common case of add<nsw/nuw>
933   const unsigned OpCode = DU.NarrowUse->getOpcode();
934   // Only Add/Sub/Mul instructions supported yet.
935   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
936       OpCode != Instruction::Mul)
937     return {nullptr, Unknown};
938 
939   // One operand (NarrowDef) has already been extended to WideDef. Now determine
940   // if extending the other will lead to a recurrence.
941   const unsigned ExtendOperIdx =
942       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
943   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
944 
945   const SCEV *ExtendOperExpr = nullptr;
946   const OverflowingBinaryOperator *OBO =
947     cast<OverflowingBinaryOperator>(DU.NarrowUse);
948   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
949   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
950     ExtendOperExpr = SE->getSignExtendExpr(
951       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
952   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
953     ExtendOperExpr = SE->getZeroExtendExpr(
954       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
955   else
956     return {nullptr, Unknown};
957 
958   // When creating this SCEV expr, don't apply the current operations NSW or NUW
959   // flags. This instruction may be guarded by control flow that the no-wrap
960   // behavior depends on. Non-control-equivalent instructions can be mapped to
961   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
962   // semantics to those operations.
963   const SCEV *lhs = SE->getSCEV(DU.WideDef);
964   const SCEV *rhs = ExtendOperExpr;
965 
966   // Let's swap operands to the initial order for the case of non-commutative
967   // operations, like SUB. See PR21014.
968   if (ExtendOperIdx == 0)
969     std::swap(lhs, rhs);
970   const SCEVAddRecExpr *AddRec =
971       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
972 
973   if (!AddRec || AddRec->getLoop() != L)
974     return {nullptr, Unknown};
975 
976   return {AddRec, ExtKind};
977 }
978 
979 /// Is this instruction potentially interesting for further simplification after
980 /// widening it's type? In other words, can the extend be safely hoisted out of
981 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
982 /// so, return the extended recurrence and the kind of extension used. Otherwise
983 /// return {nullptr, Unknown}.
984 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
985   if (!SE->isSCEVable(DU.NarrowUse->getType()))
986     return {nullptr, Unknown};
987 
988   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
989   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
990       SE->getTypeSizeInBits(WideType)) {
991     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
992     // index. So don't follow this use.
993     return {nullptr, Unknown};
994   }
995 
996   const SCEV *WideExpr;
997   ExtendKind ExtKind;
998   if (DU.NeverNegative) {
999     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1000     if (isa<SCEVAddRecExpr>(WideExpr))
1001       ExtKind = SignExtended;
1002     else {
1003       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1004       ExtKind = ZeroExtended;
1005     }
1006   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1007     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1008     ExtKind = SignExtended;
1009   } else {
1010     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1011     ExtKind = ZeroExtended;
1012   }
1013   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1014   if (!AddRec || AddRec->getLoop() != L)
1015     return {nullptr, Unknown};
1016   return {AddRec, ExtKind};
1017 }
1018 
1019 /// This IV user cannot be widened. Replace this use of the original narrow IV
1020 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1021 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1022   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1023   if (!InsertPt)
1024     return;
1025   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1026                     << *DU.NarrowUse << "\n");
1027   IRBuilder<> Builder(InsertPt);
1028   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1029   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1030 }
1031 
1032 /// If the narrow use is a compare instruction, then widen the compare
1033 //  (and possibly the other operand).  The extend operation is hoisted into the
1034 // loop preheader as far as possible.
1035 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1036   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1037   if (!Cmp)
1038     return false;
1039 
1040   // We can legally widen the comparison in the following two cases:
1041   //
1042   //  - The signedness of the IV extension and comparison match
1043   //
1044   //  - The narrow IV is always positive (and thus its sign extension is equal
1045   //    to its zero extension).  For instance, let's say we're zero extending
1046   //    %narrow for the following use
1047   //
1048   //      icmp slt i32 %narrow, %val   ... (A)
1049   //
1050   //    and %narrow is always positive.  Then
1051   //
1052   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1053   //          == icmp slt i32 zext(%narrow), sext(%val)
1054   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1055   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1056     return false;
1057 
1058   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1059   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1060   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1061   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1062 
1063   // Widen the compare instruction.
1064   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1065   if (!InsertPt)
1066     return false;
1067   IRBuilder<> Builder(InsertPt);
1068   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1069 
1070   // Widen the other operand of the compare, if necessary.
1071   if (CastWidth < IVWidth) {
1072     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1073     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1074   }
1075   return true;
1076 }
1077 
1078 /// If the narrow use is an instruction whose two operands are the defining
1079 /// instruction of DU and a load instruction, then we have the following:
1080 /// if the load is hoisted outside the loop, then we do not reach this function
1081 /// as scalar evolution analysis works fine in widenIVUse with variables
1082 /// hoisted outside the loop and efficient code is subsequently generated by
1083 /// not emitting truncate instructions. But when the load is not hoisted
1084 /// (whether due to limitation in alias analysis or due to a true legality),
1085 /// then scalar evolution can not proceed with loop variant values and
1086 /// inefficient code is generated. This function handles the non-hoisted load
1087 /// special case by making the optimization generate the same type of code for
1088 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1089 /// instruction). This special case is important especially when the induction
1090 /// variables are affecting addressing mode in code generation.
1091 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1092   Instruction *NarrowUse = DU.NarrowUse;
1093   Instruction *NarrowDef = DU.NarrowDef;
1094   Instruction *WideDef = DU.WideDef;
1095 
1096   // Handle the common case of add<nsw/nuw>
1097   const unsigned OpCode = NarrowUse->getOpcode();
1098   // Only Add/Sub/Mul instructions are supported.
1099   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1100       OpCode != Instruction::Mul)
1101     return false;
1102 
1103   // The operand that is not defined by NarrowDef of DU. Let's call it the
1104   // other operand.
1105   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1106   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1107          "bad DU");
1108 
1109   const SCEV *ExtendOperExpr = nullptr;
1110   const OverflowingBinaryOperator *OBO =
1111     cast<OverflowingBinaryOperator>(NarrowUse);
1112   ExtendKind ExtKind = getExtendKind(NarrowDef);
1113   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1114     ExtendOperExpr = SE->getSignExtendExpr(
1115       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1116   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1117     ExtendOperExpr = SE->getZeroExtendExpr(
1118       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1119   else
1120     return false;
1121 
1122   // We are interested in the other operand being a load instruction.
1123   // But, we should look into relaxing this restriction later on.
1124   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1125   if (I && I->getOpcode() != Instruction::Load)
1126     return false;
1127 
1128   // Verifying that Defining operand is an AddRec
1129   const SCEV *Op1 = SE->getSCEV(WideDef);
1130   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1131   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1132     return false;
1133   // Verifying that other operand is an Extend.
1134   if (ExtKind == SignExtended) {
1135     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1136       return false;
1137   } else {
1138     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1139       return false;
1140   }
1141 
1142   if (ExtKind == SignExtended) {
1143     for (Use &U : NarrowUse->uses()) {
1144       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1145       if (!User || User->getType() != WideType)
1146         return false;
1147     }
1148   } else { // ExtKind == ZeroExtended
1149     for (Use &U : NarrowUse->uses()) {
1150       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1151       if (!User || User->getType() != WideType)
1152         return false;
1153     }
1154   }
1155 
1156   return true;
1157 }
1158 
1159 /// Special Case for widening with variant Loads (see
1160 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1161 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1162   Instruction *NarrowUse = DU.NarrowUse;
1163   Instruction *NarrowDef = DU.NarrowDef;
1164   Instruction *WideDef = DU.WideDef;
1165 
1166   ExtendKind ExtKind = getExtendKind(NarrowDef);
1167 
1168   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1169 
1170   // Generating a widening use instruction.
1171   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1172                    ? WideDef
1173                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1174                                       ExtKind, NarrowUse);
1175   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1176                    ? WideDef
1177                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1178                                       ExtKind, NarrowUse);
1179 
1180   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1181   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1182                                         NarrowBO->getName());
1183   IRBuilder<> Builder(NarrowUse);
1184   Builder.Insert(WideBO);
1185   WideBO->copyIRFlags(NarrowBO);
1186 
1187   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1188 
1189   ExtendKindMap[NarrowUse] = ExtKind;
1190 
1191   for (Use &U : NarrowUse->uses()) {
1192     Instruction *User = nullptr;
1193     if (ExtKind == SignExtended)
1194       User = dyn_cast<SExtInst>(U.getUser());
1195     else
1196       User = dyn_cast<ZExtInst>(U.getUser());
1197     if (User && User->getType() == WideType) {
1198       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1199                         << *WideBO << "\n");
1200       ++NumElimExt;
1201       User->replaceAllUsesWith(WideBO);
1202       DeadInsts.emplace_back(User);
1203     }
1204   }
1205 }
1206 
1207 /// Determine whether an individual user of the narrow IV can be widened. If so,
1208 /// return the wide clone of the user.
1209 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1210   assert(ExtendKindMap.count(DU.NarrowDef) &&
1211          "Should already know the kind of extension used to widen NarrowDef");
1212 
1213   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1214   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1215     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1216       // For LCSSA phis, sink the truncate outside the loop.
1217       // After SimplifyCFG most loop exit targets have a single predecessor.
1218       // Otherwise fall back to a truncate within the loop.
1219       if (UsePhi->getNumOperands() != 1)
1220         truncateIVUse(DU, DT, LI);
1221       else {
1222         // Widening the PHI requires us to insert a trunc.  The logical place
1223         // for this trunc is in the same BB as the PHI.  This is not possible if
1224         // the BB is terminated by a catchswitch.
1225         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1226           return nullptr;
1227 
1228         PHINode *WidePhi =
1229           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1230                           UsePhi);
1231         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1232         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1233         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1234         UsePhi->replaceAllUsesWith(Trunc);
1235         DeadInsts.emplace_back(UsePhi);
1236         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1237                           << *WidePhi << "\n");
1238       }
1239       return nullptr;
1240     }
1241   }
1242 
1243   // This narrow use can be widened by a sext if it's non-negative or its narrow
1244   // def was widended by a sext. Same for zext.
1245   auto canWidenBySExt = [&]() {
1246     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1247   };
1248   auto canWidenByZExt = [&]() {
1249     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1250   };
1251 
1252   // Our raison d'etre! Eliminate sign and zero extension.
1253   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1254       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1255     Value *NewDef = DU.WideDef;
1256     if (DU.NarrowUse->getType() != WideType) {
1257       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1258       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1259       if (CastWidth < IVWidth) {
1260         // The cast isn't as wide as the IV, so insert a Trunc.
1261         IRBuilder<> Builder(DU.NarrowUse);
1262         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1263       }
1264       else {
1265         // A wider extend was hidden behind a narrower one. This may induce
1266         // another round of IV widening in which the intermediate IV becomes
1267         // dead. It should be very rare.
1268         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1269                           << " not wide enough to subsume " << *DU.NarrowUse
1270                           << "\n");
1271         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1272         NewDef = DU.NarrowUse;
1273       }
1274     }
1275     if (NewDef != DU.NarrowUse) {
1276       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1277                         << " replaced by " << *DU.WideDef << "\n");
1278       ++NumElimExt;
1279       DU.NarrowUse->replaceAllUsesWith(NewDef);
1280       DeadInsts.emplace_back(DU.NarrowUse);
1281     }
1282     // Now that the extend is gone, we want to expose it's uses for potential
1283     // further simplification. We don't need to directly inform SimplifyIVUsers
1284     // of the new users, because their parent IV will be processed later as a
1285     // new loop phi. If we preserved IVUsers analysis, we would also want to
1286     // push the uses of WideDef here.
1287 
1288     // No further widening is needed. The deceased [sz]ext had done it for us.
1289     return nullptr;
1290   }
1291 
1292   // Does this user itself evaluate to a recurrence after widening?
1293   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1294   if (!WideAddRec.first)
1295     WideAddRec = getWideRecurrence(DU);
1296 
1297   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1298   if (!WideAddRec.first) {
1299     // If use is a loop condition, try to promote the condition instead of
1300     // truncating the IV first.
1301     if (widenLoopCompare(DU))
1302       return nullptr;
1303 
1304     // We are here about to generate a truncate instruction that may hurt
1305     // performance because the scalar evolution expression computed earlier
1306     // in WideAddRec.first does not indicate a polynomial induction expression.
1307     // In that case, look at the operands of the use instruction to determine
1308     // if we can still widen the use instead of truncating its operand.
1309     if (widenWithVariantLoadUse(DU)) {
1310       widenWithVariantLoadUseCodegen(DU);
1311       return nullptr;
1312     }
1313 
1314     // This user does not evaluate to a recurrence after widening, so don't
1315     // follow it. Instead insert a Trunc to kill off the original use,
1316     // eventually isolating the original narrow IV so it can be removed.
1317     truncateIVUse(DU, DT, LI);
1318     return nullptr;
1319   }
1320   // Assume block terminators cannot evaluate to a recurrence. We can't to
1321   // insert a Trunc after a terminator if there happens to be a critical edge.
1322   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1323          "SCEV is not expected to evaluate a block terminator");
1324 
1325   // Reuse the IV increment that SCEVExpander created as long as it dominates
1326   // NarrowUse.
1327   Instruction *WideUse = nullptr;
1328   if (WideAddRec.first == WideIncExpr &&
1329       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1330     WideUse = WideInc;
1331   else {
1332     WideUse = cloneIVUser(DU, WideAddRec.first);
1333     if (!WideUse)
1334       return nullptr;
1335   }
1336   // Evaluation of WideAddRec ensured that the narrow expression could be
1337   // extended outside the loop without overflow. This suggests that the wide use
1338   // evaluates to the same expression as the extended narrow use, but doesn't
1339   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1340   // where it fails, we simply throw away the newly created wide use.
1341   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1342     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1343                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1344                       << "\n");
1345     DeadInsts.emplace_back(WideUse);
1346     return nullptr;
1347   }
1348 
1349   // if we reached this point then we are going to replace
1350   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1351   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1352 
1353   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1354   // Returning WideUse pushes it on the worklist.
1355   return WideUse;
1356 }
1357 
1358 /// Add eligible users of NarrowDef to NarrowIVUsers.
1359 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1360   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1361   bool NonNegativeDef =
1362       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1363                            SE->getConstant(NarrowSCEV->getType(), 0));
1364   for (User *U : NarrowDef->users()) {
1365     Instruction *NarrowUser = cast<Instruction>(U);
1366 
1367     // Handle data flow merges and bizarre phi cycles.
1368     if (!Widened.insert(NarrowUser).second)
1369       continue;
1370 
1371     bool NonNegativeUse = false;
1372     if (!NonNegativeDef) {
1373       // We might have a control-dependent range information for this context.
1374       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1375         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1376     }
1377 
1378     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1379                                NonNegativeDef || NonNegativeUse);
1380   }
1381 }
1382 
1383 /// Process a single induction variable. First use the SCEVExpander to create a
1384 /// wide induction variable that evaluates to the same recurrence as the
1385 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1386 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1387 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1388 ///
1389 /// It would be simpler to delete uses as they are processed, but we must avoid
1390 /// invalidating SCEV expressions.
1391 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1392   // Is this phi an induction variable?
1393   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1394   if (!AddRec)
1395     return nullptr;
1396 
1397   // Widen the induction variable expression.
1398   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1399                                ? SE->getSignExtendExpr(AddRec, WideType)
1400                                : SE->getZeroExtendExpr(AddRec, WideType);
1401 
1402   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1403          "Expect the new IV expression to preserve its type");
1404 
1405   // Can the IV be extended outside the loop without overflow?
1406   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1407   if (!AddRec || AddRec->getLoop() != L)
1408     return nullptr;
1409 
1410   // An AddRec must have loop-invariant operands. Since this AddRec is
1411   // materialized by a loop header phi, the expression cannot have any post-loop
1412   // operands, so they must dominate the loop header.
1413   assert(
1414       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1415       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1416       "Loop header phi recurrence inputs do not dominate the loop");
1417 
1418   // Iterate over IV uses (including transitive ones) looking for IV increments
1419   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1420   // the increment calculate control-dependent range information basing on
1421   // dominating conditions inside of the loop (e.g. a range check inside of the
1422   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1423   //
1424   // Control-dependent range information is later used to prove that a narrow
1425   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1426   // this on demand because when pushNarrowIVUsers needs this information some
1427   // of the dominating conditions might be already widened.
1428   if (UsePostIncrementRanges)
1429     calculatePostIncRanges(OrigPhi);
1430 
1431   // The rewriter provides a value for the desired IV expression. This may
1432   // either find an existing phi or materialize a new one. Either way, we
1433   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1434   // of the phi-SCC dominates the loop entry.
1435   Instruction *InsertPt = &L->getHeader()->front();
1436   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1437 
1438   // Remembering the WideIV increment generated by SCEVExpander allows
1439   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1440   // employ a general reuse mechanism because the call above is the only call to
1441   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1442   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1443     WideInc =
1444       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1445     WideIncExpr = SE->getSCEV(WideInc);
1446     // Propagate the debug location associated with the original loop increment
1447     // to the new (widened) increment.
1448     auto *OrigInc =
1449       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1450     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1451   }
1452 
1453   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1454   ++NumWidened;
1455 
1456   // Traverse the def-use chain using a worklist starting at the original IV.
1457   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1458 
1459   Widened.insert(OrigPhi);
1460   pushNarrowIVUsers(OrigPhi, WidePhi);
1461 
1462   while (!NarrowIVUsers.empty()) {
1463     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1464 
1465     // Process a def-use edge. This may replace the use, so don't hold a
1466     // use_iterator across it.
1467     Instruction *WideUse = widenIVUse(DU, Rewriter);
1468 
1469     // Follow all def-use edges from the previous narrow use.
1470     if (WideUse)
1471       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1472 
1473     // widenIVUse may have removed the def-use edge.
1474     if (DU.NarrowDef->use_empty())
1475       DeadInsts.emplace_back(DU.NarrowDef);
1476   }
1477 
1478   // Attach any debug information to the new PHI.
1479   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1480 
1481   return WidePhi;
1482 }
1483 
1484 /// Calculates control-dependent range for the given def at the given context
1485 /// by looking at dominating conditions inside of the loop
1486 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1487                                     Instruction *NarrowUser) {
1488   using namespace llvm::PatternMatch;
1489 
1490   Value *NarrowDefLHS;
1491   const APInt *NarrowDefRHS;
1492   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1493                                  m_APInt(NarrowDefRHS))) ||
1494       !NarrowDefRHS->isNonNegative())
1495     return;
1496 
1497   auto UpdateRangeFromCondition = [&] (Value *Condition,
1498                                        bool TrueDest) {
1499     CmpInst::Predicate Pred;
1500     Value *CmpRHS;
1501     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1502                                  m_Value(CmpRHS))))
1503       return;
1504 
1505     CmpInst::Predicate P =
1506             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1507 
1508     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1509     auto CmpConstrainedLHSRange =
1510             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1511     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1512         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1513 
1514     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1515   };
1516 
1517   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1518     if (!HasGuards)
1519       return;
1520 
1521     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1522                                      Ctx->getParent()->rend())) {
1523       Value *C = nullptr;
1524       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1525         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1526     }
1527   };
1528 
1529   UpdateRangeFromGuards(NarrowUser);
1530 
1531   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1532   // If NarrowUserBB is statically unreachable asking dominator queries may
1533   // yield surprising results. (e.g. the block may not have a dom tree node)
1534   if (!DT->isReachableFromEntry(NarrowUserBB))
1535     return;
1536 
1537   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1538        L->contains(DTB->getBlock());
1539        DTB = DTB->getIDom()) {
1540     auto *BB = DTB->getBlock();
1541     auto *TI = BB->getTerminator();
1542     UpdateRangeFromGuards(TI);
1543 
1544     auto *BI = dyn_cast<BranchInst>(TI);
1545     if (!BI || !BI->isConditional())
1546       continue;
1547 
1548     auto *TrueSuccessor = BI->getSuccessor(0);
1549     auto *FalseSuccessor = BI->getSuccessor(1);
1550 
1551     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1552       return BBE.isSingleEdge() &&
1553              DT->dominates(BBE, NarrowUser->getParent());
1554     };
1555 
1556     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1557       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1558 
1559     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1560       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1561   }
1562 }
1563 
1564 /// Calculates PostIncRangeInfos map for the given IV
1565 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1566   SmallPtrSet<Instruction *, 16> Visited;
1567   SmallVector<Instruction *, 6> Worklist;
1568   Worklist.push_back(OrigPhi);
1569   Visited.insert(OrigPhi);
1570 
1571   while (!Worklist.empty()) {
1572     Instruction *NarrowDef = Worklist.pop_back_val();
1573 
1574     for (Use &U : NarrowDef->uses()) {
1575       auto *NarrowUser = cast<Instruction>(U.getUser());
1576 
1577       // Don't go looking outside the current loop.
1578       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1579       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1580         continue;
1581 
1582       if (!Visited.insert(NarrowUser).second)
1583         continue;
1584 
1585       Worklist.push_back(NarrowUser);
1586 
1587       calculatePostIncRange(NarrowDef, NarrowUser);
1588     }
1589   }
1590 }
1591 
1592 //===----------------------------------------------------------------------===//
1593 //  Live IV Reduction - Minimize IVs live across the loop.
1594 //===----------------------------------------------------------------------===//
1595 
1596 //===----------------------------------------------------------------------===//
1597 //  Simplification of IV users based on SCEV evaluation.
1598 //===----------------------------------------------------------------------===//
1599 
1600 namespace {
1601 
1602 class IndVarSimplifyVisitor : public IVVisitor {
1603   ScalarEvolution *SE;
1604   const TargetTransformInfo *TTI;
1605   PHINode *IVPhi;
1606 
1607 public:
1608   WideIVInfo WI;
1609 
1610   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1611                         const TargetTransformInfo *TTI,
1612                         const DominatorTree *DTree)
1613     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1614     DT = DTree;
1615     WI.NarrowIV = IVPhi;
1616   }
1617 
1618   // Implement the interface used by simplifyUsersOfIV.
1619   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1620 };
1621 
1622 } // end anonymous namespace
1623 
1624 /// Iteratively perform simplification on a worklist of IV users. Each
1625 /// successive simplification may push more users which may themselves be
1626 /// candidates for simplification.
1627 ///
1628 /// Sign/Zero extend elimination is interleaved with IV simplification.
1629 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1630                                        SCEVExpander &Rewriter,
1631                                        LoopInfo *LI) {
1632   SmallVector<WideIVInfo, 8> WideIVs;
1633 
1634   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1635           Intrinsic::getName(Intrinsic::experimental_guard));
1636   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1637 
1638   SmallVector<PHINode*, 8> LoopPhis;
1639   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1640     LoopPhis.push_back(cast<PHINode>(I));
1641   }
1642   // Each round of simplification iterates through the SimplifyIVUsers worklist
1643   // for all current phis, then determines whether any IVs can be
1644   // widened. Widening adds new phis to LoopPhis, inducing another round of
1645   // simplification on the wide IVs.
1646   bool Changed = false;
1647   while (!LoopPhis.empty()) {
1648     // Evaluate as many IV expressions as possible before widening any IVs. This
1649     // forces SCEV to set no-wrap flags before evaluating sign/zero
1650     // extension. The first time SCEV attempts to normalize sign/zero extension,
1651     // the result becomes final. So for the most predictable results, we delay
1652     // evaluation of sign/zero extend evaluation until needed, and avoid running
1653     // other SCEV based analysis prior to simplifyAndExtend.
1654     do {
1655       PHINode *CurrIV = LoopPhis.pop_back_val();
1656 
1657       // Information about sign/zero extensions of CurrIV.
1658       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1659 
1660       Changed |=
1661           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1662 
1663       if (Visitor.WI.WidestNativeType) {
1664         WideIVs.push_back(Visitor.WI);
1665       }
1666     } while(!LoopPhis.empty());
1667 
1668     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1669       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1670       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1671         Changed = true;
1672         LoopPhis.push_back(WidePhi);
1673       }
1674     }
1675   }
1676   return Changed;
1677 }
1678 
1679 //===----------------------------------------------------------------------===//
1680 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1681 //===----------------------------------------------------------------------===//
1682 
1683 /// Given an Value which is hoped to be part of an add recurance in the given
1684 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1685 /// that this is less general than SCEVs AddRec checking.
1686 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1687   Instruction *IncI = dyn_cast<Instruction>(IncV);
1688   if (!IncI)
1689     return nullptr;
1690 
1691   switch (IncI->getOpcode()) {
1692   case Instruction::Add:
1693   case Instruction::Sub:
1694     break;
1695   case Instruction::GetElementPtr:
1696     // An IV counter must preserve its type.
1697     if (IncI->getNumOperands() == 2)
1698       break;
1699     LLVM_FALLTHROUGH;
1700   default:
1701     return nullptr;
1702   }
1703 
1704   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1705   if (Phi && Phi->getParent() == L->getHeader()) {
1706     if (L->isLoopInvariant(IncI->getOperand(1)))
1707       return Phi;
1708     return nullptr;
1709   }
1710   if (IncI->getOpcode() == Instruction::GetElementPtr)
1711     return nullptr;
1712 
1713   // Allow add/sub to be commuted.
1714   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1715   if (Phi && Phi->getParent() == L->getHeader()) {
1716     if (L->isLoopInvariant(IncI->getOperand(0)))
1717       return Phi;
1718   }
1719   return nullptr;
1720 }
1721 
1722 /// Whether the current loop exit test is based on this value.  Currently this
1723 /// is limited to a direct use in the loop condition.
1724 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1725   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1726   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1727   // TODO: Allow non-icmp loop test.
1728   if (!ICmp)
1729     return false;
1730 
1731   // TODO: Allow indirect use.
1732   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1733 }
1734 
1735 /// linearFunctionTestReplace policy. Return true unless we can show that the
1736 /// current exit test is already sufficiently canonical.
1737 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1738   assert(L->getLoopLatch() && "Must be in simplified form");
1739 
1740   // Avoid converting a constant or loop invariant test back to a runtime
1741   // test.  This is critical for when SCEV's cached ExitCount is less precise
1742   // than the current IR (such as after we've proven a particular exit is
1743   // actually dead and thus the BE count never reaches our ExitCount.)
1744   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1745   if (L->isLoopInvariant(BI->getCondition()))
1746     return false;
1747 
1748   // Do LFTR to simplify the exit condition to an ICMP.
1749   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1750   if (!Cond)
1751     return true;
1752 
1753   // Do LFTR to simplify the exit ICMP to EQ/NE
1754   ICmpInst::Predicate Pred = Cond->getPredicate();
1755   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1756     return true;
1757 
1758   // Look for a loop invariant RHS
1759   Value *LHS = Cond->getOperand(0);
1760   Value *RHS = Cond->getOperand(1);
1761   if (!L->isLoopInvariant(RHS)) {
1762     if (!L->isLoopInvariant(LHS))
1763       return true;
1764     std::swap(LHS, RHS);
1765   }
1766   // Look for a simple IV counter LHS
1767   PHINode *Phi = dyn_cast<PHINode>(LHS);
1768   if (!Phi)
1769     Phi = getLoopPhiForCounter(LHS, L);
1770 
1771   if (!Phi)
1772     return true;
1773 
1774   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1775   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1776   if (Idx < 0)
1777     return true;
1778 
1779   // Do LFTR if the exit condition's IV is *not* a simple counter.
1780   Value *IncV = Phi->getIncomingValue(Idx);
1781   return Phi != getLoopPhiForCounter(IncV, L);
1782 }
1783 
1784 /// Return true if undefined behavior would provable be executed on the path to
1785 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1786 /// anything about whether OnPathTo is actually executed or whether Root is
1787 /// actually poison.  This can be used to assess whether a new use of Root can
1788 /// be added at a location which is control equivalent with OnPathTo (such as
1789 /// immediately before it) without introducing UB which didn't previously
1790 /// exist.  Note that a false result conveys no information.
1791 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1792                                           Instruction *OnPathTo,
1793                                           DominatorTree *DT) {
1794   // Basic approach is to assume Root is poison, propagate poison forward
1795   // through all users we can easily track, and then check whether any of those
1796   // users are provable UB and must execute before out exiting block might
1797   // exit.
1798 
1799   // The set of all recursive users we've visited (which are assumed to all be
1800   // poison because of said visit)
1801   SmallSet<const Value *, 16> KnownPoison;
1802   SmallVector<const Instruction*, 16> Worklist;
1803   Worklist.push_back(Root);
1804   while (!Worklist.empty()) {
1805     const Instruction *I = Worklist.pop_back_val();
1806 
1807     // If we know this must trigger UB on a path leading our target.
1808     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1809       return true;
1810 
1811     // If we can't analyze propagation through this instruction, just skip it
1812     // and transitive users.  Safe as false is a conservative result.
1813     if (!propagatesFullPoison(I) && I != Root)
1814       continue;
1815 
1816     if (KnownPoison.insert(I).second)
1817       for (const User *User : I->users())
1818         Worklist.push_back(cast<Instruction>(User));
1819   }
1820 
1821   // Might be non-UB, or might have a path we couldn't prove must execute on
1822   // way to exiting bb.
1823   return false;
1824 }
1825 
1826 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1827 /// down to checking that all operands are constant and listing instructions
1828 /// that may hide undef.
1829 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1830                                unsigned Depth) {
1831   if (isa<Constant>(V))
1832     return !isa<UndefValue>(V);
1833 
1834   if (Depth >= 6)
1835     return false;
1836 
1837   // Conservatively handle non-constant non-instructions. For example, Arguments
1838   // may be undef.
1839   Instruction *I = dyn_cast<Instruction>(V);
1840   if (!I)
1841     return false;
1842 
1843   // Load and return values may be undef.
1844   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1845     return false;
1846 
1847   // Optimistically handle other instructions.
1848   for (Value *Op : I->operands()) {
1849     if (!Visited.insert(Op).second)
1850       continue;
1851     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1852       return false;
1853   }
1854   return true;
1855 }
1856 
1857 /// Return true if the given value is concrete. We must prove that undef can
1858 /// never reach it.
1859 ///
1860 /// TODO: If we decide that this is a good approach to checking for undef, we
1861 /// may factor it into a common location.
1862 static bool hasConcreteDef(Value *V) {
1863   SmallPtrSet<Value*, 8> Visited;
1864   Visited.insert(V);
1865   return hasConcreteDefImpl(V, Visited, 0);
1866 }
1867 
1868 /// Return true if this IV has any uses other than the (soon to be rewritten)
1869 /// loop exit test.
1870 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1871   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1872   Value *IncV = Phi->getIncomingValue(LatchIdx);
1873 
1874   for (User *U : Phi->users())
1875     if (U != Cond && U != IncV) return false;
1876 
1877   for (User *U : IncV->users())
1878     if (U != Cond && U != Phi) return false;
1879   return true;
1880 }
1881 
1882 /// Return true if the given phi is a "counter" in L.  A counter is an
1883 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1884 /// step of 1.  Note that L must have exactly one latch.
1885 static bool isLoopCounter(PHINode* Phi, Loop *L,
1886                           ScalarEvolution *SE) {
1887   assert(Phi->getParent() == L->getHeader());
1888   assert(L->getLoopLatch());
1889 
1890   if (!SE->isSCEVable(Phi->getType()))
1891     return false;
1892 
1893   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1894   if (!AR || AR->getLoop() != L || !AR->isAffine())
1895     return false;
1896 
1897   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1898   if (!Step || !Step->isOne())
1899     return false;
1900 
1901   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1902   Value *IncV = Phi->getIncomingValue(LatchIdx);
1903   return (getLoopPhiForCounter(IncV, L) == Phi);
1904 }
1905 
1906 /// Search the loop header for a loop counter (anadd rec w/step of one)
1907 /// suitable for use by LFTR.  If multiple counters are available, select the
1908 /// "best" one based profitable heuristics.
1909 ///
1910 /// BECount may be an i8* pointer type. The pointer difference is already
1911 /// valid count without scaling the address stride, so it remains a pointer
1912 /// expression as far as SCEV is concerned.
1913 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1914                                 const SCEV *BECount,
1915                                 ScalarEvolution *SE, DominatorTree *DT) {
1916   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1917 
1918   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1919 
1920   // Loop over all of the PHI nodes, looking for a simple counter.
1921   PHINode *BestPhi = nullptr;
1922   const SCEV *BestInit = nullptr;
1923   BasicBlock *LatchBlock = L->getLoopLatch();
1924   assert(LatchBlock && "Must be in simplified form");
1925   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1926 
1927   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1928     PHINode *Phi = cast<PHINode>(I);
1929     if (!isLoopCounter(Phi, L, SE))
1930       continue;
1931 
1932     // Avoid comparing an integer IV against a pointer Limit.
1933     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1934       continue;
1935 
1936     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1937 
1938     // AR may be a pointer type, while BECount is an integer type.
1939     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1940     // AR may not be a narrower type, or we may never exit.
1941     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1942     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1943       continue;
1944 
1945     // Avoid reusing a potentially undef value to compute other values that may
1946     // have originally had a concrete definition.
1947     if (!hasConcreteDef(Phi)) {
1948       // We explicitly allow unknown phis as long as they are already used by
1949       // the loop exit test.  This is legal since performing LFTR could not
1950       // increase the number of undef users.
1951       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1952       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1953           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1954         continue;
1955     }
1956 
1957     // Avoid introducing undefined behavior due to poison which didn't exist in
1958     // the original program.  (Annoyingly, the rules for poison and undef
1959     // propagation are distinct, so this does NOT cover the undef case above.)
1960     // We have to ensure that we don't introduce UB by introducing a use on an
1961     // iteration where said IV produces poison.  Our strategy here differs for
1962     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1963     // see code in linearFunctionTestReplace.  For pointers, we restrict
1964     // transforms as there is no good way to reinfer inbounds once lost.
1965     if (!Phi->getType()->isIntegerTy() &&
1966         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1967       continue;
1968 
1969     const SCEV *Init = AR->getStart();
1970 
1971     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1972       // Don't force a live loop counter if another IV can be used.
1973       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1974         continue;
1975 
1976       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1977       // also prefers integer to pointer IVs.
1978       if (BestInit->isZero() != Init->isZero()) {
1979         if (BestInit->isZero())
1980           continue;
1981       }
1982       // If two IVs both count from zero or both count from nonzero then the
1983       // narrower is likely a dead phi that has been widened. Use the wider phi
1984       // to allow the other to be eliminated.
1985       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1986         continue;
1987     }
1988     BestPhi = Phi;
1989     BestInit = Init;
1990   }
1991   return BestPhi;
1992 }
1993 
1994 /// Insert an IR expression which computes the value held by the IV IndVar
1995 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1996 /// is taken ExitCount times.
1997 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1998                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
1999                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2000   assert(isLoopCounter(IndVar, L, SE));
2001   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2002   const SCEV *IVInit = AR->getStart();
2003 
2004   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2005   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2006   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2007   // the existing GEPs whenever possible.
2008   if (IndVar->getType()->isPointerTy() &&
2009       !ExitCount->getType()->isPointerTy()) {
2010     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2011     // signed value. ExitCount on the other hand represents the loop trip count,
2012     // which is an unsigned value. FindLoopCounter only allows induction
2013     // variables that have a positive unit stride of one. This means we don't
2014     // have to handle the case of negative offsets (yet) and just need to zero
2015     // extend ExitCount.
2016     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2017     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2018     if (UsePostInc)
2019       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2020 
2021     // Expand the code for the iteration count.
2022     assert(SE->isLoopInvariant(IVOffset, L) &&
2023            "Computed iteration count is not loop invariant!");
2024 
2025     // We could handle pointer IVs other than i8*, but we need to compensate for
2026     // gep index scaling.
2027     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2028                              cast<PointerType>(IndVar->getType())
2029                                  ->getElementType())->isOne() &&
2030            "unit stride pointer IV must be i8*");
2031 
2032     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2033     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2034     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2035   } else {
2036     // In any other case, convert both IVInit and ExitCount to integers before
2037     // comparing. This may result in SCEV expansion of pointers, but in practice
2038     // SCEV will fold the pointer arithmetic away as such:
2039     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2040     //
2041     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2042     // for simple memset-style loops.
2043     //
2044     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2045     // were generated on top of case #2, which is not expected.
2046 
2047     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2048     // For unit stride, IVCount = Start + ExitCount with 2's complement
2049     // overflow.
2050 
2051     // For integer IVs, truncate the IV before computing IVInit + BECount,
2052     // unless we know apriori that the limit must be a constant when evaluated
2053     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2054     // of the IV in the loop over a (potentially) expensive expansion of the
2055     // widened exit count add(zext(add)) expression.
2056     if (SE->getTypeSizeInBits(IVInit->getType())
2057         > SE->getTypeSizeInBits(ExitCount->getType())) {
2058       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2059         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2060       else
2061         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2062     }
2063 
2064     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2065 
2066     if (UsePostInc)
2067       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2068 
2069     // Expand the code for the iteration count.
2070     assert(SE->isLoopInvariant(IVLimit, L) &&
2071            "Computed iteration count is not loop invariant!");
2072     // Ensure that we generate the same type as IndVar, or a smaller integer
2073     // type. In the presence of null pointer values, we have an integer type
2074     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2075     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2076       IndVar->getType() : ExitCount->getType();
2077     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2078     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2079   }
2080 }
2081 
2082 /// This method rewrites the exit condition of the loop to be a canonical !=
2083 /// comparison against the incremented loop induction variable.  This pass is
2084 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2085 /// determine a loop-invariant trip count of the loop, which is actually a much
2086 /// broader range than just linear tests.
2087 bool IndVarSimplify::
2088 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2089                           const SCEV *ExitCount,
2090                           PHINode *IndVar, SCEVExpander &Rewriter) {
2091   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2092   assert(isLoopCounter(IndVar, L, SE));
2093   Instruction * const IncVar =
2094     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2095 
2096   // Initialize CmpIndVar to the preincremented IV.
2097   Value *CmpIndVar = IndVar;
2098   bool UsePostInc = false;
2099 
2100   // If the exiting block is the same as the backedge block, we prefer to
2101   // compare against the post-incremented value, otherwise we must compare
2102   // against the preincremented value.
2103   if (ExitingBB == L->getLoopLatch()) {
2104     // For pointer IVs, we chose to not strip inbounds which requires us not
2105     // to add a potentially UB introducing use.  We need to either a) show
2106     // the loop test we're modifying is already in post-inc form, or b) show
2107     // that adding a use must not introduce UB.
2108     bool SafeToPostInc =
2109         IndVar->getType()->isIntegerTy() ||
2110         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2111         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2112     if (SafeToPostInc) {
2113       UsePostInc = true;
2114       CmpIndVar = IncVar;
2115     }
2116   }
2117 
2118   // It may be necessary to drop nowrap flags on the incrementing instruction
2119   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2120   // case the increment might have previously been poison on the last iteration
2121   // only) or if LFTR switches to a different IV that was previously dynamically
2122   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2123   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2124   // check), because the pre-inc addrec flags may be adopted from the original
2125   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2126   // TODO: This handling is inaccurate for one case: If we switch to a
2127   // dynamically dead IV that wraps on the first loop iteration only, which is
2128   // not covered by the post-inc addrec. (If the new IV was not dynamically
2129   // dead, it could not be poison on the first iteration in the first place.)
2130   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2131     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2132     if (BO->hasNoUnsignedWrap())
2133       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2134     if (BO->hasNoSignedWrap())
2135       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2136   }
2137 
2138   Value *ExitCnt = genLoopLimit(
2139       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2140   assert(ExitCnt->getType()->isPointerTy() ==
2141              IndVar->getType()->isPointerTy() &&
2142          "genLoopLimit missed a cast");
2143 
2144   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2145   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2146   ICmpInst::Predicate P;
2147   if (L->contains(BI->getSuccessor(0)))
2148     P = ICmpInst::ICMP_NE;
2149   else
2150     P = ICmpInst::ICMP_EQ;
2151 
2152   IRBuilder<> Builder(BI);
2153 
2154   // The new loop exit condition should reuse the debug location of the
2155   // original loop exit condition.
2156   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2157     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2158 
2159   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2160   // avoid the expensive expansion of the limit expression in the wider type,
2161   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2162   // since we know (from the exit count bitwidth), that we can't self-wrap in
2163   // the narrower type.
2164   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2165   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2166   if (CmpIndVarSize > ExitCntSize) {
2167     assert(!CmpIndVar->getType()->isPointerTy() &&
2168            !ExitCnt->getType()->isPointerTy());
2169 
2170     // Before resorting to actually inserting the truncate, use the same
2171     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2172     // the other side of the comparison instead.  We still evaluate the limit
2173     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2174     // a truncate within in.
2175     bool Extended = false;
2176     const SCEV *IV = SE->getSCEV(CmpIndVar);
2177     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2178                                                   ExitCnt->getType());
2179     const SCEV *ZExtTrunc =
2180       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2181 
2182     if (ZExtTrunc == IV) {
2183       Extended = true;
2184       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2185                                    "wide.trip.count");
2186     } else {
2187       const SCEV *SExtTrunc =
2188         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2189       if (SExtTrunc == IV) {
2190         Extended = true;
2191         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2192                                      "wide.trip.count");
2193       }
2194     }
2195 
2196     if (Extended) {
2197       bool Discard;
2198       L->makeLoopInvariant(ExitCnt, Discard);
2199     } else
2200       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2201                                       "lftr.wideiv");
2202   }
2203   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2204                     << "      LHS:" << *CmpIndVar << '\n'
2205                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2206                     << "\n"
2207                     << "      RHS:\t" << *ExitCnt << "\n"
2208                     << "ExitCount:\t" << *ExitCount << "\n"
2209                     << "  was: " << *BI->getCondition() << "\n");
2210 
2211   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2212   Value *OrigCond = BI->getCondition();
2213   // It's tempting to use replaceAllUsesWith here to fully replace the old
2214   // comparison, but that's not immediately safe, since users of the old
2215   // comparison may not be dominated by the new comparison. Instead, just
2216   // update the branch to use the new comparison; in the common case this
2217   // will make old comparison dead.
2218   BI->setCondition(Cond);
2219   DeadInsts.push_back(OrigCond);
2220 
2221   ++NumLFTR;
2222   return true;
2223 }
2224 
2225 //===----------------------------------------------------------------------===//
2226 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2227 //===----------------------------------------------------------------------===//
2228 
2229 /// If there's a single exit block, sink any loop-invariant values that
2230 /// were defined in the preheader but not used inside the loop into the
2231 /// exit block to reduce register pressure in the loop.
2232 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2233   BasicBlock *ExitBlock = L->getExitBlock();
2234   if (!ExitBlock) return false;
2235 
2236   BasicBlock *Preheader = L->getLoopPreheader();
2237   if (!Preheader) return false;
2238 
2239   bool MadeAnyChanges = false;
2240   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2241   BasicBlock::iterator I(Preheader->getTerminator());
2242   while (I != Preheader->begin()) {
2243     --I;
2244     // New instructions were inserted at the end of the preheader.
2245     if (isa<PHINode>(I))
2246       break;
2247 
2248     // Don't move instructions which might have side effects, since the side
2249     // effects need to complete before instructions inside the loop.  Also don't
2250     // move instructions which might read memory, since the loop may modify
2251     // memory. Note that it's okay if the instruction might have undefined
2252     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2253     // block.
2254     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2255       continue;
2256 
2257     // Skip debug info intrinsics.
2258     if (isa<DbgInfoIntrinsic>(I))
2259       continue;
2260 
2261     // Skip eh pad instructions.
2262     if (I->isEHPad())
2263       continue;
2264 
2265     // Don't sink alloca: we never want to sink static alloca's out of the
2266     // entry block, and correctly sinking dynamic alloca's requires
2267     // checks for stacksave/stackrestore intrinsics.
2268     // FIXME: Refactor this check somehow?
2269     if (isa<AllocaInst>(I))
2270       continue;
2271 
2272     // Determine if there is a use in or before the loop (direct or
2273     // otherwise).
2274     bool UsedInLoop = false;
2275     for (Use &U : I->uses()) {
2276       Instruction *User = cast<Instruction>(U.getUser());
2277       BasicBlock *UseBB = User->getParent();
2278       if (PHINode *P = dyn_cast<PHINode>(User)) {
2279         unsigned i =
2280           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2281         UseBB = P->getIncomingBlock(i);
2282       }
2283       if (UseBB == Preheader || L->contains(UseBB)) {
2284         UsedInLoop = true;
2285         break;
2286       }
2287     }
2288 
2289     // If there is, the def must remain in the preheader.
2290     if (UsedInLoop)
2291       continue;
2292 
2293     // Otherwise, sink it to the exit block.
2294     Instruction *ToMove = &*I;
2295     bool Done = false;
2296 
2297     if (I != Preheader->begin()) {
2298       // Skip debug info intrinsics.
2299       do {
2300         --I;
2301       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2302 
2303       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2304         Done = true;
2305     } else {
2306       Done = true;
2307     }
2308 
2309     MadeAnyChanges = true;
2310     ToMove->moveBefore(*ExitBlock, InsertPt);
2311     if (Done) break;
2312     InsertPt = ToMove->getIterator();
2313   }
2314 
2315   return MadeAnyChanges;
2316 }
2317 
2318 /// Return a symbolic upper bound for the backedge taken count of the loop.
2319 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2320 /// an arbitrary expression as opposed to only constants.
2321 /// TODO: Move into the ScalarEvolution class.
2322 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2323                                             DominatorTree &DT, Loop *L) {
2324   SmallVector<BasicBlock*, 16> ExitingBlocks;
2325   L->getExitingBlocks(ExitingBlocks);
2326 
2327   // Form an expression for the maximum exit count possible for this loop. We
2328   // merge the max and exact information to approximate a version of
2329   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2330   SmallVector<const SCEV*, 4> ExitCounts;
2331   for (BasicBlock *ExitingBB : ExitingBlocks) {
2332     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2333     if (isa<SCEVCouldNotCompute>(ExitCount))
2334       ExitCount = SE.getExitCount(L, ExitingBB,
2335                                   ScalarEvolution::ConstantMaximum);
2336     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2337       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2338              "We should only have known counts for exiting blocks that "
2339              "dominate latch!");
2340       ExitCounts.push_back(ExitCount);
2341     }
2342   }
2343   if (ExitCounts.empty())
2344     return SE.getCouldNotCompute();
2345   return SE.getUMinFromMismatchedTypes(ExitCounts);
2346 }
2347 
2348 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2349   SmallVector<BasicBlock*, 16> ExitingBlocks;
2350   L->getExitingBlocks(ExitingBlocks);
2351 
2352   // Remove all exits which aren't both rewriteable and analyzeable.
2353   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2354     // If our exitting block exits multiple loops, we can only rewrite the
2355     // innermost one.  Otherwise, we're changing how many times the innermost
2356     // loop runs before it exits.
2357     if (LI->getLoopFor(ExitingBB) != L)
2358       return true;
2359 
2360     // Can't rewrite non-branch yet.
2361     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2362     if (!BI)
2363       return true;
2364 
2365     // If already constant, nothing to do.
2366     if (isa<Constant>(BI->getCondition()))
2367       return true;
2368 
2369     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2370     if (isa<SCEVCouldNotCompute>(ExitCount))
2371       return true;
2372     return false;
2373   });
2374   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2375 
2376   if (ExitingBlocks.empty())
2377     return false;
2378 
2379   // Get a symbolic upper bound on the loop backedge taken count.
2380   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2381   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2382     return false;
2383 
2384   // Visit our exit blocks in order of dominance.  We know from the fact that
2385   // all exits (left) are analyzeable that the must be a total dominance order
2386   // between them as each must dominate the latch.  The visit order only
2387   // matters for the provably equal case.
2388   llvm::sort(ExitingBlocks,
2389              [&](BasicBlock *A, BasicBlock *B) {
2390                // std::sort sorts in ascending order, so we want the inverse of
2391                // the normal dominance relation.
2392                if (DT->properlyDominates(A, B)) return true;
2393                if (DT->properlyDominates(B, A)) return false;
2394                llvm_unreachable("expected total dominance order!");
2395              });
2396 #ifdef ASSERT
2397   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2398     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2399   }
2400 #endif
2401 
2402   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2403     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2404     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2405     auto *OldCond = BI->getCondition();
2406     auto *NewCond = ConstantInt::get(OldCond->getType(),
2407                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2408     BI->setCondition(NewCond);
2409     if (OldCond->use_empty())
2410       DeadInsts.push_back(OldCond);
2411   };
2412 
2413   bool Changed = false;
2414   SmallSet<const SCEV*, 8> DominatingExitCounts;
2415   for (BasicBlock *ExitingBB : ExitingBlocks) {
2416     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2417     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2418 
2419     // If we know we'd exit on the first iteration, rewrite the exit to
2420     // reflect this.  This does not imply the loop must exit through this
2421     // exit; there may be an earlier one taken on the first iteration.
2422     // TODO: Given we know the backedge can't be taken, we should go ahead
2423     // and break it.  Or at least, kill all the header phis and simplify.
2424     if (ExitCount->isZero()) {
2425       FoldExit(ExitingBB, true);
2426       Changed = true;
2427       continue;
2428     }
2429 
2430     // If we end up with a pointer exit count, bail.  Note that we can end up
2431     // with a pointer exit count for one exiting block, and not for another in
2432     // the same loop.
2433     if (!ExitCount->getType()->isIntegerTy() ||
2434         !MaxExitCount->getType()->isIntegerTy())
2435       continue;
2436 
2437     Type *WiderType =
2438       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2439     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2440     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2441     assert(MaxExitCount->getType() == ExitCount->getType());
2442 
2443     // Can we prove that some other exit must be taken strictly before this
2444     // one?
2445     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2446                                      MaxExitCount, ExitCount)) {
2447       FoldExit(ExitingBB, false);
2448       Changed = true;
2449       continue;
2450     }
2451 
2452     // As we run, keep track of which exit counts we've encountered.  If we
2453     // find a duplicate, we've found an exit which would have exited on the
2454     // exiting iteration, but (from the visit order) strictly follows another
2455     // which does the same and is thus dead.
2456     if (!DominatingExitCounts.insert(ExitCount).second) {
2457       FoldExit(ExitingBB, false);
2458       Changed = true;
2459       continue;
2460     }
2461 
2462     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2463     // here.  If we kept track of the min of dominanting exits so far, we could
2464     // discharge exits with EC >= MDEC. This is less powerful than the existing
2465     // transform (since later exits aren't considered), but potentially more
2466     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2467     // or a >u b.  Such a case is not currently known.
2468   }
2469   return Changed;
2470 }
2471 
2472 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2473   SmallVector<BasicBlock*, 16> ExitingBlocks;
2474   L->getExitingBlocks(ExitingBlocks);
2475 
2476   // Finally, see if we can rewrite our exit conditions into a loop invariant
2477   // form. If we have a read-only loop, and we can tell that we must exit down
2478   // a path which does not need any of the values computed within the loop, we
2479   // can rewrite the loop to exit on the first iteration.  Note that this
2480   // doesn't either a) tell us the loop exits on the first iteration (unless
2481   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2482   // This transformation looks a lot like a restricted form of dead loop
2483   // elimination, but restricted to read-only loops and without neccesssarily
2484   // needing to kill the loop entirely.
2485   if (!LoopPredication)
2486     return false;
2487 
2488   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2489     return false;
2490 
2491   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2492   // through *explicit* control flow.  We have to eliminate the possibility of
2493   // implicit exits (see below) before we know it's truly exact.
2494   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2495   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2496       !SE->isLoopInvariant(ExactBTC, L) ||
2497       !isSafeToExpand(ExactBTC, *SE))
2498     return false;
2499 
2500   // If we end up with a pointer exit count, bail.  It may be unsized.
2501   if (!ExactBTC->getType()->isIntegerTy())
2502     return false;
2503 
2504   auto BadExit = [&](BasicBlock *ExitingBB) {
2505     // If our exiting block exits multiple loops, we can only rewrite the
2506     // innermost one.  Otherwise, we're changing how many times the innermost
2507     // loop runs before it exits.
2508     if (LI->getLoopFor(ExitingBB) != L)
2509       return true;
2510 
2511     // Can't rewrite non-branch yet.
2512     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2513     if (!BI)
2514       return true;
2515 
2516     // If already constant, nothing to do.
2517     if (isa<Constant>(BI->getCondition()))
2518       return true;
2519 
2520     // If the exit block has phis, we need to be able to compute the values
2521     // within the loop which contains them.  This assumes trivially lcssa phis
2522     // have already been removed; TODO: generalize
2523     BasicBlock *ExitBlock =
2524     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2525     if (!ExitBlock->phis().empty())
2526       return true;
2527 
2528     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2529     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2530     if (!SE->isLoopInvariant(ExitCount, L) ||
2531         !isSafeToExpand(ExitCount, *SE))
2532       return true;
2533 
2534     // If we end up with a pointer exit count, bail.  It may be unsized.
2535     if (!ExitCount->getType()->isIntegerTy())
2536       return true;
2537 
2538     return false;
2539   };
2540 
2541   // If we have any exits which can't be predicated themselves, than we can't
2542   // predicate any exit which isn't guaranteed to execute before it.  Consider
2543   // two exits (a) and (b) which would both exit on the same iteration.  If we
2544   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2545   // we could convert a loop from exiting through (a) to one exiting through
2546   // (b).  Note that this problem exists only for exits with the same exit
2547   // count, and we could be more aggressive when exit counts are known inequal.
2548   llvm::sort(ExitingBlocks,
2549             [&](BasicBlock *A, BasicBlock *B) {
2550               // std::sort sorts in ascending order, so we want the inverse of
2551               // the normal dominance relation, plus a tie breaker for blocks
2552               // unordered by dominance.
2553               if (DT->properlyDominates(A, B)) return true;
2554               if (DT->properlyDominates(B, A)) return false;
2555               return A->getName() < B->getName();
2556             });
2557   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2558   // exits before the backedge).  If they aren't, reasoning about reachability
2559   // is complicated and we choose not to for now.
2560   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2561     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2562       return false;
2563 
2564   // Given our sorted total order, we know that exit[j] must be evaluated
2565   // after all exit[i] such j > i.
2566   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2567     if (BadExit(ExitingBlocks[i])) {
2568       ExitingBlocks.resize(i);
2569       break;
2570     }
2571 
2572   if (ExitingBlocks.empty())
2573     return false;
2574 
2575   // We rely on not being able to reach an exiting block on a later iteration
2576   // then it's statically compute exit count.  The implementaton of
2577   // getExitCount currently has this invariant, but assert it here so that
2578   // breakage is obvious if this ever changes..
2579   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2580         return DT->dominates(ExitingBB, L->getLoopLatch());
2581       }));
2582 
2583   // At this point, ExitingBlocks consists of only those blocks which are
2584   // predicatable.  Given that, we know we have at least one exit we can
2585   // predicate if the loop is doesn't have side effects and doesn't have any
2586   // implicit exits (because then our exact BTC isn't actually exact).
2587   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2588   // suggestions on how to improve this?  I can obviously bail out for outer
2589   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2590   // is that enough for *all* side effects?
2591   for (BasicBlock *BB : L->blocks())
2592     for (auto &I : *BB)
2593       // TODO:isGuaranteedToTransfer
2594       if (I.mayHaveSideEffects() || I.mayThrow())
2595         return false;
2596 
2597   bool Changed = false;
2598   // Finally, do the actual predication for all predicatable blocks.  A couple
2599   // of notes here:
2600   // 1) We don't bother to constant fold dominated exits with identical exit
2601   //    counts; that's simply a form of CSE/equality propagation and we leave
2602   //    it for dedicated passes.
2603   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2604   //    legality constraints and we leave that to dedicated logic.  We want to
2605   //    predicate even if we can't insert a loop invariant expression as
2606   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2607   //    varying check.
2608   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2609   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2610   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2611   for (BasicBlock *ExitingBB : ExitingBlocks) {
2612     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2613 
2614     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2615     Value *NewCond;
2616     if (ExitCount == ExactBTC) {
2617       NewCond = L->contains(BI->getSuccessor(0)) ?
2618         B.getFalse() : B.getTrue();
2619     } else {
2620       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2621       if (!ExactBTCV)
2622         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2623       Value *RHS = ExactBTCV;
2624       if (ECV->getType() != RHS->getType()) {
2625         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2626         ECV = B.CreateZExt(ECV, WiderTy);
2627         RHS = B.CreateZExt(RHS, WiderTy);
2628       }
2629       auto Pred = L->contains(BI->getSuccessor(0)) ?
2630         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2631       NewCond = B.CreateICmp(Pred, ECV, RHS);
2632     }
2633     Value *OldCond = BI->getCondition();
2634     BI->setCondition(NewCond);
2635     if (OldCond->use_empty())
2636       DeadInsts.push_back(OldCond);
2637     Changed = true;
2638   }
2639 
2640   return Changed;
2641 }
2642 
2643 //===----------------------------------------------------------------------===//
2644 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2645 //===----------------------------------------------------------------------===//
2646 
2647 bool IndVarSimplify::run(Loop *L) {
2648   // We need (and expect!) the incoming loop to be in LCSSA.
2649   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2650          "LCSSA required to run indvars!");
2651 
2652   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2653   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2654   //    canonicalization can be a pessimization without LSR to "clean up"
2655   //    afterwards.
2656   //  - We depend on having a preheader; in particular,
2657   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2658   //    and we're in trouble if we can't find the induction variable even when
2659   //    we've manually inserted one.
2660   //  - LFTR relies on having a single backedge.
2661   if (!L->isLoopSimplifyForm())
2662     return false;
2663 
2664 #ifndef NDEBUG
2665   // Used below for a consistency check only
2666   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2667 #endif
2668 
2669   bool Changed = false;
2670   // If there are any floating-point recurrences, attempt to
2671   // transform them to use integer recurrences.
2672   Changed |= rewriteNonIntegerIVs(L);
2673 
2674   // Create a rewriter object which we'll use to transform the code with.
2675   SCEVExpander Rewriter(*SE, DL, "indvars");
2676 #ifndef NDEBUG
2677   Rewriter.setDebugType(DEBUG_TYPE);
2678 #endif
2679 
2680   // Eliminate redundant IV users.
2681   //
2682   // Simplification works best when run before other consumers of SCEV. We
2683   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2684   // other expressions involving loop IVs have been evaluated. This helps SCEV
2685   // set no-wrap flags before normalizing sign/zero extension.
2686   Rewriter.disableCanonicalMode();
2687   Changed |= simplifyAndExtend(L, Rewriter, LI);
2688 
2689   // Check to see if we can compute the final value of any expressions
2690   // that are recurrent in the loop, and substitute the exit values from the
2691   // loop into any instructions outside of the loop that use the final values
2692   // of the current expressions.
2693   if (ReplaceExitValue != NeverRepl) {
2694     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT,
2695                                              ReplaceExitValue, DeadInsts)) {
2696       NumReplaced += Rewrites;
2697       Changed = true;
2698     }
2699   }
2700 
2701   // Eliminate redundant IV cycles.
2702   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2703 
2704   // Try to eliminate loop exits based on analyzeable exit counts
2705   if (optimizeLoopExits(L, Rewriter))  {
2706     Changed = true;
2707     // Given we've changed exit counts, notify SCEV
2708     SE->forgetLoop(L);
2709   }
2710 
2711   // Try to form loop invariant tests for loop exits by changing how many
2712   // iterations of the loop run when that is unobservable.
2713   if (predicateLoopExits(L, Rewriter)) {
2714     Changed = true;
2715     // Given we've changed exit counts, notify SCEV
2716     SE->forgetLoop(L);
2717   }
2718 
2719   // If we have a trip count expression, rewrite the loop's exit condition
2720   // using it.
2721   if (!DisableLFTR) {
2722     SmallVector<BasicBlock*, 16> ExitingBlocks;
2723     L->getExitingBlocks(ExitingBlocks);
2724     for (BasicBlock *ExitingBB : ExitingBlocks) {
2725       // Can't rewrite non-branch yet.
2726       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2727         continue;
2728 
2729       // If our exitting block exits multiple loops, we can only rewrite the
2730       // innermost one.  Otherwise, we're changing how many times the innermost
2731       // loop runs before it exits.
2732       if (LI->getLoopFor(ExitingBB) != L)
2733         continue;
2734 
2735       if (!needsLFTR(L, ExitingBB))
2736         continue;
2737 
2738       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2739       if (isa<SCEVCouldNotCompute>(ExitCount))
2740         continue;
2741 
2742       // This was handled above, but as we form SCEVs, we can sometimes refine
2743       // existing ones; this allows exit counts to be folded to zero which
2744       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2745       // until stable to handle cases like this better.
2746       if (ExitCount->isZero())
2747         continue;
2748 
2749       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2750       if (!IndVar)
2751         continue;
2752 
2753       // Avoid high cost expansions.  Note: This heuristic is questionable in
2754       // that our definition of "high cost" is not exactly principled.
2755       if (Rewriter.isHighCostExpansion(ExitCount, L))
2756         continue;
2757 
2758       // Check preconditions for proper SCEVExpander operation. SCEV does not
2759       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2760       // any pass that uses the SCEVExpander must do it. This does not work
2761       // well for loop passes because SCEVExpander makes assumptions about
2762       // all loops, while LoopPassManager only forces the current loop to be
2763       // simplified.
2764       //
2765       // FIXME: SCEV expansion has no way to bail out, so the caller must
2766       // explicitly check any assumptions made by SCEV. Brittle.
2767       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2768       if (!AR || AR->getLoop()->getLoopPreheader())
2769         Changed |= linearFunctionTestReplace(L, ExitingBB,
2770                                              ExitCount, IndVar,
2771                                              Rewriter);
2772     }
2773   }
2774   // Clear the rewriter cache, because values that are in the rewriter's cache
2775   // can be deleted in the loop below, causing the AssertingVH in the cache to
2776   // trigger.
2777   Rewriter.clear();
2778 
2779   // Now that we're done iterating through lists, clean up any instructions
2780   // which are now dead.
2781   while (!DeadInsts.empty())
2782     if (Instruction *Inst =
2783             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2784       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2785 
2786   // The Rewriter may not be used from this point on.
2787 
2788   // Loop-invariant instructions in the preheader that aren't used in the
2789   // loop may be sunk below the loop to reduce register pressure.
2790   Changed |= sinkUnusedInvariants(L);
2791 
2792   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2793   // trip count and therefore can further simplify exit values in addition to
2794   // rewriteLoopExitValues.
2795   Changed |= rewriteFirstIterationLoopExitValues(L);
2796 
2797   // Clean up dead instructions.
2798   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2799 
2800   // Check a post-condition.
2801   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2802          "Indvars did not preserve LCSSA!");
2803 
2804   // Verify that LFTR, and any other change have not interfered with SCEV's
2805   // ability to compute trip count.  We may have *changed* the exit count, but
2806   // only by reducing it.
2807 #ifndef NDEBUG
2808   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2809     SE->forgetLoop(L);
2810     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2811     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2812         SE->getTypeSizeInBits(NewBECount->getType()))
2813       NewBECount = SE->getTruncateOrNoop(NewBECount,
2814                                          BackedgeTakenCount->getType());
2815     else
2816       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2817                                                  NewBECount->getType());
2818     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2819                                  NewBECount) && "indvars must preserve SCEV");
2820   }
2821 #endif
2822 
2823   return Changed;
2824 }
2825 
2826 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2827                                           LoopStandardAnalysisResults &AR,
2828                                           LPMUpdater &) {
2829   Function *F = L.getHeader()->getParent();
2830   const DataLayout &DL = F->getParent()->getDataLayout();
2831 
2832   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2833   if (!IVS.run(&L))
2834     return PreservedAnalyses::all();
2835 
2836   auto PA = getLoopPassPreservedAnalyses();
2837   PA.preserveSet<CFGAnalyses>();
2838   if (AR.MSSA)
2839     PA.preserve<MemorySSAAnalysis>();
2840   return PA;
2841 }
2842 
2843 namespace {
2844 
2845 struct IndVarSimplifyLegacyPass : public LoopPass {
2846   static char ID; // Pass identification, replacement for typeid
2847 
2848   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2849     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2850   }
2851 
2852   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2853     if (skipLoop(L))
2854       return false;
2855 
2856     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2857     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2858     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2859     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2860     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2861     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2862     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2863     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2864     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2865     MemorySSA *MSSA = nullptr;
2866     if (MSSAAnalysis)
2867       MSSA = &MSSAAnalysis->getMSSA();
2868 
2869     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2870     return IVS.run(L);
2871   }
2872 
2873   void getAnalysisUsage(AnalysisUsage &AU) const override {
2874     AU.setPreservesCFG();
2875     AU.addPreserved<MemorySSAWrapperPass>();
2876     getLoopAnalysisUsage(AU);
2877   }
2878 };
2879 
2880 } // end anonymous namespace
2881 
2882 char IndVarSimplifyLegacyPass::ID = 0;
2883 
2884 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2885                       "Induction Variable Simplification", false, false)
2886 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2887 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2888                     "Induction Variable Simplification", false, false)
2889 
2890 Pass *llvm::createIndVarSimplifyPass() {
2891   return new IndVarSimplifyLegacyPass();
2892 }
2893