xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision adc4faf532bfefeddb4c789f1562394ae2a1ba0d)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpander.h"
45 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
46 #include "llvm/Analysis/TargetLibraryInfo.h"
47 #include "llvm/Analysis/TargetTransformInfo.h"
48 #include "llvm/Analysis/ValueTracking.h"
49 #include "llvm/IR/BasicBlock.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/ConstantRange.h"
52 #include "llvm/IR/Constants.h"
53 #include "llvm/IR/DataLayout.h"
54 #include "llvm/IR/DerivedTypes.h"
55 #include "llvm/IR/Dominators.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/IRBuilder.h"
58 #include "llvm/IR/InstrTypes.h"
59 #include "llvm/IR/Instruction.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/Intrinsics.h"
63 #include "llvm/IR/Module.h"
64 #include "llvm/IR/Operator.h"
65 #include "llvm/IR/PassManager.h"
66 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/Use.h"
69 #include "llvm/IR/User.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/IR/ValueHandle.h"
72 #include "llvm/InitializePasses.h"
73 #include "llvm/Pass.h"
74 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/CommandLine.h"
76 #include "llvm/Support/Compiler.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/ErrorHandling.h"
79 #include "llvm/Support/MathExtras.h"
80 #include "llvm/Support/raw_ostream.h"
81 #include "llvm/Transforms/Scalar.h"
82 #include "llvm/Transforms/Scalar/LoopPassManager.h"
83 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
84 #include "llvm/Transforms/Utils/Local.h"
85 #include "llvm/Transforms/Utils/LoopUtils.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105   "verify-indvars", cl::Hidden,
106   cl::desc("Verify the ScalarEvolution result after running indvars"));
107 
108 static cl::opt<ReplaceExitVal> ReplaceExitValue(
109     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
110     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
111     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
112                clEnumValN(OnlyCheapRepl, "cheap",
113                           "only replace exit value when the cost is cheap"),
114                clEnumValN(NoHardUse, "noharduse",
115                           "only replace exit values when loop def likely dead"),
116                clEnumValN(AlwaysRepl, "always",
117                           "always replace exit value whenever possible")));
118 
119 static cl::opt<bool> UsePostIncrementRanges(
120   "indvars-post-increment-ranges", cl::Hidden,
121   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
122   cl::init(true));
123 
124 static cl::opt<bool>
125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
126             cl::desc("Disable Linear Function Test Replace optimization"));
127 
128 static cl::opt<bool>
129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
130                 cl::desc("Predicate conditions in read only loops"));
131 
132 namespace {
133 
134 struct RewritePhi;
135 
136 class IndVarSimplify {
137   LoopInfo *LI;
138   ScalarEvolution *SE;
139   DominatorTree *DT;
140   const DataLayout &DL;
141   TargetLibraryInfo *TLI;
142   const TargetTransformInfo *TTI;
143   MemorySSA *MSSA;
144   std::unique_ptr<MemorySSAUpdater> MSSAU;
145 
146   SmallVector<WeakTrackingVH, 16> DeadInsts;
147 
148   bool handleFloatingPointIV(Loop *L, PHINode *PH);
149   bool rewriteNonIntegerIVs(Loop *L);
150 
151   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152   /// Try to eliminate loop exits based on analyzeable exit counts
153   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
154   /// Try to form loop invariant tests for loop exits by changing how many
155   /// iterations of the loop run when that is unobservable.
156   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
157 
158   bool rewriteFirstIterationLoopExitValues(Loop *L);
159 
160   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
161                                  const SCEV *ExitCount,
162                                  PHINode *IndVar, SCEVExpander &Rewriter);
163 
164   bool sinkUnusedInvariants(Loop *L);
165 
166 public:
167   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
168                  const DataLayout &DL, TargetLibraryInfo *TLI,
169                  TargetTransformInfo *TTI, MemorySSA *MSSA)
170       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), MSSA(MSSA) {
171     if (MSSA)
172       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173   }
174 
175   bool run(Loop *L);
176 };
177 
178 } // end anonymous namespace
179 
180 /// Determine the insertion point for this user. By default, insert immediately
181 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
182 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
183 /// common dominator for the incoming blocks. A nullptr can be returned if no
184 /// viable location is found: it may happen if User is a PHI and Def only comes
185 /// to this PHI from unreachable blocks.
186 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
187                                           DominatorTree *DT, LoopInfo *LI) {
188   PHINode *PHI = dyn_cast<PHINode>(User);
189   if (!PHI)
190     return User;
191 
192   Instruction *InsertPt = nullptr;
193   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
194     if (PHI->getIncomingValue(i) != Def)
195       continue;
196 
197     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
198 
199     if (!DT->isReachableFromEntry(InsertBB))
200       continue;
201 
202     if (!InsertPt) {
203       InsertPt = InsertBB->getTerminator();
204       continue;
205     }
206     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
207     InsertPt = InsertBB->getTerminator();
208   }
209 
210   // If we have skipped all inputs, it means that Def only comes to Phi from
211   // unreachable blocks.
212   if (!InsertPt)
213     return nullptr;
214 
215   auto *DefI = dyn_cast<Instruction>(Def);
216   if (!DefI)
217     return InsertPt;
218 
219   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
220 
221   auto *L = LI->getLoopFor(DefI->getParent());
222   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
223 
224   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
225     if (LI->getLoopFor(DTN->getBlock()) == L)
226       return DTN->getBlock()->getTerminator();
227 
228   llvm_unreachable("DefI dominates InsertPt!");
229 }
230 
231 //===----------------------------------------------------------------------===//
232 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
233 //===----------------------------------------------------------------------===//
234 
235 /// Convert APF to an integer, if possible.
236 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
237   bool isExact = false;
238   // See if we can convert this to an int64_t
239   uint64_t UIntVal;
240   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
241                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
242       !isExact)
243     return false;
244   IntVal = UIntVal;
245   return true;
246 }
247 
248 /// If the loop has floating induction variable then insert corresponding
249 /// integer induction variable if possible.
250 /// For example,
251 /// for(double i = 0; i < 10000; ++i)
252 ///   bar(i)
253 /// is converted into
254 /// for(int i = 0; i < 10000; ++i)
255 ///   bar((double)i);
256 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
257   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
258   unsigned BackEdge     = IncomingEdge^1;
259 
260   // Check incoming value.
261   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
262 
263   int64_t InitValue;
264   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
265     return false;
266 
267   // Check IV increment. Reject this PN if increment operation is not
268   // an add or increment value can not be represented by an integer.
269   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
270   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
271 
272   // If this is not an add of the PHI with a constantfp, or if the constant fp
273   // is not an integer, bail out.
274   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
275   int64_t IncValue;
276   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
277       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
278     return false;
279 
280   // Check Incr uses. One user is PN and the other user is an exit condition
281   // used by the conditional terminator.
282   Value::user_iterator IncrUse = Incr->user_begin();
283   Instruction *U1 = cast<Instruction>(*IncrUse++);
284   if (IncrUse == Incr->user_end()) return false;
285   Instruction *U2 = cast<Instruction>(*IncrUse++);
286   if (IncrUse != Incr->user_end()) return false;
287 
288   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
289   // only used by a branch, we can't transform it.
290   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
291   if (!Compare)
292     Compare = dyn_cast<FCmpInst>(U2);
293   if (!Compare || !Compare->hasOneUse() ||
294       !isa<BranchInst>(Compare->user_back()))
295     return false;
296 
297   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
298 
299   // We need to verify that the branch actually controls the iteration count
300   // of the loop.  If not, the new IV can overflow and no one will notice.
301   // The branch block must be in the loop and one of the successors must be out
302   // of the loop.
303   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
304   if (!L->contains(TheBr->getParent()) ||
305       (L->contains(TheBr->getSuccessor(0)) &&
306        L->contains(TheBr->getSuccessor(1))))
307     return false;
308 
309   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
310   // transform it.
311   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
312   int64_t ExitValue;
313   if (ExitValueVal == nullptr ||
314       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
315     return false;
316 
317   // Find new predicate for integer comparison.
318   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
319   switch (Compare->getPredicate()) {
320   default: return false;  // Unknown comparison.
321   case CmpInst::FCMP_OEQ:
322   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
323   case CmpInst::FCMP_ONE:
324   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
325   case CmpInst::FCMP_OGT:
326   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
327   case CmpInst::FCMP_OGE:
328   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
329   case CmpInst::FCMP_OLT:
330   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
331   case CmpInst::FCMP_OLE:
332   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
333   }
334 
335   // We convert the floating point induction variable to a signed i32 value if
336   // we can.  This is only safe if the comparison will not overflow in a way
337   // that won't be trapped by the integer equivalent operations.  Check for this
338   // now.
339   // TODO: We could use i64 if it is native and the range requires it.
340 
341   // The start/stride/exit values must all fit in signed i32.
342   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
343     return false;
344 
345   // If not actually striding (add x, 0.0), avoid touching the code.
346   if (IncValue == 0)
347     return false;
348 
349   // Positive and negative strides have different safety conditions.
350   if (IncValue > 0) {
351     // If we have a positive stride, we require the init to be less than the
352     // exit value.
353     if (InitValue >= ExitValue)
354       return false;
355 
356     uint32_t Range = uint32_t(ExitValue-InitValue);
357     // Check for infinite loop, either:
358     // while (i <= Exit) or until (i > Exit)
359     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
360       if (++Range == 0) return false;  // Range overflows.
361     }
362 
363     unsigned Leftover = Range % uint32_t(IncValue);
364 
365     // If this is an equality comparison, we require that the strided value
366     // exactly land on the exit value, otherwise the IV condition will wrap
367     // around and do things the fp IV wouldn't.
368     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
369         Leftover != 0)
370       return false;
371 
372     // If the stride would wrap around the i32 before exiting, we can't
373     // transform the IV.
374     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
375       return false;
376   } else {
377     // If we have a negative stride, we require the init to be greater than the
378     // exit value.
379     if (InitValue <= ExitValue)
380       return false;
381 
382     uint32_t Range = uint32_t(InitValue-ExitValue);
383     // Check for infinite loop, either:
384     // while (i >= Exit) or until (i < Exit)
385     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
386       if (++Range == 0) return false;  // Range overflows.
387     }
388 
389     unsigned Leftover = Range % uint32_t(-IncValue);
390 
391     // If this is an equality comparison, we require that the strided value
392     // exactly land on the exit value, otherwise the IV condition will wrap
393     // around and do things the fp IV wouldn't.
394     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
395         Leftover != 0)
396       return false;
397 
398     // If the stride would wrap around the i32 before exiting, we can't
399     // transform the IV.
400     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
401       return false;
402   }
403 
404   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
405 
406   // Insert new integer induction variable.
407   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
408   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
409                       PN->getIncomingBlock(IncomingEdge));
410 
411   Value *NewAdd =
412     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
413                               Incr->getName()+".int", Incr);
414   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
415 
416   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
417                                       ConstantInt::get(Int32Ty, ExitValue),
418                                       Compare->getName());
419 
420   // In the following deletions, PN may become dead and may be deleted.
421   // Use a WeakTrackingVH to observe whether this happens.
422   WeakTrackingVH WeakPH = PN;
423 
424   // Delete the old floating point exit comparison.  The branch starts using the
425   // new comparison.
426   NewCompare->takeName(Compare);
427   Compare->replaceAllUsesWith(NewCompare);
428   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
429 
430   // Delete the old floating point increment.
431   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
432   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
433 
434   // If the FP induction variable still has uses, this is because something else
435   // in the loop uses its value.  In order to canonicalize the induction
436   // variable, we chose to eliminate the IV and rewrite it in terms of an
437   // int->fp cast.
438   //
439   // We give preference to sitofp over uitofp because it is faster on most
440   // platforms.
441   if (WeakPH) {
442     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
443                                  &*PN->getParent()->getFirstInsertionPt());
444     PN->replaceAllUsesWith(Conv);
445     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
446   }
447   return true;
448 }
449 
450 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
451   // First step.  Check to see if there are any floating-point recurrences.
452   // If there are, change them into integer recurrences, permitting analysis by
453   // the SCEV routines.
454   BasicBlock *Header = L->getHeader();
455 
456   SmallVector<WeakTrackingVH, 8> PHIs;
457   for (PHINode &PN : Header->phis())
458     PHIs.push_back(&PN);
459 
460   bool Changed = false;
461   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
462     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
463       Changed |= handleFloatingPointIV(L, PN);
464 
465   // If the loop previously had floating-point IV, ScalarEvolution
466   // may not have been able to compute a trip count. Now that we've done some
467   // re-writing, the trip count may be computable.
468   if (Changed)
469     SE->forgetLoop(L);
470   return Changed;
471 }
472 
473 //===---------------------------------------------------------------------===//
474 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
475 // they will exit at the first iteration.
476 //===---------------------------------------------------------------------===//
477 
478 /// Check to see if this loop has loop invariant conditions which lead to loop
479 /// exits. If so, we know that if the exit path is taken, it is at the first
480 /// loop iteration. This lets us predict exit values of PHI nodes that live in
481 /// loop header.
482 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
483   // Verify the input to the pass is already in LCSSA form.
484   assert(L->isLCSSAForm(*DT));
485 
486   SmallVector<BasicBlock *, 8> ExitBlocks;
487   L->getUniqueExitBlocks(ExitBlocks);
488 
489   bool MadeAnyChanges = false;
490   for (auto *ExitBB : ExitBlocks) {
491     // If there are no more PHI nodes in this exit block, then no more
492     // values defined inside the loop are used on this path.
493     for (PHINode &PN : ExitBB->phis()) {
494       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
495            IncomingValIdx != E; ++IncomingValIdx) {
496         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
497 
498         // Can we prove that the exit must run on the first iteration if it
499         // runs at all?  (i.e. early exits are fine for our purposes, but
500         // traces which lead to this exit being taken on the 2nd iteration
501         // aren't.)  Note that this is about whether the exit branch is
502         // executed, not about whether it is taken.
503         if (!L->getLoopLatch() ||
504             !DT->dominates(IncomingBB, L->getLoopLatch()))
505           continue;
506 
507         // Get condition that leads to the exit path.
508         auto *TermInst = IncomingBB->getTerminator();
509 
510         Value *Cond = nullptr;
511         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
512           // Must be a conditional branch, otherwise the block
513           // should not be in the loop.
514           Cond = BI->getCondition();
515         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
516           Cond = SI->getCondition();
517         else
518           continue;
519 
520         if (!L->isLoopInvariant(Cond))
521           continue;
522 
523         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
524 
525         // Only deal with PHIs in the loop header.
526         if (!ExitVal || ExitVal->getParent() != L->getHeader())
527           continue;
528 
529         // If ExitVal is a PHI on the loop header, then we know its
530         // value along this exit because the exit can only be taken
531         // on the first iteration.
532         auto *LoopPreheader = L->getLoopPreheader();
533         assert(LoopPreheader && "Invalid loop");
534         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
535         if (PreheaderIdx != -1) {
536           assert(ExitVal->getParent() == L->getHeader() &&
537                  "ExitVal must be in loop header");
538           MadeAnyChanges = true;
539           PN.setIncomingValue(IncomingValIdx,
540                               ExitVal->getIncomingValue(PreheaderIdx));
541         }
542       }
543     }
544   }
545   return MadeAnyChanges;
546 }
547 
548 //===----------------------------------------------------------------------===//
549 //  IV Widening - Extend the width of an IV to cover its widest uses.
550 //===----------------------------------------------------------------------===//
551 
552 namespace {
553 
554 // Collect information about induction variables that are used by sign/zero
555 // extend operations. This information is recorded by CollectExtend and provides
556 // the input to WidenIV.
557 struct WideIVInfo {
558   PHINode *NarrowIV = nullptr;
559 
560   // Widest integer type created [sz]ext
561   Type *WidestNativeType = nullptr;
562 
563   // Was a sext user seen before a zext?
564   bool IsSigned = false;
565 };
566 
567 } // end anonymous namespace
568 
569 /// Update information about the induction variable that is extended by this
570 /// sign or zero extend operation. This is used to determine the final width of
571 /// the IV before actually widening it.
572 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
573                         const TargetTransformInfo *TTI) {
574   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
575   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
576     return;
577 
578   Type *Ty = Cast->getType();
579   uint64_t Width = SE->getTypeSizeInBits(Ty);
580   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
581     return;
582 
583   // Check that `Cast` actually extends the induction variable (we rely on this
584   // later).  This takes care of cases where `Cast` is extending a truncation of
585   // the narrow induction variable, and thus can end up being narrower than the
586   // "narrow" induction variable.
587   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
588   if (NarrowIVWidth >= Width)
589     return;
590 
591   // Cast is either an sext or zext up to this point.
592   // We should not widen an indvar if arithmetics on the wider indvar are more
593   // expensive than those on the narrower indvar. We check only the cost of ADD
594   // because at least an ADD is required to increment the induction variable. We
595   // could compute more comprehensively the cost of all instructions on the
596   // induction variable when necessary.
597   if (TTI &&
598       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
599           TTI->getArithmeticInstrCost(Instruction::Add,
600                                       Cast->getOperand(0)->getType())) {
601     return;
602   }
603 
604   if (!WI.WidestNativeType) {
605     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
606     WI.IsSigned = IsSigned;
607     return;
608   }
609 
610   // We extend the IV to satisfy the sign of its first user, arbitrarily.
611   if (WI.IsSigned != IsSigned)
612     return;
613 
614   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
615     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
616 }
617 
618 namespace {
619 
620 /// Record a link in the Narrow IV def-use chain along with the WideIV that
621 /// computes the same value as the Narrow IV def.  This avoids caching Use*
622 /// pointers.
623 struct NarrowIVDefUse {
624   Instruction *NarrowDef = nullptr;
625   Instruction *NarrowUse = nullptr;
626   Instruction *WideDef = nullptr;
627 
628   // True if the narrow def is never negative.  Tracking this information lets
629   // us use a sign extension instead of a zero extension or vice versa, when
630   // profitable and legal.
631   bool NeverNegative = false;
632 
633   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
634                  bool NeverNegative)
635       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
636         NeverNegative(NeverNegative) {}
637 };
638 
639 /// The goal of this transform is to remove sign and zero extends without
640 /// creating any new induction variables. To do this, it creates a new phi of
641 /// the wider type and redirects all users, either removing extends or inserting
642 /// truncs whenever we stop propagating the type.
643 class WidenIV {
644   // Parameters
645   PHINode *OrigPhi;
646   Type *WideType;
647 
648   // Context
649   LoopInfo        *LI;
650   Loop            *L;
651   ScalarEvolution *SE;
652   DominatorTree   *DT;
653 
654   // Does the module have any calls to the llvm.experimental.guard intrinsic
655   // at all? If not we can avoid scanning instructions looking for guards.
656   bool HasGuards;
657 
658   // Result
659   PHINode *WidePhi = nullptr;
660   Instruction *WideInc = nullptr;
661   const SCEV *WideIncExpr = nullptr;
662   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
663 
664   SmallPtrSet<Instruction *,16> Widened;
665   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
666 
667   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
668 
669   // A map tracking the kind of extension used to widen each narrow IV
670   // and narrow IV user.
671   // Key: pointer to a narrow IV or IV user.
672   // Value: the kind of extension used to widen this Instruction.
673   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
674 
675   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
676 
677   // A map with control-dependent ranges for post increment IV uses. The key is
678   // a pair of IV def and a use of this def denoting the context. The value is
679   // a ConstantRange representing possible values of the def at the given
680   // context.
681   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
682 
683   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
684                                               Instruction *UseI) {
685     DefUserPair Key(Def, UseI);
686     auto It = PostIncRangeInfos.find(Key);
687     return It == PostIncRangeInfos.end()
688                ? Optional<ConstantRange>(None)
689                : Optional<ConstantRange>(It->second);
690   }
691 
692   void calculatePostIncRanges(PHINode *OrigPhi);
693   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
694 
695   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
696     DefUserPair Key(Def, UseI);
697     auto It = PostIncRangeInfos.find(Key);
698     if (It == PostIncRangeInfos.end())
699       PostIncRangeInfos.insert({Key, R});
700     else
701       It->second = R.intersectWith(It->second);
702   }
703 
704 public:
705   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
706           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
707           bool HasGuards)
708       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
709         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
710         HasGuards(HasGuards), DeadInsts(DI) {
711     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
712     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
713   }
714 
715   PHINode *createWideIV(SCEVExpander &Rewriter);
716 
717 protected:
718   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
719                           Instruction *Use);
720 
721   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
722   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
723                                      const SCEVAddRecExpr *WideAR);
724   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
725 
726   ExtendKind getExtendKind(Instruction *I);
727 
728   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
729 
730   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
731 
732   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
733 
734   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
735                               unsigned OpCode) const;
736 
737   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
738 
739   bool widenLoopCompare(NarrowIVDefUse DU);
740   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
741   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
742 
743   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
744 };
745 
746 } // end anonymous namespace
747 
748 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
749                                  bool IsSigned, Instruction *Use) {
750   // Set the debug location and conservative insertion point.
751   IRBuilder<> Builder(Use);
752   // Hoist the insertion point into loop preheaders as far as possible.
753   for (const Loop *L = LI->getLoopFor(Use->getParent());
754        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
755        L = L->getParentLoop())
756     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
757 
758   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
759                     Builder.CreateZExt(NarrowOper, WideType);
760 }
761 
762 /// Instantiate a wide operation to replace a narrow operation. This only needs
763 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
764 /// 0 for any operation we decide not to clone.
765 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
766                                   const SCEVAddRecExpr *WideAR) {
767   unsigned Opcode = DU.NarrowUse->getOpcode();
768   switch (Opcode) {
769   default:
770     return nullptr;
771   case Instruction::Add:
772   case Instruction::Mul:
773   case Instruction::UDiv:
774   case Instruction::Sub:
775     return cloneArithmeticIVUser(DU, WideAR);
776 
777   case Instruction::And:
778   case Instruction::Or:
779   case Instruction::Xor:
780   case Instruction::Shl:
781   case Instruction::LShr:
782   case Instruction::AShr:
783     return cloneBitwiseIVUser(DU);
784   }
785 }
786 
787 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
788   Instruction *NarrowUse = DU.NarrowUse;
789   Instruction *NarrowDef = DU.NarrowDef;
790   Instruction *WideDef = DU.WideDef;
791 
792   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
793 
794   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
795   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
796   // invariant and will be folded or hoisted. If it actually comes from a
797   // widened IV, it should be removed during a future call to widenIVUse.
798   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
799   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
800                    ? WideDef
801                    : createExtendInst(NarrowUse->getOperand(0), WideType,
802                                       IsSigned, NarrowUse);
803   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
804                    ? WideDef
805                    : createExtendInst(NarrowUse->getOperand(1), WideType,
806                                       IsSigned, NarrowUse);
807 
808   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
809   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
810                                         NarrowBO->getName());
811   IRBuilder<> Builder(NarrowUse);
812   Builder.Insert(WideBO);
813   WideBO->copyIRFlags(NarrowBO);
814   return WideBO;
815 }
816 
817 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
818                                             const SCEVAddRecExpr *WideAR) {
819   Instruction *NarrowUse = DU.NarrowUse;
820   Instruction *NarrowDef = DU.NarrowDef;
821   Instruction *WideDef = DU.WideDef;
822 
823   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
824 
825   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
826 
827   // We're trying to find X such that
828   //
829   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
830   //
831   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
832   // and check using SCEV if any of them are correct.
833 
834   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
835   // correct solution to X.
836   auto GuessNonIVOperand = [&](bool SignExt) {
837     const SCEV *WideLHS;
838     const SCEV *WideRHS;
839 
840     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
841       if (SignExt)
842         return SE->getSignExtendExpr(S, Ty);
843       return SE->getZeroExtendExpr(S, Ty);
844     };
845 
846     if (IVOpIdx == 0) {
847       WideLHS = SE->getSCEV(WideDef);
848       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
849       WideRHS = GetExtend(NarrowRHS, WideType);
850     } else {
851       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
852       WideLHS = GetExtend(NarrowLHS, WideType);
853       WideRHS = SE->getSCEV(WideDef);
854     }
855 
856     // WideUse is "WideDef `op.wide` X" as described in the comment.
857     const SCEV *WideUse = nullptr;
858 
859     switch (NarrowUse->getOpcode()) {
860     default:
861       llvm_unreachable("No other possibility!");
862 
863     case Instruction::Add:
864       WideUse = SE->getAddExpr(WideLHS, WideRHS);
865       break;
866 
867     case Instruction::Mul:
868       WideUse = SE->getMulExpr(WideLHS, WideRHS);
869       break;
870 
871     case Instruction::UDiv:
872       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
873       break;
874 
875     case Instruction::Sub:
876       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
877       break;
878     }
879 
880     return WideUse == WideAR;
881   };
882 
883   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
884   if (!GuessNonIVOperand(SignExtend)) {
885     SignExtend = !SignExtend;
886     if (!GuessNonIVOperand(SignExtend))
887       return nullptr;
888   }
889 
890   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
891                    ? WideDef
892                    : createExtendInst(NarrowUse->getOperand(0), WideType,
893                                       SignExtend, NarrowUse);
894   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
895                    ? WideDef
896                    : createExtendInst(NarrowUse->getOperand(1), WideType,
897                                       SignExtend, NarrowUse);
898 
899   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
900   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
901                                         NarrowBO->getName());
902 
903   IRBuilder<> Builder(NarrowUse);
904   Builder.Insert(WideBO);
905   WideBO->copyIRFlags(NarrowBO);
906   return WideBO;
907 }
908 
909 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
910   auto It = ExtendKindMap.find(I);
911   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
912   return It->second;
913 }
914 
915 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
916                                      unsigned OpCode) const {
917   if (OpCode == Instruction::Add)
918     return SE->getAddExpr(LHS, RHS);
919   if (OpCode == Instruction::Sub)
920     return SE->getMinusSCEV(LHS, RHS);
921   if (OpCode == Instruction::Mul)
922     return SE->getMulExpr(LHS, RHS);
923 
924   llvm_unreachable("Unsupported opcode.");
925 }
926 
927 /// No-wrap operations can transfer sign extension of their result to their
928 /// operands. Generate the SCEV value for the widened operation without
929 /// actually modifying the IR yet. If the expression after extending the
930 /// operands is an AddRec for this loop, return the AddRec and the kind of
931 /// extension used.
932 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
933   // Handle the common case of add<nsw/nuw>
934   const unsigned OpCode = DU.NarrowUse->getOpcode();
935   // Only Add/Sub/Mul instructions supported yet.
936   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
937       OpCode != Instruction::Mul)
938     return {nullptr, Unknown};
939 
940   // One operand (NarrowDef) has already been extended to WideDef. Now determine
941   // if extending the other will lead to a recurrence.
942   const unsigned ExtendOperIdx =
943       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
944   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
945 
946   const SCEV *ExtendOperExpr = nullptr;
947   const OverflowingBinaryOperator *OBO =
948     cast<OverflowingBinaryOperator>(DU.NarrowUse);
949   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
950   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
951     ExtendOperExpr = SE->getSignExtendExpr(
952       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
953   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
954     ExtendOperExpr = SE->getZeroExtendExpr(
955       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
956   else
957     return {nullptr, Unknown};
958 
959   // When creating this SCEV expr, don't apply the current operations NSW or NUW
960   // flags. This instruction may be guarded by control flow that the no-wrap
961   // behavior depends on. Non-control-equivalent instructions can be mapped to
962   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
963   // semantics to those operations.
964   const SCEV *lhs = SE->getSCEV(DU.WideDef);
965   const SCEV *rhs = ExtendOperExpr;
966 
967   // Let's swap operands to the initial order for the case of non-commutative
968   // operations, like SUB. See PR21014.
969   if (ExtendOperIdx == 0)
970     std::swap(lhs, rhs);
971   const SCEVAddRecExpr *AddRec =
972       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
973 
974   if (!AddRec || AddRec->getLoop() != L)
975     return {nullptr, Unknown};
976 
977   return {AddRec, ExtKind};
978 }
979 
980 /// Is this instruction potentially interesting for further simplification after
981 /// widening it's type? In other words, can the extend be safely hoisted out of
982 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
983 /// so, return the extended recurrence and the kind of extension used. Otherwise
984 /// return {nullptr, Unknown}.
985 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
986   if (!SE->isSCEVable(DU.NarrowUse->getType()))
987     return {nullptr, Unknown};
988 
989   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
990   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
991       SE->getTypeSizeInBits(WideType)) {
992     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
993     // index. So don't follow this use.
994     return {nullptr, Unknown};
995   }
996 
997   const SCEV *WideExpr;
998   ExtendKind ExtKind;
999   if (DU.NeverNegative) {
1000     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1001     if (isa<SCEVAddRecExpr>(WideExpr))
1002       ExtKind = SignExtended;
1003     else {
1004       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1005       ExtKind = ZeroExtended;
1006     }
1007   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1008     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1009     ExtKind = SignExtended;
1010   } else {
1011     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1012     ExtKind = ZeroExtended;
1013   }
1014   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1015   if (!AddRec || AddRec->getLoop() != L)
1016     return {nullptr, Unknown};
1017   return {AddRec, ExtKind};
1018 }
1019 
1020 /// This IV user cannot be widened. Replace this use of the original narrow IV
1021 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1022 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1023   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1024   if (!InsertPt)
1025     return;
1026   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1027                     << *DU.NarrowUse << "\n");
1028   IRBuilder<> Builder(InsertPt);
1029   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1030   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1031 }
1032 
1033 /// If the narrow use is a compare instruction, then widen the compare
1034 //  (and possibly the other operand).  The extend operation is hoisted into the
1035 // loop preheader as far as possible.
1036 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1037   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1038   if (!Cmp)
1039     return false;
1040 
1041   // We can legally widen the comparison in the following two cases:
1042   //
1043   //  - The signedness of the IV extension and comparison match
1044   //
1045   //  - The narrow IV is always positive (and thus its sign extension is equal
1046   //    to its zero extension).  For instance, let's say we're zero extending
1047   //    %narrow for the following use
1048   //
1049   //      icmp slt i32 %narrow, %val   ... (A)
1050   //
1051   //    and %narrow is always positive.  Then
1052   //
1053   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1054   //          == icmp slt i32 zext(%narrow), sext(%val)
1055   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1056   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1057     return false;
1058 
1059   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1060   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1061   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1062   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1063 
1064   // Widen the compare instruction.
1065   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1066   if (!InsertPt)
1067     return false;
1068   IRBuilder<> Builder(InsertPt);
1069   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1070 
1071   // Widen the other operand of the compare, if necessary.
1072   if (CastWidth < IVWidth) {
1073     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1074     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1075   }
1076   return true;
1077 }
1078 
1079 /// If the narrow use is an instruction whose two operands are the defining
1080 /// instruction of DU and a load instruction, then we have the following:
1081 /// if the load is hoisted outside the loop, then we do not reach this function
1082 /// as scalar evolution analysis works fine in widenIVUse with variables
1083 /// hoisted outside the loop and efficient code is subsequently generated by
1084 /// not emitting truncate instructions. But when the load is not hoisted
1085 /// (whether due to limitation in alias analysis or due to a true legality),
1086 /// then scalar evolution can not proceed with loop variant values and
1087 /// inefficient code is generated. This function handles the non-hoisted load
1088 /// special case by making the optimization generate the same type of code for
1089 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1090 /// instruction). This special case is important especially when the induction
1091 /// variables are affecting addressing mode in code generation.
1092 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1093   Instruction *NarrowUse = DU.NarrowUse;
1094   Instruction *NarrowDef = DU.NarrowDef;
1095   Instruction *WideDef = DU.WideDef;
1096 
1097   // Handle the common case of add<nsw/nuw>
1098   const unsigned OpCode = NarrowUse->getOpcode();
1099   // Only Add/Sub/Mul instructions are supported.
1100   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1101       OpCode != Instruction::Mul)
1102     return false;
1103 
1104   // The operand that is not defined by NarrowDef of DU. Let's call it the
1105   // other operand.
1106   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1107   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1108          "bad DU");
1109 
1110   const SCEV *ExtendOperExpr = nullptr;
1111   const OverflowingBinaryOperator *OBO =
1112     cast<OverflowingBinaryOperator>(NarrowUse);
1113   ExtendKind ExtKind = getExtendKind(NarrowDef);
1114   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1115     ExtendOperExpr = SE->getSignExtendExpr(
1116       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1117   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1118     ExtendOperExpr = SE->getZeroExtendExpr(
1119       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1120   else
1121     return false;
1122 
1123   // We are interested in the other operand being a load instruction.
1124   // But, we should look into relaxing this restriction later on.
1125   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1126   if (I && I->getOpcode() != Instruction::Load)
1127     return false;
1128 
1129   // Verifying that Defining operand is an AddRec
1130   const SCEV *Op1 = SE->getSCEV(WideDef);
1131   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1132   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1133     return false;
1134   // Verifying that other operand is an Extend.
1135   if (ExtKind == SignExtended) {
1136     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1137       return false;
1138   } else {
1139     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1140       return false;
1141   }
1142 
1143   if (ExtKind == SignExtended) {
1144     for (Use &U : NarrowUse->uses()) {
1145       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1146       if (!User || User->getType() != WideType)
1147         return false;
1148     }
1149   } else { // ExtKind == ZeroExtended
1150     for (Use &U : NarrowUse->uses()) {
1151       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1152       if (!User || User->getType() != WideType)
1153         return false;
1154     }
1155   }
1156 
1157   return true;
1158 }
1159 
1160 /// Special Case for widening with variant Loads (see
1161 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1162 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1163   Instruction *NarrowUse = DU.NarrowUse;
1164   Instruction *NarrowDef = DU.NarrowDef;
1165   Instruction *WideDef = DU.WideDef;
1166 
1167   ExtendKind ExtKind = getExtendKind(NarrowDef);
1168 
1169   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1170 
1171   // Generating a widening use instruction.
1172   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1173                    ? WideDef
1174                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1175                                       ExtKind, NarrowUse);
1176   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1177                    ? WideDef
1178                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1179                                       ExtKind, NarrowUse);
1180 
1181   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1182   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1183                                         NarrowBO->getName());
1184   IRBuilder<> Builder(NarrowUse);
1185   Builder.Insert(WideBO);
1186   WideBO->copyIRFlags(NarrowBO);
1187 
1188   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1189 
1190   ExtendKindMap[NarrowUse] = ExtKind;
1191 
1192   for (Use &U : NarrowUse->uses()) {
1193     Instruction *User = nullptr;
1194     if (ExtKind == SignExtended)
1195       User = dyn_cast<SExtInst>(U.getUser());
1196     else
1197       User = dyn_cast<ZExtInst>(U.getUser());
1198     if (User && User->getType() == WideType) {
1199       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1200                         << *WideBO << "\n");
1201       ++NumElimExt;
1202       User->replaceAllUsesWith(WideBO);
1203       DeadInsts.emplace_back(User);
1204     }
1205   }
1206 }
1207 
1208 /// Determine whether an individual user of the narrow IV can be widened. If so,
1209 /// return the wide clone of the user.
1210 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1211   assert(ExtendKindMap.count(DU.NarrowDef) &&
1212          "Should already know the kind of extension used to widen NarrowDef");
1213 
1214   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1215   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1216     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1217       // For LCSSA phis, sink the truncate outside the loop.
1218       // After SimplifyCFG most loop exit targets have a single predecessor.
1219       // Otherwise fall back to a truncate within the loop.
1220       if (UsePhi->getNumOperands() != 1)
1221         truncateIVUse(DU, DT, LI);
1222       else {
1223         // Widening the PHI requires us to insert a trunc.  The logical place
1224         // for this trunc is in the same BB as the PHI.  This is not possible if
1225         // the BB is terminated by a catchswitch.
1226         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1227           return nullptr;
1228 
1229         PHINode *WidePhi =
1230           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1231                           UsePhi);
1232         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1233         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1234         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1235         UsePhi->replaceAllUsesWith(Trunc);
1236         DeadInsts.emplace_back(UsePhi);
1237         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1238                           << *WidePhi << "\n");
1239       }
1240       return nullptr;
1241     }
1242   }
1243 
1244   // This narrow use can be widened by a sext if it's non-negative or its narrow
1245   // def was widended by a sext. Same for zext.
1246   auto canWidenBySExt = [&]() {
1247     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1248   };
1249   auto canWidenByZExt = [&]() {
1250     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1251   };
1252 
1253   // Our raison d'etre! Eliminate sign and zero extension.
1254   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1255       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1256     Value *NewDef = DU.WideDef;
1257     if (DU.NarrowUse->getType() != WideType) {
1258       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1259       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1260       if (CastWidth < IVWidth) {
1261         // The cast isn't as wide as the IV, so insert a Trunc.
1262         IRBuilder<> Builder(DU.NarrowUse);
1263         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1264       }
1265       else {
1266         // A wider extend was hidden behind a narrower one. This may induce
1267         // another round of IV widening in which the intermediate IV becomes
1268         // dead. It should be very rare.
1269         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1270                           << " not wide enough to subsume " << *DU.NarrowUse
1271                           << "\n");
1272         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1273         NewDef = DU.NarrowUse;
1274       }
1275     }
1276     if (NewDef != DU.NarrowUse) {
1277       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1278                         << " replaced by " << *DU.WideDef << "\n");
1279       ++NumElimExt;
1280       DU.NarrowUse->replaceAllUsesWith(NewDef);
1281       DeadInsts.emplace_back(DU.NarrowUse);
1282     }
1283     // Now that the extend is gone, we want to expose it's uses for potential
1284     // further simplification. We don't need to directly inform SimplifyIVUsers
1285     // of the new users, because their parent IV will be processed later as a
1286     // new loop phi. If we preserved IVUsers analysis, we would also want to
1287     // push the uses of WideDef here.
1288 
1289     // No further widening is needed. The deceased [sz]ext had done it for us.
1290     return nullptr;
1291   }
1292 
1293   // Does this user itself evaluate to a recurrence after widening?
1294   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1295   if (!WideAddRec.first)
1296     WideAddRec = getWideRecurrence(DU);
1297 
1298   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1299   if (!WideAddRec.first) {
1300     // If use is a loop condition, try to promote the condition instead of
1301     // truncating the IV first.
1302     if (widenLoopCompare(DU))
1303       return nullptr;
1304 
1305     // We are here about to generate a truncate instruction that may hurt
1306     // performance because the scalar evolution expression computed earlier
1307     // in WideAddRec.first does not indicate a polynomial induction expression.
1308     // In that case, look at the operands of the use instruction to determine
1309     // if we can still widen the use instead of truncating its operand.
1310     if (widenWithVariantLoadUse(DU)) {
1311       widenWithVariantLoadUseCodegen(DU);
1312       return nullptr;
1313     }
1314 
1315     // This user does not evaluate to a recurrence after widening, so don't
1316     // follow it. Instead insert a Trunc to kill off the original use,
1317     // eventually isolating the original narrow IV so it can be removed.
1318     truncateIVUse(DU, DT, LI);
1319     return nullptr;
1320   }
1321   // Assume block terminators cannot evaluate to a recurrence. We can't to
1322   // insert a Trunc after a terminator if there happens to be a critical edge.
1323   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1324          "SCEV is not expected to evaluate a block terminator");
1325 
1326   // Reuse the IV increment that SCEVExpander created as long as it dominates
1327   // NarrowUse.
1328   Instruction *WideUse = nullptr;
1329   if (WideAddRec.first == WideIncExpr &&
1330       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1331     WideUse = WideInc;
1332   else {
1333     WideUse = cloneIVUser(DU, WideAddRec.first);
1334     if (!WideUse)
1335       return nullptr;
1336   }
1337   // Evaluation of WideAddRec ensured that the narrow expression could be
1338   // extended outside the loop without overflow. This suggests that the wide use
1339   // evaluates to the same expression as the extended narrow use, but doesn't
1340   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1341   // where it fails, we simply throw away the newly created wide use.
1342   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1343     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1344                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1345                       << "\n");
1346     DeadInsts.emplace_back(WideUse);
1347     return nullptr;
1348   }
1349 
1350   // if we reached this point then we are going to replace
1351   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1352   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1353 
1354   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1355   // Returning WideUse pushes it on the worklist.
1356   return WideUse;
1357 }
1358 
1359 /// Add eligible users of NarrowDef to NarrowIVUsers.
1360 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1361   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1362   bool NonNegativeDef =
1363       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1364                            SE->getConstant(NarrowSCEV->getType(), 0));
1365   for (User *U : NarrowDef->users()) {
1366     Instruction *NarrowUser = cast<Instruction>(U);
1367 
1368     // Handle data flow merges and bizarre phi cycles.
1369     if (!Widened.insert(NarrowUser).second)
1370       continue;
1371 
1372     bool NonNegativeUse = false;
1373     if (!NonNegativeDef) {
1374       // We might have a control-dependent range information for this context.
1375       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1376         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1377     }
1378 
1379     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1380                                NonNegativeDef || NonNegativeUse);
1381   }
1382 }
1383 
1384 /// Process a single induction variable. First use the SCEVExpander to create a
1385 /// wide induction variable that evaluates to the same recurrence as the
1386 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1387 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1388 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1389 ///
1390 /// It would be simpler to delete uses as they are processed, but we must avoid
1391 /// invalidating SCEV expressions.
1392 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1393   // Is this phi an induction variable?
1394   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1395   if (!AddRec)
1396     return nullptr;
1397 
1398   // Widen the induction variable expression.
1399   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1400                                ? SE->getSignExtendExpr(AddRec, WideType)
1401                                : SE->getZeroExtendExpr(AddRec, WideType);
1402 
1403   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1404          "Expect the new IV expression to preserve its type");
1405 
1406   // Can the IV be extended outside the loop without overflow?
1407   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1408   if (!AddRec || AddRec->getLoop() != L)
1409     return nullptr;
1410 
1411   // An AddRec must have loop-invariant operands. Since this AddRec is
1412   // materialized by a loop header phi, the expression cannot have any post-loop
1413   // operands, so they must dominate the loop header.
1414   assert(
1415       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1416       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1417       "Loop header phi recurrence inputs do not dominate the loop");
1418 
1419   // Iterate over IV uses (including transitive ones) looking for IV increments
1420   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1421   // the increment calculate control-dependent range information basing on
1422   // dominating conditions inside of the loop (e.g. a range check inside of the
1423   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1424   //
1425   // Control-dependent range information is later used to prove that a narrow
1426   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1427   // this on demand because when pushNarrowIVUsers needs this information some
1428   // of the dominating conditions might be already widened.
1429   if (UsePostIncrementRanges)
1430     calculatePostIncRanges(OrigPhi);
1431 
1432   // The rewriter provides a value for the desired IV expression. This may
1433   // either find an existing phi or materialize a new one. Either way, we
1434   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1435   // of the phi-SCC dominates the loop entry.
1436   Instruction *InsertPt = &L->getHeader()->front();
1437   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1438 
1439   // Remembering the WideIV increment generated by SCEVExpander allows
1440   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1441   // employ a general reuse mechanism because the call above is the only call to
1442   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1443   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1444     WideInc =
1445       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1446     WideIncExpr = SE->getSCEV(WideInc);
1447     // Propagate the debug location associated with the original loop increment
1448     // to the new (widened) increment.
1449     auto *OrigInc =
1450       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1451     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1452   }
1453 
1454   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1455   ++NumWidened;
1456 
1457   // Traverse the def-use chain using a worklist starting at the original IV.
1458   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1459 
1460   Widened.insert(OrigPhi);
1461   pushNarrowIVUsers(OrigPhi, WidePhi);
1462 
1463   while (!NarrowIVUsers.empty()) {
1464     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1465 
1466     // Process a def-use edge. This may replace the use, so don't hold a
1467     // use_iterator across it.
1468     Instruction *WideUse = widenIVUse(DU, Rewriter);
1469 
1470     // Follow all def-use edges from the previous narrow use.
1471     if (WideUse)
1472       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1473 
1474     // widenIVUse may have removed the def-use edge.
1475     if (DU.NarrowDef->use_empty())
1476       DeadInsts.emplace_back(DU.NarrowDef);
1477   }
1478 
1479   // Attach any debug information to the new PHI.
1480   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1481 
1482   return WidePhi;
1483 }
1484 
1485 /// Calculates control-dependent range for the given def at the given context
1486 /// by looking at dominating conditions inside of the loop
1487 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1488                                     Instruction *NarrowUser) {
1489   using namespace llvm::PatternMatch;
1490 
1491   Value *NarrowDefLHS;
1492   const APInt *NarrowDefRHS;
1493   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1494                                  m_APInt(NarrowDefRHS))) ||
1495       !NarrowDefRHS->isNonNegative())
1496     return;
1497 
1498   auto UpdateRangeFromCondition = [&] (Value *Condition,
1499                                        bool TrueDest) {
1500     CmpInst::Predicate Pred;
1501     Value *CmpRHS;
1502     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1503                                  m_Value(CmpRHS))))
1504       return;
1505 
1506     CmpInst::Predicate P =
1507             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1508 
1509     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1510     auto CmpConstrainedLHSRange =
1511             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1512     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1513         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1514 
1515     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1516   };
1517 
1518   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1519     if (!HasGuards)
1520       return;
1521 
1522     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1523                                      Ctx->getParent()->rend())) {
1524       Value *C = nullptr;
1525       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1526         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1527     }
1528   };
1529 
1530   UpdateRangeFromGuards(NarrowUser);
1531 
1532   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1533   // If NarrowUserBB is statically unreachable asking dominator queries may
1534   // yield surprising results. (e.g. the block may not have a dom tree node)
1535   if (!DT->isReachableFromEntry(NarrowUserBB))
1536     return;
1537 
1538   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1539        L->contains(DTB->getBlock());
1540        DTB = DTB->getIDom()) {
1541     auto *BB = DTB->getBlock();
1542     auto *TI = BB->getTerminator();
1543     UpdateRangeFromGuards(TI);
1544 
1545     auto *BI = dyn_cast<BranchInst>(TI);
1546     if (!BI || !BI->isConditional())
1547       continue;
1548 
1549     auto *TrueSuccessor = BI->getSuccessor(0);
1550     auto *FalseSuccessor = BI->getSuccessor(1);
1551 
1552     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1553       return BBE.isSingleEdge() &&
1554              DT->dominates(BBE, NarrowUser->getParent());
1555     };
1556 
1557     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1558       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1559 
1560     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1561       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1562   }
1563 }
1564 
1565 /// Calculates PostIncRangeInfos map for the given IV
1566 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1567   SmallPtrSet<Instruction *, 16> Visited;
1568   SmallVector<Instruction *, 6> Worklist;
1569   Worklist.push_back(OrigPhi);
1570   Visited.insert(OrigPhi);
1571 
1572   while (!Worklist.empty()) {
1573     Instruction *NarrowDef = Worklist.pop_back_val();
1574 
1575     for (Use &U : NarrowDef->uses()) {
1576       auto *NarrowUser = cast<Instruction>(U.getUser());
1577 
1578       // Don't go looking outside the current loop.
1579       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1580       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1581         continue;
1582 
1583       if (!Visited.insert(NarrowUser).second)
1584         continue;
1585 
1586       Worklist.push_back(NarrowUser);
1587 
1588       calculatePostIncRange(NarrowDef, NarrowUser);
1589     }
1590   }
1591 }
1592 
1593 //===----------------------------------------------------------------------===//
1594 //  Live IV Reduction - Minimize IVs live across the loop.
1595 //===----------------------------------------------------------------------===//
1596 
1597 //===----------------------------------------------------------------------===//
1598 //  Simplification of IV users based on SCEV evaluation.
1599 //===----------------------------------------------------------------------===//
1600 
1601 namespace {
1602 
1603 class IndVarSimplifyVisitor : public IVVisitor {
1604   ScalarEvolution *SE;
1605   const TargetTransformInfo *TTI;
1606   PHINode *IVPhi;
1607 
1608 public:
1609   WideIVInfo WI;
1610 
1611   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1612                         const TargetTransformInfo *TTI,
1613                         const DominatorTree *DTree)
1614     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1615     DT = DTree;
1616     WI.NarrowIV = IVPhi;
1617   }
1618 
1619   // Implement the interface used by simplifyUsersOfIV.
1620   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1621 };
1622 
1623 } // end anonymous namespace
1624 
1625 /// Iteratively perform simplification on a worklist of IV users. Each
1626 /// successive simplification may push more users which may themselves be
1627 /// candidates for simplification.
1628 ///
1629 /// Sign/Zero extend elimination is interleaved with IV simplification.
1630 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1631                                        SCEVExpander &Rewriter,
1632                                        LoopInfo *LI) {
1633   SmallVector<WideIVInfo, 8> WideIVs;
1634 
1635   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1636           Intrinsic::getName(Intrinsic::experimental_guard));
1637   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1638 
1639   SmallVector<PHINode*, 8> LoopPhis;
1640   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1641     LoopPhis.push_back(cast<PHINode>(I));
1642   }
1643   // Each round of simplification iterates through the SimplifyIVUsers worklist
1644   // for all current phis, then determines whether any IVs can be
1645   // widened. Widening adds new phis to LoopPhis, inducing another round of
1646   // simplification on the wide IVs.
1647   bool Changed = false;
1648   while (!LoopPhis.empty()) {
1649     // Evaluate as many IV expressions as possible before widening any IVs. This
1650     // forces SCEV to set no-wrap flags before evaluating sign/zero
1651     // extension. The first time SCEV attempts to normalize sign/zero extension,
1652     // the result becomes final. So for the most predictable results, we delay
1653     // evaluation of sign/zero extend evaluation until needed, and avoid running
1654     // other SCEV based analysis prior to simplifyAndExtend.
1655     do {
1656       PHINode *CurrIV = LoopPhis.pop_back_val();
1657 
1658       // Information about sign/zero extensions of CurrIV.
1659       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1660 
1661       Changed |=
1662           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1663 
1664       if (Visitor.WI.WidestNativeType) {
1665         WideIVs.push_back(Visitor.WI);
1666       }
1667     } while(!LoopPhis.empty());
1668 
1669     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1670       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1671       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1672         Changed = true;
1673         LoopPhis.push_back(WidePhi);
1674       }
1675     }
1676   }
1677   return Changed;
1678 }
1679 
1680 //===----------------------------------------------------------------------===//
1681 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1682 //===----------------------------------------------------------------------===//
1683 
1684 /// Given an Value which is hoped to be part of an add recurance in the given
1685 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1686 /// that this is less general than SCEVs AddRec checking.
1687 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1688   Instruction *IncI = dyn_cast<Instruction>(IncV);
1689   if (!IncI)
1690     return nullptr;
1691 
1692   switch (IncI->getOpcode()) {
1693   case Instruction::Add:
1694   case Instruction::Sub:
1695     break;
1696   case Instruction::GetElementPtr:
1697     // An IV counter must preserve its type.
1698     if (IncI->getNumOperands() == 2)
1699       break;
1700     LLVM_FALLTHROUGH;
1701   default:
1702     return nullptr;
1703   }
1704 
1705   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1706   if (Phi && Phi->getParent() == L->getHeader()) {
1707     if (L->isLoopInvariant(IncI->getOperand(1)))
1708       return Phi;
1709     return nullptr;
1710   }
1711   if (IncI->getOpcode() == Instruction::GetElementPtr)
1712     return nullptr;
1713 
1714   // Allow add/sub to be commuted.
1715   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1716   if (Phi && Phi->getParent() == L->getHeader()) {
1717     if (L->isLoopInvariant(IncI->getOperand(0)))
1718       return Phi;
1719   }
1720   return nullptr;
1721 }
1722 
1723 /// Whether the current loop exit test is based on this value.  Currently this
1724 /// is limited to a direct use in the loop condition.
1725 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1726   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1727   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1728   // TODO: Allow non-icmp loop test.
1729   if (!ICmp)
1730     return false;
1731 
1732   // TODO: Allow indirect use.
1733   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1734 }
1735 
1736 /// linearFunctionTestReplace policy. Return true unless we can show that the
1737 /// current exit test is already sufficiently canonical.
1738 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1739   assert(L->getLoopLatch() && "Must be in simplified form");
1740 
1741   // Avoid converting a constant or loop invariant test back to a runtime
1742   // test.  This is critical for when SCEV's cached ExitCount is less precise
1743   // than the current IR (such as after we've proven a particular exit is
1744   // actually dead and thus the BE count never reaches our ExitCount.)
1745   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1746   if (L->isLoopInvariant(BI->getCondition()))
1747     return false;
1748 
1749   // Do LFTR to simplify the exit condition to an ICMP.
1750   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1751   if (!Cond)
1752     return true;
1753 
1754   // Do LFTR to simplify the exit ICMP to EQ/NE
1755   ICmpInst::Predicate Pred = Cond->getPredicate();
1756   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1757     return true;
1758 
1759   // Look for a loop invariant RHS
1760   Value *LHS = Cond->getOperand(0);
1761   Value *RHS = Cond->getOperand(1);
1762   if (!L->isLoopInvariant(RHS)) {
1763     if (!L->isLoopInvariant(LHS))
1764       return true;
1765     std::swap(LHS, RHS);
1766   }
1767   // Look for a simple IV counter LHS
1768   PHINode *Phi = dyn_cast<PHINode>(LHS);
1769   if (!Phi)
1770     Phi = getLoopPhiForCounter(LHS, L);
1771 
1772   if (!Phi)
1773     return true;
1774 
1775   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1776   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1777   if (Idx < 0)
1778     return true;
1779 
1780   // Do LFTR if the exit condition's IV is *not* a simple counter.
1781   Value *IncV = Phi->getIncomingValue(Idx);
1782   return Phi != getLoopPhiForCounter(IncV, L);
1783 }
1784 
1785 /// Return true if undefined behavior would provable be executed on the path to
1786 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1787 /// anything about whether OnPathTo is actually executed or whether Root is
1788 /// actually poison.  This can be used to assess whether a new use of Root can
1789 /// be added at a location which is control equivalent with OnPathTo (such as
1790 /// immediately before it) without introducing UB which didn't previously
1791 /// exist.  Note that a false result conveys no information.
1792 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1793                                           Instruction *OnPathTo,
1794                                           DominatorTree *DT) {
1795   // Basic approach is to assume Root is poison, propagate poison forward
1796   // through all users we can easily track, and then check whether any of those
1797   // users are provable UB and must execute before out exiting block might
1798   // exit.
1799 
1800   // The set of all recursive users we've visited (which are assumed to all be
1801   // poison because of said visit)
1802   SmallSet<const Value *, 16> KnownPoison;
1803   SmallVector<const Instruction*, 16> Worklist;
1804   Worklist.push_back(Root);
1805   while (!Worklist.empty()) {
1806     const Instruction *I = Worklist.pop_back_val();
1807 
1808     // If we know this must trigger UB on a path leading our target.
1809     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1810       return true;
1811 
1812     // If we can't analyze propagation through this instruction, just skip it
1813     // and transitive users.  Safe as false is a conservative result.
1814     if (!propagatesFullPoison(I) && I != Root)
1815       continue;
1816 
1817     if (KnownPoison.insert(I).second)
1818       for (const User *User : I->users())
1819         Worklist.push_back(cast<Instruction>(User));
1820   }
1821 
1822   // Might be non-UB, or might have a path we couldn't prove must execute on
1823   // way to exiting bb.
1824   return false;
1825 }
1826 
1827 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1828 /// down to checking that all operands are constant and listing instructions
1829 /// that may hide undef.
1830 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1831                                unsigned Depth) {
1832   if (isa<Constant>(V))
1833     return !isa<UndefValue>(V);
1834 
1835   if (Depth >= 6)
1836     return false;
1837 
1838   // Conservatively handle non-constant non-instructions. For example, Arguments
1839   // may be undef.
1840   Instruction *I = dyn_cast<Instruction>(V);
1841   if (!I)
1842     return false;
1843 
1844   // Load and return values may be undef.
1845   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1846     return false;
1847 
1848   // Optimistically handle other instructions.
1849   for (Value *Op : I->operands()) {
1850     if (!Visited.insert(Op).second)
1851       continue;
1852     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1853       return false;
1854   }
1855   return true;
1856 }
1857 
1858 /// Return true if the given value is concrete. We must prove that undef can
1859 /// never reach it.
1860 ///
1861 /// TODO: If we decide that this is a good approach to checking for undef, we
1862 /// may factor it into a common location.
1863 static bool hasConcreteDef(Value *V) {
1864   SmallPtrSet<Value*, 8> Visited;
1865   Visited.insert(V);
1866   return hasConcreteDefImpl(V, Visited, 0);
1867 }
1868 
1869 /// Return true if this IV has any uses other than the (soon to be rewritten)
1870 /// loop exit test.
1871 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1872   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1873   Value *IncV = Phi->getIncomingValue(LatchIdx);
1874 
1875   for (User *U : Phi->users())
1876     if (U != Cond && U != IncV) return false;
1877 
1878   for (User *U : IncV->users())
1879     if (U != Cond && U != Phi) return false;
1880   return true;
1881 }
1882 
1883 /// Return true if the given phi is a "counter" in L.  A counter is an
1884 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1885 /// step of 1.  Note that L must have exactly one latch.
1886 static bool isLoopCounter(PHINode* Phi, Loop *L,
1887                           ScalarEvolution *SE) {
1888   assert(Phi->getParent() == L->getHeader());
1889   assert(L->getLoopLatch());
1890 
1891   if (!SE->isSCEVable(Phi->getType()))
1892     return false;
1893 
1894   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1895   if (!AR || AR->getLoop() != L || !AR->isAffine())
1896     return false;
1897 
1898   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1899   if (!Step || !Step->isOne())
1900     return false;
1901 
1902   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1903   Value *IncV = Phi->getIncomingValue(LatchIdx);
1904   return (getLoopPhiForCounter(IncV, L) == Phi);
1905 }
1906 
1907 /// Search the loop header for a loop counter (anadd rec w/step of one)
1908 /// suitable for use by LFTR.  If multiple counters are available, select the
1909 /// "best" one based profitable heuristics.
1910 ///
1911 /// BECount may be an i8* pointer type. The pointer difference is already
1912 /// valid count without scaling the address stride, so it remains a pointer
1913 /// expression as far as SCEV is concerned.
1914 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1915                                 const SCEV *BECount,
1916                                 ScalarEvolution *SE, DominatorTree *DT) {
1917   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1918 
1919   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1920 
1921   // Loop over all of the PHI nodes, looking for a simple counter.
1922   PHINode *BestPhi = nullptr;
1923   const SCEV *BestInit = nullptr;
1924   BasicBlock *LatchBlock = L->getLoopLatch();
1925   assert(LatchBlock && "Must be in simplified form");
1926   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1927 
1928   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1929     PHINode *Phi = cast<PHINode>(I);
1930     if (!isLoopCounter(Phi, L, SE))
1931       continue;
1932 
1933     // Avoid comparing an integer IV against a pointer Limit.
1934     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1935       continue;
1936 
1937     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1938 
1939     // AR may be a pointer type, while BECount is an integer type.
1940     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1941     // AR may not be a narrower type, or we may never exit.
1942     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1943     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1944       continue;
1945 
1946     // Avoid reusing a potentially undef value to compute other values that may
1947     // have originally had a concrete definition.
1948     if (!hasConcreteDef(Phi)) {
1949       // We explicitly allow unknown phis as long as they are already used by
1950       // the loop exit test.  This is legal since performing LFTR could not
1951       // increase the number of undef users.
1952       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1953       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1954           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1955         continue;
1956     }
1957 
1958     // Avoid introducing undefined behavior due to poison which didn't exist in
1959     // the original program.  (Annoyingly, the rules for poison and undef
1960     // propagation are distinct, so this does NOT cover the undef case above.)
1961     // We have to ensure that we don't introduce UB by introducing a use on an
1962     // iteration where said IV produces poison.  Our strategy here differs for
1963     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1964     // see code in linearFunctionTestReplace.  For pointers, we restrict
1965     // transforms as there is no good way to reinfer inbounds once lost.
1966     if (!Phi->getType()->isIntegerTy() &&
1967         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1968       continue;
1969 
1970     const SCEV *Init = AR->getStart();
1971 
1972     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1973       // Don't force a live loop counter if another IV can be used.
1974       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1975         continue;
1976 
1977       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1978       // also prefers integer to pointer IVs.
1979       if (BestInit->isZero() != Init->isZero()) {
1980         if (BestInit->isZero())
1981           continue;
1982       }
1983       // If two IVs both count from zero or both count from nonzero then the
1984       // narrower is likely a dead phi that has been widened. Use the wider phi
1985       // to allow the other to be eliminated.
1986       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1987         continue;
1988     }
1989     BestPhi = Phi;
1990     BestInit = Init;
1991   }
1992   return BestPhi;
1993 }
1994 
1995 /// Insert an IR expression which computes the value held by the IV IndVar
1996 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1997 /// is taken ExitCount times.
1998 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1999                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2000                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2001   assert(isLoopCounter(IndVar, L, SE));
2002   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2003   const SCEV *IVInit = AR->getStart();
2004 
2005   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2006   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2007   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2008   // the existing GEPs whenever possible.
2009   if (IndVar->getType()->isPointerTy() &&
2010       !ExitCount->getType()->isPointerTy()) {
2011     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2012     // signed value. ExitCount on the other hand represents the loop trip count,
2013     // which is an unsigned value. FindLoopCounter only allows induction
2014     // variables that have a positive unit stride of one. This means we don't
2015     // have to handle the case of negative offsets (yet) and just need to zero
2016     // extend ExitCount.
2017     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2018     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2019     if (UsePostInc)
2020       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2021 
2022     // Expand the code for the iteration count.
2023     assert(SE->isLoopInvariant(IVOffset, L) &&
2024            "Computed iteration count is not loop invariant!");
2025 
2026     // We could handle pointer IVs other than i8*, but we need to compensate for
2027     // gep index scaling.
2028     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2029                              cast<PointerType>(IndVar->getType())
2030                                  ->getElementType())->isOne() &&
2031            "unit stride pointer IV must be i8*");
2032 
2033     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2034     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2035     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2036   } else {
2037     // In any other case, convert both IVInit and ExitCount to integers before
2038     // comparing. This may result in SCEV expansion of pointers, but in practice
2039     // SCEV will fold the pointer arithmetic away as such:
2040     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2041     //
2042     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2043     // for simple memset-style loops.
2044     //
2045     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2046     // were generated on top of case #2, which is not expected.
2047 
2048     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2049     // For unit stride, IVCount = Start + ExitCount with 2's complement
2050     // overflow.
2051 
2052     // For integer IVs, truncate the IV before computing IVInit + BECount,
2053     // unless we know apriori that the limit must be a constant when evaluated
2054     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2055     // of the IV in the loop over a (potentially) expensive expansion of the
2056     // widened exit count add(zext(add)) expression.
2057     if (SE->getTypeSizeInBits(IVInit->getType())
2058         > SE->getTypeSizeInBits(ExitCount->getType())) {
2059       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2060         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2061       else
2062         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2063     }
2064 
2065     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2066 
2067     if (UsePostInc)
2068       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2069 
2070     // Expand the code for the iteration count.
2071     assert(SE->isLoopInvariant(IVLimit, L) &&
2072            "Computed iteration count is not loop invariant!");
2073     // Ensure that we generate the same type as IndVar, or a smaller integer
2074     // type. In the presence of null pointer values, we have an integer type
2075     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2076     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2077       IndVar->getType() : ExitCount->getType();
2078     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2079     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2080   }
2081 }
2082 
2083 /// This method rewrites the exit condition of the loop to be a canonical !=
2084 /// comparison against the incremented loop induction variable.  This pass is
2085 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2086 /// determine a loop-invariant trip count of the loop, which is actually a much
2087 /// broader range than just linear tests.
2088 bool IndVarSimplify::
2089 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2090                           const SCEV *ExitCount,
2091                           PHINode *IndVar, SCEVExpander &Rewriter) {
2092   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2093   assert(isLoopCounter(IndVar, L, SE));
2094   Instruction * const IncVar =
2095     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2096 
2097   // Initialize CmpIndVar to the preincremented IV.
2098   Value *CmpIndVar = IndVar;
2099   bool UsePostInc = false;
2100 
2101   // If the exiting block is the same as the backedge block, we prefer to
2102   // compare against the post-incremented value, otherwise we must compare
2103   // against the preincremented value.
2104   if (ExitingBB == L->getLoopLatch()) {
2105     // For pointer IVs, we chose to not strip inbounds which requires us not
2106     // to add a potentially UB introducing use.  We need to either a) show
2107     // the loop test we're modifying is already in post-inc form, or b) show
2108     // that adding a use must not introduce UB.
2109     bool SafeToPostInc =
2110         IndVar->getType()->isIntegerTy() ||
2111         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2112         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2113     if (SafeToPostInc) {
2114       UsePostInc = true;
2115       CmpIndVar = IncVar;
2116     }
2117   }
2118 
2119   // It may be necessary to drop nowrap flags on the incrementing instruction
2120   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2121   // case the increment might have previously been poison on the last iteration
2122   // only) or if LFTR switches to a different IV that was previously dynamically
2123   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2124   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2125   // check), because the pre-inc addrec flags may be adopted from the original
2126   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2127   // TODO: This handling is inaccurate for one case: If we switch to a
2128   // dynamically dead IV that wraps on the first loop iteration only, which is
2129   // not covered by the post-inc addrec. (If the new IV was not dynamically
2130   // dead, it could not be poison on the first iteration in the first place.)
2131   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2132     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2133     if (BO->hasNoUnsignedWrap())
2134       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2135     if (BO->hasNoSignedWrap())
2136       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2137   }
2138 
2139   Value *ExitCnt = genLoopLimit(
2140       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2141   assert(ExitCnt->getType()->isPointerTy() ==
2142              IndVar->getType()->isPointerTy() &&
2143          "genLoopLimit missed a cast");
2144 
2145   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2146   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2147   ICmpInst::Predicate P;
2148   if (L->contains(BI->getSuccessor(0)))
2149     P = ICmpInst::ICMP_NE;
2150   else
2151     P = ICmpInst::ICMP_EQ;
2152 
2153   IRBuilder<> Builder(BI);
2154 
2155   // The new loop exit condition should reuse the debug location of the
2156   // original loop exit condition.
2157   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2158     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2159 
2160   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2161   // avoid the expensive expansion of the limit expression in the wider type,
2162   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2163   // since we know (from the exit count bitwidth), that we can't self-wrap in
2164   // the narrower type.
2165   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2166   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2167   if (CmpIndVarSize > ExitCntSize) {
2168     assert(!CmpIndVar->getType()->isPointerTy() &&
2169            !ExitCnt->getType()->isPointerTy());
2170 
2171     // Before resorting to actually inserting the truncate, use the same
2172     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2173     // the other side of the comparison instead.  We still evaluate the limit
2174     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2175     // a truncate within in.
2176     bool Extended = false;
2177     const SCEV *IV = SE->getSCEV(CmpIndVar);
2178     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2179                                                   ExitCnt->getType());
2180     const SCEV *ZExtTrunc =
2181       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2182 
2183     if (ZExtTrunc == IV) {
2184       Extended = true;
2185       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2186                                    "wide.trip.count");
2187     } else {
2188       const SCEV *SExtTrunc =
2189         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2190       if (SExtTrunc == IV) {
2191         Extended = true;
2192         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2193                                      "wide.trip.count");
2194       }
2195     }
2196 
2197     if (Extended) {
2198       bool Discard;
2199       L->makeLoopInvariant(ExitCnt, Discard);
2200     } else
2201       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2202                                       "lftr.wideiv");
2203   }
2204   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2205                     << "      LHS:" << *CmpIndVar << '\n'
2206                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2207                     << "\n"
2208                     << "      RHS:\t" << *ExitCnt << "\n"
2209                     << "ExitCount:\t" << *ExitCount << "\n"
2210                     << "  was: " << *BI->getCondition() << "\n");
2211 
2212   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2213   Value *OrigCond = BI->getCondition();
2214   // It's tempting to use replaceAllUsesWith here to fully replace the old
2215   // comparison, but that's not immediately safe, since users of the old
2216   // comparison may not be dominated by the new comparison. Instead, just
2217   // update the branch to use the new comparison; in the common case this
2218   // will make old comparison dead.
2219   BI->setCondition(Cond);
2220   DeadInsts.push_back(OrigCond);
2221 
2222   ++NumLFTR;
2223   return true;
2224 }
2225 
2226 //===----------------------------------------------------------------------===//
2227 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2228 //===----------------------------------------------------------------------===//
2229 
2230 /// If there's a single exit block, sink any loop-invariant values that
2231 /// were defined in the preheader but not used inside the loop into the
2232 /// exit block to reduce register pressure in the loop.
2233 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2234   BasicBlock *ExitBlock = L->getExitBlock();
2235   if (!ExitBlock) return false;
2236 
2237   BasicBlock *Preheader = L->getLoopPreheader();
2238   if (!Preheader) return false;
2239 
2240   bool MadeAnyChanges = false;
2241   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2242   BasicBlock::iterator I(Preheader->getTerminator());
2243   while (I != Preheader->begin()) {
2244     --I;
2245     // New instructions were inserted at the end of the preheader.
2246     if (isa<PHINode>(I))
2247       break;
2248 
2249     // Don't move instructions which might have side effects, since the side
2250     // effects need to complete before instructions inside the loop.  Also don't
2251     // move instructions which might read memory, since the loop may modify
2252     // memory. Note that it's okay if the instruction might have undefined
2253     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2254     // block.
2255     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2256       continue;
2257 
2258     // Skip debug info intrinsics.
2259     if (isa<DbgInfoIntrinsic>(I))
2260       continue;
2261 
2262     // Skip eh pad instructions.
2263     if (I->isEHPad())
2264       continue;
2265 
2266     // Don't sink alloca: we never want to sink static alloca's out of the
2267     // entry block, and correctly sinking dynamic alloca's requires
2268     // checks for stacksave/stackrestore intrinsics.
2269     // FIXME: Refactor this check somehow?
2270     if (isa<AllocaInst>(I))
2271       continue;
2272 
2273     // Determine if there is a use in or before the loop (direct or
2274     // otherwise).
2275     bool UsedInLoop = false;
2276     for (Use &U : I->uses()) {
2277       Instruction *User = cast<Instruction>(U.getUser());
2278       BasicBlock *UseBB = User->getParent();
2279       if (PHINode *P = dyn_cast<PHINode>(User)) {
2280         unsigned i =
2281           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2282         UseBB = P->getIncomingBlock(i);
2283       }
2284       if (UseBB == Preheader || L->contains(UseBB)) {
2285         UsedInLoop = true;
2286         break;
2287       }
2288     }
2289 
2290     // If there is, the def must remain in the preheader.
2291     if (UsedInLoop)
2292       continue;
2293 
2294     // Otherwise, sink it to the exit block.
2295     Instruction *ToMove = &*I;
2296     bool Done = false;
2297 
2298     if (I != Preheader->begin()) {
2299       // Skip debug info intrinsics.
2300       do {
2301         --I;
2302       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2303 
2304       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2305         Done = true;
2306     } else {
2307       Done = true;
2308     }
2309 
2310     MadeAnyChanges = true;
2311     ToMove->moveBefore(*ExitBlock, InsertPt);
2312     if (Done) break;
2313     InsertPt = ToMove->getIterator();
2314   }
2315 
2316   return MadeAnyChanges;
2317 }
2318 
2319 /// Return a symbolic upper bound for the backedge taken count of the loop.
2320 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2321 /// an arbitrary expression as opposed to only constants.
2322 /// TODO: Move into the ScalarEvolution class.
2323 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2324                                             DominatorTree &DT, Loop *L) {
2325   SmallVector<BasicBlock*, 16> ExitingBlocks;
2326   L->getExitingBlocks(ExitingBlocks);
2327 
2328   // Form an expression for the maximum exit count possible for this loop. We
2329   // merge the max and exact information to approximate a version of
2330   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2331   SmallVector<const SCEV*, 4> ExitCounts;
2332   for (BasicBlock *ExitingBB : ExitingBlocks) {
2333     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2334     if (isa<SCEVCouldNotCompute>(ExitCount))
2335       ExitCount = SE.getExitCount(L, ExitingBB,
2336                                   ScalarEvolution::ConstantMaximum);
2337     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2338       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2339              "We should only have known counts for exiting blocks that "
2340              "dominate latch!");
2341       ExitCounts.push_back(ExitCount);
2342     }
2343   }
2344   if (ExitCounts.empty())
2345     return SE.getCouldNotCompute();
2346   return SE.getUMinFromMismatchedTypes(ExitCounts);
2347 }
2348 
2349 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2350   SmallVector<BasicBlock*, 16> ExitingBlocks;
2351   L->getExitingBlocks(ExitingBlocks);
2352 
2353   // Remove all exits which aren't both rewriteable and analyzeable.
2354   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2355     // If our exitting block exits multiple loops, we can only rewrite the
2356     // innermost one.  Otherwise, we're changing how many times the innermost
2357     // loop runs before it exits.
2358     if (LI->getLoopFor(ExitingBB) != L)
2359       return true;
2360 
2361     // Can't rewrite non-branch yet.
2362     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2363     if (!BI)
2364       return true;
2365 
2366     // If already constant, nothing to do.
2367     if (isa<Constant>(BI->getCondition()))
2368       return true;
2369 
2370     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2371     if (isa<SCEVCouldNotCompute>(ExitCount))
2372       return true;
2373     return false;
2374   });
2375   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2376 
2377   if (ExitingBlocks.empty())
2378     return false;
2379 
2380   // Get a symbolic upper bound on the loop backedge taken count.
2381   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2382   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2383     return false;
2384 
2385   // Visit our exit blocks in order of dominance.  We know from the fact that
2386   // all exits (left) are analyzeable that the must be a total dominance order
2387   // between them as each must dominate the latch.  The visit order only
2388   // matters for the provably equal case.
2389   llvm::sort(ExitingBlocks,
2390              [&](BasicBlock *A, BasicBlock *B) {
2391                // std::sort sorts in ascending order, so we want the inverse of
2392                // the normal dominance relation.
2393                if (DT->properlyDominates(A, B)) return true;
2394                if (DT->properlyDominates(B, A)) return false;
2395                llvm_unreachable("expected total dominance order!");
2396              });
2397 #ifdef ASSERT
2398   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2399     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2400   }
2401 #endif
2402 
2403   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2404     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2405     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2406     auto *OldCond = BI->getCondition();
2407     auto *NewCond = ConstantInt::get(OldCond->getType(),
2408                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2409     BI->setCondition(NewCond);
2410     if (OldCond->use_empty())
2411       DeadInsts.push_back(OldCond);
2412   };
2413 
2414   bool Changed = false;
2415   SmallSet<const SCEV*, 8> DominatingExitCounts;
2416   for (BasicBlock *ExitingBB : ExitingBlocks) {
2417     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2418     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2419 
2420     // If we know we'd exit on the first iteration, rewrite the exit to
2421     // reflect this.  This does not imply the loop must exit through this
2422     // exit; there may be an earlier one taken on the first iteration.
2423     // TODO: Given we know the backedge can't be taken, we should go ahead
2424     // and break it.  Or at least, kill all the header phis and simplify.
2425     if (ExitCount->isZero()) {
2426       FoldExit(ExitingBB, true);
2427       Changed = true;
2428       continue;
2429     }
2430 
2431     // If we end up with a pointer exit count, bail.  Note that we can end up
2432     // with a pointer exit count for one exiting block, and not for another in
2433     // the same loop.
2434     if (!ExitCount->getType()->isIntegerTy() ||
2435         !MaxExitCount->getType()->isIntegerTy())
2436       continue;
2437 
2438     Type *WiderType =
2439       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2440     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2441     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2442     assert(MaxExitCount->getType() == ExitCount->getType());
2443 
2444     // Can we prove that some other exit must be taken strictly before this
2445     // one?
2446     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2447                                      MaxExitCount, ExitCount)) {
2448       FoldExit(ExitingBB, false);
2449       Changed = true;
2450       continue;
2451     }
2452 
2453     // As we run, keep track of which exit counts we've encountered.  If we
2454     // find a duplicate, we've found an exit which would have exited on the
2455     // exiting iteration, but (from the visit order) strictly follows another
2456     // which does the same and is thus dead.
2457     if (!DominatingExitCounts.insert(ExitCount).second) {
2458       FoldExit(ExitingBB, false);
2459       Changed = true;
2460       continue;
2461     }
2462 
2463     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2464     // here.  If we kept track of the min of dominanting exits so far, we could
2465     // discharge exits with EC >= MDEC. This is less powerful than the existing
2466     // transform (since later exits aren't considered), but potentially more
2467     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2468     // or a >u b.  Such a case is not currently known.
2469   }
2470   return Changed;
2471 }
2472 
2473 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2474   SmallVector<BasicBlock*, 16> ExitingBlocks;
2475   L->getExitingBlocks(ExitingBlocks);
2476 
2477   // Finally, see if we can rewrite our exit conditions into a loop invariant
2478   // form. If we have a read-only loop, and we can tell that we must exit down
2479   // a path which does not need any of the values computed within the loop, we
2480   // can rewrite the loop to exit on the first iteration.  Note that this
2481   // doesn't either a) tell us the loop exits on the first iteration (unless
2482   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2483   // This transformation looks a lot like a restricted form of dead loop
2484   // elimination, but restricted to read-only loops and without neccesssarily
2485   // needing to kill the loop entirely.
2486   if (!LoopPredication)
2487     return false;
2488 
2489   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2490     return false;
2491 
2492   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2493   // through *explicit* control flow.  We have to eliminate the possibility of
2494   // implicit exits (see below) before we know it's truly exact.
2495   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2496   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2497       !SE->isLoopInvariant(ExactBTC, L) ||
2498       !isSafeToExpand(ExactBTC, *SE))
2499     return false;
2500 
2501   // If we end up with a pointer exit count, bail.  It may be unsized.
2502   if (!ExactBTC->getType()->isIntegerTy())
2503     return false;
2504 
2505   auto BadExit = [&](BasicBlock *ExitingBB) {
2506     // If our exiting block exits multiple loops, we can only rewrite the
2507     // innermost one.  Otherwise, we're changing how many times the innermost
2508     // loop runs before it exits.
2509     if (LI->getLoopFor(ExitingBB) != L)
2510       return true;
2511 
2512     // Can't rewrite non-branch yet.
2513     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2514     if (!BI)
2515       return true;
2516 
2517     // If already constant, nothing to do.
2518     if (isa<Constant>(BI->getCondition()))
2519       return true;
2520 
2521     // If the exit block has phis, we need to be able to compute the values
2522     // within the loop which contains them.  This assumes trivially lcssa phis
2523     // have already been removed; TODO: generalize
2524     BasicBlock *ExitBlock =
2525     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2526     if (!ExitBlock->phis().empty())
2527       return true;
2528 
2529     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2530     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2531     if (!SE->isLoopInvariant(ExitCount, L) ||
2532         !isSafeToExpand(ExitCount, *SE))
2533       return true;
2534 
2535     // If we end up with a pointer exit count, bail.  It may be unsized.
2536     if (!ExitCount->getType()->isIntegerTy())
2537       return true;
2538 
2539     return false;
2540   };
2541 
2542   // If we have any exits which can't be predicated themselves, than we can't
2543   // predicate any exit which isn't guaranteed to execute before it.  Consider
2544   // two exits (a) and (b) which would both exit on the same iteration.  If we
2545   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2546   // we could convert a loop from exiting through (a) to one exiting through
2547   // (b).  Note that this problem exists only for exits with the same exit
2548   // count, and we could be more aggressive when exit counts are known inequal.
2549   llvm::sort(ExitingBlocks,
2550             [&](BasicBlock *A, BasicBlock *B) {
2551               // std::sort sorts in ascending order, so we want the inverse of
2552               // the normal dominance relation, plus a tie breaker for blocks
2553               // unordered by dominance.
2554               if (DT->properlyDominates(A, B)) return true;
2555               if (DT->properlyDominates(B, A)) return false;
2556               return A->getName() < B->getName();
2557             });
2558   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2559   // exits before the backedge).  If they aren't, reasoning about reachability
2560   // is complicated and we choose not to for now.
2561   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2562     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2563       return false;
2564 
2565   // Given our sorted total order, we know that exit[j] must be evaluated
2566   // after all exit[i] such j > i.
2567   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2568     if (BadExit(ExitingBlocks[i])) {
2569       ExitingBlocks.resize(i);
2570       break;
2571     }
2572 
2573   if (ExitingBlocks.empty())
2574     return false;
2575 
2576   // We rely on not being able to reach an exiting block on a later iteration
2577   // then it's statically compute exit count.  The implementaton of
2578   // getExitCount currently has this invariant, but assert it here so that
2579   // breakage is obvious if this ever changes..
2580   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2581         return DT->dominates(ExitingBB, L->getLoopLatch());
2582       }));
2583 
2584   // At this point, ExitingBlocks consists of only those blocks which are
2585   // predicatable.  Given that, we know we have at least one exit we can
2586   // predicate if the loop is doesn't have side effects and doesn't have any
2587   // implicit exits (because then our exact BTC isn't actually exact).
2588   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2589   // suggestions on how to improve this?  I can obviously bail out for outer
2590   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2591   // is that enough for *all* side effects?
2592   for (BasicBlock *BB : L->blocks())
2593     for (auto &I : *BB)
2594       // TODO:isGuaranteedToTransfer
2595       if (I.mayHaveSideEffects() || I.mayThrow())
2596         return false;
2597 
2598   bool Changed = false;
2599   // Finally, do the actual predication for all predicatable blocks.  A couple
2600   // of notes here:
2601   // 1) We don't bother to constant fold dominated exits with identical exit
2602   //    counts; that's simply a form of CSE/equality propagation and we leave
2603   //    it for dedicated passes.
2604   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2605   //    legality constraints and we leave that to dedicated logic.  We want to
2606   //    predicate even if we can't insert a loop invariant expression as
2607   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2608   //    varying check.
2609   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2610   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2611   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2612   for (BasicBlock *ExitingBB : ExitingBlocks) {
2613     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2614 
2615     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2616     Value *NewCond;
2617     if (ExitCount == ExactBTC) {
2618       NewCond = L->contains(BI->getSuccessor(0)) ?
2619         B.getFalse() : B.getTrue();
2620     } else {
2621       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2622       if (!ExactBTCV)
2623         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2624       Value *RHS = ExactBTCV;
2625       if (ECV->getType() != RHS->getType()) {
2626         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2627         ECV = B.CreateZExt(ECV, WiderTy);
2628         RHS = B.CreateZExt(RHS, WiderTy);
2629       }
2630       auto Pred = L->contains(BI->getSuccessor(0)) ?
2631         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2632       NewCond = B.CreateICmp(Pred, ECV, RHS);
2633     }
2634     Value *OldCond = BI->getCondition();
2635     BI->setCondition(NewCond);
2636     if (OldCond->use_empty())
2637       DeadInsts.push_back(OldCond);
2638     Changed = true;
2639   }
2640 
2641   return Changed;
2642 }
2643 
2644 //===----------------------------------------------------------------------===//
2645 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2646 //===----------------------------------------------------------------------===//
2647 
2648 bool IndVarSimplify::run(Loop *L) {
2649   // We need (and expect!) the incoming loop to be in LCSSA.
2650   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2651          "LCSSA required to run indvars!");
2652 
2653   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2654   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2655   //    canonicalization can be a pessimization without LSR to "clean up"
2656   //    afterwards.
2657   //  - We depend on having a preheader; in particular,
2658   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2659   //    and we're in trouble if we can't find the induction variable even when
2660   //    we've manually inserted one.
2661   //  - LFTR relies on having a single backedge.
2662   if (!L->isLoopSimplifyForm())
2663     return false;
2664 
2665 #ifndef NDEBUG
2666   // Used below for a consistency check only
2667   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2668 #endif
2669 
2670   bool Changed = false;
2671   // If there are any floating-point recurrences, attempt to
2672   // transform them to use integer recurrences.
2673   Changed |= rewriteNonIntegerIVs(L);
2674 
2675   // Create a rewriter object which we'll use to transform the code with.
2676   SCEVExpander Rewriter(*SE, DL, "indvars");
2677 #ifndef NDEBUG
2678   Rewriter.setDebugType(DEBUG_TYPE);
2679 #endif
2680 
2681   // Eliminate redundant IV users.
2682   //
2683   // Simplification works best when run before other consumers of SCEV. We
2684   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2685   // other expressions involving loop IVs have been evaluated. This helps SCEV
2686   // set no-wrap flags before normalizing sign/zero extension.
2687   Rewriter.disableCanonicalMode();
2688   Changed |= simplifyAndExtend(L, Rewriter, LI);
2689 
2690   // Check to see if we can compute the final value of any expressions
2691   // that are recurrent in the loop, and substitute the exit values from the
2692   // loop into any instructions outside of the loop that use the final values
2693   // of the current expressions.
2694   if (ReplaceExitValue != NeverRepl) {
2695     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT,
2696                                              ReplaceExitValue, DeadInsts)) {
2697       NumReplaced += Rewrites;
2698       Changed = true;
2699     }
2700   }
2701 
2702   // Eliminate redundant IV cycles.
2703   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2704 
2705   // Try to eliminate loop exits based on analyzeable exit counts
2706   if (optimizeLoopExits(L, Rewriter))  {
2707     Changed = true;
2708     // Given we've changed exit counts, notify SCEV
2709     SE->forgetLoop(L);
2710   }
2711 
2712   // Try to form loop invariant tests for loop exits by changing how many
2713   // iterations of the loop run when that is unobservable.
2714   if (predicateLoopExits(L, Rewriter)) {
2715     Changed = true;
2716     // Given we've changed exit counts, notify SCEV
2717     SE->forgetLoop(L);
2718   }
2719 
2720   // If we have a trip count expression, rewrite the loop's exit condition
2721   // using it.
2722   if (!DisableLFTR) {
2723     SmallVector<BasicBlock*, 16> ExitingBlocks;
2724     L->getExitingBlocks(ExitingBlocks);
2725     for (BasicBlock *ExitingBB : ExitingBlocks) {
2726       // Can't rewrite non-branch yet.
2727       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2728         continue;
2729 
2730       // If our exitting block exits multiple loops, we can only rewrite the
2731       // innermost one.  Otherwise, we're changing how many times the innermost
2732       // loop runs before it exits.
2733       if (LI->getLoopFor(ExitingBB) != L)
2734         continue;
2735 
2736       if (!needsLFTR(L, ExitingBB))
2737         continue;
2738 
2739       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2740       if (isa<SCEVCouldNotCompute>(ExitCount))
2741         continue;
2742 
2743       // This was handled above, but as we form SCEVs, we can sometimes refine
2744       // existing ones; this allows exit counts to be folded to zero which
2745       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2746       // until stable to handle cases like this better.
2747       if (ExitCount->isZero())
2748         continue;
2749 
2750       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2751       if (!IndVar)
2752         continue;
2753 
2754       // Avoid high cost expansions.  Note: This heuristic is questionable in
2755       // that our definition of "high cost" is not exactly principled.
2756       if (Rewriter.isHighCostExpansion(ExitCount, L))
2757         continue;
2758 
2759       // Check preconditions for proper SCEVExpander operation. SCEV does not
2760       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2761       // any pass that uses the SCEVExpander must do it. This does not work
2762       // well for loop passes because SCEVExpander makes assumptions about
2763       // all loops, while LoopPassManager only forces the current loop to be
2764       // simplified.
2765       //
2766       // FIXME: SCEV expansion has no way to bail out, so the caller must
2767       // explicitly check any assumptions made by SCEV. Brittle.
2768       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2769       if (!AR || AR->getLoop()->getLoopPreheader())
2770         Changed |= linearFunctionTestReplace(L, ExitingBB,
2771                                              ExitCount, IndVar,
2772                                              Rewriter);
2773     }
2774   }
2775   // Clear the rewriter cache, because values that are in the rewriter's cache
2776   // can be deleted in the loop below, causing the AssertingVH in the cache to
2777   // trigger.
2778   Rewriter.clear();
2779 
2780   // Now that we're done iterating through lists, clean up any instructions
2781   // which are now dead.
2782   while (!DeadInsts.empty())
2783     if (Instruction *Inst =
2784             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2785       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2786 
2787   // The Rewriter may not be used from this point on.
2788 
2789   // Loop-invariant instructions in the preheader that aren't used in the
2790   // loop may be sunk below the loop to reduce register pressure.
2791   Changed |= sinkUnusedInvariants(L);
2792 
2793   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2794   // trip count and therefore can further simplify exit values in addition to
2795   // rewriteLoopExitValues.
2796   Changed |= rewriteFirstIterationLoopExitValues(L);
2797 
2798   // Clean up dead instructions.
2799   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2800 
2801   // Check a post-condition.
2802   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2803          "Indvars did not preserve LCSSA!");
2804 
2805   // Verify that LFTR, and any other change have not interfered with SCEV's
2806   // ability to compute trip count.  We may have *changed* the exit count, but
2807   // only by reducing it.
2808 #ifndef NDEBUG
2809   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2810     SE->forgetLoop(L);
2811     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2812     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2813         SE->getTypeSizeInBits(NewBECount->getType()))
2814       NewBECount = SE->getTruncateOrNoop(NewBECount,
2815                                          BackedgeTakenCount->getType());
2816     else
2817       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2818                                                  NewBECount->getType());
2819     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2820                                  NewBECount) && "indvars must preserve SCEV");
2821   }
2822 #endif
2823 
2824   return Changed;
2825 }
2826 
2827 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2828                                           LoopStandardAnalysisResults &AR,
2829                                           LPMUpdater &) {
2830   Function *F = L.getHeader()->getParent();
2831   const DataLayout &DL = F->getParent()->getDataLayout();
2832 
2833   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2834   if (!IVS.run(&L))
2835     return PreservedAnalyses::all();
2836 
2837   auto PA = getLoopPassPreservedAnalyses();
2838   PA.preserveSet<CFGAnalyses>();
2839   if (AR.MSSA)
2840     PA.preserve<MemorySSAAnalysis>();
2841   return PA;
2842 }
2843 
2844 namespace {
2845 
2846 struct IndVarSimplifyLegacyPass : public LoopPass {
2847   static char ID; // Pass identification, replacement for typeid
2848 
2849   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2850     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2851   }
2852 
2853   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2854     if (skipLoop(L))
2855       return false;
2856 
2857     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2858     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2859     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2860     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2861     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2862     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2863     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2864     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2865     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2866     MemorySSA *MSSA = nullptr;
2867     if (MSSAAnalysis)
2868       MSSA = &MSSAAnalysis->getMSSA();
2869 
2870     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2871     return IVS.run(L);
2872   }
2873 
2874   void getAnalysisUsage(AnalysisUsage &AU) const override {
2875     AU.setPreservesCFG();
2876     AU.addPreserved<MemorySSAWrapperPass>();
2877     getLoopAnalysisUsage(AU);
2878   }
2879 };
2880 
2881 } // end anonymous namespace
2882 
2883 char IndVarSimplifyLegacyPass::ID = 0;
2884 
2885 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2886                       "Induction Variable Simplification", false, false)
2887 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2888 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2889                     "Induction Variable Simplification", false, false)
2890 
2891 Pass *llvm::createIndVarSimplifyPass() {
2892   return new IndVarSimplifyLegacyPass();
2893 }
2894