xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision a44b7322a27a636da6a6e1a28091dc7a6d59a4be)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 struct RewritePhi;
141 
142 class IndVarSimplify {
143   LoopInfo *LI;
144   ScalarEvolution *SE;
145   DominatorTree *DT;
146   const DataLayout &DL;
147   TargetLibraryInfo *TLI;
148   const TargetTransformInfo *TTI;
149   std::unique_ptr<MemorySSAUpdater> MSSAU;
150 
151   SmallVector<WeakTrackingVH, 16> DeadInsts;
152 
153   bool handleFloatingPointIV(Loop *L, PHINode *PH);
154   bool rewriteNonIntegerIVs(Loop *L);
155 
156   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157   /// Try to eliminate loop exits based on analyzeable exit counts
158   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
159   /// Try to form loop invariant tests for loop exits by changing how many
160   /// iterations of the loop run when that is unobservable.
161   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
162 
163   bool rewriteFirstIterationLoopExitValues(Loop *L);
164 
165   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
166                                  const SCEV *ExitCount,
167                                  PHINode *IndVar, SCEVExpander &Rewriter);
168 
169   bool sinkUnusedInvariants(Loop *L);
170 
171 public:
172   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
173                  const DataLayout &DL, TargetLibraryInfo *TLI,
174                  TargetTransformInfo *TTI, MemorySSA *MSSA)
175       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
176     if (MSSA)
177       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
178   }
179 
180   bool run(Loop *L);
181 };
182 
183 } // end anonymous namespace
184 
185 /// Determine the insertion point for this user. By default, insert immediately
186 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
187 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
188 /// common dominator for the incoming blocks. A nullptr can be returned if no
189 /// viable location is found: it may happen if User is a PHI and Def only comes
190 /// to this PHI from unreachable blocks.
191 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
192                                           DominatorTree *DT, LoopInfo *LI) {
193   PHINode *PHI = dyn_cast<PHINode>(User);
194   if (!PHI)
195     return User;
196 
197   Instruction *InsertPt = nullptr;
198   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
199     if (PHI->getIncomingValue(i) != Def)
200       continue;
201 
202     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
203 
204     if (!DT->isReachableFromEntry(InsertBB))
205       continue;
206 
207     if (!InsertPt) {
208       InsertPt = InsertBB->getTerminator();
209       continue;
210     }
211     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
212     InsertPt = InsertBB->getTerminator();
213   }
214 
215   // If we have skipped all inputs, it means that Def only comes to Phi from
216   // unreachable blocks.
217   if (!InsertPt)
218     return nullptr;
219 
220   auto *DefI = dyn_cast<Instruction>(Def);
221   if (!DefI)
222     return InsertPt;
223 
224   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
225 
226   auto *L = LI->getLoopFor(DefI->getParent());
227   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
228 
229   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
230     if (LI->getLoopFor(DTN->getBlock()) == L)
231       return DTN->getBlock()->getTerminator();
232 
233   llvm_unreachable("DefI dominates InsertPt!");
234 }
235 
236 //===----------------------------------------------------------------------===//
237 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
238 //===----------------------------------------------------------------------===//
239 
240 /// Convert APF to an integer, if possible.
241 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
242   bool isExact = false;
243   // See if we can convert this to an int64_t
244   uint64_t UIntVal;
245   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
246                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
247       !isExact)
248     return false;
249   IntVal = UIntVal;
250   return true;
251 }
252 
253 /// If the loop has floating induction variable then insert corresponding
254 /// integer induction variable if possible.
255 /// For example,
256 /// for(double i = 0; i < 10000; ++i)
257 ///   bar(i)
258 /// is converted into
259 /// for(int i = 0; i < 10000; ++i)
260 ///   bar((double)i);
261 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
262   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
263   unsigned BackEdge     = IncomingEdge^1;
264 
265   // Check incoming value.
266   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
267 
268   int64_t InitValue;
269   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
270     return false;
271 
272   // Check IV increment. Reject this PN if increment operation is not
273   // an add or increment value can not be represented by an integer.
274   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
275   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
276 
277   // If this is not an add of the PHI with a constantfp, or if the constant fp
278   // is not an integer, bail out.
279   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
280   int64_t IncValue;
281   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
282       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
283     return false;
284 
285   // Check Incr uses. One user is PN and the other user is an exit condition
286   // used by the conditional terminator.
287   Value::user_iterator IncrUse = Incr->user_begin();
288   Instruction *U1 = cast<Instruction>(*IncrUse++);
289   if (IncrUse == Incr->user_end()) return false;
290   Instruction *U2 = cast<Instruction>(*IncrUse++);
291   if (IncrUse != Incr->user_end()) return false;
292 
293   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
294   // only used by a branch, we can't transform it.
295   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
296   if (!Compare)
297     Compare = dyn_cast<FCmpInst>(U2);
298   if (!Compare || !Compare->hasOneUse() ||
299       !isa<BranchInst>(Compare->user_back()))
300     return false;
301 
302   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
303 
304   // We need to verify that the branch actually controls the iteration count
305   // of the loop.  If not, the new IV can overflow and no one will notice.
306   // The branch block must be in the loop and one of the successors must be out
307   // of the loop.
308   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
309   if (!L->contains(TheBr->getParent()) ||
310       (L->contains(TheBr->getSuccessor(0)) &&
311        L->contains(TheBr->getSuccessor(1))))
312     return false;
313 
314   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
315   // transform it.
316   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
317   int64_t ExitValue;
318   if (ExitValueVal == nullptr ||
319       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
320     return false;
321 
322   // Find new predicate for integer comparison.
323   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
324   switch (Compare->getPredicate()) {
325   default: return false;  // Unknown comparison.
326   case CmpInst::FCMP_OEQ:
327   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
328   case CmpInst::FCMP_ONE:
329   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
330   case CmpInst::FCMP_OGT:
331   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
332   case CmpInst::FCMP_OGE:
333   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
334   case CmpInst::FCMP_OLT:
335   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
336   case CmpInst::FCMP_OLE:
337   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
338   }
339 
340   // We convert the floating point induction variable to a signed i32 value if
341   // we can.  This is only safe if the comparison will not overflow in a way
342   // that won't be trapped by the integer equivalent operations.  Check for this
343   // now.
344   // TODO: We could use i64 if it is native and the range requires it.
345 
346   // The start/stride/exit values must all fit in signed i32.
347   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
348     return false;
349 
350   // If not actually striding (add x, 0.0), avoid touching the code.
351   if (IncValue == 0)
352     return false;
353 
354   // Positive and negative strides have different safety conditions.
355   if (IncValue > 0) {
356     // If we have a positive stride, we require the init to be less than the
357     // exit value.
358     if (InitValue >= ExitValue)
359       return false;
360 
361     uint32_t Range = uint32_t(ExitValue-InitValue);
362     // Check for infinite loop, either:
363     // while (i <= Exit) or until (i > Exit)
364     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
365       if (++Range == 0) return false;  // Range overflows.
366     }
367 
368     unsigned Leftover = Range % uint32_t(IncValue);
369 
370     // If this is an equality comparison, we require that the strided value
371     // exactly land on the exit value, otherwise the IV condition will wrap
372     // around and do things the fp IV wouldn't.
373     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
374         Leftover != 0)
375       return false;
376 
377     // If the stride would wrap around the i32 before exiting, we can't
378     // transform the IV.
379     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
380       return false;
381   } else {
382     // If we have a negative stride, we require the init to be greater than the
383     // exit value.
384     if (InitValue <= ExitValue)
385       return false;
386 
387     uint32_t Range = uint32_t(InitValue-ExitValue);
388     // Check for infinite loop, either:
389     // while (i >= Exit) or until (i < Exit)
390     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
391       if (++Range == 0) return false;  // Range overflows.
392     }
393 
394     unsigned Leftover = Range % uint32_t(-IncValue);
395 
396     // If this is an equality comparison, we require that the strided value
397     // exactly land on the exit value, otherwise the IV condition will wrap
398     // around and do things the fp IV wouldn't.
399     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
400         Leftover != 0)
401       return false;
402 
403     // If the stride would wrap around the i32 before exiting, we can't
404     // transform the IV.
405     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
406       return false;
407   }
408 
409   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
410 
411   // Insert new integer induction variable.
412   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
413   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
414                       PN->getIncomingBlock(IncomingEdge));
415 
416   Value *NewAdd =
417     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
418                               Incr->getName()+".int", Incr);
419   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
420 
421   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
422                                       ConstantInt::get(Int32Ty, ExitValue),
423                                       Compare->getName());
424 
425   // In the following deletions, PN may become dead and may be deleted.
426   // Use a WeakTrackingVH to observe whether this happens.
427   WeakTrackingVH WeakPH = PN;
428 
429   // Delete the old floating point exit comparison.  The branch starts using the
430   // new comparison.
431   NewCompare->takeName(Compare);
432   Compare->replaceAllUsesWith(NewCompare);
433   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
434 
435   // Delete the old floating point increment.
436   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
437   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
438 
439   // If the FP induction variable still has uses, this is because something else
440   // in the loop uses its value.  In order to canonicalize the induction
441   // variable, we chose to eliminate the IV and rewrite it in terms of an
442   // int->fp cast.
443   //
444   // We give preference to sitofp over uitofp because it is faster on most
445   // platforms.
446   if (WeakPH) {
447     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
448                                  &*PN->getParent()->getFirstInsertionPt());
449     PN->replaceAllUsesWith(Conv);
450     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
451   }
452   return true;
453 }
454 
455 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
456   // First step.  Check to see if there are any floating-point recurrences.
457   // If there are, change them into integer recurrences, permitting analysis by
458   // the SCEV routines.
459   BasicBlock *Header = L->getHeader();
460 
461   SmallVector<WeakTrackingVH, 8> PHIs;
462   for (PHINode &PN : Header->phis())
463     PHIs.push_back(&PN);
464 
465   bool Changed = false;
466   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
467     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
468       Changed |= handleFloatingPointIV(L, PN);
469 
470   // If the loop previously had floating-point IV, ScalarEvolution
471   // may not have been able to compute a trip count. Now that we've done some
472   // re-writing, the trip count may be computable.
473   if (Changed)
474     SE->forgetLoop(L);
475   return Changed;
476 }
477 
478 //===---------------------------------------------------------------------===//
479 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
480 // they will exit at the first iteration.
481 //===---------------------------------------------------------------------===//
482 
483 /// Check to see if this loop has loop invariant conditions which lead to loop
484 /// exits. If so, we know that if the exit path is taken, it is at the first
485 /// loop iteration. This lets us predict exit values of PHI nodes that live in
486 /// loop header.
487 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
488   // Verify the input to the pass is already in LCSSA form.
489   assert(L->isLCSSAForm(*DT));
490 
491   SmallVector<BasicBlock *, 8> ExitBlocks;
492   L->getUniqueExitBlocks(ExitBlocks);
493 
494   bool MadeAnyChanges = false;
495   for (auto *ExitBB : ExitBlocks) {
496     // If there are no more PHI nodes in this exit block, then no more
497     // values defined inside the loop are used on this path.
498     for (PHINode &PN : ExitBB->phis()) {
499       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
500            IncomingValIdx != E; ++IncomingValIdx) {
501         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
502 
503         // Can we prove that the exit must run on the first iteration if it
504         // runs at all?  (i.e. early exits are fine for our purposes, but
505         // traces which lead to this exit being taken on the 2nd iteration
506         // aren't.)  Note that this is about whether the exit branch is
507         // executed, not about whether it is taken.
508         if (!L->getLoopLatch() ||
509             !DT->dominates(IncomingBB, L->getLoopLatch()))
510           continue;
511 
512         // Get condition that leads to the exit path.
513         auto *TermInst = IncomingBB->getTerminator();
514 
515         Value *Cond = nullptr;
516         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
517           // Must be a conditional branch, otherwise the block
518           // should not be in the loop.
519           Cond = BI->getCondition();
520         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
521           Cond = SI->getCondition();
522         else
523           continue;
524 
525         if (!L->isLoopInvariant(Cond))
526           continue;
527 
528         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
529 
530         // Only deal with PHIs in the loop header.
531         if (!ExitVal || ExitVal->getParent() != L->getHeader())
532           continue;
533 
534         // If ExitVal is a PHI on the loop header, then we know its
535         // value along this exit because the exit can only be taken
536         // on the first iteration.
537         auto *LoopPreheader = L->getLoopPreheader();
538         assert(LoopPreheader && "Invalid loop");
539         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
540         if (PreheaderIdx != -1) {
541           assert(ExitVal->getParent() == L->getHeader() &&
542                  "ExitVal must be in loop header");
543           MadeAnyChanges = true;
544           PN.setIncomingValue(IncomingValIdx,
545                               ExitVal->getIncomingValue(PreheaderIdx));
546         }
547       }
548     }
549   }
550   return MadeAnyChanges;
551 }
552 
553 //===----------------------------------------------------------------------===//
554 //  IV Widening - Extend the width of an IV to cover its widest uses.
555 //===----------------------------------------------------------------------===//
556 
557 namespace {
558 
559 // Collect information about induction variables that are used by sign/zero
560 // extend operations. This information is recorded by CollectExtend and provides
561 // the input to WidenIV.
562 struct WideIVInfo {
563   PHINode *NarrowIV = nullptr;
564 
565   // Widest integer type created [sz]ext
566   Type *WidestNativeType = nullptr;
567 
568   // Was a sext user seen before a zext?
569   bool IsSigned = false;
570 };
571 
572 } // end anonymous namespace
573 
574 /// Update information about the induction variable that is extended by this
575 /// sign or zero extend operation. This is used to determine the final width of
576 /// the IV before actually widening it.
577 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
578                         const TargetTransformInfo *TTI) {
579   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
580   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
581     return;
582 
583   Type *Ty = Cast->getType();
584   uint64_t Width = SE->getTypeSizeInBits(Ty);
585   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
586     return;
587 
588   // Check that `Cast` actually extends the induction variable (we rely on this
589   // later).  This takes care of cases where `Cast` is extending a truncation of
590   // the narrow induction variable, and thus can end up being narrower than the
591   // "narrow" induction variable.
592   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
593   if (NarrowIVWidth >= Width)
594     return;
595 
596   // Cast is either an sext or zext up to this point.
597   // We should not widen an indvar if arithmetics on the wider indvar are more
598   // expensive than those on the narrower indvar. We check only the cost of ADD
599   // because at least an ADD is required to increment the induction variable. We
600   // could compute more comprehensively the cost of all instructions on the
601   // induction variable when necessary.
602   if (TTI &&
603       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
604           TTI->getArithmeticInstrCost(Instruction::Add,
605                                       Cast->getOperand(0)->getType())) {
606     return;
607   }
608 
609   if (!WI.WidestNativeType) {
610     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
611     WI.IsSigned = IsSigned;
612     return;
613   }
614 
615   // We extend the IV to satisfy the sign of its first user, arbitrarily.
616   if (WI.IsSigned != IsSigned)
617     return;
618 
619   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
620     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
621 }
622 
623 namespace {
624 
625 /// Record a link in the Narrow IV def-use chain along with the WideIV that
626 /// computes the same value as the Narrow IV def.  This avoids caching Use*
627 /// pointers.
628 struct NarrowIVDefUse {
629   Instruction *NarrowDef = nullptr;
630   Instruction *NarrowUse = nullptr;
631   Instruction *WideDef = nullptr;
632 
633   // True if the narrow def is never negative.  Tracking this information lets
634   // us use a sign extension instead of a zero extension or vice versa, when
635   // profitable and legal.
636   bool NeverNegative = false;
637 
638   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
639                  bool NeverNegative)
640       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
641         NeverNegative(NeverNegative) {}
642 };
643 
644 /// The goal of this transform is to remove sign and zero extends without
645 /// creating any new induction variables. To do this, it creates a new phi of
646 /// the wider type and redirects all users, either removing extends or inserting
647 /// truncs whenever we stop propagating the type.
648 class WidenIV {
649   // Parameters
650   PHINode *OrigPhi;
651   Type *WideType;
652 
653   // Context
654   LoopInfo        *LI;
655   Loop            *L;
656   ScalarEvolution *SE;
657   DominatorTree   *DT;
658 
659   // Does the module have any calls to the llvm.experimental.guard intrinsic
660   // at all? If not we can avoid scanning instructions looking for guards.
661   bool HasGuards;
662 
663   // Result
664   PHINode *WidePhi = nullptr;
665   Instruction *WideInc = nullptr;
666   const SCEV *WideIncExpr = nullptr;
667   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
668 
669   SmallPtrSet<Instruction *,16> Widened;
670   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
671 
672   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
673 
674   // A map tracking the kind of extension used to widen each narrow IV
675   // and narrow IV user.
676   // Key: pointer to a narrow IV or IV user.
677   // Value: the kind of extension used to widen this Instruction.
678   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
679 
680   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
681 
682   // A map with control-dependent ranges for post increment IV uses. The key is
683   // a pair of IV def and a use of this def denoting the context. The value is
684   // a ConstantRange representing possible values of the def at the given
685   // context.
686   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
687 
688   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
689                                               Instruction *UseI) {
690     DefUserPair Key(Def, UseI);
691     auto It = PostIncRangeInfos.find(Key);
692     return It == PostIncRangeInfos.end()
693                ? Optional<ConstantRange>(None)
694                : Optional<ConstantRange>(It->second);
695   }
696 
697   void calculatePostIncRanges(PHINode *OrigPhi);
698   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
699 
700   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
701     DefUserPair Key(Def, UseI);
702     auto It = PostIncRangeInfos.find(Key);
703     if (It == PostIncRangeInfos.end())
704       PostIncRangeInfos.insert({Key, R});
705     else
706       It->second = R.intersectWith(It->second);
707   }
708 
709 public:
710   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
711           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
712           bool HasGuards)
713       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
714         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
715         HasGuards(HasGuards), DeadInsts(DI) {
716     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
717     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
718   }
719 
720   PHINode *createWideIV(SCEVExpander &Rewriter);
721 
722 protected:
723   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
724                           Instruction *Use);
725 
726   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
727   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
728                                      const SCEVAddRecExpr *WideAR);
729   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
730 
731   ExtendKind getExtendKind(Instruction *I);
732 
733   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
734 
735   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
736 
737   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
738 
739   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
740                               unsigned OpCode) const;
741 
742   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
743 
744   bool widenLoopCompare(NarrowIVDefUse DU);
745   bool widenWithVariantUse(NarrowIVDefUse DU);
746 
747   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
748 };
749 
750 } // end anonymous namespace
751 
752 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
753                                  bool IsSigned, Instruction *Use) {
754   // Set the debug location and conservative insertion point.
755   IRBuilder<> Builder(Use);
756   // Hoist the insertion point into loop preheaders as far as possible.
757   for (const Loop *L = LI->getLoopFor(Use->getParent());
758        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
759        L = L->getParentLoop())
760     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
761 
762   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
763                     Builder.CreateZExt(NarrowOper, WideType);
764 }
765 
766 /// Instantiate a wide operation to replace a narrow operation. This only needs
767 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
768 /// 0 for any operation we decide not to clone.
769 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
770                                   const SCEVAddRecExpr *WideAR) {
771   unsigned Opcode = DU.NarrowUse->getOpcode();
772   switch (Opcode) {
773   default:
774     return nullptr;
775   case Instruction::Add:
776   case Instruction::Mul:
777   case Instruction::UDiv:
778   case Instruction::Sub:
779     return cloneArithmeticIVUser(DU, WideAR);
780 
781   case Instruction::And:
782   case Instruction::Or:
783   case Instruction::Xor:
784   case Instruction::Shl:
785   case Instruction::LShr:
786   case Instruction::AShr:
787     return cloneBitwiseIVUser(DU);
788   }
789 }
790 
791 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
792   Instruction *NarrowUse = DU.NarrowUse;
793   Instruction *NarrowDef = DU.NarrowDef;
794   Instruction *WideDef = DU.WideDef;
795 
796   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
797 
798   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
799   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
800   // invariant and will be folded or hoisted. If it actually comes from a
801   // widened IV, it should be removed during a future call to widenIVUse.
802   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
803   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
804                    ? WideDef
805                    : createExtendInst(NarrowUse->getOperand(0), WideType,
806                                       IsSigned, NarrowUse);
807   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
808                    ? WideDef
809                    : createExtendInst(NarrowUse->getOperand(1), WideType,
810                                       IsSigned, NarrowUse);
811 
812   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
813   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
814                                         NarrowBO->getName());
815   IRBuilder<> Builder(NarrowUse);
816   Builder.Insert(WideBO);
817   WideBO->copyIRFlags(NarrowBO);
818   return WideBO;
819 }
820 
821 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
822                                             const SCEVAddRecExpr *WideAR) {
823   Instruction *NarrowUse = DU.NarrowUse;
824   Instruction *NarrowDef = DU.NarrowDef;
825   Instruction *WideDef = DU.WideDef;
826 
827   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
828 
829   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
830 
831   // We're trying to find X such that
832   //
833   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
834   //
835   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
836   // and check using SCEV if any of them are correct.
837 
838   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
839   // correct solution to X.
840   auto GuessNonIVOperand = [&](bool SignExt) {
841     const SCEV *WideLHS;
842     const SCEV *WideRHS;
843 
844     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
845       if (SignExt)
846         return SE->getSignExtendExpr(S, Ty);
847       return SE->getZeroExtendExpr(S, Ty);
848     };
849 
850     if (IVOpIdx == 0) {
851       WideLHS = SE->getSCEV(WideDef);
852       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
853       WideRHS = GetExtend(NarrowRHS, WideType);
854     } else {
855       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
856       WideLHS = GetExtend(NarrowLHS, WideType);
857       WideRHS = SE->getSCEV(WideDef);
858     }
859 
860     // WideUse is "WideDef `op.wide` X" as described in the comment.
861     const SCEV *WideUse = nullptr;
862 
863     switch (NarrowUse->getOpcode()) {
864     default:
865       llvm_unreachable("No other possibility!");
866 
867     case Instruction::Add:
868       WideUse = SE->getAddExpr(WideLHS, WideRHS);
869       break;
870 
871     case Instruction::Mul:
872       WideUse = SE->getMulExpr(WideLHS, WideRHS);
873       break;
874 
875     case Instruction::UDiv:
876       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
877       break;
878 
879     case Instruction::Sub:
880       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
881       break;
882     }
883 
884     return WideUse == WideAR;
885   };
886 
887   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
888   if (!GuessNonIVOperand(SignExtend)) {
889     SignExtend = !SignExtend;
890     if (!GuessNonIVOperand(SignExtend))
891       return nullptr;
892   }
893 
894   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
895                    ? WideDef
896                    : createExtendInst(NarrowUse->getOperand(0), WideType,
897                                       SignExtend, NarrowUse);
898   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
899                    ? WideDef
900                    : createExtendInst(NarrowUse->getOperand(1), WideType,
901                                       SignExtend, NarrowUse);
902 
903   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
904   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
905                                         NarrowBO->getName());
906 
907   IRBuilder<> Builder(NarrowUse);
908   Builder.Insert(WideBO);
909   WideBO->copyIRFlags(NarrowBO);
910   return WideBO;
911 }
912 
913 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
914   auto It = ExtendKindMap.find(I);
915   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
916   return It->second;
917 }
918 
919 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
920                                      unsigned OpCode) const {
921   if (OpCode == Instruction::Add)
922     return SE->getAddExpr(LHS, RHS);
923   if (OpCode == Instruction::Sub)
924     return SE->getMinusSCEV(LHS, RHS);
925   if (OpCode == Instruction::Mul)
926     return SE->getMulExpr(LHS, RHS);
927 
928   llvm_unreachable("Unsupported opcode.");
929 }
930 
931 /// No-wrap operations can transfer sign extension of their result to their
932 /// operands. Generate the SCEV value for the widened operation without
933 /// actually modifying the IR yet. If the expression after extending the
934 /// operands is an AddRec for this loop, return the AddRec and the kind of
935 /// extension used.
936 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
937   // Handle the common case of add<nsw/nuw>
938   const unsigned OpCode = DU.NarrowUse->getOpcode();
939   // Only Add/Sub/Mul instructions supported yet.
940   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
941       OpCode != Instruction::Mul)
942     return {nullptr, Unknown};
943 
944   // One operand (NarrowDef) has already been extended to WideDef. Now determine
945   // if extending the other will lead to a recurrence.
946   const unsigned ExtendOperIdx =
947       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
948   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
949 
950   const SCEV *ExtendOperExpr = nullptr;
951   const OverflowingBinaryOperator *OBO =
952     cast<OverflowingBinaryOperator>(DU.NarrowUse);
953   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
954   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
955     ExtendOperExpr = SE->getSignExtendExpr(
956       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
957   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
958     ExtendOperExpr = SE->getZeroExtendExpr(
959       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
960   else
961     return {nullptr, Unknown};
962 
963   // When creating this SCEV expr, don't apply the current operations NSW or NUW
964   // flags. This instruction may be guarded by control flow that the no-wrap
965   // behavior depends on. Non-control-equivalent instructions can be mapped to
966   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
967   // semantics to those operations.
968   const SCEV *lhs = SE->getSCEV(DU.WideDef);
969   const SCEV *rhs = ExtendOperExpr;
970 
971   // Let's swap operands to the initial order for the case of non-commutative
972   // operations, like SUB. See PR21014.
973   if (ExtendOperIdx == 0)
974     std::swap(lhs, rhs);
975   const SCEVAddRecExpr *AddRec =
976       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
977 
978   if (!AddRec || AddRec->getLoop() != L)
979     return {nullptr, Unknown};
980 
981   return {AddRec, ExtKind};
982 }
983 
984 /// Is this instruction potentially interesting for further simplification after
985 /// widening it's type? In other words, can the extend be safely hoisted out of
986 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
987 /// so, return the extended recurrence and the kind of extension used. Otherwise
988 /// return {nullptr, Unknown}.
989 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
990   if (!SE->isSCEVable(DU.NarrowUse->getType()))
991     return {nullptr, Unknown};
992 
993   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
994   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
995       SE->getTypeSizeInBits(WideType)) {
996     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
997     // index. So don't follow this use.
998     return {nullptr, Unknown};
999   }
1000 
1001   const SCEV *WideExpr;
1002   ExtendKind ExtKind;
1003   if (DU.NeverNegative) {
1004     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1005     if (isa<SCEVAddRecExpr>(WideExpr))
1006       ExtKind = SignExtended;
1007     else {
1008       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1009       ExtKind = ZeroExtended;
1010     }
1011   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1012     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1013     ExtKind = SignExtended;
1014   } else {
1015     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1016     ExtKind = ZeroExtended;
1017   }
1018   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1019   if (!AddRec || AddRec->getLoop() != L)
1020     return {nullptr, Unknown};
1021   return {AddRec, ExtKind};
1022 }
1023 
1024 /// This IV user cannot be widened. Replace this use of the original narrow IV
1025 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1026 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1027   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1028   if (!InsertPt)
1029     return;
1030   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1031                     << *DU.NarrowUse << "\n");
1032   IRBuilder<> Builder(InsertPt);
1033   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1034   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1035 }
1036 
1037 /// If the narrow use is a compare instruction, then widen the compare
1038 //  (and possibly the other operand).  The extend operation is hoisted into the
1039 // loop preheader as far as possible.
1040 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1041   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1042   if (!Cmp)
1043     return false;
1044 
1045   // We can legally widen the comparison in the following two cases:
1046   //
1047   //  - The signedness of the IV extension and comparison match
1048   //
1049   //  - The narrow IV is always positive (and thus its sign extension is equal
1050   //    to its zero extension).  For instance, let's say we're zero extending
1051   //    %narrow for the following use
1052   //
1053   //      icmp slt i32 %narrow, %val   ... (A)
1054   //
1055   //    and %narrow is always positive.  Then
1056   //
1057   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1058   //          == icmp slt i32 zext(%narrow), sext(%val)
1059   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1060   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1061     return false;
1062 
1063   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1064   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1065   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1066   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1067 
1068   // Widen the compare instruction.
1069   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1070   if (!InsertPt)
1071     return false;
1072   IRBuilder<> Builder(InsertPt);
1073   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1074 
1075   // Widen the other operand of the compare, if necessary.
1076   if (CastWidth < IVWidth) {
1077     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1078     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1079   }
1080   return true;
1081 }
1082 
1083 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1084 // will not work when:
1085 //    1) SCEV traces back to an instruction inside the loop that SCEV can not
1086 // expand, eg. add %indvar, (load %addr)
1087 //    2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1088 // While SCEV fails to avoid trunc, we can still try to use instruction
1089 // combining approach to prove trunc is not required. This can be further
1090 // extended with other instruction combining checks, but for now we handle the
1091 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1092 //
1093 // Src:
1094 //   %c = sub nsw %b, %indvar
1095 //   %d = sext %c to i64
1096 // Dst:
1097 //   %indvar.ext1 = sext %indvar to i64
1098 //   %m = sext %b to i64
1099 //   %d = sub nsw i64 %m, %indvar.ext1
1100 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1101 // trunc is required regardless of how %b is generated. This pattern is common
1102 // when calculating address in 64 bit architecture
1103 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
1104   Instruction *NarrowUse = DU.NarrowUse;
1105   Instruction *NarrowDef = DU.NarrowDef;
1106   Instruction *WideDef = DU.WideDef;
1107 
1108   // Handle the common case of add<nsw/nuw>
1109   const unsigned OpCode = NarrowUse->getOpcode();
1110   // Only Add/Sub/Mul instructions are supported.
1111   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1112       OpCode != Instruction::Mul)
1113     return false;
1114 
1115   // The operand that is not defined by NarrowDef of DU. Let's call it the
1116   // other operand.
1117   assert((NarrowUse->getOperand(0) == NarrowDef ||
1118           NarrowUse->getOperand(1) == NarrowDef) &&
1119          "bad DU");
1120 
1121   const OverflowingBinaryOperator *OBO =
1122     cast<OverflowingBinaryOperator>(NarrowUse);
1123   ExtendKind ExtKind = getExtendKind(NarrowDef);
1124   bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap();
1125   bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
1126   if (!CanSignExtend && !CanZeroExtend)
1127     return false;
1128 
1129   // Verifying that Defining operand is an AddRec
1130   const SCEV *Op1 = SE->getSCEV(WideDef);
1131   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1132   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1133     return false;
1134 
1135   for (Use &U : NarrowUse->uses()) {
1136     Instruction *User = nullptr;
1137     if (ExtKind == SignExtended)
1138       User = dyn_cast<SExtInst>(U.getUser());
1139     else
1140       User = dyn_cast<ZExtInst>(U.getUser());
1141     if (!User || User->getType() != WideType)
1142       return false;
1143   }
1144 
1145   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1146 
1147   // Generating a widening use instruction.
1148   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1149                    ? WideDef
1150                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1151                                       ExtKind, NarrowUse);
1152   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1153                    ? WideDef
1154                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1155                                       ExtKind, NarrowUse);
1156 
1157   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1158   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1159                                         NarrowBO->getName());
1160   IRBuilder<> Builder(NarrowUse);
1161   Builder.Insert(WideBO);
1162   WideBO->copyIRFlags(NarrowBO);
1163   ExtendKindMap[NarrowUse] = ExtKind;
1164 
1165   for (Use &U : NarrowUse->uses()) {
1166     Instruction *User = nullptr;
1167     if (ExtKind == SignExtended)
1168       User = cast<SExtInst>(U.getUser());
1169     else
1170       User = cast<ZExtInst>(U.getUser());
1171     assert(User->getType() == WideType && "Checked before!");
1172     LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1173                       << *WideBO << "\n");
1174     ++NumElimExt;
1175     User->replaceAllUsesWith(WideBO);
1176     DeadInsts.emplace_back(User);
1177   }
1178   return true;
1179 }
1180 
1181 /// Determine whether an individual user of the narrow IV can be widened. If so,
1182 /// return the wide clone of the user.
1183 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1184   assert(ExtendKindMap.count(DU.NarrowDef) &&
1185          "Should already know the kind of extension used to widen NarrowDef");
1186 
1187   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1188   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1189     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1190       // For LCSSA phis, sink the truncate outside the loop.
1191       // After SimplifyCFG most loop exit targets have a single predecessor.
1192       // Otherwise fall back to a truncate within the loop.
1193       if (UsePhi->getNumOperands() != 1)
1194         truncateIVUse(DU, DT, LI);
1195       else {
1196         // Widening the PHI requires us to insert a trunc.  The logical place
1197         // for this trunc is in the same BB as the PHI.  This is not possible if
1198         // the BB is terminated by a catchswitch.
1199         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1200           return nullptr;
1201 
1202         PHINode *WidePhi =
1203           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1204                           UsePhi);
1205         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1206         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1207         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1208         UsePhi->replaceAllUsesWith(Trunc);
1209         DeadInsts.emplace_back(UsePhi);
1210         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1211                           << *WidePhi << "\n");
1212       }
1213       return nullptr;
1214     }
1215   }
1216 
1217   // This narrow use can be widened by a sext if it's non-negative or its narrow
1218   // def was widended by a sext. Same for zext.
1219   auto canWidenBySExt = [&]() {
1220     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1221   };
1222   auto canWidenByZExt = [&]() {
1223     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1224   };
1225 
1226   // Our raison d'etre! Eliminate sign and zero extension.
1227   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1228       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1229     Value *NewDef = DU.WideDef;
1230     if (DU.NarrowUse->getType() != WideType) {
1231       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1232       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1233       if (CastWidth < IVWidth) {
1234         // The cast isn't as wide as the IV, so insert a Trunc.
1235         IRBuilder<> Builder(DU.NarrowUse);
1236         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1237       }
1238       else {
1239         // A wider extend was hidden behind a narrower one. This may induce
1240         // another round of IV widening in which the intermediate IV becomes
1241         // dead. It should be very rare.
1242         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1243                           << " not wide enough to subsume " << *DU.NarrowUse
1244                           << "\n");
1245         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1246         NewDef = DU.NarrowUse;
1247       }
1248     }
1249     if (NewDef != DU.NarrowUse) {
1250       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1251                         << " replaced by " << *DU.WideDef << "\n");
1252       ++NumElimExt;
1253       DU.NarrowUse->replaceAllUsesWith(NewDef);
1254       DeadInsts.emplace_back(DU.NarrowUse);
1255     }
1256     // Now that the extend is gone, we want to expose it's uses for potential
1257     // further simplification. We don't need to directly inform SimplifyIVUsers
1258     // of the new users, because their parent IV will be processed later as a
1259     // new loop phi. If we preserved IVUsers analysis, we would also want to
1260     // push the uses of WideDef here.
1261 
1262     // No further widening is needed. The deceased [sz]ext had done it for us.
1263     return nullptr;
1264   }
1265 
1266   // Does this user itself evaluate to a recurrence after widening?
1267   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1268   if (!WideAddRec.first)
1269     WideAddRec = getWideRecurrence(DU);
1270 
1271   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1272   if (!WideAddRec.first) {
1273     // If use is a loop condition, try to promote the condition instead of
1274     // truncating the IV first.
1275     if (widenLoopCompare(DU))
1276       return nullptr;
1277 
1278     // We are here about to generate a truncate instruction that may hurt
1279     // performance because the scalar evolution expression computed earlier
1280     // in WideAddRec.first does not indicate a polynomial induction expression.
1281     // In that case, look at the operands of the use instruction to determine
1282     // if we can still widen the use instead of truncating its operand.
1283     if (widenWithVariantUse(DU))
1284       return nullptr;
1285 
1286     // This user does not evaluate to a recurrence after widening, so don't
1287     // follow it. Instead insert a Trunc to kill off the original use,
1288     // eventually isolating the original narrow IV so it can be removed.
1289     truncateIVUse(DU, DT, LI);
1290     return nullptr;
1291   }
1292   // Assume block terminators cannot evaluate to a recurrence. We can't to
1293   // insert a Trunc after a terminator if there happens to be a critical edge.
1294   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1295          "SCEV is not expected to evaluate a block terminator");
1296 
1297   // Reuse the IV increment that SCEVExpander created as long as it dominates
1298   // NarrowUse.
1299   Instruction *WideUse = nullptr;
1300   if (WideAddRec.first == WideIncExpr &&
1301       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1302     WideUse = WideInc;
1303   else {
1304     WideUse = cloneIVUser(DU, WideAddRec.first);
1305     if (!WideUse)
1306       return nullptr;
1307   }
1308   // Evaluation of WideAddRec ensured that the narrow expression could be
1309   // extended outside the loop without overflow. This suggests that the wide use
1310   // evaluates to the same expression as the extended narrow use, but doesn't
1311   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1312   // where it fails, we simply throw away the newly created wide use.
1313   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1314     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1315                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1316                       << "\n");
1317     DeadInsts.emplace_back(WideUse);
1318     return nullptr;
1319   }
1320 
1321   // if we reached this point then we are going to replace
1322   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1323   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1324 
1325   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1326   // Returning WideUse pushes it on the worklist.
1327   return WideUse;
1328 }
1329 
1330 /// Add eligible users of NarrowDef to NarrowIVUsers.
1331 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1332   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1333   bool NonNegativeDef =
1334       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1335                            SE->getZero(NarrowSCEV->getType()));
1336   for (User *U : NarrowDef->users()) {
1337     Instruction *NarrowUser = cast<Instruction>(U);
1338 
1339     // Handle data flow merges and bizarre phi cycles.
1340     if (!Widened.insert(NarrowUser).second)
1341       continue;
1342 
1343     bool NonNegativeUse = false;
1344     if (!NonNegativeDef) {
1345       // We might have a control-dependent range information for this context.
1346       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1347         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1348     }
1349 
1350     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1351                                NonNegativeDef || NonNegativeUse);
1352   }
1353 }
1354 
1355 /// Process a single induction variable. First use the SCEVExpander to create a
1356 /// wide induction variable that evaluates to the same recurrence as the
1357 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1358 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1359 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1360 ///
1361 /// It would be simpler to delete uses as they are processed, but we must avoid
1362 /// invalidating SCEV expressions.
1363 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1364   // Bail if we disallowed widening.
1365   if(!AllowIVWidening)
1366     return nullptr;
1367 
1368   // Is this phi an induction variable?
1369   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1370   if (!AddRec)
1371     return nullptr;
1372 
1373   // Widen the induction variable expression.
1374   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1375                                ? SE->getSignExtendExpr(AddRec, WideType)
1376                                : SE->getZeroExtendExpr(AddRec, WideType);
1377 
1378   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1379          "Expect the new IV expression to preserve its type");
1380 
1381   // Can the IV be extended outside the loop without overflow?
1382   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1383   if (!AddRec || AddRec->getLoop() != L)
1384     return nullptr;
1385 
1386   // An AddRec must have loop-invariant operands. Since this AddRec is
1387   // materialized by a loop header phi, the expression cannot have any post-loop
1388   // operands, so they must dominate the loop header.
1389   assert(
1390       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1391       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1392       "Loop header phi recurrence inputs do not dominate the loop");
1393 
1394   // Iterate over IV uses (including transitive ones) looking for IV increments
1395   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1396   // the increment calculate control-dependent range information basing on
1397   // dominating conditions inside of the loop (e.g. a range check inside of the
1398   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1399   //
1400   // Control-dependent range information is later used to prove that a narrow
1401   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1402   // this on demand because when pushNarrowIVUsers needs this information some
1403   // of the dominating conditions might be already widened.
1404   if (UsePostIncrementRanges)
1405     calculatePostIncRanges(OrigPhi);
1406 
1407   // The rewriter provides a value for the desired IV expression. This may
1408   // either find an existing phi or materialize a new one. Either way, we
1409   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1410   // of the phi-SCC dominates the loop entry.
1411   Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
1412   Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
1413   // If the wide phi is not a phi node, for example a cast node, like bitcast,
1414   // inttoptr, ptrtoint, just skip for now.
1415   if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
1416     // if the cast node is an inserted instruction without any user, we should
1417     // remove it to make sure the pass don't touch the function as we can not
1418     // wide the phi.
1419     if (ExpandInst->hasNUses(0) &&
1420         Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
1421       DeadInsts.emplace_back(ExpandInst);
1422     return nullptr;
1423   }
1424 
1425   // Remembering the WideIV increment generated by SCEVExpander allows
1426   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1427   // employ a general reuse mechanism because the call above is the only call to
1428   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1429   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1430     WideInc =
1431       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1432     WideIncExpr = SE->getSCEV(WideInc);
1433     // Propagate the debug location associated with the original loop increment
1434     // to the new (widened) increment.
1435     auto *OrigInc =
1436       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1437     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1438   }
1439 
1440   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1441   ++NumWidened;
1442 
1443   // Traverse the def-use chain using a worklist starting at the original IV.
1444   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1445 
1446   Widened.insert(OrigPhi);
1447   pushNarrowIVUsers(OrigPhi, WidePhi);
1448 
1449   while (!NarrowIVUsers.empty()) {
1450     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1451 
1452     // Process a def-use edge. This may replace the use, so don't hold a
1453     // use_iterator across it.
1454     Instruction *WideUse = widenIVUse(DU, Rewriter);
1455 
1456     // Follow all def-use edges from the previous narrow use.
1457     if (WideUse)
1458       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1459 
1460     // widenIVUse may have removed the def-use edge.
1461     if (DU.NarrowDef->use_empty())
1462       DeadInsts.emplace_back(DU.NarrowDef);
1463   }
1464 
1465   // Attach any debug information to the new PHI.
1466   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1467 
1468   return WidePhi;
1469 }
1470 
1471 /// Calculates control-dependent range for the given def at the given context
1472 /// by looking at dominating conditions inside of the loop
1473 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1474                                     Instruction *NarrowUser) {
1475   using namespace llvm::PatternMatch;
1476 
1477   Value *NarrowDefLHS;
1478   const APInt *NarrowDefRHS;
1479   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1480                                  m_APInt(NarrowDefRHS))) ||
1481       !NarrowDefRHS->isNonNegative())
1482     return;
1483 
1484   auto UpdateRangeFromCondition = [&] (Value *Condition,
1485                                        bool TrueDest) {
1486     CmpInst::Predicate Pred;
1487     Value *CmpRHS;
1488     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1489                                  m_Value(CmpRHS))))
1490       return;
1491 
1492     CmpInst::Predicate P =
1493             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1494 
1495     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1496     auto CmpConstrainedLHSRange =
1497             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1498     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1499         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1500 
1501     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1502   };
1503 
1504   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1505     if (!HasGuards)
1506       return;
1507 
1508     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1509                                      Ctx->getParent()->rend())) {
1510       Value *C = nullptr;
1511       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1512         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1513     }
1514   };
1515 
1516   UpdateRangeFromGuards(NarrowUser);
1517 
1518   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1519   // If NarrowUserBB is statically unreachable asking dominator queries may
1520   // yield surprising results. (e.g. the block may not have a dom tree node)
1521   if (!DT->isReachableFromEntry(NarrowUserBB))
1522     return;
1523 
1524   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1525        L->contains(DTB->getBlock());
1526        DTB = DTB->getIDom()) {
1527     auto *BB = DTB->getBlock();
1528     auto *TI = BB->getTerminator();
1529     UpdateRangeFromGuards(TI);
1530 
1531     auto *BI = dyn_cast<BranchInst>(TI);
1532     if (!BI || !BI->isConditional())
1533       continue;
1534 
1535     auto *TrueSuccessor = BI->getSuccessor(0);
1536     auto *FalseSuccessor = BI->getSuccessor(1);
1537 
1538     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1539       return BBE.isSingleEdge() &&
1540              DT->dominates(BBE, NarrowUser->getParent());
1541     };
1542 
1543     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1544       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1545 
1546     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1547       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1548   }
1549 }
1550 
1551 /// Calculates PostIncRangeInfos map for the given IV
1552 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1553   SmallPtrSet<Instruction *, 16> Visited;
1554   SmallVector<Instruction *, 6> Worklist;
1555   Worklist.push_back(OrigPhi);
1556   Visited.insert(OrigPhi);
1557 
1558   while (!Worklist.empty()) {
1559     Instruction *NarrowDef = Worklist.pop_back_val();
1560 
1561     for (Use &U : NarrowDef->uses()) {
1562       auto *NarrowUser = cast<Instruction>(U.getUser());
1563 
1564       // Don't go looking outside the current loop.
1565       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1566       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1567         continue;
1568 
1569       if (!Visited.insert(NarrowUser).second)
1570         continue;
1571 
1572       Worklist.push_back(NarrowUser);
1573 
1574       calculatePostIncRange(NarrowDef, NarrowUser);
1575     }
1576   }
1577 }
1578 
1579 //===----------------------------------------------------------------------===//
1580 //  Live IV Reduction - Minimize IVs live across the loop.
1581 //===----------------------------------------------------------------------===//
1582 
1583 //===----------------------------------------------------------------------===//
1584 //  Simplification of IV users based on SCEV evaluation.
1585 //===----------------------------------------------------------------------===//
1586 
1587 namespace {
1588 
1589 class IndVarSimplifyVisitor : public IVVisitor {
1590   ScalarEvolution *SE;
1591   const TargetTransformInfo *TTI;
1592   PHINode *IVPhi;
1593 
1594 public:
1595   WideIVInfo WI;
1596 
1597   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1598                         const TargetTransformInfo *TTI,
1599                         const DominatorTree *DTree)
1600     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1601     DT = DTree;
1602     WI.NarrowIV = IVPhi;
1603   }
1604 
1605   // Implement the interface used by simplifyUsersOfIV.
1606   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1607 };
1608 
1609 } // end anonymous namespace
1610 
1611 /// Iteratively perform simplification on a worklist of IV users. Each
1612 /// successive simplification may push more users which may themselves be
1613 /// candidates for simplification.
1614 ///
1615 /// Sign/Zero extend elimination is interleaved with IV simplification.
1616 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1617                                        SCEVExpander &Rewriter,
1618                                        LoopInfo *LI) {
1619   SmallVector<WideIVInfo, 8> WideIVs;
1620 
1621   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1622           Intrinsic::getName(Intrinsic::experimental_guard));
1623   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1624 
1625   SmallVector<PHINode*, 8> LoopPhis;
1626   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1627     LoopPhis.push_back(cast<PHINode>(I));
1628   }
1629   // Each round of simplification iterates through the SimplifyIVUsers worklist
1630   // for all current phis, then determines whether any IVs can be
1631   // widened. Widening adds new phis to LoopPhis, inducing another round of
1632   // simplification on the wide IVs.
1633   bool Changed = false;
1634   while (!LoopPhis.empty()) {
1635     // Evaluate as many IV expressions as possible before widening any IVs. This
1636     // forces SCEV to set no-wrap flags before evaluating sign/zero
1637     // extension. The first time SCEV attempts to normalize sign/zero extension,
1638     // the result becomes final. So for the most predictable results, we delay
1639     // evaluation of sign/zero extend evaluation until needed, and avoid running
1640     // other SCEV based analysis prior to simplifyAndExtend.
1641     do {
1642       PHINode *CurrIV = LoopPhis.pop_back_val();
1643 
1644       // Information about sign/zero extensions of CurrIV.
1645       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1646 
1647       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1648                                    &Visitor);
1649 
1650       if (Visitor.WI.WidestNativeType) {
1651         WideIVs.push_back(Visitor.WI);
1652       }
1653     } while(!LoopPhis.empty());
1654 
1655     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1656       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1657       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1658         Changed = true;
1659         LoopPhis.push_back(WidePhi);
1660       }
1661     }
1662   }
1663   return Changed;
1664 }
1665 
1666 //===----------------------------------------------------------------------===//
1667 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1668 //===----------------------------------------------------------------------===//
1669 
1670 /// Given an Value which is hoped to be part of an add recurance in the given
1671 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1672 /// that this is less general than SCEVs AddRec checking.
1673 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1674   Instruction *IncI = dyn_cast<Instruction>(IncV);
1675   if (!IncI)
1676     return nullptr;
1677 
1678   switch (IncI->getOpcode()) {
1679   case Instruction::Add:
1680   case Instruction::Sub:
1681     break;
1682   case Instruction::GetElementPtr:
1683     // An IV counter must preserve its type.
1684     if (IncI->getNumOperands() == 2)
1685       break;
1686     LLVM_FALLTHROUGH;
1687   default:
1688     return nullptr;
1689   }
1690 
1691   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1692   if (Phi && Phi->getParent() == L->getHeader()) {
1693     if (L->isLoopInvariant(IncI->getOperand(1)))
1694       return Phi;
1695     return nullptr;
1696   }
1697   if (IncI->getOpcode() == Instruction::GetElementPtr)
1698     return nullptr;
1699 
1700   // Allow add/sub to be commuted.
1701   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1702   if (Phi && Phi->getParent() == L->getHeader()) {
1703     if (L->isLoopInvariant(IncI->getOperand(0)))
1704       return Phi;
1705   }
1706   return nullptr;
1707 }
1708 
1709 /// Whether the current loop exit test is based on this value.  Currently this
1710 /// is limited to a direct use in the loop condition.
1711 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1712   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1713   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1714   // TODO: Allow non-icmp loop test.
1715   if (!ICmp)
1716     return false;
1717 
1718   // TODO: Allow indirect use.
1719   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1720 }
1721 
1722 /// linearFunctionTestReplace policy. Return true unless we can show that the
1723 /// current exit test is already sufficiently canonical.
1724 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1725   assert(L->getLoopLatch() && "Must be in simplified form");
1726 
1727   // Avoid converting a constant or loop invariant test back to a runtime
1728   // test.  This is critical for when SCEV's cached ExitCount is less precise
1729   // than the current IR (such as after we've proven a particular exit is
1730   // actually dead and thus the BE count never reaches our ExitCount.)
1731   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1732   if (L->isLoopInvariant(BI->getCondition()))
1733     return false;
1734 
1735   // Do LFTR to simplify the exit condition to an ICMP.
1736   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1737   if (!Cond)
1738     return true;
1739 
1740   // Do LFTR to simplify the exit ICMP to EQ/NE
1741   ICmpInst::Predicate Pred = Cond->getPredicate();
1742   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1743     return true;
1744 
1745   // Look for a loop invariant RHS
1746   Value *LHS = Cond->getOperand(0);
1747   Value *RHS = Cond->getOperand(1);
1748   if (!L->isLoopInvariant(RHS)) {
1749     if (!L->isLoopInvariant(LHS))
1750       return true;
1751     std::swap(LHS, RHS);
1752   }
1753   // Look for a simple IV counter LHS
1754   PHINode *Phi = dyn_cast<PHINode>(LHS);
1755   if (!Phi)
1756     Phi = getLoopPhiForCounter(LHS, L);
1757 
1758   if (!Phi)
1759     return true;
1760 
1761   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1762   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1763   if (Idx < 0)
1764     return true;
1765 
1766   // Do LFTR if the exit condition's IV is *not* a simple counter.
1767   Value *IncV = Phi->getIncomingValue(Idx);
1768   return Phi != getLoopPhiForCounter(IncV, L);
1769 }
1770 
1771 /// Return true if undefined behavior would provable be executed on the path to
1772 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1773 /// anything about whether OnPathTo is actually executed or whether Root is
1774 /// actually poison.  This can be used to assess whether a new use of Root can
1775 /// be added at a location which is control equivalent with OnPathTo (such as
1776 /// immediately before it) without introducing UB which didn't previously
1777 /// exist.  Note that a false result conveys no information.
1778 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1779                                           Instruction *OnPathTo,
1780                                           DominatorTree *DT) {
1781   // Basic approach is to assume Root is poison, propagate poison forward
1782   // through all users we can easily track, and then check whether any of those
1783   // users are provable UB and must execute before out exiting block might
1784   // exit.
1785 
1786   // The set of all recursive users we've visited (which are assumed to all be
1787   // poison because of said visit)
1788   SmallSet<const Value *, 16> KnownPoison;
1789   SmallVector<const Instruction*, 16> Worklist;
1790   Worklist.push_back(Root);
1791   while (!Worklist.empty()) {
1792     const Instruction *I = Worklist.pop_back_val();
1793 
1794     // If we know this must trigger UB on a path leading our target.
1795     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1796       return true;
1797 
1798     // If we can't analyze propagation through this instruction, just skip it
1799     // and transitive users.  Safe as false is a conservative result.
1800     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
1801       continue;
1802 
1803     if (KnownPoison.insert(I).second)
1804       for (const User *User : I->users())
1805         Worklist.push_back(cast<Instruction>(User));
1806   }
1807 
1808   // Might be non-UB, or might have a path we couldn't prove must execute on
1809   // way to exiting bb.
1810   return false;
1811 }
1812 
1813 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1814 /// down to checking that all operands are constant and listing instructions
1815 /// that may hide undef.
1816 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1817                                unsigned Depth) {
1818   if (isa<Constant>(V))
1819     return !isa<UndefValue>(V);
1820 
1821   if (Depth >= 6)
1822     return false;
1823 
1824   // Conservatively handle non-constant non-instructions. For example, Arguments
1825   // may be undef.
1826   Instruction *I = dyn_cast<Instruction>(V);
1827   if (!I)
1828     return false;
1829 
1830   // Load and return values may be undef.
1831   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1832     return false;
1833 
1834   // Optimistically handle other instructions.
1835   for (Value *Op : I->operands()) {
1836     if (!Visited.insert(Op).second)
1837       continue;
1838     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1839       return false;
1840   }
1841   return true;
1842 }
1843 
1844 /// Return true if the given value is concrete. We must prove that undef can
1845 /// never reach it.
1846 ///
1847 /// TODO: If we decide that this is a good approach to checking for undef, we
1848 /// may factor it into a common location.
1849 static bool hasConcreteDef(Value *V) {
1850   SmallPtrSet<Value*, 8> Visited;
1851   Visited.insert(V);
1852   return hasConcreteDefImpl(V, Visited, 0);
1853 }
1854 
1855 /// Return true if this IV has any uses other than the (soon to be rewritten)
1856 /// loop exit test.
1857 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1858   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1859   Value *IncV = Phi->getIncomingValue(LatchIdx);
1860 
1861   for (User *U : Phi->users())
1862     if (U != Cond && U != IncV) return false;
1863 
1864   for (User *U : IncV->users())
1865     if (U != Cond && U != Phi) return false;
1866   return true;
1867 }
1868 
1869 /// Return true if the given phi is a "counter" in L.  A counter is an
1870 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1871 /// step of 1.  Note that L must have exactly one latch.
1872 static bool isLoopCounter(PHINode* Phi, Loop *L,
1873                           ScalarEvolution *SE) {
1874   assert(Phi->getParent() == L->getHeader());
1875   assert(L->getLoopLatch());
1876 
1877   if (!SE->isSCEVable(Phi->getType()))
1878     return false;
1879 
1880   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1881   if (!AR || AR->getLoop() != L || !AR->isAffine())
1882     return false;
1883 
1884   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1885   if (!Step || !Step->isOne())
1886     return false;
1887 
1888   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1889   Value *IncV = Phi->getIncomingValue(LatchIdx);
1890   return (getLoopPhiForCounter(IncV, L) == Phi);
1891 }
1892 
1893 /// Search the loop header for a loop counter (anadd rec w/step of one)
1894 /// suitable for use by LFTR.  If multiple counters are available, select the
1895 /// "best" one based profitable heuristics.
1896 ///
1897 /// BECount may be an i8* pointer type. The pointer difference is already
1898 /// valid count without scaling the address stride, so it remains a pointer
1899 /// expression as far as SCEV is concerned.
1900 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1901                                 const SCEV *BECount,
1902                                 ScalarEvolution *SE, DominatorTree *DT) {
1903   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1904 
1905   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1906 
1907   // Loop over all of the PHI nodes, looking for a simple counter.
1908   PHINode *BestPhi = nullptr;
1909   const SCEV *BestInit = nullptr;
1910   BasicBlock *LatchBlock = L->getLoopLatch();
1911   assert(LatchBlock && "Must be in simplified form");
1912   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1913 
1914   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1915     PHINode *Phi = cast<PHINode>(I);
1916     if (!isLoopCounter(Phi, L, SE))
1917       continue;
1918 
1919     // Avoid comparing an integer IV against a pointer Limit.
1920     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1921       continue;
1922 
1923     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1924 
1925     // AR may be a pointer type, while BECount is an integer type.
1926     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1927     // AR may not be a narrower type, or we may never exit.
1928     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1929     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1930       continue;
1931 
1932     // Avoid reusing a potentially undef value to compute other values that may
1933     // have originally had a concrete definition.
1934     if (!hasConcreteDef(Phi)) {
1935       // We explicitly allow unknown phis as long as they are already used by
1936       // the loop exit test.  This is legal since performing LFTR could not
1937       // increase the number of undef users.
1938       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1939       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1940           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1941         continue;
1942     }
1943 
1944     // Avoid introducing undefined behavior due to poison which didn't exist in
1945     // the original program.  (Annoyingly, the rules for poison and undef
1946     // propagation are distinct, so this does NOT cover the undef case above.)
1947     // We have to ensure that we don't introduce UB by introducing a use on an
1948     // iteration where said IV produces poison.  Our strategy here differs for
1949     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1950     // see code in linearFunctionTestReplace.  For pointers, we restrict
1951     // transforms as there is no good way to reinfer inbounds once lost.
1952     if (!Phi->getType()->isIntegerTy() &&
1953         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1954       continue;
1955 
1956     const SCEV *Init = AR->getStart();
1957 
1958     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1959       // Don't force a live loop counter if another IV can be used.
1960       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1961         continue;
1962 
1963       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1964       // also prefers integer to pointer IVs.
1965       if (BestInit->isZero() != Init->isZero()) {
1966         if (BestInit->isZero())
1967           continue;
1968       }
1969       // If two IVs both count from zero or both count from nonzero then the
1970       // narrower is likely a dead phi that has been widened. Use the wider phi
1971       // to allow the other to be eliminated.
1972       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1973         continue;
1974     }
1975     BestPhi = Phi;
1976     BestInit = Init;
1977   }
1978   return BestPhi;
1979 }
1980 
1981 /// Insert an IR expression which computes the value held by the IV IndVar
1982 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1983 /// is taken ExitCount times.
1984 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1985                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
1986                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
1987   assert(isLoopCounter(IndVar, L, SE));
1988   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1989   const SCEV *IVInit = AR->getStart();
1990 
1991   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
1992   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
1993   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
1994   // the existing GEPs whenever possible.
1995   if (IndVar->getType()->isPointerTy() &&
1996       !ExitCount->getType()->isPointerTy()) {
1997     // IVOffset will be the new GEP offset that is interpreted by GEP as a
1998     // signed value. ExitCount on the other hand represents the loop trip count,
1999     // which is an unsigned value. FindLoopCounter only allows induction
2000     // variables that have a positive unit stride of one. This means we don't
2001     // have to handle the case of negative offsets (yet) and just need to zero
2002     // extend ExitCount.
2003     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2004     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2005     if (UsePostInc)
2006       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2007 
2008     // Expand the code for the iteration count.
2009     assert(SE->isLoopInvariant(IVOffset, L) &&
2010            "Computed iteration count is not loop invariant!");
2011 
2012     // We could handle pointer IVs other than i8*, but we need to compensate for
2013     // gep index scaling.
2014     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2015                              cast<PointerType>(IndVar->getType())
2016                                  ->getElementType())->isOne() &&
2017            "unit stride pointer IV must be i8*");
2018 
2019     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2020     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2021     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2022   } else {
2023     // In any other case, convert both IVInit and ExitCount to integers before
2024     // comparing. This may result in SCEV expansion of pointers, but in practice
2025     // SCEV will fold the pointer arithmetic away as such:
2026     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2027     //
2028     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2029     // for simple memset-style loops.
2030     //
2031     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2032     // were generated on top of case #2, which is not expected.
2033 
2034     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2035     // For unit stride, IVCount = Start + ExitCount with 2's complement
2036     // overflow.
2037 
2038     // For integer IVs, truncate the IV before computing IVInit + BECount,
2039     // unless we know apriori that the limit must be a constant when evaluated
2040     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2041     // of the IV in the loop over a (potentially) expensive expansion of the
2042     // widened exit count add(zext(add)) expression.
2043     if (SE->getTypeSizeInBits(IVInit->getType())
2044         > SE->getTypeSizeInBits(ExitCount->getType())) {
2045       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2046         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2047       else
2048         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2049     }
2050 
2051     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2052 
2053     if (UsePostInc)
2054       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2055 
2056     // Expand the code for the iteration count.
2057     assert(SE->isLoopInvariant(IVLimit, L) &&
2058            "Computed iteration count is not loop invariant!");
2059     // Ensure that we generate the same type as IndVar, or a smaller integer
2060     // type. In the presence of null pointer values, we have an integer type
2061     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2062     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2063       IndVar->getType() : ExitCount->getType();
2064     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2065     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2066   }
2067 }
2068 
2069 /// This method rewrites the exit condition of the loop to be a canonical !=
2070 /// comparison against the incremented loop induction variable.  This pass is
2071 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2072 /// determine a loop-invariant trip count of the loop, which is actually a much
2073 /// broader range than just linear tests.
2074 bool IndVarSimplify::
2075 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2076                           const SCEV *ExitCount,
2077                           PHINode *IndVar, SCEVExpander &Rewriter) {
2078   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2079   assert(isLoopCounter(IndVar, L, SE));
2080   Instruction * const IncVar =
2081     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2082 
2083   // Initialize CmpIndVar to the preincremented IV.
2084   Value *CmpIndVar = IndVar;
2085   bool UsePostInc = false;
2086 
2087   // If the exiting block is the same as the backedge block, we prefer to
2088   // compare against the post-incremented value, otherwise we must compare
2089   // against the preincremented value.
2090   if (ExitingBB == L->getLoopLatch()) {
2091     // For pointer IVs, we chose to not strip inbounds which requires us not
2092     // to add a potentially UB introducing use.  We need to either a) show
2093     // the loop test we're modifying is already in post-inc form, or b) show
2094     // that adding a use must not introduce UB.
2095     bool SafeToPostInc =
2096         IndVar->getType()->isIntegerTy() ||
2097         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2098         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2099     if (SafeToPostInc) {
2100       UsePostInc = true;
2101       CmpIndVar = IncVar;
2102     }
2103   }
2104 
2105   // It may be necessary to drop nowrap flags on the incrementing instruction
2106   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2107   // case the increment might have previously been poison on the last iteration
2108   // only) or if LFTR switches to a different IV that was previously dynamically
2109   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2110   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2111   // check), because the pre-inc addrec flags may be adopted from the original
2112   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2113   // TODO: This handling is inaccurate for one case: If we switch to a
2114   // dynamically dead IV that wraps on the first loop iteration only, which is
2115   // not covered by the post-inc addrec. (If the new IV was not dynamically
2116   // dead, it could not be poison on the first iteration in the first place.)
2117   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2118     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2119     if (BO->hasNoUnsignedWrap())
2120       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2121     if (BO->hasNoSignedWrap())
2122       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2123   }
2124 
2125   Value *ExitCnt = genLoopLimit(
2126       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2127   assert(ExitCnt->getType()->isPointerTy() ==
2128              IndVar->getType()->isPointerTy() &&
2129          "genLoopLimit missed a cast");
2130 
2131   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2132   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2133   ICmpInst::Predicate P;
2134   if (L->contains(BI->getSuccessor(0)))
2135     P = ICmpInst::ICMP_NE;
2136   else
2137     P = ICmpInst::ICMP_EQ;
2138 
2139   IRBuilder<> Builder(BI);
2140 
2141   // The new loop exit condition should reuse the debug location of the
2142   // original loop exit condition.
2143   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2144     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2145 
2146   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2147   // avoid the expensive expansion of the limit expression in the wider type,
2148   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2149   // since we know (from the exit count bitwidth), that we can't self-wrap in
2150   // the narrower type.
2151   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2152   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2153   if (CmpIndVarSize > ExitCntSize) {
2154     assert(!CmpIndVar->getType()->isPointerTy() &&
2155            !ExitCnt->getType()->isPointerTy());
2156 
2157     // Before resorting to actually inserting the truncate, use the same
2158     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2159     // the other side of the comparison instead.  We still evaluate the limit
2160     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2161     // a truncate within in.
2162     bool Extended = false;
2163     const SCEV *IV = SE->getSCEV(CmpIndVar);
2164     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2165                                                   ExitCnt->getType());
2166     const SCEV *ZExtTrunc =
2167       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2168 
2169     if (ZExtTrunc == IV) {
2170       Extended = true;
2171       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2172                                    "wide.trip.count");
2173     } else {
2174       const SCEV *SExtTrunc =
2175         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2176       if (SExtTrunc == IV) {
2177         Extended = true;
2178         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2179                                      "wide.trip.count");
2180       }
2181     }
2182 
2183     if (Extended) {
2184       bool Discard;
2185       L->makeLoopInvariant(ExitCnt, Discard);
2186     } else
2187       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2188                                       "lftr.wideiv");
2189   }
2190   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2191                     << "      LHS:" << *CmpIndVar << '\n'
2192                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2193                     << "\n"
2194                     << "      RHS:\t" << *ExitCnt << "\n"
2195                     << "ExitCount:\t" << *ExitCount << "\n"
2196                     << "  was: " << *BI->getCondition() << "\n");
2197 
2198   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2199   Value *OrigCond = BI->getCondition();
2200   // It's tempting to use replaceAllUsesWith here to fully replace the old
2201   // comparison, but that's not immediately safe, since users of the old
2202   // comparison may not be dominated by the new comparison. Instead, just
2203   // update the branch to use the new comparison; in the common case this
2204   // will make old comparison dead.
2205   BI->setCondition(Cond);
2206   DeadInsts.emplace_back(OrigCond);
2207 
2208   ++NumLFTR;
2209   return true;
2210 }
2211 
2212 //===----------------------------------------------------------------------===//
2213 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2214 //===----------------------------------------------------------------------===//
2215 
2216 /// If there's a single exit block, sink any loop-invariant values that
2217 /// were defined in the preheader but not used inside the loop into the
2218 /// exit block to reduce register pressure in the loop.
2219 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2220   BasicBlock *ExitBlock = L->getExitBlock();
2221   if (!ExitBlock) return false;
2222 
2223   BasicBlock *Preheader = L->getLoopPreheader();
2224   if (!Preheader) return false;
2225 
2226   bool MadeAnyChanges = false;
2227   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2228   BasicBlock::iterator I(Preheader->getTerminator());
2229   while (I != Preheader->begin()) {
2230     --I;
2231     // New instructions were inserted at the end of the preheader.
2232     if (isa<PHINode>(I))
2233       break;
2234 
2235     // Don't move instructions which might have side effects, since the side
2236     // effects need to complete before instructions inside the loop.  Also don't
2237     // move instructions which might read memory, since the loop may modify
2238     // memory. Note that it's okay if the instruction might have undefined
2239     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2240     // block.
2241     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2242       continue;
2243 
2244     // Skip debug info intrinsics.
2245     if (isa<DbgInfoIntrinsic>(I))
2246       continue;
2247 
2248     // Skip eh pad instructions.
2249     if (I->isEHPad())
2250       continue;
2251 
2252     // Don't sink alloca: we never want to sink static alloca's out of the
2253     // entry block, and correctly sinking dynamic alloca's requires
2254     // checks for stacksave/stackrestore intrinsics.
2255     // FIXME: Refactor this check somehow?
2256     if (isa<AllocaInst>(I))
2257       continue;
2258 
2259     // Determine if there is a use in or before the loop (direct or
2260     // otherwise).
2261     bool UsedInLoop = false;
2262     for (Use &U : I->uses()) {
2263       Instruction *User = cast<Instruction>(U.getUser());
2264       BasicBlock *UseBB = User->getParent();
2265       if (PHINode *P = dyn_cast<PHINode>(User)) {
2266         unsigned i =
2267           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2268         UseBB = P->getIncomingBlock(i);
2269       }
2270       if (UseBB == Preheader || L->contains(UseBB)) {
2271         UsedInLoop = true;
2272         break;
2273       }
2274     }
2275 
2276     // If there is, the def must remain in the preheader.
2277     if (UsedInLoop)
2278       continue;
2279 
2280     // Otherwise, sink it to the exit block.
2281     Instruction *ToMove = &*I;
2282     bool Done = false;
2283 
2284     if (I != Preheader->begin()) {
2285       // Skip debug info intrinsics.
2286       do {
2287         --I;
2288       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2289 
2290       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2291         Done = true;
2292     } else {
2293       Done = true;
2294     }
2295 
2296     MadeAnyChanges = true;
2297     ToMove->moveBefore(*ExitBlock, InsertPt);
2298     if (Done) break;
2299     InsertPt = ToMove->getIterator();
2300   }
2301 
2302   return MadeAnyChanges;
2303 }
2304 
2305 // Returns true if the condition of \p BI being checked is invariant and can be
2306 // proved to be trivially true during at least first \p MaxIter iterations.
2307 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
2308                           bool ProvingLoopExit, const SCEV *MaxIter) {
2309   ICmpInst::Predicate Pred;
2310   Value *LHS, *RHS;
2311   using namespace PatternMatch;
2312   BasicBlock *TrueSucc, *FalseSucc;
2313   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
2314                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
2315     return false;
2316 
2317   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
2318          "Not a loop exit!");
2319 
2320   // 'LHS pred RHS' should now mean that we stay in loop.
2321   if (L->contains(FalseSucc))
2322     Pred = CmpInst::getInversePredicate(Pred);
2323 
2324   // If we are proving loop exit, invert the predicate.
2325   if (ProvingLoopExit)
2326     Pred = CmpInst::getInversePredicate(Pred);
2327 
2328   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
2329   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
2330   // Can we prove it to be trivially true?
2331   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI))
2332     return true;
2333 
2334   if (ProvingLoopExit)
2335     return false;
2336 
2337   ICmpInst::Predicate InvariantPred;
2338   const SCEV *InvariantLHS, *InvariantRHS;
2339 
2340   // Check if there is a loop-invariant predicate equivalent to our check.
2341   if (!SE->isLoopInvariantExitCondDuringFirstIterations(
2342            Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS,
2343            InvariantRHS))
2344     return false;
2345 
2346   // Can we prove it to be trivially true?
2347   return SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI);
2348 }
2349 
2350 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2351   SmallVector<BasicBlock*, 16> ExitingBlocks;
2352   L->getExitingBlocks(ExitingBlocks);
2353 
2354   // Remove all exits which aren't both rewriteable and execute on every
2355   // iteration.
2356   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2357     // If our exitting block exits multiple loops, we can only rewrite the
2358     // innermost one.  Otherwise, we're changing how many times the innermost
2359     // loop runs before it exits.
2360     if (LI->getLoopFor(ExitingBB) != L)
2361       return true;
2362 
2363     // Can't rewrite non-branch yet.
2364     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2365     if (!BI)
2366       return true;
2367 
2368     // If already constant, nothing to do.
2369     if (isa<Constant>(BI->getCondition()))
2370       return true;
2371 
2372     // Likewise, the loop latch must be dominated by the exiting BB.
2373     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
2374       return true;
2375 
2376     return false;
2377   });
2378   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2379 
2380   if (ExitingBlocks.empty())
2381     return false;
2382 
2383   // Get a symbolic upper bound on the loop backedge taken count.
2384   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
2385   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2386     return false;
2387 
2388   // Visit our exit blocks in order of dominance. We know from the fact that
2389   // all exits must dominate the latch, so there is a total dominance order
2390   // between them.
2391   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
2392                // std::sort sorts in ascending order, so we want the inverse of
2393                // the normal dominance relation.
2394                if (A == B) return false;
2395                if (DT->properlyDominates(A, B))
2396                  return true;
2397                else {
2398                  assert(DT->properlyDominates(B, A) &&
2399                         "expected total dominance order!");
2400                  return false;
2401                }
2402   });
2403 #ifdef ASSERT
2404   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2405     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2406   }
2407 #endif
2408 
2409   auto ReplaceExitCond = [&](BranchInst *BI, Value *NewCond) {
2410     auto *OldCond = BI->getCondition();
2411     BI->setCondition(NewCond);
2412     if (OldCond->use_empty())
2413       DeadInsts.emplace_back(OldCond);
2414   };
2415 
2416   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2417     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2418     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2419     auto *OldCond = BI->getCondition();
2420     auto *NewCond = ConstantInt::get(OldCond->getType(),
2421                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2422     ReplaceExitCond(BI, NewCond);
2423   };
2424 
2425   bool Changed = false;
2426   bool SkipLastIter = false;
2427   SmallSet<const SCEV*, 8> DominatingExitCounts;
2428   for (BasicBlock *ExitingBB : ExitingBlocks) {
2429     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2430     if (isa<SCEVCouldNotCompute>(ExitCount)) {
2431       // Okay, we do not know the exit count here. Can we at least prove that it
2432       // will remain the same within iteration space?
2433       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2434       auto OptimizeCond = [this, L, BI, ExitingBB, MaxExitCount, &FoldExit](
2435           bool Inverted, bool SkipLastIter) {
2436         const SCEV *MaxIter = MaxExitCount;
2437         if (SkipLastIter) {
2438           const SCEV *One = SE->getOne(MaxIter->getType());
2439           MaxIter = SE->getMinusSCEV(MaxIter, One);
2440         }
2441         if (isTrivialCond(L, BI, SE, Inverted, MaxIter)) {
2442           FoldExit(ExitingBB, Inverted);
2443           return true;
2444         }
2445         return false;
2446       };
2447 
2448       // TODO: We might have proved that we can skip the last iteration for
2449       // this check. In this case, we only want to check the condition on the
2450       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
2451       // corner case:
2452       //
2453       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
2454       //
2455       // If we could not prove that len != 0, then we also could not prove that
2456       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
2457       // OptimizeCond will likely not prove anything for it, even if it could
2458       // prove the same fact for len.
2459       //
2460       // As a temporary solution, we query both last and pre-last iterations in
2461       // hope that we will be able to prove triviality for at least one of
2462       // them. We can stop querying MaxExitCount for this case once SCEV
2463       // understands that (MaxExitCount - 1) will not overflow here.
2464       if (OptimizeCond(false, false) || OptimizeCond(true, false))
2465         Changed = true;
2466       else if (SkipLastIter)
2467         if (OptimizeCond(false, true) || OptimizeCond(true, true))
2468           Changed = true;
2469       continue;
2470     }
2471 
2472     if (MaxExitCount == ExitCount)
2473       // If the loop has more than 1 iteration, all further checks will be
2474       // executed 1 iteration less.
2475       SkipLastIter = true;
2476 
2477     // If we know we'd exit on the first iteration, rewrite the exit to
2478     // reflect this.  This does not imply the loop must exit through this
2479     // exit; there may be an earlier one taken on the first iteration.
2480     // TODO: Given we know the backedge can't be taken, we should go ahead
2481     // and break it.  Or at least, kill all the header phis and simplify.
2482     if (ExitCount->isZero()) {
2483       FoldExit(ExitingBB, true);
2484       Changed = true;
2485       continue;
2486     }
2487 
2488     // If we end up with a pointer exit count, bail.  Note that we can end up
2489     // with a pointer exit count for one exiting block, and not for another in
2490     // the same loop.
2491     if (!ExitCount->getType()->isIntegerTy() ||
2492         !MaxExitCount->getType()->isIntegerTy())
2493       continue;
2494 
2495     Type *WiderType =
2496       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2497     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2498     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2499     assert(MaxExitCount->getType() == ExitCount->getType());
2500 
2501     // Can we prove that some other exit must be taken strictly before this
2502     // one?
2503     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2504                                      MaxExitCount, ExitCount)) {
2505       FoldExit(ExitingBB, false);
2506       Changed = true;
2507       continue;
2508     }
2509 
2510     // As we run, keep track of which exit counts we've encountered.  If we
2511     // find a duplicate, we've found an exit which would have exited on the
2512     // exiting iteration, but (from the visit order) strictly follows another
2513     // which does the same and is thus dead.
2514     if (!DominatingExitCounts.insert(ExitCount).second) {
2515       FoldExit(ExitingBB, false);
2516       Changed = true;
2517       continue;
2518     }
2519 
2520     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2521     // here.  If we kept track of the min of dominanting exits so far, we could
2522     // discharge exits with EC >= MDEC. This is less powerful than the existing
2523     // transform (since later exits aren't considered), but potentially more
2524     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2525     // or a >u b.  Such a case is not currently known.
2526   }
2527   return Changed;
2528 }
2529 
2530 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2531   SmallVector<BasicBlock*, 16> ExitingBlocks;
2532   L->getExitingBlocks(ExitingBlocks);
2533 
2534   // Finally, see if we can rewrite our exit conditions into a loop invariant
2535   // form. If we have a read-only loop, and we can tell that we must exit down
2536   // a path which does not need any of the values computed within the loop, we
2537   // can rewrite the loop to exit on the first iteration.  Note that this
2538   // doesn't either a) tell us the loop exits on the first iteration (unless
2539   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2540   // This transformation looks a lot like a restricted form of dead loop
2541   // elimination, but restricted to read-only loops and without neccesssarily
2542   // needing to kill the loop entirely.
2543   if (!LoopPredication)
2544     return false;
2545 
2546   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2547     return false;
2548 
2549   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2550   // through *explicit* control flow.  We have to eliminate the possibility of
2551   // implicit exits (see below) before we know it's truly exact.
2552   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2553   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2554       !SE->isLoopInvariant(ExactBTC, L) ||
2555       !isSafeToExpand(ExactBTC, *SE))
2556     return false;
2557 
2558   // If we end up with a pointer exit count, bail.  It may be unsized.
2559   if (!ExactBTC->getType()->isIntegerTy())
2560     return false;
2561 
2562   auto BadExit = [&](BasicBlock *ExitingBB) {
2563     // If our exiting block exits multiple loops, we can only rewrite the
2564     // innermost one.  Otherwise, we're changing how many times the innermost
2565     // loop runs before it exits.
2566     if (LI->getLoopFor(ExitingBB) != L)
2567       return true;
2568 
2569     // Can't rewrite non-branch yet.
2570     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2571     if (!BI)
2572       return true;
2573 
2574     // If already constant, nothing to do.
2575     if (isa<Constant>(BI->getCondition()))
2576       return true;
2577 
2578     // If the exit block has phis, we need to be able to compute the values
2579     // within the loop which contains them.  This assumes trivially lcssa phis
2580     // have already been removed; TODO: generalize
2581     BasicBlock *ExitBlock =
2582     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2583     if (!ExitBlock->phis().empty())
2584       return true;
2585 
2586     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2587     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2588     if (!SE->isLoopInvariant(ExitCount, L) ||
2589         !isSafeToExpand(ExitCount, *SE))
2590       return true;
2591 
2592     // If we end up with a pointer exit count, bail.  It may be unsized.
2593     if (!ExitCount->getType()->isIntegerTy())
2594       return true;
2595 
2596     return false;
2597   };
2598 
2599   // If we have any exits which can't be predicated themselves, than we can't
2600   // predicate any exit which isn't guaranteed to execute before it.  Consider
2601   // two exits (a) and (b) which would both exit on the same iteration.  If we
2602   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2603   // we could convert a loop from exiting through (a) to one exiting through
2604   // (b).  Note that this problem exists only for exits with the same exit
2605   // count, and we could be more aggressive when exit counts are known inequal.
2606   llvm::sort(ExitingBlocks,
2607             [&](BasicBlock *A, BasicBlock *B) {
2608               // std::sort sorts in ascending order, so we want the inverse of
2609               // the normal dominance relation, plus a tie breaker for blocks
2610               // unordered by dominance.
2611               if (DT->properlyDominates(A, B)) return true;
2612               if (DT->properlyDominates(B, A)) return false;
2613               return A->getName() < B->getName();
2614             });
2615   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2616   // exits before the backedge).  If they aren't, reasoning about reachability
2617   // is complicated and we choose not to for now.
2618   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2619     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2620       return false;
2621 
2622   // Given our sorted total order, we know that exit[j] must be evaluated
2623   // after all exit[i] such j > i.
2624   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2625     if (BadExit(ExitingBlocks[i])) {
2626       ExitingBlocks.resize(i);
2627       break;
2628     }
2629 
2630   if (ExitingBlocks.empty())
2631     return false;
2632 
2633   // We rely on not being able to reach an exiting block on a later iteration
2634   // then it's statically compute exit count.  The implementaton of
2635   // getExitCount currently has this invariant, but assert it here so that
2636   // breakage is obvious if this ever changes..
2637   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2638         return DT->dominates(ExitingBB, L->getLoopLatch());
2639       }));
2640 
2641   // At this point, ExitingBlocks consists of only those blocks which are
2642   // predicatable.  Given that, we know we have at least one exit we can
2643   // predicate if the loop is doesn't have side effects and doesn't have any
2644   // implicit exits (because then our exact BTC isn't actually exact).
2645   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2646   // suggestions on how to improve this?  I can obviously bail out for outer
2647   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2648   // is that enough for *all* side effects?
2649   for (BasicBlock *BB : L->blocks())
2650     for (auto &I : *BB)
2651       // TODO:isGuaranteedToTransfer
2652       if (I.mayHaveSideEffects() || I.mayThrow())
2653         return false;
2654 
2655   bool Changed = false;
2656   // Finally, do the actual predication for all predicatable blocks.  A couple
2657   // of notes here:
2658   // 1) We don't bother to constant fold dominated exits with identical exit
2659   //    counts; that's simply a form of CSE/equality propagation and we leave
2660   //    it for dedicated passes.
2661   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2662   //    legality constraints and we leave that to dedicated logic.  We want to
2663   //    predicate even if we can't insert a loop invariant expression as
2664   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2665   //    varying check.
2666   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2667   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2668   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2669   for (BasicBlock *ExitingBB : ExitingBlocks) {
2670     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2671 
2672     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2673     Value *NewCond;
2674     if (ExitCount == ExactBTC) {
2675       NewCond = L->contains(BI->getSuccessor(0)) ?
2676         B.getFalse() : B.getTrue();
2677     } else {
2678       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2679       if (!ExactBTCV)
2680         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2681       Value *RHS = ExactBTCV;
2682       if (ECV->getType() != RHS->getType()) {
2683         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2684         ECV = B.CreateZExt(ECV, WiderTy);
2685         RHS = B.CreateZExt(RHS, WiderTy);
2686       }
2687       auto Pred = L->contains(BI->getSuccessor(0)) ?
2688         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2689       NewCond = B.CreateICmp(Pred, ECV, RHS);
2690     }
2691     Value *OldCond = BI->getCondition();
2692     BI->setCondition(NewCond);
2693     if (OldCond->use_empty())
2694       DeadInsts.emplace_back(OldCond);
2695     Changed = true;
2696   }
2697 
2698   return Changed;
2699 }
2700 
2701 //===----------------------------------------------------------------------===//
2702 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2703 //===----------------------------------------------------------------------===//
2704 
2705 bool IndVarSimplify::run(Loop *L) {
2706   // We need (and expect!) the incoming loop to be in LCSSA.
2707   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2708          "LCSSA required to run indvars!");
2709 
2710   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2711   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2712   //    canonicalization can be a pessimization without LSR to "clean up"
2713   //    afterwards.
2714   //  - We depend on having a preheader; in particular,
2715   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2716   //    and we're in trouble if we can't find the induction variable even when
2717   //    we've manually inserted one.
2718   //  - LFTR relies on having a single backedge.
2719   if (!L->isLoopSimplifyForm())
2720     return false;
2721 
2722 #ifndef NDEBUG
2723   // Used below for a consistency check only
2724   // Note: Since the result returned by ScalarEvolution may depend on the order
2725   // in which previous results are added to its cache, the call to
2726   // getBackedgeTakenCount() may change following SCEV queries.
2727   const SCEV *BackedgeTakenCount;
2728   if (VerifyIndvars)
2729     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2730 #endif
2731 
2732   bool Changed = false;
2733   // If there are any floating-point recurrences, attempt to
2734   // transform them to use integer recurrences.
2735   Changed |= rewriteNonIntegerIVs(L);
2736 
2737   // Create a rewriter object which we'll use to transform the code with.
2738   SCEVExpander Rewriter(*SE, DL, "indvars");
2739 #ifndef NDEBUG
2740   Rewriter.setDebugType(DEBUG_TYPE);
2741 #endif
2742 
2743   // Eliminate redundant IV users.
2744   //
2745   // Simplification works best when run before other consumers of SCEV. We
2746   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2747   // other expressions involving loop IVs have been evaluated. This helps SCEV
2748   // set no-wrap flags before normalizing sign/zero extension.
2749   Rewriter.disableCanonicalMode();
2750   Changed |= simplifyAndExtend(L, Rewriter, LI);
2751 
2752   // Check to see if we can compute the final value of any expressions
2753   // that are recurrent in the loop, and substitute the exit values from the
2754   // loop into any instructions outside of the loop that use the final values
2755   // of the current expressions.
2756   if (ReplaceExitValue != NeverRepl) {
2757     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2758                                              ReplaceExitValue, DeadInsts)) {
2759       NumReplaced += Rewrites;
2760       Changed = true;
2761     }
2762   }
2763 
2764   // Eliminate redundant IV cycles.
2765   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2766 
2767   // Try to eliminate loop exits based on analyzeable exit counts
2768   if (optimizeLoopExits(L, Rewriter))  {
2769     Changed = true;
2770     // Given we've changed exit counts, notify SCEV
2771     SE->forgetLoop(L);
2772   }
2773 
2774   // Try to form loop invariant tests for loop exits by changing how many
2775   // iterations of the loop run when that is unobservable.
2776   if (predicateLoopExits(L, Rewriter)) {
2777     Changed = true;
2778     // Given we've changed exit counts, notify SCEV
2779     SE->forgetLoop(L);
2780   }
2781 
2782   // If we have a trip count expression, rewrite the loop's exit condition
2783   // using it.
2784   if (!DisableLFTR) {
2785     BasicBlock *PreHeader = L->getLoopPreheader();
2786     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2787 
2788     SmallVector<BasicBlock*, 16> ExitingBlocks;
2789     L->getExitingBlocks(ExitingBlocks);
2790     for (BasicBlock *ExitingBB : ExitingBlocks) {
2791       // Can't rewrite non-branch yet.
2792       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2793         continue;
2794 
2795       // If our exitting block exits multiple loops, we can only rewrite the
2796       // innermost one.  Otherwise, we're changing how many times the innermost
2797       // loop runs before it exits.
2798       if (LI->getLoopFor(ExitingBB) != L)
2799         continue;
2800 
2801       if (!needsLFTR(L, ExitingBB))
2802         continue;
2803 
2804       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2805       if (isa<SCEVCouldNotCompute>(ExitCount))
2806         continue;
2807 
2808       // This was handled above, but as we form SCEVs, we can sometimes refine
2809       // existing ones; this allows exit counts to be folded to zero which
2810       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2811       // until stable to handle cases like this better.
2812       if (ExitCount->isZero())
2813         continue;
2814 
2815       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2816       if (!IndVar)
2817         continue;
2818 
2819       // Avoid high cost expansions.  Note: This heuristic is questionable in
2820       // that our definition of "high cost" is not exactly principled.
2821       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2822                                        TTI, PreHeaderBR))
2823         continue;
2824 
2825       // Check preconditions for proper SCEVExpander operation. SCEV does not
2826       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2827       // any pass that uses the SCEVExpander must do it. This does not work
2828       // well for loop passes because SCEVExpander makes assumptions about
2829       // all loops, while LoopPassManager only forces the current loop to be
2830       // simplified.
2831       //
2832       // FIXME: SCEV expansion has no way to bail out, so the caller must
2833       // explicitly check any assumptions made by SCEV. Brittle.
2834       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2835       if (!AR || AR->getLoop()->getLoopPreheader())
2836         Changed |= linearFunctionTestReplace(L, ExitingBB,
2837                                              ExitCount, IndVar,
2838                                              Rewriter);
2839     }
2840   }
2841   // Clear the rewriter cache, because values that are in the rewriter's cache
2842   // can be deleted in the loop below, causing the AssertingVH in the cache to
2843   // trigger.
2844   Rewriter.clear();
2845 
2846   // Now that we're done iterating through lists, clean up any instructions
2847   // which are now dead.
2848   while (!DeadInsts.empty())
2849     if (Instruction *Inst =
2850             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2851       Changed |=
2852           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2853 
2854   // The Rewriter may not be used from this point on.
2855 
2856   // Loop-invariant instructions in the preheader that aren't used in the
2857   // loop may be sunk below the loop to reduce register pressure.
2858   Changed |= sinkUnusedInvariants(L);
2859 
2860   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2861   // trip count and therefore can further simplify exit values in addition to
2862   // rewriteLoopExitValues.
2863   Changed |= rewriteFirstIterationLoopExitValues(L);
2864 
2865   // Clean up dead instructions.
2866   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2867 
2868   // Check a post-condition.
2869   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2870          "Indvars did not preserve LCSSA!");
2871 
2872   // Verify that LFTR, and any other change have not interfered with SCEV's
2873   // ability to compute trip count.  We may have *changed* the exit count, but
2874   // only by reducing it.
2875 #ifndef NDEBUG
2876   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2877     SE->forgetLoop(L);
2878     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2879     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2880         SE->getTypeSizeInBits(NewBECount->getType()))
2881       NewBECount = SE->getTruncateOrNoop(NewBECount,
2882                                          BackedgeTakenCount->getType());
2883     else
2884       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2885                                                  NewBECount->getType());
2886     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2887                                  NewBECount) && "indvars must preserve SCEV");
2888   }
2889   if (VerifyMemorySSA && MSSAU)
2890     MSSAU->getMemorySSA()->verifyMemorySSA();
2891 #endif
2892 
2893   return Changed;
2894 }
2895 
2896 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2897                                           LoopStandardAnalysisResults &AR,
2898                                           LPMUpdater &) {
2899   Function *F = L.getHeader()->getParent();
2900   const DataLayout &DL = F->getParent()->getDataLayout();
2901 
2902   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2903   if (!IVS.run(&L))
2904     return PreservedAnalyses::all();
2905 
2906   auto PA = getLoopPassPreservedAnalyses();
2907   PA.preserveSet<CFGAnalyses>();
2908   if (AR.MSSA)
2909     PA.preserve<MemorySSAAnalysis>();
2910   return PA;
2911 }
2912 
2913 namespace {
2914 
2915 struct IndVarSimplifyLegacyPass : public LoopPass {
2916   static char ID; // Pass identification, replacement for typeid
2917 
2918   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2919     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2920   }
2921 
2922   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2923     if (skipLoop(L))
2924       return false;
2925 
2926     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2927     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2928     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2929     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2930     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2931     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2932     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2933     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2934     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2935     MemorySSA *MSSA = nullptr;
2936     if (MSSAAnalysis)
2937       MSSA = &MSSAAnalysis->getMSSA();
2938 
2939     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2940     return IVS.run(L);
2941   }
2942 
2943   void getAnalysisUsage(AnalysisUsage &AU) const override {
2944     AU.setPreservesCFG();
2945     AU.addPreserved<MemorySSAWrapperPass>();
2946     getLoopAnalysisUsage(AU);
2947   }
2948 };
2949 
2950 } // end anonymous namespace
2951 
2952 char IndVarSimplifyLegacyPass::ID = 0;
2953 
2954 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2955                       "Induction Variable Simplification", false, false)
2956 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2957 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2958                     "Induction Variable Simplification", false, false)
2959 
2960 Pass *llvm::createIndVarSimplifyPass() {
2961   return new IndVarSimplifyLegacyPass();
2962 }
2963