xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision bd341bafbf60be1b9b9cc12ec2c5c37c2ea0e46b)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 struct RewritePhi;
141 
142 class IndVarSimplify {
143   LoopInfo *LI;
144   ScalarEvolution *SE;
145   DominatorTree *DT;
146   const DataLayout &DL;
147   TargetLibraryInfo *TLI;
148   const TargetTransformInfo *TTI;
149   std::unique_ptr<MemorySSAUpdater> MSSAU;
150 
151   SmallVector<WeakTrackingVH, 16> DeadInsts;
152 
153   bool handleFloatingPointIV(Loop *L, PHINode *PH);
154   bool rewriteNonIntegerIVs(Loop *L);
155 
156   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157   /// Try to eliminate loop exits based on analyzeable exit counts
158   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
159   /// Try to form loop invariant tests for loop exits by changing how many
160   /// iterations of the loop run when that is unobservable.
161   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
162 
163   bool rewriteFirstIterationLoopExitValues(Loop *L);
164 
165   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
166                                  const SCEV *ExitCount,
167                                  PHINode *IndVar, SCEVExpander &Rewriter);
168 
169   bool sinkUnusedInvariants(Loop *L);
170 
171 public:
172   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
173                  const DataLayout &DL, TargetLibraryInfo *TLI,
174                  TargetTransformInfo *TTI, MemorySSA *MSSA)
175       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
176     if (MSSA)
177       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
178   }
179 
180   bool run(Loop *L);
181 };
182 
183 } // end anonymous namespace
184 
185 /// Determine the insertion point for this user. By default, insert immediately
186 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
187 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
188 /// common dominator for the incoming blocks. A nullptr can be returned if no
189 /// viable location is found: it may happen if User is a PHI and Def only comes
190 /// to this PHI from unreachable blocks.
191 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
192                                           DominatorTree *DT, LoopInfo *LI) {
193   PHINode *PHI = dyn_cast<PHINode>(User);
194   if (!PHI)
195     return User;
196 
197   Instruction *InsertPt = nullptr;
198   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
199     if (PHI->getIncomingValue(i) != Def)
200       continue;
201 
202     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
203 
204     if (!DT->isReachableFromEntry(InsertBB))
205       continue;
206 
207     if (!InsertPt) {
208       InsertPt = InsertBB->getTerminator();
209       continue;
210     }
211     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
212     InsertPt = InsertBB->getTerminator();
213   }
214 
215   // If we have skipped all inputs, it means that Def only comes to Phi from
216   // unreachable blocks.
217   if (!InsertPt)
218     return nullptr;
219 
220   auto *DefI = dyn_cast<Instruction>(Def);
221   if (!DefI)
222     return InsertPt;
223 
224   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
225 
226   auto *L = LI->getLoopFor(DefI->getParent());
227   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
228 
229   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
230     if (LI->getLoopFor(DTN->getBlock()) == L)
231       return DTN->getBlock()->getTerminator();
232 
233   llvm_unreachable("DefI dominates InsertPt!");
234 }
235 
236 //===----------------------------------------------------------------------===//
237 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
238 //===----------------------------------------------------------------------===//
239 
240 /// Convert APF to an integer, if possible.
241 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
242   bool isExact = false;
243   // See if we can convert this to an int64_t
244   uint64_t UIntVal;
245   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
246                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
247       !isExact)
248     return false;
249   IntVal = UIntVal;
250   return true;
251 }
252 
253 /// If the loop has floating induction variable then insert corresponding
254 /// integer induction variable if possible.
255 /// For example,
256 /// for(double i = 0; i < 10000; ++i)
257 ///   bar(i)
258 /// is converted into
259 /// for(int i = 0; i < 10000; ++i)
260 ///   bar((double)i);
261 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
262   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
263   unsigned BackEdge     = IncomingEdge^1;
264 
265   // Check incoming value.
266   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
267 
268   int64_t InitValue;
269   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
270     return false;
271 
272   // Check IV increment. Reject this PN if increment operation is not
273   // an add or increment value can not be represented by an integer.
274   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
275   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
276 
277   // If this is not an add of the PHI with a constantfp, or if the constant fp
278   // is not an integer, bail out.
279   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
280   int64_t IncValue;
281   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
282       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
283     return false;
284 
285   // Check Incr uses. One user is PN and the other user is an exit condition
286   // used by the conditional terminator.
287   Value::user_iterator IncrUse = Incr->user_begin();
288   Instruction *U1 = cast<Instruction>(*IncrUse++);
289   if (IncrUse == Incr->user_end()) return false;
290   Instruction *U2 = cast<Instruction>(*IncrUse++);
291   if (IncrUse != Incr->user_end()) return false;
292 
293   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
294   // only used by a branch, we can't transform it.
295   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
296   if (!Compare)
297     Compare = dyn_cast<FCmpInst>(U2);
298   if (!Compare || !Compare->hasOneUse() ||
299       !isa<BranchInst>(Compare->user_back()))
300     return false;
301 
302   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
303 
304   // We need to verify that the branch actually controls the iteration count
305   // of the loop.  If not, the new IV can overflow and no one will notice.
306   // The branch block must be in the loop and one of the successors must be out
307   // of the loop.
308   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
309   if (!L->contains(TheBr->getParent()) ||
310       (L->contains(TheBr->getSuccessor(0)) &&
311        L->contains(TheBr->getSuccessor(1))))
312     return false;
313 
314   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
315   // transform it.
316   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
317   int64_t ExitValue;
318   if (ExitValueVal == nullptr ||
319       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
320     return false;
321 
322   // Find new predicate for integer comparison.
323   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
324   switch (Compare->getPredicate()) {
325   default: return false;  // Unknown comparison.
326   case CmpInst::FCMP_OEQ:
327   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
328   case CmpInst::FCMP_ONE:
329   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
330   case CmpInst::FCMP_OGT:
331   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
332   case CmpInst::FCMP_OGE:
333   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
334   case CmpInst::FCMP_OLT:
335   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
336   case CmpInst::FCMP_OLE:
337   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
338   }
339 
340   // We convert the floating point induction variable to a signed i32 value if
341   // we can.  This is only safe if the comparison will not overflow in a way
342   // that won't be trapped by the integer equivalent operations.  Check for this
343   // now.
344   // TODO: We could use i64 if it is native and the range requires it.
345 
346   // The start/stride/exit values must all fit in signed i32.
347   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
348     return false;
349 
350   // If not actually striding (add x, 0.0), avoid touching the code.
351   if (IncValue == 0)
352     return false;
353 
354   // Positive and negative strides have different safety conditions.
355   if (IncValue > 0) {
356     // If we have a positive stride, we require the init to be less than the
357     // exit value.
358     if (InitValue >= ExitValue)
359       return false;
360 
361     uint32_t Range = uint32_t(ExitValue-InitValue);
362     // Check for infinite loop, either:
363     // while (i <= Exit) or until (i > Exit)
364     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
365       if (++Range == 0) return false;  // Range overflows.
366     }
367 
368     unsigned Leftover = Range % uint32_t(IncValue);
369 
370     // If this is an equality comparison, we require that the strided value
371     // exactly land on the exit value, otherwise the IV condition will wrap
372     // around and do things the fp IV wouldn't.
373     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
374         Leftover != 0)
375       return false;
376 
377     // If the stride would wrap around the i32 before exiting, we can't
378     // transform the IV.
379     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
380       return false;
381   } else {
382     // If we have a negative stride, we require the init to be greater than the
383     // exit value.
384     if (InitValue <= ExitValue)
385       return false;
386 
387     uint32_t Range = uint32_t(InitValue-ExitValue);
388     // Check for infinite loop, either:
389     // while (i >= Exit) or until (i < Exit)
390     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
391       if (++Range == 0) return false;  // Range overflows.
392     }
393 
394     unsigned Leftover = Range % uint32_t(-IncValue);
395 
396     // If this is an equality comparison, we require that the strided value
397     // exactly land on the exit value, otherwise the IV condition will wrap
398     // around and do things the fp IV wouldn't.
399     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
400         Leftover != 0)
401       return false;
402 
403     // If the stride would wrap around the i32 before exiting, we can't
404     // transform the IV.
405     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
406       return false;
407   }
408 
409   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
410 
411   // Insert new integer induction variable.
412   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
413   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
414                       PN->getIncomingBlock(IncomingEdge));
415 
416   Value *NewAdd =
417     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
418                               Incr->getName()+".int", Incr);
419   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
420 
421   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
422                                       ConstantInt::get(Int32Ty, ExitValue),
423                                       Compare->getName());
424 
425   // In the following deletions, PN may become dead and may be deleted.
426   // Use a WeakTrackingVH to observe whether this happens.
427   WeakTrackingVH WeakPH = PN;
428 
429   // Delete the old floating point exit comparison.  The branch starts using the
430   // new comparison.
431   NewCompare->takeName(Compare);
432   Compare->replaceAllUsesWith(NewCompare);
433   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
434 
435   // Delete the old floating point increment.
436   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
437   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
438 
439   // If the FP induction variable still has uses, this is because something else
440   // in the loop uses its value.  In order to canonicalize the induction
441   // variable, we chose to eliminate the IV and rewrite it in terms of an
442   // int->fp cast.
443   //
444   // We give preference to sitofp over uitofp because it is faster on most
445   // platforms.
446   if (WeakPH) {
447     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
448                                  &*PN->getParent()->getFirstInsertionPt());
449     PN->replaceAllUsesWith(Conv);
450     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
451   }
452   return true;
453 }
454 
455 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
456   // First step.  Check to see if there are any floating-point recurrences.
457   // If there are, change them into integer recurrences, permitting analysis by
458   // the SCEV routines.
459   BasicBlock *Header = L->getHeader();
460 
461   SmallVector<WeakTrackingVH, 8> PHIs;
462   for (PHINode &PN : Header->phis())
463     PHIs.push_back(&PN);
464 
465   bool Changed = false;
466   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
467     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
468       Changed |= handleFloatingPointIV(L, PN);
469 
470   // If the loop previously had floating-point IV, ScalarEvolution
471   // may not have been able to compute a trip count. Now that we've done some
472   // re-writing, the trip count may be computable.
473   if (Changed)
474     SE->forgetLoop(L);
475   return Changed;
476 }
477 
478 //===---------------------------------------------------------------------===//
479 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
480 // they will exit at the first iteration.
481 //===---------------------------------------------------------------------===//
482 
483 /// Check to see if this loop has loop invariant conditions which lead to loop
484 /// exits. If so, we know that if the exit path is taken, it is at the first
485 /// loop iteration. This lets us predict exit values of PHI nodes that live in
486 /// loop header.
487 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
488   // Verify the input to the pass is already in LCSSA form.
489   assert(L->isLCSSAForm(*DT));
490 
491   SmallVector<BasicBlock *, 8> ExitBlocks;
492   L->getUniqueExitBlocks(ExitBlocks);
493 
494   bool MadeAnyChanges = false;
495   for (auto *ExitBB : ExitBlocks) {
496     // If there are no more PHI nodes in this exit block, then no more
497     // values defined inside the loop are used on this path.
498     for (PHINode &PN : ExitBB->phis()) {
499       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
500            IncomingValIdx != E; ++IncomingValIdx) {
501         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
502 
503         // Can we prove that the exit must run on the first iteration if it
504         // runs at all?  (i.e. early exits are fine for our purposes, but
505         // traces which lead to this exit being taken on the 2nd iteration
506         // aren't.)  Note that this is about whether the exit branch is
507         // executed, not about whether it is taken.
508         if (!L->getLoopLatch() ||
509             !DT->dominates(IncomingBB, L->getLoopLatch()))
510           continue;
511 
512         // Get condition that leads to the exit path.
513         auto *TermInst = IncomingBB->getTerminator();
514 
515         Value *Cond = nullptr;
516         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
517           // Must be a conditional branch, otherwise the block
518           // should not be in the loop.
519           Cond = BI->getCondition();
520         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
521           Cond = SI->getCondition();
522         else
523           continue;
524 
525         if (!L->isLoopInvariant(Cond))
526           continue;
527 
528         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
529 
530         // Only deal with PHIs in the loop header.
531         if (!ExitVal || ExitVal->getParent() != L->getHeader())
532           continue;
533 
534         // If ExitVal is a PHI on the loop header, then we know its
535         // value along this exit because the exit can only be taken
536         // on the first iteration.
537         auto *LoopPreheader = L->getLoopPreheader();
538         assert(LoopPreheader && "Invalid loop");
539         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
540         if (PreheaderIdx != -1) {
541           assert(ExitVal->getParent() == L->getHeader() &&
542                  "ExitVal must be in loop header");
543           MadeAnyChanges = true;
544           PN.setIncomingValue(IncomingValIdx,
545                               ExitVal->getIncomingValue(PreheaderIdx));
546         }
547       }
548     }
549   }
550   return MadeAnyChanges;
551 }
552 
553 //===----------------------------------------------------------------------===//
554 //  IV Widening - Extend the width of an IV to cover its widest uses.
555 //===----------------------------------------------------------------------===//
556 
557 namespace {
558 
559 // Collect information about induction variables that are used by sign/zero
560 // extend operations. This information is recorded by CollectExtend and provides
561 // the input to WidenIV.
562 struct WideIVInfo {
563   PHINode *NarrowIV = nullptr;
564 
565   // Widest integer type created [sz]ext
566   Type *WidestNativeType = nullptr;
567 
568   // Was a sext user seen before a zext?
569   bool IsSigned = false;
570 };
571 
572 } // end anonymous namespace
573 
574 /// Update information about the induction variable that is extended by this
575 /// sign or zero extend operation. This is used to determine the final width of
576 /// the IV before actually widening it.
577 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
578                         const TargetTransformInfo *TTI) {
579   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
580   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
581     return;
582 
583   Type *Ty = Cast->getType();
584   uint64_t Width = SE->getTypeSizeInBits(Ty);
585   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
586     return;
587 
588   // Check that `Cast` actually extends the induction variable (we rely on this
589   // later).  This takes care of cases where `Cast` is extending a truncation of
590   // the narrow induction variable, and thus can end up being narrower than the
591   // "narrow" induction variable.
592   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
593   if (NarrowIVWidth >= Width)
594     return;
595 
596   // Cast is either an sext or zext up to this point.
597   // We should not widen an indvar if arithmetics on the wider indvar are more
598   // expensive than those on the narrower indvar. We check only the cost of ADD
599   // because at least an ADD is required to increment the induction variable. We
600   // could compute more comprehensively the cost of all instructions on the
601   // induction variable when necessary.
602   if (TTI &&
603       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
604           TTI->getArithmeticInstrCost(Instruction::Add,
605                                       Cast->getOperand(0)->getType())) {
606     return;
607   }
608 
609   if (!WI.WidestNativeType) {
610     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
611     WI.IsSigned = IsSigned;
612     return;
613   }
614 
615   // We extend the IV to satisfy the sign of its first user, arbitrarily.
616   if (WI.IsSigned != IsSigned)
617     return;
618 
619   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
620     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
621 }
622 
623 namespace {
624 
625 /// Record a link in the Narrow IV def-use chain along with the WideIV that
626 /// computes the same value as the Narrow IV def.  This avoids caching Use*
627 /// pointers.
628 struct NarrowIVDefUse {
629   Instruction *NarrowDef = nullptr;
630   Instruction *NarrowUse = nullptr;
631   Instruction *WideDef = nullptr;
632 
633   // True if the narrow def is never negative.  Tracking this information lets
634   // us use a sign extension instead of a zero extension or vice versa, when
635   // profitable and legal.
636   bool NeverNegative = false;
637 
638   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
639                  bool NeverNegative)
640       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
641         NeverNegative(NeverNegative) {}
642 };
643 
644 /// The goal of this transform is to remove sign and zero extends without
645 /// creating any new induction variables. To do this, it creates a new phi of
646 /// the wider type and redirects all users, either removing extends or inserting
647 /// truncs whenever we stop propagating the type.
648 class WidenIV {
649   // Parameters
650   PHINode *OrigPhi;
651   Type *WideType;
652 
653   // Context
654   LoopInfo        *LI;
655   Loop            *L;
656   ScalarEvolution *SE;
657   DominatorTree   *DT;
658 
659   // Does the module have any calls to the llvm.experimental.guard intrinsic
660   // at all? If not we can avoid scanning instructions looking for guards.
661   bool HasGuards;
662 
663   // Result
664   PHINode *WidePhi = nullptr;
665   Instruction *WideInc = nullptr;
666   const SCEV *WideIncExpr = nullptr;
667   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
668 
669   SmallPtrSet<Instruction *,16> Widened;
670   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
671 
672   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
673 
674   // A map tracking the kind of extension used to widen each narrow IV
675   // and narrow IV user.
676   // Key: pointer to a narrow IV or IV user.
677   // Value: the kind of extension used to widen this Instruction.
678   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
679 
680   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
681 
682   // A map with control-dependent ranges for post increment IV uses. The key is
683   // a pair of IV def and a use of this def denoting the context. The value is
684   // a ConstantRange representing possible values of the def at the given
685   // context.
686   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
687 
688   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
689                                               Instruction *UseI) {
690     DefUserPair Key(Def, UseI);
691     auto It = PostIncRangeInfos.find(Key);
692     return It == PostIncRangeInfos.end()
693                ? Optional<ConstantRange>(None)
694                : Optional<ConstantRange>(It->second);
695   }
696 
697   void calculatePostIncRanges(PHINode *OrigPhi);
698   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
699 
700   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
701     DefUserPair Key(Def, UseI);
702     auto It = PostIncRangeInfos.find(Key);
703     if (It == PostIncRangeInfos.end())
704       PostIncRangeInfos.insert({Key, R});
705     else
706       It->second = R.intersectWith(It->second);
707   }
708 
709 public:
710   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
711           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
712           bool HasGuards)
713       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
714         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
715         HasGuards(HasGuards), DeadInsts(DI) {
716     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
717     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
718   }
719 
720   PHINode *createWideIV(SCEVExpander &Rewriter);
721 
722 protected:
723   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
724                           Instruction *Use);
725 
726   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
727   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
728                                      const SCEVAddRecExpr *WideAR);
729   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
730 
731   ExtendKind getExtendKind(Instruction *I);
732 
733   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
734 
735   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
736 
737   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
738 
739   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
740                               unsigned OpCode) const;
741 
742   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
743 
744   bool widenLoopCompare(NarrowIVDefUse DU);
745   bool widenWithVariantUse(NarrowIVDefUse DU);
746 
747   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
748 };
749 
750 } // end anonymous namespace
751 
752 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
753                                  bool IsSigned, Instruction *Use) {
754   // Set the debug location and conservative insertion point.
755   IRBuilder<> Builder(Use);
756   // Hoist the insertion point into loop preheaders as far as possible.
757   for (const Loop *L = LI->getLoopFor(Use->getParent());
758        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
759        L = L->getParentLoop())
760     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
761 
762   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
763                     Builder.CreateZExt(NarrowOper, WideType);
764 }
765 
766 /// Instantiate a wide operation to replace a narrow operation. This only needs
767 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
768 /// 0 for any operation we decide not to clone.
769 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
770                                   const SCEVAddRecExpr *WideAR) {
771   unsigned Opcode = DU.NarrowUse->getOpcode();
772   switch (Opcode) {
773   default:
774     return nullptr;
775   case Instruction::Add:
776   case Instruction::Mul:
777   case Instruction::UDiv:
778   case Instruction::Sub:
779     return cloneArithmeticIVUser(DU, WideAR);
780 
781   case Instruction::And:
782   case Instruction::Or:
783   case Instruction::Xor:
784   case Instruction::Shl:
785   case Instruction::LShr:
786   case Instruction::AShr:
787     return cloneBitwiseIVUser(DU);
788   }
789 }
790 
791 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
792   Instruction *NarrowUse = DU.NarrowUse;
793   Instruction *NarrowDef = DU.NarrowDef;
794   Instruction *WideDef = DU.WideDef;
795 
796   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
797 
798   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
799   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
800   // invariant and will be folded or hoisted. If it actually comes from a
801   // widened IV, it should be removed during a future call to widenIVUse.
802   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
803   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
804                    ? WideDef
805                    : createExtendInst(NarrowUse->getOperand(0), WideType,
806                                       IsSigned, NarrowUse);
807   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
808                    ? WideDef
809                    : createExtendInst(NarrowUse->getOperand(1), WideType,
810                                       IsSigned, NarrowUse);
811 
812   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
813   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
814                                         NarrowBO->getName());
815   IRBuilder<> Builder(NarrowUse);
816   Builder.Insert(WideBO);
817   WideBO->copyIRFlags(NarrowBO);
818   return WideBO;
819 }
820 
821 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
822                                             const SCEVAddRecExpr *WideAR) {
823   Instruction *NarrowUse = DU.NarrowUse;
824   Instruction *NarrowDef = DU.NarrowDef;
825   Instruction *WideDef = DU.WideDef;
826 
827   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
828 
829   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
830 
831   // We're trying to find X such that
832   //
833   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
834   //
835   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
836   // and check using SCEV if any of them are correct.
837 
838   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
839   // correct solution to X.
840   auto GuessNonIVOperand = [&](bool SignExt) {
841     const SCEV *WideLHS;
842     const SCEV *WideRHS;
843 
844     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
845       if (SignExt)
846         return SE->getSignExtendExpr(S, Ty);
847       return SE->getZeroExtendExpr(S, Ty);
848     };
849 
850     if (IVOpIdx == 0) {
851       WideLHS = SE->getSCEV(WideDef);
852       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
853       WideRHS = GetExtend(NarrowRHS, WideType);
854     } else {
855       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
856       WideLHS = GetExtend(NarrowLHS, WideType);
857       WideRHS = SE->getSCEV(WideDef);
858     }
859 
860     // WideUse is "WideDef `op.wide` X" as described in the comment.
861     const SCEV *WideUse = nullptr;
862 
863     switch (NarrowUse->getOpcode()) {
864     default:
865       llvm_unreachable("No other possibility!");
866 
867     case Instruction::Add:
868       WideUse = SE->getAddExpr(WideLHS, WideRHS);
869       break;
870 
871     case Instruction::Mul:
872       WideUse = SE->getMulExpr(WideLHS, WideRHS);
873       break;
874 
875     case Instruction::UDiv:
876       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
877       break;
878 
879     case Instruction::Sub:
880       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
881       break;
882     }
883 
884     return WideUse == WideAR;
885   };
886 
887   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
888   if (!GuessNonIVOperand(SignExtend)) {
889     SignExtend = !SignExtend;
890     if (!GuessNonIVOperand(SignExtend))
891       return nullptr;
892   }
893 
894   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
895                    ? WideDef
896                    : createExtendInst(NarrowUse->getOperand(0), WideType,
897                                       SignExtend, NarrowUse);
898   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
899                    ? WideDef
900                    : createExtendInst(NarrowUse->getOperand(1), WideType,
901                                       SignExtend, NarrowUse);
902 
903   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
904   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
905                                         NarrowBO->getName());
906 
907   IRBuilder<> Builder(NarrowUse);
908   Builder.Insert(WideBO);
909   WideBO->copyIRFlags(NarrowBO);
910   return WideBO;
911 }
912 
913 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
914   auto It = ExtendKindMap.find(I);
915   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
916   return It->second;
917 }
918 
919 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
920                                      unsigned OpCode) const {
921   if (OpCode == Instruction::Add)
922     return SE->getAddExpr(LHS, RHS);
923   if (OpCode == Instruction::Sub)
924     return SE->getMinusSCEV(LHS, RHS);
925   if (OpCode == Instruction::Mul)
926     return SE->getMulExpr(LHS, RHS);
927 
928   llvm_unreachable("Unsupported opcode.");
929 }
930 
931 /// No-wrap operations can transfer sign extension of their result to their
932 /// operands. Generate the SCEV value for the widened operation without
933 /// actually modifying the IR yet. If the expression after extending the
934 /// operands is an AddRec for this loop, return the AddRec and the kind of
935 /// extension used.
936 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
937   // Handle the common case of add<nsw/nuw>
938   const unsigned OpCode = DU.NarrowUse->getOpcode();
939   // Only Add/Sub/Mul instructions supported yet.
940   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
941       OpCode != Instruction::Mul)
942     return {nullptr, Unknown};
943 
944   // One operand (NarrowDef) has already been extended to WideDef. Now determine
945   // if extending the other will lead to a recurrence.
946   const unsigned ExtendOperIdx =
947       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
948   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
949 
950   const SCEV *ExtendOperExpr = nullptr;
951   const OverflowingBinaryOperator *OBO =
952     cast<OverflowingBinaryOperator>(DU.NarrowUse);
953   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
954   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
955     ExtendOperExpr = SE->getSignExtendExpr(
956       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
957   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
958     ExtendOperExpr = SE->getZeroExtendExpr(
959       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
960   else
961     return {nullptr, Unknown};
962 
963   // When creating this SCEV expr, don't apply the current operations NSW or NUW
964   // flags. This instruction may be guarded by control flow that the no-wrap
965   // behavior depends on. Non-control-equivalent instructions can be mapped to
966   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
967   // semantics to those operations.
968   const SCEV *lhs = SE->getSCEV(DU.WideDef);
969   const SCEV *rhs = ExtendOperExpr;
970 
971   // Let's swap operands to the initial order for the case of non-commutative
972   // operations, like SUB. See PR21014.
973   if (ExtendOperIdx == 0)
974     std::swap(lhs, rhs);
975   const SCEVAddRecExpr *AddRec =
976       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
977 
978   if (!AddRec || AddRec->getLoop() != L)
979     return {nullptr, Unknown};
980 
981   return {AddRec, ExtKind};
982 }
983 
984 /// Is this instruction potentially interesting for further simplification after
985 /// widening it's type? In other words, can the extend be safely hoisted out of
986 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
987 /// so, return the extended recurrence and the kind of extension used. Otherwise
988 /// return {nullptr, Unknown}.
989 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
990   if (!SE->isSCEVable(DU.NarrowUse->getType()))
991     return {nullptr, Unknown};
992 
993   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
994   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
995       SE->getTypeSizeInBits(WideType)) {
996     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
997     // index. So don't follow this use.
998     return {nullptr, Unknown};
999   }
1000 
1001   const SCEV *WideExpr;
1002   ExtendKind ExtKind;
1003   if (DU.NeverNegative) {
1004     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1005     if (isa<SCEVAddRecExpr>(WideExpr))
1006       ExtKind = SignExtended;
1007     else {
1008       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1009       ExtKind = ZeroExtended;
1010     }
1011   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1012     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1013     ExtKind = SignExtended;
1014   } else {
1015     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1016     ExtKind = ZeroExtended;
1017   }
1018   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1019   if (!AddRec || AddRec->getLoop() != L)
1020     return {nullptr, Unknown};
1021   return {AddRec, ExtKind};
1022 }
1023 
1024 /// This IV user cannot be widened. Replace this use of the original narrow IV
1025 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1026 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1027   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1028   if (!InsertPt)
1029     return;
1030   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1031                     << *DU.NarrowUse << "\n");
1032   IRBuilder<> Builder(InsertPt);
1033   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1034   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1035 }
1036 
1037 /// If the narrow use is a compare instruction, then widen the compare
1038 //  (and possibly the other operand).  The extend operation is hoisted into the
1039 // loop preheader as far as possible.
1040 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1041   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1042   if (!Cmp)
1043     return false;
1044 
1045   // We can legally widen the comparison in the following two cases:
1046   //
1047   //  - The signedness of the IV extension and comparison match
1048   //
1049   //  - The narrow IV is always positive (and thus its sign extension is equal
1050   //    to its zero extension).  For instance, let's say we're zero extending
1051   //    %narrow for the following use
1052   //
1053   //      icmp slt i32 %narrow, %val   ... (A)
1054   //
1055   //    and %narrow is always positive.  Then
1056   //
1057   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1058   //          == icmp slt i32 zext(%narrow), sext(%val)
1059   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1060   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1061     return false;
1062 
1063   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1064   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1065   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1066   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1067 
1068   // Widen the compare instruction.
1069   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1070   if (!InsertPt)
1071     return false;
1072   IRBuilder<> Builder(InsertPt);
1073   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1074 
1075   // Widen the other operand of the compare, if necessary.
1076   if (CastWidth < IVWidth) {
1077     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1078     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1079   }
1080   return true;
1081 }
1082 
1083 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1084 // will not work when:
1085 //    1) SCEV traces back to an instruction inside the loop that SCEV can not
1086 // expand, eg. add %indvar, (load %addr)
1087 //    2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1088 // While SCEV fails to avoid trunc, we can still try to use instruction
1089 // combining approach to prove trunc is not required. This can be further
1090 // extended with other instruction combining checks, but for now we handle the
1091 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1092 //
1093 // Src:
1094 //   %c = sub nsw %b, %indvar
1095 //   %d = sext %c to i64
1096 // Dst:
1097 //   %indvar.ext1 = sext %indvar to i64
1098 //   %m = sext %b to i64
1099 //   %d = sub nsw i64 %m, %indvar.ext1
1100 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1101 // trunc is required regardless of how %b is generated. This pattern is common
1102 // when calculating address in 64 bit architecture
1103 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
1104   Instruction *NarrowUse = DU.NarrowUse;
1105   Instruction *NarrowDef = DU.NarrowDef;
1106   Instruction *WideDef = DU.WideDef;
1107 
1108   // Handle the common case of add<nsw/nuw>
1109   const unsigned OpCode = NarrowUse->getOpcode();
1110   // Only Add/Sub/Mul instructions are supported.
1111   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1112       OpCode != Instruction::Mul)
1113     return false;
1114 
1115   // The operand that is not defined by NarrowDef of DU. Let's call it the
1116   // other operand.
1117   assert((NarrowUse->getOperand(0) == NarrowDef ||
1118           NarrowUse->getOperand(1) == NarrowDef) &&
1119          "bad DU");
1120 
1121   const OverflowingBinaryOperator *OBO =
1122     cast<OverflowingBinaryOperator>(NarrowUse);
1123   ExtendKind ExtKind = getExtendKind(NarrowDef);
1124   bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap();
1125   bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
1126   if (!CanSignExtend && !CanZeroExtend)
1127     return false;
1128 
1129   // Verifying that Defining operand is an AddRec
1130   const SCEV *Op1 = SE->getSCEV(WideDef);
1131   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1132   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1133     return false;
1134 
1135   for (Use &U : NarrowUse->uses()) {
1136     Instruction *User = nullptr;
1137     if (ExtKind == SignExtended)
1138       User = dyn_cast<SExtInst>(U.getUser());
1139     else
1140       User = dyn_cast<ZExtInst>(U.getUser());
1141     if (!User || User->getType() != WideType)
1142       return false;
1143   }
1144 
1145   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1146 
1147   // Generating a widening use instruction.
1148   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1149                    ? WideDef
1150                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1151                                       ExtKind, NarrowUse);
1152   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1153                    ? WideDef
1154                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1155                                       ExtKind, NarrowUse);
1156 
1157   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1158   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1159                                         NarrowBO->getName());
1160   IRBuilder<> Builder(NarrowUse);
1161   Builder.Insert(WideBO);
1162   WideBO->copyIRFlags(NarrowBO);
1163   ExtendKindMap[NarrowUse] = ExtKind;
1164 
1165   for (Use &U : NarrowUse->uses()) {
1166     Instruction *User = nullptr;
1167     if (ExtKind == SignExtended)
1168       User = cast<SExtInst>(U.getUser());
1169     else
1170       User = cast<ZExtInst>(U.getUser());
1171     assert(User->getType() == WideType && "Checked before!");
1172     LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1173                       << *WideBO << "\n");
1174     ++NumElimExt;
1175     User->replaceAllUsesWith(WideBO);
1176     DeadInsts.emplace_back(User);
1177   }
1178   return true;
1179 }
1180 
1181 /// Determine whether an individual user of the narrow IV can be widened. If so,
1182 /// return the wide clone of the user.
1183 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1184   assert(ExtendKindMap.count(DU.NarrowDef) &&
1185          "Should already know the kind of extension used to widen NarrowDef");
1186 
1187   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1188   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1189     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1190       // For LCSSA phis, sink the truncate outside the loop.
1191       // After SimplifyCFG most loop exit targets have a single predecessor.
1192       // Otherwise fall back to a truncate within the loop.
1193       if (UsePhi->getNumOperands() != 1)
1194         truncateIVUse(DU, DT, LI);
1195       else {
1196         // Widening the PHI requires us to insert a trunc.  The logical place
1197         // for this trunc is in the same BB as the PHI.  This is not possible if
1198         // the BB is terminated by a catchswitch.
1199         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1200           return nullptr;
1201 
1202         PHINode *WidePhi =
1203           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1204                           UsePhi);
1205         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1206         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1207         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1208         UsePhi->replaceAllUsesWith(Trunc);
1209         DeadInsts.emplace_back(UsePhi);
1210         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1211                           << *WidePhi << "\n");
1212       }
1213       return nullptr;
1214     }
1215   }
1216 
1217   // This narrow use can be widened by a sext if it's non-negative or its narrow
1218   // def was widended by a sext. Same for zext.
1219   auto canWidenBySExt = [&]() {
1220     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1221   };
1222   auto canWidenByZExt = [&]() {
1223     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1224   };
1225 
1226   // Our raison d'etre! Eliminate sign and zero extension.
1227   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1228       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1229     Value *NewDef = DU.WideDef;
1230     if (DU.NarrowUse->getType() != WideType) {
1231       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1232       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1233       if (CastWidth < IVWidth) {
1234         // The cast isn't as wide as the IV, so insert a Trunc.
1235         IRBuilder<> Builder(DU.NarrowUse);
1236         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1237       }
1238       else {
1239         // A wider extend was hidden behind a narrower one. This may induce
1240         // another round of IV widening in which the intermediate IV becomes
1241         // dead. It should be very rare.
1242         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1243                           << " not wide enough to subsume " << *DU.NarrowUse
1244                           << "\n");
1245         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1246         NewDef = DU.NarrowUse;
1247       }
1248     }
1249     if (NewDef != DU.NarrowUse) {
1250       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1251                         << " replaced by " << *DU.WideDef << "\n");
1252       ++NumElimExt;
1253       DU.NarrowUse->replaceAllUsesWith(NewDef);
1254       DeadInsts.emplace_back(DU.NarrowUse);
1255     }
1256     // Now that the extend is gone, we want to expose it's uses for potential
1257     // further simplification. We don't need to directly inform SimplifyIVUsers
1258     // of the new users, because their parent IV will be processed later as a
1259     // new loop phi. If we preserved IVUsers analysis, we would also want to
1260     // push the uses of WideDef here.
1261 
1262     // No further widening is needed. The deceased [sz]ext had done it for us.
1263     return nullptr;
1264   }
1265 
1266   // Does this user itself evaluate to a recurrence after widening?
1267   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1268   if (!WideAddRec.first)
1269     WideAddRec = getWideRecurrence(DU);
1270 
1271   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1272   if (!WideAddRec.first) {
1273     // If use is a loop condition, try to promote the condition instead of
1274     // truncating the IV first.
1275     if (widenLoopCompare(DU))
1276       return nullptr;
1277 
1278     // We are here about to generate a truncate instruction that may hurt
1279     // performance because the scalar evolution expression computed earlier
1280     // in WideAddRec.first does not indicate a polynomial induction expression.
1281     // In that case, look at the operands of the use instruction to determine
1282     // if we can still widen the use instead of truncating its operand.
1283     if (widenWithVariantUse(DU))
1284       return nullptr;
1285 
1286     // This user does not evaluate to a recurrence after widening, so don't
1287     // follow it. Instead insert a Trunc to kill off the original use,
1288     // eventually isolating the original narrow IV so it can be removed.
1289     truncateIVUse(DU, DT, LI);
1290     return nullptr;
1291   }
1292   // Assume block terminators cannot evaluate to a recurrence. We can't to
1293   // insert a Trunc after a terminator if there happens to be a critical edge.
1294   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1295          "SCEV is not expected to evaluate a block terminator");
1296 
1297   // Reuse the IV increment that SCEVExpander created as long as it dominates
1298   // NarrowUse.
1299   Instruction *WideUse = nullptr;
1300   if (WideAddRec.first == WideIncExpr &&
1301       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1302     WideUse = WideInc;
1303   else {
1304     WideUse = cloneIVUser(DU, WideAddRec.first);
1305     if (!WideUse)
1306       return nullptr;
1307   }
1308   // Evaluation of WideAddRec ensured that the narrow expression could be
1309   // extended outside the loop without overflow. This suggests that the wide use
1310   // evaluates to the same expression as the extended narrow use, but doesn't
1311   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1312   // where it fails, we simply throw away the newly created wide use.
1313   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1314     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1315                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1316                       << "\n");
1317     DeadInsts.emplace_back(WideUse);
1318     return nullptr;
1319   }
1320 
1321   // if we reached this point then we are going to replace
1322   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1323   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1324 
1325   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1326   // Returning WideUse pushes it on the worklist.
1327   return WideUse;
1328 }
1329 
1330 /// Add eligible users of NarrowDef to NarrowIVUsers.
1331 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1332   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1333   bool NonNegativeDef =
1334       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1335                            SE->getZero(NarrowSCEV->getType()));
1336   for (User *U : NarrowDef->users()) {
1337     Instruction *NarrowUser = cast<Instruction>(U);
1338 
1339     // Handle data flow merges and bizarre phi cycles.
1340     if (!Widened.insert(NarrowUser).second)
1341       continue;
1342 
1343     bool NonNegativeUse = false;
1344     if (!NonNegativeDef) {
1345       // We might have a control-dependent range information for this context.
1346       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1347         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1348     }
1349 
1350     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1351                                NonNegativeDef || NonNegativeUse);
1352   }
1353 }
1354 
1355 /// Process a single induction variable. First use the SCEVExpander to create a
1356 /// wide induction variable that evaluates to the same recurrence as the
1357 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1358 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1359 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1360 ///
1361 /// It would be simpler to delete uses as they are processed, but we must avoid
1362 /// invalidating SCEV expressions.
1363 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1364   // Bail if we disallowed widening.
1365   if(!AllowIVWidening)
1366     return nullptr;
1367 
1368   // Is this phi an induction variable?
1369   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1370   if (!AddRec)
1371     return nullptr;
1372 
1373   // Widen the induction variable expression.
1374   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1375                                ? SE->getSignExtendExpr(AddRec, WideType)
1376                                : SE->getZeroExtendExpr(AddRec, WideType);
1377 
1378   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1379          "Expect the new IV expression to preserve its type");
1380 
1381   // Can the IV be extended outside the loop without overflow?
1382   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1383   if (!AddRec || AddRec->getLoop() != L)
1384     return nullptr;
1385 
1386   // An AddRec must have loop-invariant operands. Since this AddRec is
1387   // materialized by a loop header phi, the expression cannot have any post-loop
1388   // operands, so they must dominate the loop header.
1389   assert(
1390       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1391       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1392       "Loop header phi recurrence inputs do not dominate the loop");
1393 
1394   // Iterate over IV uses (including transitive ones) looking for IV increments
1395   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1396   // the increment calculate control-dependent range information basing on
1397   // dominating conditions inside of the loop (e.g. a range check inside of the
1398   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1399   //
1400   // Control-dependent range information is later used to prove that a narrow
1401   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1402   // this on demand because when pushNarrowIVUsers needs this information some
1403   // of the dominating conditions might be already widened.
1404   if (UsePostIncrementRanges)
1405     calculatePostIncRanges(OrigPhi);
1406 
1407   // The rewriter provides a value for the desired IV expression. This may
1408   // either find an existing phi or materialize a new one. Either way, we
1409   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1410   // of the phi-SCC dominates the loop entry.
1411   Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
1412   Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
1413   // If the wide phi is not a phi node, for example a cast node, like bitcast,
1414   // inttoptr, ptrtoint, just skip for now.
1415   if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
1416     // if the cast node is an inserted instruction without any user, we should
1417     // remove it to make sure the pass don't touch the function as we can not
1418     // wide the phi.
1419     if (ExpandInst->hasNUses(0) &&
1420         Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
1421       DeadInsts.emplace_back(ExpandInst);
1422     return nullptr;
1423   }
1424 
1425   // Remembering the WideIV increment generated by SCEVExpander allows
1426   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1427   // employ a general reuse mechanism because the call above is the only call to
1428   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1429   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1430     WideInc =
1431       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1432     WideIncExpr = SE->getSCEV(WideInc);
1433     // Propagate the debug location associated with the original loop increment
1434     // to the new (widened) increment.
1435     auto *OrigInc =
1436       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1437     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1438   }
1439 
1440   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1441   ++NumWidened;
1442 
1443   // Traverse the def-use chain using a worklist starting at the original IV.
1444   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1445 
1446   Widened.insert(OrigPhi);
1447   pushNarrowIVUsers(OrigPhi, WidePhi);
1448 
1449   while (!NarrowIVUsers.empty()) {
1450     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1451 
1452     // Process a def-use edge. This may replace the use, so don't hold a
1453     // use_iterator across it.
1454     Instruction *WideUse = widenIVUse(DU, Rewriter);
1455 
1456     // Follow all def-use edges from the previous narrow use.
1457     if (WideUse)
1458       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1459 
1460     // widenIVUse may have removed the def-use edge.
1461     if (DU.NarrowDef->use_empty())
1462       DeadInsts.emplace_back(DU.NarrowDef);
1463   }
1464 
1465   // Attach any debug information to the new PHI.
1466   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1467 
1468   return WidePhi;
1469 }
1470 
1471 /// Calculates control-dependent range for the given def at the given context
1472 /// by looking at dominating conditions inside of the loop
1473 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1474                                     Instruction *NarrowUser) {
1475   using namespace llvm::PatternMatch;
1476 
1477   Value *NarrowDefLHS;
1478   const APInt *NarrowDefRHS;
1479   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1480                                  m_APInt(NarrowDefRHS))) ||
1481       !NarrowDefRHS->isNonNegative())
1482     return;
1483 
1484   auto UpdateRangeFromCondition = [&] (Value *Condition,
1485                                        bool TrueDest) {
1486     CmpInst::Predicate Pred;
1487     Value *CmpRHS;
1488     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1489                                  m_Value(CmpRHS))))
1490       return;
1491 
1492     CmpInst::Predicate P =
1493             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1494 
1495     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1496     auto CmpConstrainedLHSRange =
1497             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1498     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1499         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1500 
1501     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1502   };
1503 
1504   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1505     if (!HasGuards)
1506       return;
1507 
1508     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1509                                      Ctx->getParent()->rend())) {
1510       Value *C = nullptr;
1511       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1512         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1513     }
1514   };
1515 
1516   UpdateRangeFromGuards(NarrowUser);
1517 
1518   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1519   // If NarrowUserBB is statically unreachable asking dominator queries may
1520   // yield surprising results. (e.g. the block may not have a dom tree node)
1521   if (!DT->isReachableFromEntry(NarrowUserBB))
1522     return;
1523 
1524   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1525        L->contains(DTB->getBlock());
1526        DTB = DTB->getIDom()) {
1527     auto *BB = DTB->getBlock();
1528     auto *TI = BB->getTerminator();
1529     UpdateRangeFromGuards(TI);
1530 
1531     auto *BI = dyn_cast<BranchInst>(TI);
1532     if (!BI || !BI->isConditional())
1533       continue;
1534 
1535     auto *TrueSuccessor = BI->getSuccessor(0);
1536     auto *FalseSuccessor = BI->getSuccessor(1);
1537 
1538     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1539       return BBE.isSingleEdge() &&
1540              DT->dominates(BBE, NarrowUser->getParent());
1541     };
1542 
1543     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1544       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1545 
1546     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1547       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1548   }
1549 }
1550 
1551 /// Calculates PostIncRangeInfos map for the given IV
1552 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1553   SmallPtrSet<Instruction *, 16> Visited;
1554   SmallVector<Instruction *, 6> Worklist;
1555   Worklist.push_back(OrigPhi);
1556   Visited.insert(OrigPhi);
1557 
1558   while (!Worklist.empty()) {
1559     Instruction *NarrowDef = Worklist.pop_back_val();
1560 
1561     for (Use &U : NarrowDef->uses()) {
1562       auto *NarrowUser = cast<Instruction>(U.getUser());
1563 
1564       // Don't go looking outside the current loop.
1565       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1566       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1567         continue;
1568 
1569       if (!Visited.insert(NarrowUser).second)
1570         continue;
1571 
1572       Worklist.push_back(NarrowUser);
1573 
1574       calculatePostIncRange(NarrowDef, NarrowUser);
1575     }
1576   }
1577 }
1578 
1579 //===----------------------------------------------------------------------===//
1580 //  Live IV Reduction - Minimize IVs live across the loop.
1581 //===----------------------------------------------------------------------===//
1582 
1583 //===----------------------------------------------------------------------===//
1584 //  Simplification of IV users based on SCEV evaluation.
1585 //===----------------------------------------------------------------------===//
1586 
1587 namespace {
1588 
1589 class IndVarSimplifyVisitor : public IVVisitor {
1590   ScalarEvolution *SE;
1591   const TargetTransformInfo *TTI;
1592   PHINode *IVPhi;
1593 
1594 public:
1595   WideIVInfo WI;
1596 
1597   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1598                         const TargetTransformInfo *TTI,
1599                         const DominatorTree *DTree)
1600     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1601     DT = DTree;
1602     WI.NarrowIV = IVPhi;
1603   }
1604 
1605   // Implement the interface used by simplifyUsersOfIV.
1606   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1607 };
1608 
1609 } // end anonymous namespace
1610 
1611 /// Iteratively perform simplification on a worklist of IV users. Each
1612 /// successive simplification may push more users which may themselves be
1613 /// candidates for simplification.
1614 ///
1615 /// Sign/Zero extend elimination is interleaved with IV simplification.
1616 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1617                                        SCEVExpander &Rewriter,
1618                                        LoopInfo *LI) {
1619   SmallVector<WideIVInfo, 8> WideIVs;
1620 
1621   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1622           Intrinsic::getName(Intrinsic::experimental_guard));
1623   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1624 
1625   SmallVector<PHINode*, 8> LoopPhis;
1626   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1627     LoopPhis.push_back(cast<PHINode>(I));
1628   }
1629   // Each round of simplification iterates through the SimplifyIVUsers worklist
1630   // for all current phis, then determines whether any IVs can be
1631   // widened. Widening adds new phis to LoopPhis, inducing another round of
1632   // simplification on the wide IVs.
1633   bool Changed = false;
1634   while (!LoopPhis.empty()) {
1635     // Evaluate as many IV expressions as possible before widening any IVs. This
1636     // forces SCEV to set no-wrap flags before evaluating sign/zero
1637     // extension. The first time SCEV attempts to normalize sign/zero extension,
1638     // the result becomes final. So for the most predictable results, we delay
1639     // evaluation of sign/zero extend evaluation until needed, and avoid running
1640     // other SCEV based analysis prior to simplifyAndExtend.
1641     do {
1642       PHINode *CurrIV = LoopPhis.pop_back_val();
1643 
1644       // Information about sign/zero extensions of CurrIV.
1645       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1646 
1647       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1648                                    &Visitor);
1649 
1650       if (Visitor.WI.WidestNativeType) {
1651         WideIVs.push_back(Visitor.WI);
1652       }
1653     } while(!LoopPhis.empty());
1654 
1655     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1656       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1657       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1658         Changed = true;
1659         LoopPhis.push_back(WidePhi);
1660       }
1661     }
1662   }
1663   return Changed;
1664 }
1665 
1666 //===----------------------------------------------------------------------===//
1667 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1668 //===----------------------------------------------------------------------===//
1669 
1670 /// Given an Value which is hoped to be part of an add recurance in the given
1671 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1672 /// that this is less general than SCEVs AddRec checking.
1673 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1674   Instruction *IncI = dyn_cast<Instruction>(IncV);
1675   if (!IncI)
1676     return nullptr;
1677 
1678   switch (IncI->getOpcode()) {
1679   case Instruction::Add:
1680   case Instruction::Sub:
1681     break;
1682   case Instruction::GetElementPtr:
1683     // An IV counter must preserve its type.
1684     if (IncI->getNumOperands() == 2)
1685       break;
1686     LLVM_FALLTHROUGH;
1687   default:
1688     return nullptr;
1689   }
1690 
1691   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1692   if (Phi && Phi->getParent() == L->getHeader()) {
1693     if (L->isLoopInvariant(IncI->getOperand(1)))
1694       return Phi;
1695     return nullptr;
1696   }
1697   if (IncI->getOpcode() == Instruction::GetElementPtr)
1698     return nullptr;
1699 
1700   // Allow add/sub to be commuted.
1701   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1702   if (Phi && Phi->getParent() == L->getHeader()) {
1703     if (L->isLoopInvariant(IncI->getOperand(0)))
1704       return Phi;
1705   }
1706   return nullptr;
1707 }
1708 
1709 /// Whether the current loop exit test is based on this value.  Currently this
1710 /// is limited to a direct use in the loop condition.
1711 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1712   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1713   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1714   // TODO: Allow non-icmp loop test.
1715   if (!ICmp)
1716     return false;
1717 
1718   // TODO: Allow indirect use.
1719   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1720 }
1721 
1722 /// linearFunctionTestReplace policy. Return true unless we can show that the
1723 /// current exit test is already sufficiently canonical.
1724 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1725   assert(L->getLoopLatch() && "Must be in simplified form");
1726 
1727   // Avoid converting a constant or loop invariant test back to a runtime
1728   // test.  This is critical for when SCEV's cached ExitCount is less precise
1729   // than the current IR (such as after we've proven a particular exit is
1730   // actually dead and thus the BE count never reaches our ExitCount.)
1731   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1732   if (L->isLoopInvariant(BI->getCondition()))
1733     return false;
1734 
1735   // Do LFTR to simplify the exit condition to an ICMP.
1736   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1737   if (!Cond)
1738     return true;
1739 
1740   // Do LFTR to simplify the exit ICMP to EQ/NE
1741   ICmpInst::Predicate Pred = Cond->getPredicate();
1742   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1743     return true;
1744 
1745   // Look for a loop invariant RHS
1746   Value *LHS = Cond->getOperand(0);
1747   Value *RHS = Cond->getOperand(1);
1748   if (!L->isLoopInvariant(RHS)) {
1749     if (!L->isLoopInvariant(LHS))
1750       return true;
1751     std::swap(LHS, RHS);
1752   }
1753   // Look for a simple IV counter LHS
1754   PHINode *Phi = dyn_cast<PHINode>(LHS);
1755   if (!Phi)
1756     Phi = getLoopPhiForCounter(LHS, L);
1757 
1758   if (!Phi)
1759     return true;
1760 
1761   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1762   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1763   if (Idx < 0)
1764     return true;
1765 
1766   // Do LFTR if the exit condition's IV is *not* a simple counter.
1767   Value *IncV = Phi->getIncomingValue(Idx);
1768   return Phi != getLoopPhiForCounter(IncV, L);
1769 }
1770 
1771 /// Return true if undefined behavior would provable be executed on the path to
1772 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1773 /// anything about whether OnPathTo is actually executed or whether Root is
1774 /// actually poison.  This can be used to assess whether a new use of Root can
1775 /// be added at a location which is control equivalent with OnPathTo (such as
1776 /// immediately before it) without introducing UB which didn't previously
1777 /// exist.  Note that a false result conveys no information.
1778 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1779                                           Instruction *OnPathTo,
1780                                           DominatorTree *DT) {
1781   // Basic approach is to assume Root is poison, propagate poison forward
1782   // through all users we can easily track, and then check whether any of those
1783   // users are provable UB and must execute before out exiting block might
1784   // exit.
1785 
1786   // The set of all recursive users we've visited (which are assumed to all be
1787   // poison because of said visit)
1788   SmallSet<const Value *, 16> KnownPoison;
1789   SmallVector<const Instruction*, 16> Worklist;
1790   Worklist.push_back(Root);
1791   while (!Worklist.empty()) {
1792     const Instruction *I = Worklist.pop_back_val();
1793 
1794     // If we know this must trigger UB on a path leading our target.
1795     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1796       return true;
1797 
1798     // If we can't analyze propagation through this instruction, just skip it
1799     // and transitive users.  Safe as false is a conservative result.
1800     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
1801       continue;
1802 
1803     if (KnownPoison.insert(I).second)
1804       for (const User *User : I->users())
1805         Worklist.push_back(cast<Instruction>(User));
1806   }
1807 
1808   // Might be non-UB, or might have a path we couldn't prove must execute on
1809   // way to exiting bb.
1810   return false;
1811 }
1812 
1813 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1814 /// down to checking that all operands are constant and listing instructions
1815 /// that may hide undef.
1816 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1817                                unsigned Depth) {
1818   if (isa<Constant>(V))
1819     return !isa<UndefValue>(V);
1820 
1821   if (Depth >= 6)
1822     return false;
1823 
1824   // Conservatively handle non-constant non-instructions. For example, Arguments
1825   // may be undef.
1826   Instruction *I = dyn_cast<Instruction>(V);
1827   if (!I)
1828     return false;
1829 
1830   // Load and return values may be undef.
1831   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1832     return false;
1833 
1834   // Optimistically handle other instructions.
1835   for (Value *Op : I->operands()) {
1836     if (!Visited.insert(Op).second)
1837       continue;
1838     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1839       return false;
1840   }
1841   return true;
1842 }
1843 
1844 /// Return true if the given value is concrete. We must prove that undef can
1845 /// never reach it.
1846 ///
1847 /// TODO: If we decide that this is a good approach to checking for undef, we
1848 /// may factor it into a common location.
1849 static bool hasConcreteDef(Value *V) {
1850   SmallPtrSet<Value*, 8> Visited;
1851   Visited.insert(V);
1852   return hasConcreteDefImpl(V, Visited, 0);
1853 }
1854 
1855 /// Return true if this IV has any uses other than the (soon to be rewritten)
1856 /// loop exit test.
1857 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1858   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1859   Value *IncV = Phi->getIncomingValue(LatchIdx);
1860 
1861   for (User *U : Phi->users())
1862     if (U != Cond && U != IncV) return false;
1863 
1864   for (User *U : IncV->users())
1865     if (U != Cond && U != Phi) return false;
1866   return true;
1867 }
1868 
1869 /// Return true if the given phi is a "counter" in L.  A counter is an
1870 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1871 /// step of 1.  Note that L must have exactly one latch.
1872 static bool isLoopCounter(PHINode* Phi, Loop *L,
1873                           ScalarEvolution *SE) {
1874   assert(Phi->getParent() == L->getHeader());
1875   assert(L->getLoopLatch());
1876 
1877   if (!SE->isSCEVable(Phi->getType()))
1878     return false;
1879 
1880   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1881   if (!AR || AR->getLoop() != L || !AR->isAffine())
1882     return false;
1883 
1884   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1885   if (!Step || !Step->isOne())
1886     return false;
1887 
1888   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1889   Value *IncV = Phi->getIncomingValue(LatchIdx);
1890   return (getLoopPhiForCounter(IncV, L) == Phi);
1891 }
1892 
1893 /// Search the loop header for a loop counter (anadd rec w/step of one)
1894 /// suitable for use by LFTR.  If multiple counters are available, select the
1895 /// "best" one based profitable heuristics.
1896 ///
1897 /// BECount may be an i8* pointer type. The pointer difference is already
1898 /// valid count without scaling the address stride, so it remains a pointer
1899 /// expression as far as SCEV is concerned.
1900 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1901                                 const SCEV *BECount,
1902                                 ScalarEvolution *SE, DominatorTree *DT) {
1903   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1904 
1905   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1906 
1907   // Loop over all of the PHI nodes, looking for a simple counter.
1908   PHINode *BestPhi = nullptr;
1909   const SCEV *BestInit = nullptr;
1910   BasicBlock *LatchBlock = L->getLoopLatch();
1911   assert(LatchBlock && "Must be in simplified form");
1912   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1913 
1914   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1915     PHINode *Phi = cast<PHINode>(I);
1916     if (!isLoopCounter(Phi, L, SE))
1917       continue;
1918 
1919     // Avoid comparing an integer IV against a pointer Limit.
1920     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1921       continue;
1922 
1923     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1924 
1925     // AR may be a pointer type, while BECount is an integer type.
1926     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1927     // AR may not be a narrower type, or we may never exit.
1928     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1929     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1930       continue;
1931 
1932     // Avoid reusing a potentially undef value to compute other values that may
1933     // have originally had a concrete definition.
1934     if (!hasConcreteDef(Phi)) {
1935       // We explicitly allow unknown phis as long as they are already used by
1936       // the loop exit test.  This is legal since performing LFTR could not
1937       // increase the number of undef users.
1938       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1939       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1940           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1941         continue;
1942     }
1943 
1944     // Avoid introducing undefined behavior due to poison which didn't exist in
1945     // the original program.  (Annoyingly, the rules for poison and undef
1946     // propagation are distinct, so this does NOT cover the undef case above.)
1947     // We have to ensure that we don't introduce UB by introducing a use on an
1948     // iteration where said IV produces poison.  Our strategy here differs for
1949     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1950     // see code in linearFunctionTestReplace.  For pointers, we restrict
1951     // transforms as there is no good way to reinfer inbounds once lost.
1952     if (!Phi->getType()->isIntegerTy() &&
1953         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1954       continue;
1955 
1956     const SCEV *Init = AR->getStart();
1957 
1958     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1959       // Don't force a live loop counter if another IV can be used.
1960       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1961         continue;
1962 
1963       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1964       // also prefers integer to pointer IVs.
1965       if (BestInit->isZero() != Init->isZero()) {
1966         if (BestInit->isZero())
1967           continue;
1968       }
1969       // If two IVs both count from zero or both count from nonzero then the
1970       // narrower is likely a dead phi that has been widened. Use the wider phi
1971       // to allow the other to be eliminated.
1972       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1973         continue;
1974     }
1975     BestPhi = Phi;
1976     BestInit = Init;
1977   }
1978   return BestPhi;
1979 }
1980 
1981 /// Insert an IR expression which computes the value held by the IV IndVar
1982 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1983 /// is taken ExitCount times.
1984 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1985                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
1986                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
1987   assert(isLoopCounter(IndVar, L, SE));
1988   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1989   const SCEV *IVInit = AR->getStart();
1990 
1991   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
1992   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
1993   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
1994   // the existing GEPs whenever possible.
1995   if (IndVar->getType()->isPointerTy() &&
1996       !ExitCount->getType()->isPointerTy()) {
1997     // IVOffset will be the new GEP offset that is interpreted by GEP as a
1998     // signed value. ExitCount on the other hand represents the loop trip count,
1999     // which is an unsigned value. FindLoopCounter only allows induction
2000     // variables that have a positive unit stride of one. This means we don't
2001     // have to handle the case of negative offsets (yet) and just need to zero
2002     // extend ExitCount.
2003     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2004     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2005     if (UsePostInc)
2006       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2007 
2008     // Expand the code for the iteration count.
2009     assert(SE->isLoopInvariant(IVOffset, L) &&
2010            "Computed iteration count is not loop invariant!");
2011 
2012     // We could handle pointer IVs other than i8*, but we need to compensate for
2013     // gep index scaling.
2014     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2015                              cast<PointerType>(IndVar->getType())
2016                                  ->getElementType())->isOne() &&
2017            "unit stride pointer IV must be i8*");
2018 
2019     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2020     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2021     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2022   } else {
2023     // In any other case, convert both IVInit and ExitCount to integers before
2024     // comparing. This may result in SCEV expansion of pointers, but in practice
2025     // SCEV will fold the pointer arithmetic away as such:
2026     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2027     //
2028     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2029     // for simple memset-style loops.
2030     //
2031     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2032     // were generated on top of case #2, which is not expected.
2033 
2034     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2035     // For unit stride, IVCount = Start + ExitCount with 2's complement
2036     // overflow.
2037 
2038     // For integer IVs, truncate the IV before computing IVInit + BECount,
2039     // unless we know apriori that the limit must be a constant when evaluated
2040     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2041     // of the IV in the loop over a (potentially) expensive expansion of the
2042     // widened exit count add(zext(add)) expression.
2043     if (SE->getTypeSizeInBits(IVInit->getType())
2044         > SE->getTypeSizeInBits(ExitCount->getType())) {
2045       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2046         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2047       else
2048         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2049     }
2050 
2051     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2052 
2053     if (UsePostInc)
2054       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2055 
2056     // Expand the code for the iteration count.
2057     assert(SE->isLoopInvariant(IVLimit, L) &&
2058            "Computed iteration count is not loop invariant!");
2059     // Ensure that we generate the same type as IndVar, or a smaller integer
2060     // type. In the presence of null pointer values, we have an integer type
2061     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2062     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2063       IndVar->getType() : ExitCount->getType();
2064     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2065     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2066   }
2067 }
2068 
2069 /// This method rewrites the exit condition of the loop to be a canonical !=
2070 /// comparison against the incremented loop induction variable.  This pass is
2071 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2072 /// determine a loop-invariant trip count of the loop, which is actually a much
2073 /// broader range than just linear tests.
2074 bool IndVarSimplify::
2075 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2076                           const SCEV *ExitCount,
2077                           PHINode *IndVar, SCEVExpander &Rewriter) {
2078   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2079   assert(isLoopCounter(IndVar, L, SE));
2080   Instruction * const IncVar =
2081     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2082 
2083   // Initialize CmpIndVar to the preincremented IV.
2084   Value *CmpIndVar = IndVar;
2085   bool UsePostInc = false;
2086 
2087   // If the exiting block is the same as the backedge block, we prefer to
2088   // compare against the post-incremented value, otherwise we must compare
2089   // against the preincremented value.
2090   if (ExitingBB == L->getLoopLatch()) {
2091     // For pointer IVs, we chose to not strip inbounds which requires us not
2092     // to add a potentially UB introducing use.  We need to either a) show
2093     // the loop test we're modifying is already in post-inc form, or b) show
2094     // that adding a use must not introduce UB.
2095     bool SafeToPostInc =
2096         IndVar->getType()->isIntegerTy() ||
2097         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2098         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2099     if (SafeToPostInc) {
2100       UsePostInc = true;
2101       CmpIndVar = IncVar;
2102     }
2103   }
2104 
2105   // It may be necessary to drop nowrap flags on the incrementing instruction
2106   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2107   // case the increment might have previously been poison on the last iteration
2108   // only) or if LFTR switches to a different IV that was previously dynamically
2109   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2110   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2111   // check), because the pre-inc addrec flags may be adopted from the original
2112   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2113   // TODO: This handling is inaccurate for one case: If we switch to a
2114   // dynamically dead IV that wraps on the first loop iteration only, which is
2115   // not covered by the post-inc addrec. (If the new IV was not dynamically
2116   // dead, it could not be poison on the first iteration in the first place.)
2117   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2118     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2119     if (BO->hasNoUnsignedWrap())
2120       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2121     if (BO->hasNoSignedWrap())
2122       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2123   }
2124 
2125   Value *ExitCnt = genLoopLimit(
2126       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2127   assert(ExitCnt->getType()->isPointerTy() ==
2128              IndVar->getType()->isPointerTy() &&
2129          "genLoopLimit missed a cast");
2130 
2131   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2132   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2133   ICmpInst::Predicate P;
2134   if (L->contains(BI->getSuccessor(0)))
2135     P = ICmpInst::ICMP_NE;
2136   else
2137     P = ICmpInst::ICMP_EQ;
2138 
2139   IRBuilder<> Builder(BI);
2140 
2141   // The new loop exit condition should reuse the debug location of the
2142   // original loop exit condition.
2143   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2144     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2145 
2146   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2147   // avoid the expensive expansion of the limit expression in the wider type,
2148   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2149   // since we know (from the exit count bitwidth), that we can't self-wrap in
2150   // the narrower type.
2151   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2152   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2153   if (CmpIndVarSize > ExitCntSize) {
2154     assert(!CmpIndVar->getType()->isPointerTy() &&
2155            !ExitCnt->getType()->isPointerTy());
2156 
2157     // Before resorting to actually inserting the truncate, use the same
2158     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2159     // the other side of the comparison instead.  We still evaluate the limit
2160     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2161     // a truncate within in.
2162     bool Extended = false;
2163     const SCEV *IV = SE->getSCEV(CmpIndVar);
2164     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2165                                                   ExitCnt->getType());
2166     const SCEV *ZExtTrunc =
2167       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2168 
2169     if (ZExtTrunc == IV) {
2170       Extended = true;
2171       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2172                                    "wide.trip.count");
2173     } else {
2174       const SCEV *SExtTrunc =
2175         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2176       if (SExtTrunc == IV) {
2177         Extended = true;
2178         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2179                                      "wide.trip.count");
2180       }
2181     }
2182 
2183     if (Extended) {
2184       bool Discard;
2185       L->makeLoopInvariant(ExitCnt, Discard);
2186     } else
2187       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2188                                       "lftr.wideiv");
2189   }
2190   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2191                     << "      LHS:" << *CmpIndVar << '\n'
2192                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2193                     << "\n"
2194                     << "      RHS:\t" << *ExitCnt << "\n"
2195                     << "ExitCount:\t" << *ExitCount << "\n"
2196                     << "  was: " << *BI->getCondition() << "\n");
2197 
2198   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2199   Value *OrigCond = BI->getCondition();
2200   // It's tempting to use replaceAllUsesWith here to fully replace the old
2201   // comparison, but that's not immediately safe, since users of the old
2202   // comparison may not be dominated by the new comparison. Instead, just
2203   // update the branch to use the new comparison; in the common case this
2204   // will make old comparison dead.
2205   BI->setCondition(Cond);
2206   DeadInsts.emplace_back(OrigCond);
2207 
2208   ++NumLFTR;
2209   return true;
2210 }
2211 
2212 //===----------------------------------------------------------------------===//
2213 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2214 //===----------------------------------------------------------------------===//
2215 
2216 /// If there's a single exit block, sink any loop-invariant values that
2217 /// were defined in the preheader but not used inside the loop into the
2218 /// exit block to reduce register pressure in the loop.
2219 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2220   BasicBlock *ExitBlock = L->getExitBlock();
2221   if (!ExitBlock) return false;
2222 
2223   BasicBlock *Preheader = L->getLoopPreheader();
2224   if (!Preheader) return false;
2225 
2226   bool MadeAnyChanges = false;
2227   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2228   BasicBlock::iterator I(Preheader->getTerminator());
2229   while (I != Preheader->begin()) {
2230     --I;
2231     // New instructions were inserted at the end of the preheader.
2232     if (isa<PHINode>(I))
2233       break;
2234 
2235     // Don't move instructions which might have side effects, since the side
2236     // effects need to complete before instructions inside the loop.  Also don't
2237     // move instructions which might read memory, since the loop may modify
2238     // memory. Note that it's okay if the instruction might have undefined
2239     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2240     // block.
2241     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2242       continue;
2243 
2244     // Skip debug info intrinsics.
2245     if (isa<DbgInfoIntrinsic>(I))
2246       continue;
2247 
2248     // Skip eh pad instructions.
2249     if (I->isEHPad())
2250       continue;
2251 
2252     // Don't sink alloca: we never want to sink static alloca's out of the
2253     // entry block, and correctly sinking dynamic alloca's requires
2254     // checks for stacksave/stackrestore intrinsics.
2255     // FIXME: Refactor this check somehow?
2256     if (isa<AllocaInst>(I))
2257       continue;
2258 
2259     // Determine if there is a use in or before the loop (direct or
2260     // otherwise).
2261     bool UsedInLoop = false;
2262     for (Use &U : I->uses()) {
2263       Instruction *User = cast<Instruction>(U.getUser());
2264       BasicBlock *UseBB = User->getParent();
2265       if (PHINode *P = dyn_cast<PHINode>(User)) {
2266         unsigned i =
2267           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2268         UseBB = P->getIncomingBlock(i);
2269       }
2270       if (UseBB == Preheader || L->contains(UseBB)) {
2271         UsedInLoop = true;
2272         break;
2273       }
2274     }
2275 
2276     // If there is, the def must remain in the preheader.
2277     if (UsedInLoop)
2278       continue;
2279 
2280     // Otherwise, sink it to the exit block.
2281     Instruction *ToMove = &*I;
2282     bool Done = false;
2283 
2284     if (I != Preheader->begin()) {
2285       // Skip debug info intrinsics.
2286       do {
2287         --I;
2288       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2289 
2290       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2291         Done = true;
2292     } else {
2293       Done = true;
2294     }
2295 
2296     MadeAnyChanges = true;
2297     ToMove->moveBefore(*ExitBlock, InsertPt);
2298     if (Done) break;
2299     InsertPt = ToMove->getIterator();
2300   }
2301 
2302   return MadeAnyChanges;
2303 }
2304 
2305 // Returns true if the condition of \p BI being checked is invariant and can be
2306 // proved to be trivially true during at least first \p MaxIter iterations.
2307 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
2308                           bool ProvingLoopExit, const SCEV *MaxIter) {
2309   ICmpInst::Predicate Pred;
2310   Value *LHS, *RHS;
2311   using namespace PatternMatch;
2312   BasicBlock *TrueSucc, *FalseSucc;
2313   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
2314                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
2315     return false;
2316 
2317   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
2318          "Not a loop exit!");
2319 
2320   // 'LHS pred RHS' should now mean that we stay in loop.
2321   if (L->contains(FalseSucc))
2322     Pred = CmpInst::getInversePredicate(Pred);
2323 
2324   // If we are proving loop exit, invert the predicate.
2325   if (ProvingLoopExit)
2326     Pred = CmpInst::getInversePredicate(Pred);
2327 
2328   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
2329   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
2330   // Can we prove it to be trivially true?
2331   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI))
2332     return true;
2333 
2334   if (ProvingLoopExit)
2335     return false;
2336 
2337   ICmpInst::Predicate InvariantPred;
2338   const SCEV *InvariantLHS, *InvariantRHS;
2339 
2340   // Check if there is a loop-invariant predicate equivalent to our check.
2341   if (!SE->isLoopInvariantExitCondDuringFirstIterations(
2342            Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS,
2343            InvariantRHS))
2344     return false;
2345 
2346   // Can we prove it to be trivially true?
2347   return SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI);
2348 }
2349 
2350 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2351   SmallVector<BasicBlock*, 16> ExitingBlocks;
2352   L->getExitingBlocks(ExitingBlocks);
2353 
2354   // Remove all exits which aren't both rewriteable and execute on every
2355   // iteration.
2356   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2357     // If our exitting block exits multiple loops, we can only rewrite the
2358     // innermost one.  Otherwise, we're changing how many times the innermost
2359     // loop runs before it exits.
2360     if (LI->getLoopFor(ExitingBB) != L)
2361       return true;
2362 
2363     // Can't rewrite non-branch yet.
2364     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2365     if (!BI)
2366       return true;
2367 
2368     // If already constant, nothing to do.
2369     if (isa<Constant>(BI->getCondition()))
2370       return true;
2371 
2372     // Likewise, the loop latch must be dominated by the exiting BB.
2373     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
2374       return true;
2375 
2376     return false;
2377   });
2378   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2379 
2380   if (ExitingBlocks.empty())
2381     return false;
2382 
2383   // Get a symbolic upper bound on the loop backedge taken count.
2384   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
2385   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2386     return false;
2387 
2388   // Visit our exit blocks in order of dominance. We know from the fact that
2389   // all exits must dominate the latch, so there is a total dominance order
2390   // between them.
2391   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
2392                // std::sort sorts in ascending order, so we want the inverse of
2393                // the normal dominance relation.
2394                if (A == B) return false;
2395                if (DT->properlyDominates(A, B))
2396                  return true;
2397                else {
2398                  assert(DT->properlyDominates(B, A) &&
2399                         "expected total dominance order!");
2400                  return false;
2401                }
2402   });
2403 #ifdef ASSERT
2404   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2405     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2406   }
2407 #endif
2408 
2409   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2410     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2411     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2412     auto *OldCond = BI->getCondition();
2413     auto *NewCond = ConstantInt::get(OldCond->getType(),
2414                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2415     BI->setCondition(NewCond);
2416     if (OldCond->use_empty())
2417       DeadInsts.emplace_back(OldCond);
2418   };
2419 
2420   bool Changed = false;
2421   SmallSet<const SCEV*, 8> DominatingExitCounts;
2422   for (BasicBlock *ExitingBB : ExitingBlocks) {
2423     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2424     if (isa<SCEVCouldNotCompute>(ExitCount)) {
2425       // Okay, we do not know the exit count here. Can we at least prove that it
2426       // will remain the same within iteration space?
2427       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2428       auto OptimizeCond = [&](bool Inverted) {
2429         if (isTrivialCond(L, BI, SE, Inverted, MaxExitCount)) {
2430           FoldExit(ExitingBB, Inverted);
2431           return true;
2432         }
2433         return false;
2434       };
2435       if (OptimizeCond(false) || OptimizeCond(true))
2436         Changed = true;
2437       continue;
2438     }
2439 
2440     // If we know we'd exit on the first iteration, rewrite the exit to
2441     // reflect this.  This does not imply the loop must exit through this
2442     // exit; there may be an earlier one taken on the first iteration.
2443     // TODO: Given we know the backedge can't be taken, we should go ahead
2444     // and break it.  Or at least, kill all the header phis and simplify.
2445     if (ExitCount->isZero()) {
2446       FoldExit(ExitingBB, true);
2447       Changed = true;
2448       continue;
2449     }
2450 
2451     // If we end up with a pointer exit count, bail.  Note that we can end up
2452     // with a pointer exit count for one exiting block, and not for another in
2453     // the same loop.
2454     if (!ExitCount->getType()->isIntegerTy() ||
2455         !MaxExitCount->getType()->isIntegerTy())
2456       continue;
2457 
2458     Type *WiderType =
2459       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2460     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2461     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2462     assert(MaxExitCount->getType() == ExitCount->getType());
2463 
2464     // Can we prove that some other exit must be taken strictly before this
2465     // one?
2466     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2467                                      MaxExitCount, ExitCount)) {
2468       FoldExit(ExitingBB, false);
2469       Changed = true;
2470       continue;
2471     }
2472 
2473     // As we run, keep track of which exit counts we've encountered.  If we
2474     // find a duplicate, we've found an exit which would have exited on the
2475     // exiting iteration, but (from the visit order) strictly follows another
2476     // which does the same and is thus dead.
2477     if (!DominatingExitCounts.insert(ExitCount).second) {
2478       FoldExit(ExitingBB, false);
2479       Changed = true;
2480       continue;
2481     }
2482 
2483     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2484     // here.  If we kept track of the min of dominanting exits so far, we could
2485     // discharge exits with EC >= MDEC. This is less powerful than the existing
2486     // transform (since later exits aren't considered), but potentially more
2487     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2488     // or a >u b.  Such a case is not currently known.
2489   }
2490   return Changed;
2491 }
2492 
2493 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2494   SmallVector<BasicBlock*, 16> ExitingBlocks;
2495   L->getExitingBlocks(ExitingBlocks);
2496 
2497   // Finally, see if we can rewrite our exit conditions into a loop invariant
2498   // form. If we have a read-only loop, and we can tell that we must exit down
2499   // a path which does not need any of the values computed within the loop, we
2500   // can rewrite the loop to exit on the first iteration.  Note that this
2501   // doesn't either a) tell us the loop exits on the first iteration (unless
2502   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2503   // This transformation looks a lot like a restricted form of dead loop
2504   // elimination, but restricted to read-only loops and without neccesssarily
2505   // needing to kill the loop entirely.
2506   if (!LoopPredication)
2507     return false;
2508 
2509   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2510     return false;
2511 
2512   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2513   // through *explicit* control flow.  We have to eliminate the possibility of
2514   // implicit exits (see below) before we know it's truly exact.
2515   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2516   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2517       !SE->isLoopInvariant(ExactBTC, L) ||
2518       !isSafeToExpand(ExactBTC, *SE))
2519     return false;
2520 
2521   // If we end up with a pointer exit count, bail.  It may be unsized.
2522   if (!ExactBTC->getType()->isIntegerTy())
2523     return false;
2524 
2525   auto BadExit = [&](BasicBlock *ExitingBB) {
2526     // If our exiting block exits multiple loops, we can only rewrite the
2527     // innermost one.  Otherwise, we're changing how many times the innermost
2528     // loop runs before it exits.
2529     if (LI->getLoopFor(ExitingBB) != L)
2530       return true;
2531 
2532     // Can't rewrite non-branch yet.
2533     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2534     if (!BI)
2535       return true;
2536 
2537     // If already constant, nothing to do.
2538     if (isa<Constant>(BI->getCondition()))
2539       return true;
2540 
2541     // If the exit block has phis, we need to be able to compute the values
2542     // within the loop which contains them.  This assumes trivially lcssa phis
2543     // have already been removed; TODO: generalize
2544     BasicBlock *ExitBlock =
2545     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2546     if (!ExitBlock->phis().empty())
2547       return true;
2548 
2549     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2550     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2551     if (!SE->isLoopInvariant(ExitCount, L) ||
2552         !isSafeToExpand(ExitCount, *SE))
2553       return true;
2554 
2555     // If we end up with a pointer exit count, bail.  It may be unsized.
2556     if (!ExitCount->getType()->isIntegerTy())
2557       return true;
2558 
2559     return false;
2560   };
2561 
2562   // If we have any exits which can't be predicated themselves, than we can't
2563   // predicate any exit which isn't guaranteed to execute before it.  Consider
2564   // two exits (a) and (b) which would both exit on the same iteration.  If we
2565   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2566   // we could convert a loop from exiting through (a) to one exiting through
2567   // (b).  Note that this problem exists only for exits with the same exit
2568   // count, and we could be more aggressive when exit counts are known inequal.
2569   llvm::sort(ExitingBlocks,
2570             [&](BasicBlock *A, BasicBlock *B) {
2571               // std::sort sorts in ascending order, so we want the inverse of
2572               // the normal dominance relation, plus a tie breaker for blocks
2573               // unordered by dominance.
2574               if (DT->properlyDominates(A, B)) return true;
2575               if (DT->properlyDominates(B, A)) return false;
2576               return A->getName() < B->getName();
2577             });
2578   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2579   // exits before the backedge).  If they aren't, reasoning about reachability
2580   // is complicated and we choose not to for now.
2581   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2582     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2583       return false;
2584 
2585   // Given our sorted total order, we know that exit[j] must be evaluated
2586   // after all exit[i] such j > i.
2587   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2588     if (BadExit(ExitingBlocks[i])) {
2589       ExitingBlocks.resize(i);
2590       break;
2591     }
2592 
2593   if (ExitingBlocks.empty())
2594     return false;
2595 
2596   // We rely on not being able to reach an exiting block on a later iteration
2597   // then it's statically compute exit count.  The implementaton of
2598   // getExitCount currently has this invariant, but assert it here so that
2599   // breakage is obvious if this ever changes..
2600   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2601         return DT->dominates(ExitingBB, L->getLoopLatch());
2602       }));
2603 
2604   // At this point, ExitingBlocks consists of only those blocks which are
2605   // predicatable.  Given that, we know we have at least one exit we can
2606   // predicate if the loop is doesn't have side effects and doesn't have any
2607   // implicit exits (because then our exact BTC isn't actually exact).
2608   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2609   // suggestions on how to improve this?  I can obviously bail out for outer
2610   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2611   // is that enough for *all* side effects?
2612   for (BasicBlock *BB : L->blocks())
2613     for (auto &I : *BB)
2614       // TODO:isGuaranteedToTransfer
2615       if (I.mayHaveSideEffects() || I.mayThrow())
2616         return false;
2617 
2618   bool Changed = false;
2619   // Finally, do the actual predication for all predicatable blocks.  A couple
2620   // of notes here:
2621   // 1) We don't bother to constant fold dominated exits with identical exit
2622   //    counts; that's simply a form of CSE/equality propagation and we leave
2623   //    it for dedicated passes.
2624   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2625   //    legality constraints and we leave that to dedicated logic.  We want to
2626   //    predicate even if we can't insert a loop invariant expression as
2627   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2628   //    varying check.
2629   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2630   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2631   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2632   for (BasicBlock *ExitingBB : ExitingBlocks) {
2633     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2634 
2635     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2636     Value *NewCond;
2637     if (ExitCount == ExactBTC) {
2638       NewCond = L->contains(BI->getSuccessor(0)) ?
2639         B.getFalse() : B.getTrue();
2640     } else {
2641       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2642       if (!ExactBTCV)
2643         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2644       Value *RHS = ExactBTCV;
2645       if (ECV->getType() != RHS->getType()) {
2646         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2647         ECV = B.CreateZExt(ECV, WiderTy);
2648         RHS = B.CreateZExt(RHS, WiderTy);
2649       }
2650       auto Pred = L->contains(BI->getSuccessor(0)) ?
2651         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2652       NewCond = B.CreateICmp(Pred, ECV, RHS);
2653     }
2654     Value *OldCond = BI->getCondition();
2655     BI->setCondition(NewCond);
2656     if (OldCond->use_empty())
2657       DeadInsts.emplace_back(OldCond);
2658     Changed = true;
2659   }
2660 
2661   return Changed;
2662 }
2663 
2664 //===----------------------------------------------------------------------===//
2665 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2666 //===----------------------------------------------------------------------===//
2667 
2668 bool IndVarSimplify::run(Loop *L) {
2669   // We need (and expect!) the incoming loop to be in LCSSA.
2670   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2671          "LCSSA required to run indvars!");
2672 
2673   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2674   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2675   //    canonicalization can be a pessimization without LSR to "clean up"
2676   //    afterwards.
2677   //  - We depend on having a preheader; in particular,
2678   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2679   //    and we're in trouble if we can't find the induction variable even when
2680   //    we've manually inserted one.
2681   //  - LFTR relies on having a single backedge.
2682   if (!L->isLoopSimplifyForm())
2683     return false;
2684 
2685 #ifndef NDEBUG
2686   // Used below for a consistency check only
2687   // Note: Since the result returned by ScalarEvolution may depend on the order
2688   // in which previous results are added to its cache, the call to
2689   // getBackedgeTakenCount() may change following SCEV queries.
2690   const SCEV *BackedgeTakenCount;
2691   if (VerifyIndvars)
2692     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2693 #endif
2694 
2695   bool Changed = false;
2696   // If there are any floating-point recurrences, attempt to
2697   // transform them to use integer recurrences.
2698   Changed |= rewriteNonIntegerIVs(L);
2699 
2700   // Create a rewriter object which we'll use to transform the code with.
2701   SCEVExpander Rewriter(*SE, DL, "indvars");
2702 #ifndef NDEBUG
2703   Rewriter.setDebugType(DEBUG_TYPE);
2704 #endif
2705 
2706   // Eliminate redundant IV users.
2707   //
2708   // Simplification works best when run before other consumers of SCEV. We
2709   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2710   // other expressions involving loop IVs have been evaluated. This helps SCEV
2711   // set no-wrap flags before normalizing sign/zero extension.
2712   Rewriter.disableCanonicalMode();
2713   Changed |= simplifyAndExtend(L, Rewriter, LI);
2714 
2715   // Check to see if we can compute the final value of any expressions
2716   // that are recurrent in the loop, and substitute the exit values from the
2717   // loop into any instructions outside of the loop that use the final values
2718   // of the current expressions.
2719   if (ReplaceExitValue != NeverRepl) {
2720     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2721                                              ReplaceExitValue, DeadInsts)) {
2722       NumReplaced += Rewrites;
2723       Changed = true;
2724     }
2725   }
2726 
2727   // Eliminate redundant IV cycles.
2728   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2729 
2730   // Try to eliminate loop exits based on analyzeable exit counts
2731   if (optimizeLoopExits(L, Rewriter))  {
2732     Changed = true;
2733     // Given we've changed exit counts, notify SCEV
2734     SE->forgetLoop(L);
2735   }
2736 
2737   // Try to form loop invariant tests for loop exits by changing how many
2738   // iterations of the loop run when that is unobservable.
2739   if (predicateLoopExits(L, Rewriter)) {
2740     Changed = true;
2741     // Given we've changed exit counts, notify SCEV
2742     SE->forgetLoop(L);
2743   }
2744 
2745   // If we have a trip count expression, rewrite the loop's exit condition
2746   // using it.
2747   if (!DisableLFTR) {
2748     BasicBlock *PreHeader = L->getLoopPreheader();
2749     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2750 
2751     SmallVector<BasicBlock*, 16> ExitingBlocks;
2752     L->getExitingBlocks(ExitingBlocks);
2753     for (BasicBlock *ExitingBB : ExitingBlocks) {
2754       // Can't rewrite non-branch yet.
2755       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2756         continue;
2757 
2758       // If our exitting block exits multiple loops, we can only rewrite the
2759       // innermost one.  Otherwise, we're changing how many times the innermost
2760       // loop runs before it exits.
2761       if (LI->getLoopFor(ExitingBB) != L)
2762         continue;
2763 
2764       if (!needsLFTR(L, ExitingBB))
2765         continue;
2766 
2767       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2768       if (isa<SCEVCouldNotCompute>(ExitCount))
2769         continue;
2770 
2771       // This was handled above, but as we form SCEVs, we can sometimes refine
2772       // existing ones; this allows exit counts to be folded to zero which
2773       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2774       // until stable to handle cases like this better.
2775       if (ExitCount->isZero())
2776         continue;
2777 
2778       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2779       if (!IndVar)
2780         continue;
2781 
2782       // Avoid high cost expansions.  Note: This heuristic is questionable in
2783       // that our definition of "high cost" is not exactly principled.
2784       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2785                                        TTI, PreHeaderBR))
2786         continue;
2787 
2788       // Check preconditions for proper SCEVExpander operation. SCEV does not
2789       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2790       // any pass that uses the SCEVExpander must do it. This does not work
2791       // well for loop passes because SCEVExpander makes assumptions about
2792       // all loops, while LoopPassManager only forces the current loop to be
2793       // simplified.
2794       //
2795       // FIXME: SCEV expansion has no way to bail out, so the caller must
2796       // explicitly check any assumptions made by SCEV. Brittle.
2797       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2798       if (!AR || AR->getLoop()->getLoopPreheader())
2799         Changed |= linearFunctionTestReplace(L, ExitingBB,
2800                                              ExitCount, IndVar,
2801                                              Rewriter);
2802     }
2803   }
2804   // Clear the rewriter cache, because values that are in the rewriter's cache
2805   // can be deleted in the loop below, causing the AssertingVH in the cache to
2806   // trigger.
2807   Rewriter.clear();
2808 
2809   // Now that we're done iterating through lists, clean up any instructions
2810   // which are now dead.
2811   while (!DeadInsts.empty())
2812     if (Instruction *Inst =
2813             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2814       Changed |=
2815           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2816 
2817   // The Rewriter may not be used from this point on.
2818 
2819   // Loop-invariant instructions in the preheader that aren't used in the
2820   // loop may be sunk below the loop to reduce register pressure.
2821   Changed |= sinkUnusedInvariants(L);
2822 
2823   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2824   // trip count and therefore can further simplify exit values in addition to
2825   // rewriteLoopExitValues.
2826   Changed |= rewriteFirstIterationLoopExitValues(L);
2827 
2828   // Clean up dead instructions.
2829   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2830 
2831   // Check a post-condition.
2832   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2833          "Indvars did not preserve LCSSA!");
2834 
2835   // Verify that LFTR, and any other change have not interfered with SCEV's
2836   // ability to compute trip count.  We may have *changed* the exit count, but
2837   // only by reducing it.
2838 #ifndef NDEBUG
2839   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2840     SE->forgetLoop(L);
2841     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2842     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2843         SE->getTypeSizeInBits(NewBECount->getType()))
2844       NewBECount = SE->getTruncateOrNoop(NewBECount,
2845                                          BackedgeTakenCount->getType());
2846     else
2847       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2848                                                  NewBECount->getType());
2849     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2850                                  NewBECount) && "indvars must preserve SCEV");
2851   }
2852   if (VerifyMemorySSA && MSSAU)
2853     MSSAU->getMemorySSA()->verifyMemorySSA();
2854 #endif
2855 
2856   return Changed;
2857 }
2858 
2859 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2860                                           LoopStandardAnalysisResults &AR,
2861                                           LPMUpdater &) {
2862   Function *F = L.getHeader()->getParent();
2863   const DataLayout &DL = F->getParent()->getDataLayout();
2864 
2865   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2866   if (!IVS.run(&L))
2867     return PreservedAnalyses::all();
2868 
2869   auto PA = getLoopPassPreservedAnalyses();
2870   PA.preserveSet<CFGAnalyses>();
2871   if (AR.MSSA)
2872     PA.preserve<MemorySSAAnalysis>();
2873   return PA;
2874 }
2875 
2876 namespace {
2877 
2878 struct IndVarSimplifyLegacyPass : public LoopPass {
2879   static char ID; // Pass identification, replacement for typeid
2880 
2881   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2882     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2883   }
2884 
2885   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2886     if (skipLoop(L))
2887       return false;
2888 
2889     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2890     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2891     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2892     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2893     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2894     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2895     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2896     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2897     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2898     MemorySSA *MSSA = nullptr;
2899     if (MSSAAnalysis)
2900       MSSA = &MSSAAnalysis->getMSSA();
2901 
2902     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2903     return IVS.run(L);
2904   }
2905 
2906   void getAnalysisUsage(AnalysisUsage &AU) const override {
2907     AU.setPreservesCFG();
2908     AU.addPreserved<MemorySSAWrapperPass>();
2909     getLoopAnalysisUsage(AU);
2910   }
2911 };
2912 
2913 } // end anonymous namespace
2914 
2915 char IndVarSimplifyLegacyPass::ID = 0;
2916 
2917 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2918                       "Induction Variable Simplification", false, false)
2919 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2920 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2921                     "Induction Variable Simplification", false, false)
2922 
2923 Pass *llvm::createIndVarSimplifyPass() {
2924   return new IndVarSimplifyLegacyPass();
2925 }
2926