xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision bb39372e5e66e8d36735fa205d48e06c06a8fa00)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 struct RewritePhi;
141 
142 class IndVarSimplify {
143   LoopInfo *LI;
144   ScalarEvolution *SE;
145   DominatorTree *DT;
146   const DataLayout &DL;
147   TargetLibraryInfo *TLI;
148   const TargetTransformInfo *TTI;
149   std::unique_ptr<MemorySSAUpdater> MSSAU;
150 
151   SmallVector<WeakTrackingVH, 16> DeadInsts;
152 
153   bool handleFloatingPointIV(Loop *L, PHINode *PH);
154   bool rewriteNonIntegerIVs(Loop *L);
155 
156   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157   /// Try to eliminate loop exits based on analyzeable exit counts
158   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
159   /// Try to form loop invariant tests for loop exits by changing how many
160   /// iterations of the loop run when that is unobservable.
161   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
162 
163   bool rewriteFirstIterationLoopExitValues(Loop *L);
164 
165   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
166                                  const SCEV *ExitCount,
167                                  PHINode *IndVar, SCEVExpander &Rewriter);
168 
169   bool sinkUnusedInvariants(Loop *L);
170 
171 public:
172   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
173                  const DataLayout &DL, TargetLibraryInfo *TLI,
174                  TargetTransformInfo *TTI, MemorySSA *MSSA)
175       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
176     if (MSSA)
177       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
178   }
179 
180   bool run(Loop *L);
181 };
182 
183 } // end anonymous namespace
184 
185 /// Determine the insertion point for this user. By default, insert immediately
186 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
187 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
188 /// common dominator for the incoming blocks. A nullptr can be returned if no
189 /// viable location is found: it may happen if User is a PHI and Def only comes
190 /// to this PHI from unreachable blocks.
191 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
192                                           DominatorTree *DT, LoopInfo *LI) {
193   PHINode *PHI = dyn_cast<PHINode>(User);
194   if (!PHI)
195     return User;
196 
197   Instruction *InsertPt = nullptr;
198   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
199     if (PHI->getIncomingValue(i) != Def)
200       continue;
201 
202     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
203 
204     if (!DT->isReachableFromEntry(InsertBB))
205       continue;
206 
207     if (!InsertPt) {
208       InsertPt = InsertBB->getTerminator();
209       continue;
210     }
211     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
212     InsertPt = InsertBB->getTerminator();
213   }
214 
215   // If we have skipped all inputs, it means that Def only comes to Phi from
216   // unreachable blocks.
217   if (!InsertPt)
218     return nullptr;
219 
220   auto *DefI = dyn_cast<Instruction>(Def);
221   if (!DefI)
222     return InsertPt;
223 
224   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
225 
226   auto *L = LI->getLoopFor(DefI->getParent());
227   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
228 
229   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
230     if (LI->getLoopFor(DTN->getBlock()) == L)
231       return DTN->getBlock()->getTerminator();
232 
233   llvm_unreachable("DefI dominates InsertPt!");
234 }
235 
236 //===----------------------------------------------------------------------===//
237 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
238 //===----------------------------------------------------------------------===//
239 
240 /// Convert APF to an integer, if possible.
241 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
242   bool isExact = false;
243   // See if we can convert this to an int64_t
244   uint64_t UIntVal;
245   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
246                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
247       !isExact)
248     return false;
249   IntVal = UIntVal;
250   return true;
251 }
252 
253 /// If the loop has floating induction variable then insert corresponding
254 /// integer induction variable if possible.
255 /// For example,
256 /// for(double i = 0; i < 10000; ++i)
257 ///   bar(i)
258 /// is converted into
259 /// for(int i = 0; i < 10000; ++i)
260 ///   bar((double)i);
261 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
262   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
263   unsigned BackEdge     = IncomingEdge^1;
264 
265   // Check incoming value.
266   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
267 
268   int64_t InitValue;
269   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
270     return false;
271 
272   // Check IV increment. Reject this PN if increment operation is not
273   // an add or increment value can not be represented by an integer.
274   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
275   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
276 
277   // If this is not an add of the PHI with a constantfp, or if the constant fp
278   // is not an integer, bail out.
279   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
280   int64_t IncValue;
281   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
282       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
283     return false;
284 
285   // Check Incr uses. One user is PN and the other user is an exit condition
286   // used by the conditional terminator.
287   Value::user_iterator IncrUse = Incr->user_begin();
288   Instruction *U1 = cast<Instruction>(*IncrUse++);
289   if (IncrUse == Incr->user_end()) return false;
290   Instruction *U2 = cast<Instruction>(*IncrUse++);
291   if (IncrUse != Incr->user_end()) return false;
292 
293   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
294   // only used by a branch, we can't transform it.
295   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
296   if (!Compare)
297     Compare = dyn_cast<FCmpInst>(U2);
298   if (!Compare || !Compare->hasOneUse() ||
299       !isa<BranchInst>(Compare->user_back()))
300     return false;
301 
302   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
303 
304   // We need to verify that the branch actually controls the iteration count
305   // of the loop.  If not, the new IV can overflow and no one will notice.
306   // The branch block must be in the loop and one of the successors must be out
307   // of the loop.
308   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
309   if (!L->contains(TheBr->getParent()) ||
310       (L->contains(TheBr->getSuccessor(0)) &&
311        L->contains(TheBr->getSuccessor(1))))
312     return false;
313 
314   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
315   // transform it.
316   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
317   int64_t ExitValue;
318   if (ExitValueVal == nullptr ||
319       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
320     return false;
321 
322   // Find new predicate for integer comparison.
323   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
324   switch (Compare->getPredicate()) {
325   default: return false;  // Unknown comparison.
326   case CmpInst::FCMP_OEQ:
327   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
328   case CmpInst::FCMP_ONE:
329   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
330   case CmpInst::FCMP_OGT:
331   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
332   case CmpInst::FCMP_OGE:
333   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
334   case CmpInst::FCMP_OLT:
335   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
336   case CmpInst::FCMP_OLE:
337   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
338   }
339 
340   // We convert the floating point induction variable to a signed i32 value if
341   // we can.  This is only safe if the comparison will not overflow in a way
342   // that won't be trapped by the integer equivalent operations.  Check for this
343   // now.
344   // TODO: We could use i64 if it is native and the range requires it.
345 
346   // The start/stride/exit values must all fit in signed i32.
347   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
348     return false;
349 
350   // If not actually striding (add x, 0.0), avoid touching the code.
351   if (IncValue == 0)
352     return false;
353 
354   // Positive and negative strides have different safety conditions.
355   if (IncValue > 0) {
356     // If we have a positive stride, we require the init to be less than the
357     // exit value.
358     if (InitValue >= ExitValue)
359       return false;
360 
361     uint32_t Range = uint32_t(ExitValue-InitValue);
362     // Check for infinite loop, either:
363     // while (i <= Exit) or until (i > Exit)
364     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
365       if (++Range == 0) return false;  // Range overflows.
366     }
367 
368     unsigned Leftover = Range % uint32_t(IncValue);
369 
370     // If this is an equality comparison, we require that the strided value
371     // exactly land on the exit value, otherwise the IV condition will wrap
372     // around and do things the fp IV wouldn't.
373     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
374         Leftover != 0)
375       return false;
376 
377     // If the stride would wrap around the i32 before exiting, we can't
378     // transform the IV.
379     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
380       return false;
381   } else {
382     // If we have a negative stride, we require the init to be greater than the
383     // exit value.
384     if (InitValue <= ExitValue)
385       return false;
386 
387     uint32_t Range = uint32_t(InitValue-ExitValue);
388     // Check for infinite loop, either:
389     // while (i >= Exit) or until (i < Exit)
390     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
391       if (++Range == 0) return false;  // Range overflows.
392     }
393 
394     unsigned Leftover = Range % uint32_t(-IncValue);
395 
396     // If this is an equality comparison, we require that the strided value
397     // exactly land on the exit value, otherwise the IV condition will wrap
398     // around and do things the fp IV wouldn't.
399     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
400         Leftover != 0)
401       return false;
402 
403     // If the stride would wrap around the i32 before exiting, we can't
404     // transform the IV.
405     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
406       return false;
407   }
408 
409   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
410 
411   // Insert new integer induction variable.
412   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
413   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
414                       PN->getIncomingBlock(IncomingEdge));
415 
416   Value *NewAdd =
417     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
418                               Incr->getName()+".int", Incr);
419   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
420 
421   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
422                                       ConstantInt::get(Int32Ty, ExitValue),
423                                       Compare->getName());
424 
425   // In the following deletions, PN may become dead and may be deleted.
426   // Use a WeakTrackingVH to observe whether this happens.
427   WeakTrackingVH WeakPH = PN;
428 
429   // Delete the old floating point exit comparison.  The branch starts using the
430   // new comparison.
431   NewCompare->takeName(Compare);
432   Compare->replaceAllUsesWith(NewCompare);
433   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
434 
435   // Delete the old floating point increment.
436   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
437   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
438 
439   // If the FP induction variable still has uses, this is because something else
440   // in the loop uses its value.  In order to canonicalize the induction
441   // variable, we chose to eliminate the IV and rewrite it in terms of an
442   // int->fp cast.
443   //
444   // We give preference to sitofp over uitofp because it is faster on most
445   // platforms.
446   if (WeakPH) {
447     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
448                                  &*PN->getParent()->getFirstInsertionPt());
449     PN->replaceAllUsesWith(Conv);
450     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
451   }
452   return true;
453 }
454 
455 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
456   // First step.  Check to see if there are any floating-point recurrences.
457   // If there are, change them into integer recurrences, permitting analysis by
458   // the SCEV routines.
459   BasicBlock *Header = L->getHeader();
460 
461   SmallVector<WeakTrackingVH, 8> PHIs;
462   for (PHINode &PN : Header->phis())
463     PHIs.push_back(&PN);
464 
465   bool Changed = false;
466   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
467     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
468       Changed |= handleFloatingPointIV(L, PN);
469 
470   // If the loop previously had floating-point IV, ScalarEvolution
471   // may not have been able to compute a trip count. Now that we've done some
472   // re-writing, the trip count may be computable.
473   if (Changed)
474     SE->forgetLoop(L);
475   return Changed;
476 }
477 
478 //===---------------------------------------------------------------------===//
479 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
480 // they will exit at the first iteration.
481 //===---------------------------------------------------------------------===//
482 
483 /// Check to see if this loop has loop invariant conditions which lead to loop
484 /// exits. If so, we know that if the exit path is taken, it is at the first
485 /// loop iteration. This lets us predict exit values of PHI nodes that live in
486 /// loop header.
487 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
488   // Verify the input to the pass is already in LCSSA form.
489   assert(L->isLCSSAForm(*DT));
490 
491   SmallVector<BasicBlock *, 8> ExitBlocks;
492   L->getUniqueExitBlocks(ExitBlocks);
493 
494   bool MadeAnyChanges = false;
495   for (auto *ExitBB : ExitBlocks) {
496     // If there are no more PHI nodes in this exit block, then no more
497     // values defined inside the loop are used on this path.
498     for (PHINode &PN : ExitBB->phis()) {
499       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
500            IncomingValIdx != E; ++IncomingValIdx) {
501         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
502 
503         // Can we prove that the exit must run on the first iteration if it
504         // runs at all?  (i.e. early exits are fine for our purposes, but
505         // traces which lead to this exit being taken on the 2nd iteration
506         // aren't.)  Note that this is about whether the exit branch is
507         // executed, not about whether it is taken.
508         if (!L->getLoopLatch() ||
509             !DT->dominates(IncomingBB, L->getLoopLatch()))
510           continue;
511 
512         // Get condition that leads to the exit path.
513         auto *TermInst = IncomingBB->getTerminator();
514 
515         Value *Cond = nullptr;
516         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
517           // Must be a conditional branch, otherwise the block
518           // should not be in the loop.
519           Cond = BI->getCondition();
520         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
521           Cond = SI->getCondition();
522         else
523           continue;
524 
525         if (!L->isLoopInvariant(Cond))
526           continue;
527 
528         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
529 
530         // Only deal with PHIs in the loop header.
531         if (!ExitVal || ExitVal->getParent() != L->getHeader())
532           continue;
533 
534         // If ExitVal is a PHI on the loop header, then we know its
535         // value along this exit because the exit can only be taken
536         // on the first iteration.
537         auto *LoopPreheader = L->getLoopPreheader();
538         assert(LoopPreheader && "Invalid loop");
539         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
540         if (PreheaderIdx != -1) {
541           assert(ExitVal->getParent() == L->getHeader() &&
542                  "ExitVal must be in loop header");
543           MadeAnyChanges = true;
544           PN.setIncomingValue(IncomingValIdx,
545                               ExitVal->getIncomingValue(PreheaderIdx));
546         }
547       }
548     }
549   }
550   return MadeAnyChanges;
551 }
552 
553 //===----------------------------------------------------------------------===//
554 //  IV Widening - Extend the width of an IV to cover its widest uses.
555 //===----------------------------------------------------------------------===//
556 
557 namespace {
558 
559 // Collect information about induction variables that are used by sign/zero
560 // extend operations. This information is recorded by CollectExtend and provides
561 // the input to WidenIV.
562 struct WideIVInfo {
563   PHINode *NarrowIV = nullptr;
564 
565   // Widest integer type created [sz]ext
566   Type *WidestNativeType = nullptr;
567 
568   // Was a sext user seen before a zext?
569   bool IsSigned = false;
570 };
571 
572 } // end anonymous namespace
573 
574 /// Update information about the induction variable that is extended by this
575 /// sign or zero extend operation. This is used to determine the final width of
576 /// the IV before actually widening it.
577 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
578                         const TargetTransformInfo *TTI) {
579   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
580   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
581     return;
582 
583   Type *Ty = Cast->getType();
584   uint64_t Width = SE->getTypeSizeInBits(Ty);
585   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
586     return;
587 
588   // Check that `Cast` actually extends the induction variable (we rely on this
589   // later).  This takes care of cases where `Cast` is extending a truncation of
590   // the narrow induction variable, and thus can end up being narrower than the
591   // "narrow" induction variable.
592   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
593   if (NarrowIVWidth >= Width)
594     return;
595 
596   // Cast is either an sext or zext up to this point.
597   // We should not widen an indvar if arithmetics on the wider indvar are more
598   // expensive than those on the narrower indvar. We check only the cost of ADD
599   // because at least an ADD is required to increment the induction variable. We
600   // could compute more comprehensively the cost of all instructions on the
601   // induction variable when necessary.
602   if (TTI &&
603       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
604           TTI->getArithmeticInstrCost(Instruction::Add,
605                                       Cast->getOperand(0)->getType())) {
606     return;
607   }
608 
609   if (!WI.WidestNativeType) {
610     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
611     WI.IsSigned = IsSigned;
612     return;
613   }
614 
615   // We extend the IV to satisfy the sign of its first user, arbitrarily.
616   if (WI.IsSigned != IsSigned)
617     return;
618 
619   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
620     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
621 }
622 
623 namespace {
624 
625 /// Record a link in the Narrow IV def-use chain along with the WideIV that
626 /// computes the same value as the Narrow IV def.  This avoids caching Use*
627 /// pointers.
628 struct NarrowIVDefUse {
629   Instruction *NarrowDef = nullptr;
630   Instruction *NarrowUse = nullptr;
631   Instruction *WideDef = nullptr;
632 
633   // True if the narrow def is never negative.  Tracking this information lets
634   // us use a sign extension instead of a zero extension or vice versa, when
635   // profitable and legal.
636   bool NeverNegative = false;
637 
638   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
639                  bool NeverNegative)
640       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
641         NeverNegative(NeverNegative) {}
642 };
643 
644 /// The goal of this transform is to remove sign and zero extends without
645 /// creating any new induction variables. To do this, it creates a new phi of
646 /// the wider type and redirects all users, either removing extends or inserting
647 /// truncs whenever we stop propagating the type.
648 class WidenIV {
649   // Parameters
650   PHINode *OrigPhi;
651   Type *WideType;
652 
653   // Context
654   LoopInfo        *LI;
655   Loop            *L;
656   ScalarEvolution *SE;
657   DominatorTree   *DT;
658 
659   // Does the module have any calls to the llvm.experimental.guard intrinsic
660   // at all? If not we can avoid scanning instructions looking for guards.
661   bool HasGuards;
662 
663   // Result
664   PHINode *WidePhi = nullptr;
665   Instruction *WideInc = nullptr;
666   const SCEV *WideIncExpr = nullptr;
667   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
668 
669   SmallPtrSet<Instruction *,16> Widened;
670   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
671 
672   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
673 
674   // A map tracking the kind of extension used to widen each narrow IV
675   // and narrow IV user.
676   // Key: pointer to a narrow IV or IV user.
677   // Value: the kind of extension used to widen this Instruction.
678   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
679 
680   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
681 
682   // A map with control-dependent ranges for post increment IV uses. The key is
683   // a pair of IV def and a use of this def denoting the context. The value is
684   // a ConstantRange representing possible values of the def at the given
685   // context.
686   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
687 
688   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
689                                               Instruction *UseI) {
690     DefUserPair Key(Def, UseI);
691     auto It = PostIncRangeInfos.find(Key);
692     return It == PostIncRangeInfos.end()
693                ? Optional<ConstantRange>(None)
694                : Optional<ConstantRange>(It->second);
695   }
696 
697   void calculatePostIncRanges(PHINode *OrigPhi);
698   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
699 
700   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
701     DefUserPair Key(Def, UseI);
702     auto It = PostIncRangeInfos.find(Key);
703     if (It == PostIncRangeInfos.end())
704       PostIncRangeInfos.insert({Key, R});
705     else
706       It->second = R.intersectWith(It->second);
707   }
708 
709 public:
710   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
711           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
712           bool HasGuards)
713       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
714         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
715         HasGuards(HasGuards), DeadInsts(DI) {
716     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
717     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
718   }
719 
720   PHINode *createWideIV(SCEVExpander &Rewriter);
721 
722 protected:
723   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
724                           Instruction *Use);
725 
726   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
727   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
728                                      const SCEVAddRecExpr *WideAR);
729   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
730 
731   ExtendKind getExtendKind(Instruction *I);
732 
733   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
734 
735   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
736 
737   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
738 
739   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
740                               unsigned OpCode) const;
741 
742   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
743 
744   bool widenLoopCompare(NarrowIVDefUse DU);
745   bool widenWithVariantUse(NarrowIVDefUse DU);
746   void widenWithVariantUseCodegen(NarrowIVDefUse DU);
747 
748   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
749 };
750 
751 } // end anonymous namespace
752 
753 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
754                                  bool IsSigned, Instruction *Use) {
755   // Set the debug location and conservative insertion point.
756   IRBuilder<> Builder(Use);
757   // Hoist the insertion point into loop preheaders as far as possible.
758   for (const Loop *L = LI->getLoopFor(Use->getParent());
759        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
760        L = L->getParentLoop())
761     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
762 
763   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
764                     Builder.CreateZExt(NarrowOper, WideType);
765 }
766 
767 /// Instantiate a wide operation to replace a narrow operation. This only needs
768 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
769 /// 0 for any operation we decide not to clone.
770 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
771                                   const SCEVAddRecExpr *WideAR) {
772   unsigned Opcode = DU.NarrowUse->getOpcode();
773   switch (Opcode) {
774   default:
775     return nullptr;
776   case Instruction::Add:
777   case Instruction::Mul:
778   case Instruction::UDiv:
779   case Instruction::Sub:
780     return cloneArithmeticIVUser(DU, WideAR);
781 
782   case Instruction::And:
783   case Instruction::Or:
784   case Instruction::Xor:
785   case Instruction::Shl:
786   case Instruction::LShr:
787   case Instruction::AShr:
788     return cloneBitwiseIVUser(DU);
789   }
790 }
791 
792 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
793   Instruction *NarrowUse = DU.NarrowUse;
794   Instruction *NarrowDef = DU.NarrowDef;
795   Instruction *WideDef = DU.WideDef;
796 
797   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
798 
799   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
800   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
801   // invariant and will be folded or hoisted. If it actually comes from a
802   // widened IV, it should be removed during a future call to widenIVUse.
803   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
804   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
805                    ? WideDef
806                    : createExtendInst(NarrowUse->getOperand(0), WideType,
807                                       IsSigned, NarrowUse);
808   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
809                    ? WideDef
810                    : createExtendInst(NarrowUse->getOperand(1), WideType,
811                                       IsSigned, NarrowUse);
812 
813   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
814   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
815                                         NarrowBO->getName());
816   IRBuilder<> Builder(NarrowUse);
817   Builder.Insert(WideBO);
818   WideBO->copyIRFlags(NarrowBO);
819   return WideBO;
820 }
821 
822 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
823                                             const SCEVAddRecExpr *WideAR) {
824   Instruction *NarrowUse = DU.NarrowUse;
825   Instruction *NarrowDef = DU.NarrowDef;
826   Instruction *WideDef = DU.WideDef;
827 
828   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
829 
830   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
831 
832   // We're trying to find X such that
833   //
834   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
835   //
836   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
837   // and check using SCEV if any of them are correct.
838 
839   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
840   // correct solution to X.
841   auto GuessNonIVOperand = [&](bool SignExt) {
842     const SCEV *WideLHS;
843     const SCEV *WideRHS;
844 
845     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
846       if (SignExt)
847         return SE->getSignExtendExpr(S, Ty);
848       return SE->getZeroExtendExpr(S, Ty);
849     };
850 
851     if (IVOpIdx == 0) {
852       WideLHS = SE->getSCEV(WideDef);
853       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
854       WideRHS = GetExtend(NarrowRHS, WideType);
855     } else {
856       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
857       WideLHS = GetExtend(NarrowLHS, WideType);
858       WideRHS = SE->getSCEV(WideDef);
859     }
860 
861     // WideUse is "WideDef `op.wide` X" as described in the comment.
862     const SCEV *WideUse = nullptr;
863 
864     switch (NarrowUse->getOpcode()) {
865     default:
866       llvm_unreachable("No other possibility!");
867 
868     case Instruction::Add:
869       WideUse = SE->getAddExpr(WideLHS, WideRHS);
870       break;
871 
872     case Instruction::Mul:
873       WideUse = SE->getMulExpr(WideLHS, WideRHS);
874       break;
875 
876     case Instruction::UDiv:
877       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
878       break;
879 
880     case Instruction::Sub:
881       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
882       break;
883     }
884 
885     return WideUse == WideAR;
886   };
887 
888   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
889   if (!GuessNonIVOperand(SignExtend)) {
890     SignExtend = !SignExtend;
891     if (!GuessNonIVOperand(SignExtend))
892       return nullptr;
893   }
894 
895   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
896                    ? WideDef
897                    : createExtendInst(NarrowUse->getOperand(0), WideType,
898                                       SignExtend, NarrowUse);
899   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
900                    ? WideDef
901                    : createExtendInst(NarrowUse->getOperand(1), WideType,
902                                       SignExtend, NarrowUse);
903 
904   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
905   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
906                                         NarrowBO->getName());
907 
908   IRBuilder<> Builder(NarrowUse);
909   Builder.Insert(WideBO);
910   WideBO->copyIRFlags(NarrowBO);
911   return WideBO;
912 }
913 
914 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
915   auto It = ExtendKindMap.find(I);
916   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
917   return It->second;
918 }
919 
920 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
921                                      unsigned OpCode) const {
922   if (OpCode == Instruction::Add)
923     return SE->getAddExpr(LHS, RHS);
924   if (OpCode == Instruction::Sub)
925     return SE->getMinusSCEV(LHS, RHS);
926   if (OpCode == Instruction::Mul)
927     return SE->getMulExpr(LHS, RHS);
928 
929   llvm_unreachable("Unsupported opcode.");
930 }
931 
932 /// No-wrap operations can transfer sign extension of their result to their
933 /// operands. Generate the SCEV value for the widened operation without
934 /// actually modifying the IR yet. If the expression after extending the
935 /// operands is an AddRec for this loop, return the AddRec and the kind of
936 /// extension used.
937 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
938   // Handle the common case of add<nsw/nuw>
939   const unsigned OpCode = DU.NarrowUse->getOpcode();
940   // Only Add/Sub/Mul instructions supported yet.
941   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
942       OpCode != Instruction::Mul)
943     return {nullptr, Unknown};
944 
945   // One operand (NarrowDef) has already been extended to WideDef. Now determine
946   // if extending the other will lead to a recurrence.
947   const unsigned ExtendOperIdx =
948       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
949   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
950 
951   const SCEV *ExtendOperExpr = nullptr;
952   const OverflowingBinaryOperator *OBO =
953     cast<OverflowingBinaryOperator>(DU.NarrowUse);
954   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
955   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
956     ExtendOperExpr = SE->getSignExtendExpr(
957       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
958   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
959     ExtendOperExpr = SE->getZeroExtendExpr(
960       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
961   else
962     return {nullptr, Unknown};
963 
964   // When creating this SCEV expr, don't apply the current operations NSW or NUW
965   // flags. This instruction may be guarded by control flow that the no-wrap
966   // behavior depends on. Non-control-equivalent instructions can be mapped to
967   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
968   // semantics to those operations.
969   const SCEV *lhs = SE->getSCEV(DU.WideDef);
970   const SCEV *rhs = ExtendOperExpr;
971 
972   // Let's swap operands to the initial order for the case of non-commutative
973   // operations, like SUB. See PR21014.
974   if (ExtendOperIdx == 0)
975     std::swap(lhs, rhs);
976   const SCEVAddRecExpr *AddRec =
977       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
978 
979   if (!AddRec || AddRec->getLoop() != L)
980     return {nullptr, Unknown};
981 
982   return {AddRec, ExtKind};
983 }
984 
985 /// Is this instruction potentially interesting for further simplification after
986 /// widening it's type? In other words, can the extend be safely hoisted out of
987 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
988 /// so, return the extended recurrence and the kind of extension used. Otherwise
989 /// return {nullptr, Unknown}.
990 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
991   if (!SE->isSCEVable(DU.NarrowUse->getType()))
992     return {nullptr, Unknown};
993 
994   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
995   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
996       SE->getTypeSizeInBits(WideType)) {
997     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
998     // index. So don't follow this use.
999     return {nullptr, Unknown};
1000   }
1001 
1002   const SCEV *WideExpr;
1003   ExtendKind ExtKind;
1004   if (DU.NeverNegative) {
1005     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1006     if (isa<SCEVAddRecExpr>(WideExpr))
1007       ExtKind = SignExtended;
1008     else {
1009       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1010       ExtKind = ZeroExtended;
1011     }
1012   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1013     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1014     ExtKind = SignExtended;
1015   } else {
1016     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1017     ExtKind = ZeroExtended;
1018   }
1019   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1020   if (!AddRec || AddRec->getLoop() != L)
1021     return {nullptr, Unknown};
1022   return {AddRec, ExtKind};
1023 }
1024 
1025 /// This IV user cannot be widened. Replace this use of the original narrow IV
1026 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1027 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1028   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1029   if (!InsertPt)
1030     return;
1031   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1032                     << *DU.NarrowUse << "\n");
1033   IRBuilder<> Builder(InsertPt);
1034   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1035   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1036 }
1037 
1038 /// If the narrow use is a compare instruction, then widen the compare
1039 //  (and possibly the other operand).  The extend operation is hoisted into the
1040 // loop preheader as far as possible.
1041 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1042   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1043   if (!Cmp)
1044     return false;
1045 
1046   // We can legally widen the comparison in the following two cases:
1047   //
1048   //  - The signedness of the IV extension and comparison match
1049   //
1050   //  - The narrow IV is always positive (and thus its sign extension is equal
1051   //    to its zero extension).  For instance, let's say we're zero extending
1052   //    %narrow for the following use
1053   //
1054   //      icmp slt i32 %narrow, %val   ... (A)
1055   //
1056   //    and %narrow is always positive.  Then
1057   //
1058   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1059   //          == icmp slt i32 zext(%narrow), sext(%val)
1060   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1061   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1062     return false;
1063 
1064   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1065   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1066   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1067   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1068 
1069   // Widen the compare instruction.
1070   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1071   if (!InsertPt)
1072     return false;
1073   IRBuilder<> Builder(InsertPt);
1074   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1075 
1076   // Widen the other operand of the compare, if necessary.
1077   if (CastWidth < IVWidth) {
1078     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1079     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1080   }
1081   return true;
1082 }
1083 
1084 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1085 // will not work when:
1086 //    1) SCEV traces back to an instruction inside the loop that SCEV can not
1087 // expand, eg. add %indvar, (load %addr)
1088 //    2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1089 // While SCEV fails to avoid trunc, we can still try to use instruction
1090 // combining approach to prove trunc is not required. This can be further
1091 // extended with other instruction combining checks, but for now we handle the
1092 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1093 //
1094 // Src:
1095 //   %c = sub nsw %b, %indvar
1096 //   %d = sext %c to i64
1097 // Dst:
1098 //   %indvar.ext1 = sext %indvar to i64
1099 //   %m = sext %b to i64
1100 //   %d = sub nsw i64 %m, %indvar.ext1
1101 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1102 // trunc is required regardless of how %b is generated. This pattern is common
1103 // when calculating address in 64 bit architecture
1104 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
1105   Instruction *NarrowUse = DU.NarrowUse;
1106   Instruction *NarrowDef = DU.NarrowDef;
1107   Instruction *WideDef = DU.WideDef;
1108 
1109   // Handle the common case of add<nsw/nuw>
1110   const unsigned OpCode = NarrowUse->getOpcode();
1111   // Only Add/Sub/Mul instructions are supported.
1112   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1113       OpCode != Instruction::Mul)
1114     return false;
1115 
1116   // The operand that is not defined by NarrowDef of DU. Let's call it the
1117   // other operand.
1118   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1119   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1120          "bad DU");
1121 
1122   const OverflowingBinaryOperator *OBO =
1123     cast<OverflowingBinaryOperator>(NarrowUse);
1124   ExtendKind ExtKind = getExtendKind(NarrowDef);
1125   bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap();
1126   bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
1127   if (!CanSignExtend && !CanZeroExtend)
1128     return false;
1129 
1130   // Verifying that Defining operand is an AddRec
1131   const SCEV *Op1 = SE->getSCEV(WideDef);
1132   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1133   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1134     return false;
1135 
1136   if (ExtKind == SignExtended) {
1137     for (Use &U : NarrowUse->uses()) {
1138       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1139       if (!User || User->getType() != WideType)
1140         return false;
1141     }
1142   } else { // ExtKind == ZeroExtended
1143     for (Use &U : NarrowUse->uses()) {
1144       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1145       if (!User || User->getType() != WideType)
1146         return false;
1147     }
1148   }
1149 
1150   return true;
1151 }
1152 
1153 /// Special Case for widening with loop variant (see
1154 /// WidenIV::widenWithVariant). This is the code generation part.
1155 void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) {
1156   Instruction *NarrowUse = DU.NarrowUse;
1157   Instruction *NarrowDef = DU.NarrowDef;
1158   Instruction *WideDef = DU.WideDef;
1159 
1160   ExtendKind ExtKind = getExtendKind(NarrowDef);
1161   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1162   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1163 
1164   // Generating a widening use instruction.
1165   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1166                    ? WideDef
1167                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1168                                       ExtKind, NarrowUse);
1169   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1170                    ? WideDef
1171                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1172                                       ExtKind, NarrowUse);
1173 
1174   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1175   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1176                                         NarrowBO->getName());
1177   IRBuilder<> Builder(NarrowUse);
1178   Builder.Insert(WideBO);
1179   WideBO->copyIRFlags(NarrowBO);
1180   ExtendKindMap[NarrowUse] = ExtKind;
1181 
1182   for (Use &U : NarrowUse->uses()) {
1183     Instruction *User = nullptr;
1184     if (ExtKind == SignExtended)
1185       User = cast<SExtInst>(U.getUser());
1186     else
1187       User = cast<ZExtInst>(U.getUser());
1188     assert(User->getType() == WideType && "Checked before!");
1189     LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1190                       << *WideBO << "\n");
1191     ++NumElimExt;
1192     User->replaceAllUsesWith(WideBO);
1193     DeadInsts.emplace_back(User);
1194   }
1195 }
1196 
1197 /// Determine whether an individual user of the narrow IV can be widened. If so,
1198 /// return the wide clone of the user.
1199 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1200   assert(ExtendKindMap.count(DU.NarrowDef) &&
1201          "Should already know the kind of extension used to widen NarrowDef");
1202 
1203   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1204   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1205     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1206       // For LCSSA phis, sink the truncate outside the loop.
1207       // After SimplifyCFG most loop exit targets have a single predecessor.
1208       // Otherwise fall back to a truncate within the loop.
1209       if (UsePhi->getNumOperands() != 1)
1210         truncateIVUse(DU, DT, LI);
1211       else {
1212         // Widening the PHI requires us to insert a trunc.  The logical place
1213         // for this trunc is in the same BB as the PHI.  This is not possible if
1214         // the BB is terminated by a catchswitch.
1215         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1216           return nullptr;
1217 
1218         PHINode *WidePhi =
1219           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1220                           UsePhi);
1221         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1222         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1223         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1224         UsePhi->replaceAllUsesWith(Trunc);
1225         DeadInsts.emplace_back(UsePhi);
1226         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1227                           << *WidePhi << "\n");
1228       }
1229       return nullptr;
1230     }
1231   }
1232 
1233   // This narrow use can be widened by a sext if it's non-negative or its narrow
1234   // def was widended by a sext. Same for zext.
1235   auto canWidenBySExt = [&]() {
1236     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1237   };
1238   auto canWidenByZExt = [&]() {
1239     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1240   };
1241 
1242   // Our raison d'etre! Eliminate sign and zero extension.
1243   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1244       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1245     Value *NewDef = DU.WideDef;
1246     if (DU.NarrowUse->getType() != WideType) {
1247       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1248       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1249       if (CastWidth < IVWidth) {
1250         // The cast isn't as wide as the IV, so insert a Trunc.
1251         IRBuilder<> Builder(DU.NarrowUse);
1252         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1253       }
1254       else {
1255         // A wider extend was hidden behind a narrower one. This may induce
1256         // another round of IV widening in which the intermediate IV becomes
1257         // dead. It should be very rare.
1258         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1259                           << " not wide enough to subsume " << *DU.NarrowUse
1260                           << "\n");
1261         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1262         NewDef = DU.NarrowUse;
1263       }
1264     }
1265     if (NewDef != DU.NarrowUse) {
1266       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1267                         << " replaced by " << *DU.WideDef << "\n");
1268       ++NumElimExt;
1269       DU.NarrowUse->replaceAllUsesWith(NewDef);
1270       DeadInsts.emplace_back(DU.NarrowUse);
1271     }
1272     // Now that the extend is gone, we want to expose it's uses for potential
1273     // further simplification. We don't need to directly inform SimplifyIVUsers
1274     // of the new users, because their parent IV will be processed later as a
1275     // new loop phi. If we preserved IVUsers analysis, we would also want to
1276     // push the uses of WideDef here.
1277 
1278     // No further widening is needed. The deceased [sz]ext had done it for us.
1279     return nullptr;
1280   }
1281 
1282   // Does this user itself evaluate to a recurrence after widening?
1283   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1284   if (!WideAddRec.first)
1285     WideAddRec = getWideRecurrence(DU);
1286 
1287   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1288   if (!WideAddRec.first) {
1289     // If use is a loop condition, try to promote the condition instead of
1290     // truncating the IV first.
1291     if (widenLoopCompare(DU))
1292       return nullptr;
1293 
1294     // We are here about to generate a truncate instruction that may hurt
1295     // performance because the scalar evolution expression computed earlier
1296     // in WideAddRec.first does not indicate a polynomial induction expression.
1297     // In that case, look at the operands of the use instruction to determine
1298     // if we can still widen the use instead of truncating its operand.
1299     if (widenWithVariantUse(DU)) {
1300       widenWithVariantUseCodegen(DU);
1301       return nullptr;
1302     }
1303 
1304     // This user does not evaluate to a recurrence after widening, so don't
1305     // follow it. Instead insert a Trunc to kill off the original use,
1306     // eventually isolating the original narrow IV so it can be removed.
1307     truncateIVUse(DU, DT, LI);
1308     return nullptr;
1309   }
1310   // Assume block terminators cannot evaluate to a recurrence. We can't to
1311   // insert a Trunc after a terminator if there happens to be a critical edge.
1312   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1313          "SCEV is not expected to evaluate a block terminator");
1314 
1315   // Reuse the IV increment that SCEVExpander created as long as it dominates
1316   // NarrowUse.
1317   Instruction *WideUse = nullptr;
1318   if (WideAddRec.first == WideIncExpr &&
1319       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1320     WideUse = WideInc;
1321   else {
1322     WideUse = cloneIVUser(DU, WideAddRec.first);
1323     if (!WideUse)
1324       return nullptr;
1325   }
1326   // Evaluation of WideAddRec ensured that the narrow expression could be
1327   // extended outside the loop without overflow. This suggests that the wide use
1328   // evaluates to the same expression as the extended narrow use, but doesn't
1329   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1330   // where it fails, we simply throw away the newly created wide use.
1331   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1332     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1333                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1334                       << "\n");
1335     DeadInsts.emplace_back(WideUse);
1336     return nullptr;
1337   }
1338 
1339   // if we reached this point then we are going to replace
1340   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1341   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1342 
1343   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1344   // Returning WideUse pushes it on the worklist.
1345   return WideUse;
1346 }
1347 
1348 /// Add eligible users of NarrowDef to NarrowIVUsers.
1349 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1350   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1351   bool NonNegativeDef =
1352       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1353                            SE->getZero(NarrowSCEV->getType()));
1354   for (User *U : NarrowDef->users()) {
1355     Instruction *NarrowUser = cast<Instruction>(U);
1356 
1357     // Handle data flow merges and bizarre phi cycles.
1358     if (!Widened.insert(NarrowUser).second)
1359       continue;
1360 
1361     bool NonNegativeUse = false;
1362     if (!NonNegativeDef) {
1363       // We might have a control-dependent range information for this context.
1364       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1365         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1366     }
1367 
1368     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1369                                NonNegativeDef || NonNegativeUse);
1370   }
1371 }
1372 
1373 /// Process a single induction variable. First use the SCEVExpander to create a
1374 /// wide induction variable that evaluates to the same recurrence as the
1375 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1376 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1377 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1378 ///
1379 /// It would be simpler to delete uses as they are processed, but we must avoid
1380 /// invalidating SCEV expressions.
1381 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1382   // Bail if we disallowed widening.
1383   if(!AllowIVWidening)
1384     return nullptr;
1385 
1386   // Is this phi an induction variable?
1387   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1388   if (!AddRec)
1389     return nullptr;
1390 
1391   // Widen the induction variable expression.
1392   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1393                                ? SE->getSignExtendExpr(AddRec, WideType)
1394                                : SE->getZeroExtendExpr(AddRec, WideType);
1395 
1396   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1397          "Expect the new IV expression to preserve its type");
1398 
1399   // Can the IV be extended outside the loop without overflow?
1400   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1401   if (!AddRec || AddRec->getLoop() != L)
1402     return nullptr;
1403 
1404   // An AddRec must have loop-invariant operands. Since this AddRec is
1405   // materialized by a loop header phi, the expression cannot have any post-loop
1406   // operands, so they must dominate the loop header.
1407   assert(
1408       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1409       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1410       "Loop header phi recurrence inputs do not dominate the loop");
1411 
1412   // Iterate over IV uses (including transitive ones) looking for IV increments
1413   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1414   // the increment calculate control-dependent range information basing on
1415   // dominating conditions inside of the loop (e.g. a range check inside of the
1416   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1417   //
1418   // Control-dependent range information is later used to prove that a narrow
1419   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1420   // this on demand because when pushNarrowIVUsers needs this information some
1421   // of the dominating conditions might be already widened.
1422   if (UsePostIncrementRanges)
1423     calculatePostIncRanges(OrigPhi);
1424 
1425   // The rewriter provides a value for the desired IV expression. This may
1426   // either find an existing phi or materialize a new one. Either way, we
1427   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1428   // of the phi-SCC dominates the loop entry.
1429   Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
1430   Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
1431   // If the wide phi is not a phi node, for example a cast node, like bitcast,
1432   // inttoptr, ptrtoint, just skip for now.
1433   if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
1434     // if the cast node is an inserted instruction without any user, we should
1435     // remove it to make sure the pass don't touch the function as we can not
1436     // wide the phi.
1437     if (ExpandInst->hasNUses(0) &&
1438         Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
1439       DeadInsts.emplace_back(ExpandInst);
1440     return nullptr;
1441   }
1442 
1443   // Remembering the WideIV increment generated by SCEVExpander allows
1444   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1445   // employ a general reuse mechanism because the call above is the only call to
1446   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1447   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1448     WideInc =
1449       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1450     WideIncExpr = SE->getSCEV(WideInc);
1451     // Propagate the debug location associated with the original loop increment
1452     // to the new (widened) increment.
1453     auto *OrigInc =
1454       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1455     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1456   }
1457 
1458   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1459   ++NumWidened;
1460 
1461   // Traverse the def-use chain using a worklist starting at the original IV.
1462   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1463 
1464   Widened.insert(OrigPhi);
1465   pushNarrowIVUsers(OrigPhi, WidePhi);
1466 
1467   while (!NarrowIVUsers.empty()) {
1468     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1469 
1470     // Process a def-use edge. This may replace the use, so don't hold a
1471     // use_iterator across it.
1472     Instruction *WideUse = widenIVUse(DU, Rewriter);
1473 
1474     // Follow all def-use edges from the previous narrow use.
1475     if (WideUse)
1476       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1477 
1478     // widenIVUse may have removed the def-use edge.
1479     if (DU.NarrowDef->use_empty())
1480       DeadInsts.emplace_back(DU.NarrowDef);
1481   }
1482 
1483   // Attach any debug information to the new PHI.
1484   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1485 
1486   return WidePhi;
1487 }
1488 
1489 /// Calculates control-dependent range for the given def at the given context
1490 /// by looking at dominating conditions inside of the loop
1491 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1492                                     Instruction *NarrowUser) {
1493   using namespace llvm::PatternMatch;
1494 
1495   Value *NarrowDefLHS;
1496   const APInt *NarrowDefRHS;
1497   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1498                                  m_APInt(NarrowDefRHS))) ||
1499       !NarrowDefRHS->isNonNegative())
1500     return;
1501 
1502   auto UpdateRangeFromCondition = [&] (Value *Condition,
1503                                        bool TrueDest) {
1504     CmpInst::Predicate Pred;
1505     Value *CmpRHS;
1506     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1507                                  m_Value(CmpRHS))))
1508       return;
1509 
1510     CmpInst::Predicate P =
1511             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1512 
1513     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1514     auto CmpConstrainedLHSRange =
1515             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1516     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1517         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1518 
1519     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1520   };
1521 
1522   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1523     if (!HasGuards)
1524       return;
1525 
1526     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1527                                      Ctx->getParent()->rend())) {
1528       Value *C = nullptr;
1529       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1530         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1531     }
1532   };
1533 
1534   UpdateRangeFromGuards(NarrowUser);
1535 
1536   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1537   // If NarrowUserBB is statically unreachable asking dominator queries may
1538   // yield surprising results. (e.g. the block may not have a dom tree node)
1539   if (!DT->isReachableFromEntry(NarrowUserBB))
1540     return;
1541 
1542   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1543        L->contains(DTB->getBlock());
1544        DTB = DTB->getIDom()) {
1545     auto *BB = DTB->getBlock();
1546     auto *TI = BB->getTerminator();
1547     UpdateRangeFromGuards(TI);
1548 
1549     auto *BI = dyn_cast<BranchInst>(TI);
1550     if (!BI || !BI->isConditional())
1551       continue;
1552 
1553     auto *TrueSuccessor = BI->getSuccessor(0);
1554     auto *FalseSuccessor = BI->getSuccessor(1);
1555 
1556     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1557       return BBE.isSingleEdge() &&
1558              DT->dominates(BBE, NarrowUser->getParent());
1559     };
1560 
1561     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1562       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1563 
1564     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1565       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1566   }
1567 }
1568 
1569 /// Calculates PostIncRangeInfos map for the given IV
1570 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1571   SmallPtrSet<Instruction *, 16> Visited;
1572   SmallVector<Instruction *, 6> Worklist;
1573   Worklist.push_back(OrigPhi);
1574   Visited.insert(OrigPhi);
1575 
1576   while (!Worklist.empty()) {
1577     Instruction *NarrowDef = Worklist.pop_back_val();
1578 
1579     for (Use &U : NarrowDef->uses()) {
1580       auto *NarrowUser = cast<Instruction>(U.getUser());
1581 
1582       // Don't go looking outside the current loop.
1583       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1584       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1585         continue;
1586 
1587       if (!Visited.insert(NarrowUser).second)
1588         continue;
1589 
1590       Worklist.push_back(NarrowUser);
1591 
1592       calculatePostIncRange(NarrowDef, NarrowUser);
1593     }
1594   }
1595 }
1596 
1597 //===----------------------------------------------------------------------===//
1598 //  Live IV Reduction - Minimize IVs live across the loop.
1599 //===----------------------------------------------------------------------===//
1600 
1601 //===----------------------------------------------------------------------===//
1602 //  Simplification of IV users based on SCEV evaluation.
1603 //===----------------------------------------------------------------------===//
1604 
1605 namespace {
1606 
1607 class IndVarSimplifyVisitor : public IVVisitor {
1608   ScalarEvolution *SE;
1609   const TargetTransformInfo *TTI;
1610   PHINode *IVPhi;
1611 
1612 public:
1613   WideIVInfo WI;
1614 
1615   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1616                         const TargetTransformInfo *TTI,
1617                         const DominatorTree *DTree)
1618     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1619     DT = DTree;
1620     WI.NarrowIV = IVPhi;
1621   }
1622 
1623   // Implement the interface used by simplifyUsersOfIV.
1624   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1625 };
1626 
1627 } // end anonymous namespace
1628 
1629 /// Iteratively perform simplification on a worklist of IV users. Each
1630 /// successive simplification may push more users which may themselves be
1631 /// candidates for simplification.
1632 ///
1633 /// Sign/Zero extend elimination is interleaved with IV simplification.
1634 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1635                                        SCEVExpander &Rewriter,
1636                                        LoopInfo *LI) {
1637   SmallVector<WideIVInfo, 8> WideIVs;
1638 
1639   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1640           Intrinsic::getName(Intrinsic::experimental_guard));
1641   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1642 
1643   SmallVector<PHINode*, 8> LoopPhis;
1644   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1645     LoopPhis.push_back(cast<PHINode>(I));
1646   }
1647   // Each round of simplification iterates through the SimplifyIVUsers worklist
1648   // for all current phis, then determines whether any IVs can be
1649   // widened. Widening adds new phis to LoopPhis, inducing another round of
1650   // simplification on the wide IVs.
1651   bool Changed = false;
1652   while (!LoopPhis.empty()) {
1653     // Evaluate as many IV expressions as possible before widening any IVs. This
1654     // forces SCEV to set no-wrap flags before evaluating sign/zero
1655     // extension. The first time SCEV attempts to normalize sign/zero extension,
1656     // the result becomes final. So for the most predictable results, we delay
1657     // evaluation of sign/zero extend evaluation until needed, and avoid running
1658     // other SCEV based analysis prior to simplifyAndExtend.
1659     do {
1660       PHINode *CurrIV = LoopPhis.pop_back_val();
1661 
1662       // Information about sign/zero extensions of CurrIV.
1663       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1664 
1665       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1666                                    &Visitor);
1667 
1668       if (Visitor.WI.WidestNativeType) {
1669         WideIVs.push_back(Visitor.WI);
1670       }
1671     } while(!LoopPhis.empty());
1672 
1673     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1674       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1675       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1676         Changed = true;
1677         LoopPhis.push_back(WidePhi);
1678       }
1679     }
1680   }
1681   return Changed;
1682 }
1683 
1684 //===----------------------------------------------------------------------===//
1685 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1686 //===----------------------------------------------------------------------===//
1687 
1688 /// Given an Value which is hoped to be part of an add recurance in the given
1689 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1690 /// that this is less general than SCEVs AddRec checking.
1691 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1692   Instruction *IncI = dyn_cast<Instruction>(IncV);
1693   if (!IncI)
1694     return nullptr;
1695 
1696   switch (IncI->getOpcode()) {
1697   case Instruction::Add:
1698   case Instruction::Sub:
1699     break;
1700   case Instruction::GetElementPtr:
1701     // An IV counter must preserve its type.
1702     if (IncI->getNumOperands() == 2)
1703       break;
1704     LLVM_FALLTHROUGH;
1705   default:
1706     return nullptr;
1707   }
1708 
1709   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1710   if (Phi && Phi->getParent() == L->getHeader()) {
1711     if (L->isLoopInvariant(IncI->getOperand(1)))
1712       return Phi;
1713     return nullptr;
1714   }
1715   if (IncI->getOpcode() == Instruction::GetElementPtr)
1716     return nullptr;
1717 
1718   // Allow add/sub to be commuted.
1719   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1720   if (Phi && Phi->getParent() == L->getHeader()) {
1721     if (L->isLoopInvariant(IncI->getOperand(0)))
1722       return Phi;
1723   }
1724   return nullptr;
1725 }
1726 
1727 /// Whether the current loop exit test is based on this value.  Currently this
1728 /// is limited to a direct use in the loop condition.
1729 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1730   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1731   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1732   // TODO: Allow non-icmp loop test.
1733   if (!ICmp)
1734     return false;
1735 
1736   // TODO: Allow indirect use.
1737   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1738 }
1739 
1740 /// linearFunctionTestReplace policy. Return true unless we can show that the
1741 /// current exit test is already sufficiently canonical.
1742 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1743   assert(L->getLoopLatch() && "Must be in simplified form");
1744 
1745   // Avoid converting a constant or loop invariant test back to a runtime
1746   // test.  This is critical for when SCEV's cached ExitCount is less precise
1747   // than the current IR (such as after we've proven a particular exit is
1748   // actually dead and thus the BE count never reaches our ExitCount.)
1749   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1750   if (L->isLoopInvariant(BI->getCondition()))
1751     return false;
1752 
1753   // Do LFTR to simplify the exit condition to an ICMP.
1754   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1755   if (!Cond)
1756     return true;
1757 
1758   // Do LFTR to simplify the exit ICMP to EQ/NE
1759   ICmpInst::Predicate Pred = Cond->getPredicate();
1760   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1761     return true;
1762 
1763   // Look for a loop invariant RHS
1764   Value *LHS = Cond->getOperand(0);
1765   Value *RHS = Cond->getOperand(1);
1766   if (!L->isLoopInvariant(RHS)) {
1767     if (!L->isLoopInvariant(LHS))
1768       return true;
1769     std::swap(LHS, RHS);
1770   }
1771   // Look for a simple IV counter LHS
1772   PHINode *Phi = dyn_cast<PHINode>(LHS);
1773   if (!Phi)
1774     Phi = getLoopPhiForCounter(LHS, L);
1775 
1776   if (!Phi)
1777     return true;
1778 
1779   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1780   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1781   if (Idx < 0)
1782     return true;
1783 
1784   // Do LFTR if the exit condition's IV is *not* a simple counter.
1785   Value *IncV = Phi->getIncomingValue(Idx);
1786   return Phi != getLoopPhiForCounter(IncV, L);
1787 }
1788 
1789 /// Return true if undefined behavior would provable be executed on the path to
1790 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1791 /// anything about whether OnPathTo is actually executed or whether Root is
1792 /// actually poison.  This can be used to assess whether a new use of Root can
1793 /// be added at a location which is control equivalent with OnPathTo (such as
1794 /// immediately before it) without introducing UB which didn't previously
1795 /// exist.  Note that a false result conveys no information.
1796 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1797                                           Instruction *OnPathTo,
1798                                           DominatorTree *DT) {
1799   // Basic approach is to assume Root is poison, propagate poison forward
1800   // through all users we can easily track, and then check whether any of those
1801   // users are provable UB and must execute before out exiting block might
1802   // exit.
1803 
1804   // The set of all recursive users we've visited (which are assumed to all be
1805   // poison because of said visit)
1806   SmallSet<const Value *, 16> KnownPoison;
1807   SmallVector<const Instruction*, 16> Worklist;
1808   Worklist.push_back(Root);
1809   while (!Worklist.empty()) {
1810     const Instruction *I = Worklist.pop_back_val();
1811 
1812     // If we know this must trigger UB on a path leading our target.
1813     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1814       return true;
1815 
1816     // If we can't analyze propagation through this instruction, just skip it
1817     // and transitive users.  Safe as false is a conservative result.
1818     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
1819       continue;
1820 
1821     if (KnownPoison.insert(I).second)
1822       for (const User *User : I->users())
1823         Worklist.push_back(cast<Instruction>(User));
1824   }
1825 
1826   // Might be non-UB, or might have a path we couldn't prove must execute on
1827   // way to exiting bb.
1828   return false;
1829 }
1830 
1831 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1832 /// down to checking that all operands are constant and listing instructions
1833 /// that may hide undef.
1834 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1835                                unsigned Depth) {
1836   if (isa<Constant>(V))
1837     return !isa<UndefValue>(V);
1838 
1839   if (Depth >= 6)
1840     return false;
1841 
1842   // Conservatively handle non-constant non-instructions. For example, Arguments
1843   // may be undef.
1844   Instruction *I = dyn_cast<Instruction>(V);
1845   if (!I)
1846     return false;
1847 
1848   // Load and return values may be undef.
1849   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1850     return false;
1851 
1852   // Optimistically handle other instructions.
1853   for (Value *Op : I->operands()) {
1854     if (!Visited.insert(Op).second)
1855       continue;
1856     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1857       return false;
1858   }
1859   return true;
1860 }
1861 
1862 /// Return true if the given value is concrete. We must prove that undef can
1863 /// never reach it.
1864 ///
1865 /// TODO: If we decide that this is a good approach to checking for undef, we
1866 /// may factor it into a common location.
1867 static bool hasConcreteDef(Value *V) {
1868   SmallPtrSet<Value*, 8> Visited;
1869   Visited.insert(V);
1870   return hasConcreteDefImpl(V, Visited, 0);
1871 }
1872 
1873 /// Return true if this IV has any uses other than the (soon to be rewritten)
1874 /// loop exit test.
1875 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1876   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1877   Value *IncV = Phi->getIncomingValue(LatchIdx);
1878 
1879   for (User *U : Phi->users())
1880     if (U != Cond && U != IncV) return false;
1881 
1882   for (User *U : IncV->users())
1883     if (U != Cond && U != Phi) return false;
1884   return true;
1885 }
1886 
1887 /// Return true if the given phi is a "counter" in L.  A counter is an
1888 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1889 /// step of 1.  Note that L must have exactly one latch.
1890 static bool isLoopCounter(PHINode* Phi, Loop *L,
1891                           ScalarEvolution *SE) {
1892   assert(Phi->getParent() == L->getHeader());
1893   assert(L->getLoopLatch());
1894 
1895   if (!SE->isSCEVable(Phi->getType()))
1896     return false;
1897 
1898   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1899   if (!AR || AR->getLoop() != L || !AR->isAffine())
1900     return false;
1901 
1902   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1903   if (!Step || !Step->isOne())
1904     return false;
1905 
1906   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1907   Value *IncV = Phi->getIncomingValue(LatchIdx);
1908   return (getLoopPhiForCounter(IncV, L) == Phi);
1909 }
1910 
1911 /// Search the loop header for a loop counter (anadd rec w/step of one)
1912 /// suitable for use by LFTR.  If multiple counters are available, select the
1913 /// "best" one based profitable heuristics.
1914 ///
1915 /// BECount may be an i8* pointer type. The pointer difference is already
1916 /// valid count without scaling the address stride, so it remains a pointer
1917 /// expression as far as SCEV is concerned.
1918 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1919                                 const SCEV *BECount,
1920                                 ScalarEvolution *SE, DominatorTree *DT) {
1921   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1922 
1923   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1924 
1925   // Loop over all of the PHI nodes, looking for a simple counter.
1926   PHINode *BestPhi = nullptr;
1927   const SCEV *BestInit = nullptr;
1928   BasicBlock *LatchBlock = L->getLoopLatch();
1929   assert(LatchBlock && "Must be in simplified form");
1930   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1931 
1932   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1933     PHINode *Phi = cast<PHINode>(I);
1934     if (!isLoopCounter(Phi, L, SE))
1935       continue;
1936 
1937     // Avoid comparing an integer IV against a pointer Limit.
1938     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1939       continue;
1940 
1941     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1942 
1943     // AR may be a pointer type, while BECount is an integer type.
1944     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1945     // AR may not be a narrower type, or we may never exit.
1946     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1947     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1948       continue;
1949 
1950     // Avoid reusing a potentially undef value to compute other values that may
1951     // have originally had a concrete definition.
1952     if (!hasConcreteDef(Phi)) {
1953       // We explicitly allow unknown phis as long as they are already used by
1954       // the loop exit test.  This is legal since performing LFTR could not
1955       // increase the number of undef users.
1956       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1957       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1958           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1959         continue;
1960     }
1961 
1962     // Avoid introducing undefined behavior due to poison which didn't exist in
1963     // the original program.  (Annoyingly, the rules for poison and undef
1964     // propagation are distinct, so this does NOT cover the undef case above.)
1965     // We have to ensure that we don't introduce UB by introducing a use on an
1966     // iteration where said IV produces poison.  Our strategy here differs for
1967     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1968     // see code in linearFunctionTestReplace.  For pointers, we restrict
1969     // transforms as there is no good way to reinfer inbounds once lost.
1970     if (!Phi->getType()->isIntegerTy() &&
1971         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1972       continue;
1973 
1974     const SCEV *Init = AR->getStart();
1975 
1976     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1977       // Don't force a live loop counter if another IV can be used.
1978       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1979         continue;
1980 
1981       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1982       // also prefers integer to pointer IVs.
1983       if (BestInit->isZero() != Init->isZero()) {
1984         if (BestInit->isZero())
1985           continue;
1986       }
1987       // If two IVs both count from zero or both count from nonzero then the
1988       // narrower is likely a dead phi that has been widened. Use the wider phi
1989       // to allow the other to be eliminated.
1990       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1991         continue;
1992     }
1993     BestPhi = Phi;
1994     BestInit = Init;
1995   }
1996   return BestPhi;
1997 }
1998 
1999 /// Insert an IR expression which computes the value held by the IV IndVar
2000 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2001 /// is taken ExitCount times.
2002 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2003                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2004                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2005   assert(isLoopCounter(IndVar, L, SE));
2006   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2007   const SCEV *IVInit = AR->getStart();
2008 
2009   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2010   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2011   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2012   // the existing GEPs whenever possible.
2013   if (IndVar->getType()->isPointerTy() &&
2014       !ExitCount->getType()->isPointerTy()) {
2015     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2016     // signed value. ExitCount on the other hand represents the loop trip count,
2017     // which is an unsigned value. FindLoopCounter only allows induction
2018     // variables that have a positive unit stride of one. This means we don't
2019     // have to handle the case of negative offsets (yet) and just need to zero
2020     // extend ExitCount.
2021     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2022     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2023     if (UsePostInc)
2024       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2025 
2026     // Expand the code for the iteration count.
2027     assert(SE->isLoopInvariant(IVOffset, L) &&
2028            "Computed iteration count is not loop invariant!");
2029 
2030     // We could handle pointer IVs other than i8*, but we need to compensate for
2031     // gep index scaling.
2032     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2033                              cast<PointerType>(IndVar->getType())
2034                                  ->getElementType())->isOne() &&
2035            "unit stride pointer IV must be i8*");
2036 
2037     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2038     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2039     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2040   } else {
2041     // In any other case, convert both IVInit and ExitCount to integers before
2042     // comparing. This may result in SCEV expansion of pointers, but in practice
2043     // SCEV will fold the pointer arithmetic away as such:
2044     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2045     //
2046     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2047     // for simple memset-style loops.
2048     //
2049     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2050     // were generated on top of case #2, which is not expected.
2051 
2052     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2053     // For unit stride, IVCount = Start + ExitCount with 2's complement
2054     // overflow.
2055 
2056     // For integer IVs, truncate the IV before computing IVInit + BECount,
2057     // unless we know apriori that the limit must be a constant when evaluated
2058     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2059     // of the IV in the loop over a (potentially) expensive expansion of the
2060     // widened exit count add(zext(add)) expression.
2061     if (SE->getTypeSizeInBits(IVInit->getType())
2062         > SE->getTypeSizeInBits(ExitCount->getType())) {
2063       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2064         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2065       else
2066         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2067     }
2068 
2069     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2070 
2071     if (UsePostInc)
2072       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2073 
2074     // Expand the code for the iteration count.
2075     assert(SE->isLoopInvariant(IVLimit, L) &&
2076            "Computed iteration count is not loop invariant!");
2077     // Ensure that we generate the same type as IndVar, or a smaller integer
2078     // type. In the presence of null pointer values, we have an integer type
2079     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2080     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2081       IndVar->getType() : ExitCount->getType();
2082     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2083     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2084   }
2085 }
2086 
2087 /// This method rewrites the exit condition of the loop to be a canonical !=
2088 /// comparison against the incremented loop induction variable.  This pass is
2089 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2090 /// determine a loop-invariant trip count of the loop, which is actually a much
2091 /// broader range than just linear tests.
2092 bool IndVarSimplify::
2093 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2094                           const SCEV *ExitCount,
2095                           PHINode *IndVar, SCEVExpander &Rewriter) {
2096   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2097   assert(isLoopCounter(IndVar, L, SE));
2098   Instruction * const IncVar =
2099     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2100 
2101   // Initialize CmpIndVar to the preincremented IV.
2102   Value *CmpIndVar = IndVar;
2103   bool UsePostInc = false;
2104 
2105   // If the exiting block is the same as the backedge block, we prefer to
2106   // compare against the post-incremented value, otherwise we must compare
2107   // against the preincremented value.
2108   if (ExitingBB == L->getLoopLatch()) {
2109     // For pointer IVs, we chose to not strip inbounds which requires us not
2110     // to add a potentially UB introducing use.  We need to either a) show
2111     // the loop test we're modifying is already in post-inc form, or b) show
2112     // that adding a use must not introduce UB.
2113     bool SafeToPostInc =
2114         IndVar->getType()->isIntegerTy() ||
2115         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2116         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2117     if (SafeToPostInc) {
2118       UsePostInc = true;
2119       CmpIndVar = IncVar;
2120     }
2121   }
2122 
2123   // It may be necessary to drop nowrap flags on the incrementing instruction
2124   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2125   // case the increment might have previously been poison on the last iteration
2126   // only) or if LFTR switches to a different IV that was previously dynamically
2127   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2128   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2129   // check), because the pre-inc addrec flags may be adopted from the original
2130   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2131   // TODO: This handling is inaccurate for one case: If we switch to a
2132   // dynamically dead IV that wraps on the first loop iteration only, which is
2133   // not covered by the post-inc addrec. (If the new IV was not dynamically
2134   // dead, it could not be poison on the first iteration in the first place.)
2135   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2136     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2137     if (BO->hasNoUnsignedWrap())
2138       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2139     if (BO->hasNoSignedWrap())
2140       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2141   }
2142 
2143   Value *ExitCnt = genLoopLimit(
2144       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2145   assert(ExitCnt->getType()->isPointerTy() ==
2146              IndVar->getType()->isPointerTy() &&
2147          "genLoopLimit missed a cast");
2148 
2149   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2150   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2151   ICmpInst::Predicate P;
2152   if (L->contains(BI->getSuccessor(0)))
2153     P = ICmpInst::ICMP_NE;
2154   else
2155     P = ICmpInst::ICMP_EQ;
2156 
2157   IRBuilder<> Builder(BI);
2158 
2159   // The new loop exit condition should reuse the debug location of the
2160   // original loop exit condition.
2161   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2162     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2163 
2164   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2165   // avoid the expensive expansion of the limit expression in the wider type,
2166   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2167   // since we know (from the exit count bitwidth), that we can't self-wrap in
2168   // the narrower type.
2169   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2170   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2171   if (CmpIndVarSize > ExitCntSize) {
2172     assert(!CmpIndVar->getType()->isPointerTy() &&
2173            !ExitCnt->getType()->isPointerTy());
2174 
2175     // Before resorting to actually inserting the truncate, use the same
2176     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2177     // the other side of the comparison instead.  We still evaluate the limit
2178     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2179     // a truncate within in.
2180     bool Extended = false;
2181     const SCEV *IV = SE->getSCEV(CmpIndVar);
2182     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2183                                                   ExitCnt->getType());
2184     const SCEV *ZExtTrunc =
2185       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2186 
2187     if (ZExtTrunc == IV) {
2188       Extended = true;
2189       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2190                                    "wide.trip.count");
2191     } else {
2192       const SCEV *SExtTrunc =
2193         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2194       if (SExtTrunc == IV) {
2195         Extended = true;
2196         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2197                                      "wide.trip.count");
2198       }
2199     }
2200 
2201     if (Extended) {
2202       bool Discard;
2203       L->makeLoopInvariant(ExitCnt, Discard);
2204     } else
2205       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2206                                       "lftr.wideiv");
2207   }
2208   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2209                     << "      LHS:" << *CmpIndVar << '\n'
2210                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2211                     << "\n"
2212                     << "      RHS:\t" << *ExitCnt << "\n"
2213                     << "ExitCount:\t" << *ExitCount << "\n"
2214                     << "  was: " << *BI->getCondition() << "\n");
2215 
2216   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2217   Value *OrigCond = BI->getCondition();
2218   // It's tempting to use replaceAllUsesWith here to fully replace the old
2219   // comparison, but that's not immediately safe, since users of the old
2220   // comparison may not be dominated by the new comparison. Instead, just
2221   // update the branch to use the new comparison; in the common case this
2222   // will make old comparison dead.
2223   BI->setCondition(Cond);
2224   DeadInsts.emplace_back(OrigCond);
2225 
2226   ++NumLFTR;
2227   return true;
2228 }
2229 
2230 //===----------------------------------------------------------------------===//
2231 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2232 //===----------------------------------------------------------------------===//
2233 
2234 /// If there's a single exit block, sink any loop-invariant values that
2235 /// were defined in the preheader but not used inside the loop into the
2236 /// exit block to reduce register pressure in the loop.
2237 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2238   BasicBlock *ExitBlock = L->getExitBlock();
2239   if (!ExitBlock) return false;
2240 
2241   BasicBlock *Preheader = L->getLoopPreheader();
2242   if (!Preheader) return false;
2243 
2244   bool MadeAnyChanges = false;
2245   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2246   BasicBlock::iterator I(Preheader->getTerminator());
2247   while (I != Preheader->begin()) {
2248     --I;
2249     // New instructions were inserted at the end of the preheader.
2250     if (isa<PHINode>(I))
2251       break;
2252 
2253     // Don't move instructions which might have side effects, since the side
2254     // effects need to complete before instructions inside the loop.  Also don't
2255     // move instructions which might read memory, since the loop may modify
2256     // memory. Note that it's okay if the instruction might have undefined
2257     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2258     // block.
2259     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2260       continue;
2261 
2262     // Skip debug info intrinsics.
2263     if (isa<DbgInfoIntrinsic>(I))
2264       continue;
2265 
2266     // Skip eh pad instructions.
2267     if (I->isEHPad())
2268       continue;
2269 
2270     // Don't sink alloca: we never want to sink static alloca's out of the
2271     // entry block, and correctly sinking dynamic alloca's requires
2272     // checks for stacksave/stackrestore intrinsics.
2273     // FIXME: Refactor this check somehow?
2274     if (isa<AllocaInst>(I))
2275       continue;
2276 
2277     // Determine if there is a use in or before the loop (direct or
2278     // otherwise).
2279     bool UsedInLoop = false;
2280     for (Use &U : I->uses()) {
2281       Instruction *User = cast<Instruction>(U.getUser());
2282       BasicBlock *UseBB = User->getParent();
2283       if (PHINode *P = dyn_cast<PHINode>(User)) {
2284         unsigned i =
2285           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2286         UseBB = P->getIncomingBlock(i);
2287       }
2288       if (UseBB == Preheader || L->contains(UseBB)) {
2289         UsedInLoop = true;
2290         break;
2291       }
2292     }
2293 
2294     // If there is, the def must remain in the preheader.
2295     if (UsedInLoop)
2296       continue;
2297 
2298     // Otherwise, sink it to the exit block.
2299     Instruction *ToMove = &*I;
2300     bool Done = false;
2301 
2302     if (I != Preheader->begin()) {
2303       // Skip debug info intrinsics.
2304       do {
2305         --I;
2306       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2307 
2308       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2309         Done = true;
2310     } else {
2311       Done = true;
2312     }
2313 
2314     MadeAnyChanges = true;
2315     ToMove->moveBefore(*ExitBlock, InsertPt);
2316     if (Done) break;
2317     InsertPt = ToMove->getIterator();
2318   }
2319 
2320   return MadeAnyChanges;
2321 }
2322 
2323 // Returns true if the condition of \p BI being checked is invariant and can be
2324 // proved to be trivially true.
2325 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
2326                           bool ProvingLoopExit) {
2327   ICmpInst::Predicate Pred;
2328   Value *LHS, *RHS;
2329   using namespace PatternMatch;
2330   BasicBlock *TrueSucc, *FalseSucc;
2331   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
2332                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
2333     return false;
2334 
2335   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
2336          "Not a loop exit!");
2337 
2338   // 'LHS pred RHS' should now mean that we stay in loop.
2339   if (L->contains(FalseSucc))
2340     Pred = CmpInst::getInversePredicate(Pred);
2341 
2342   // If we are proving loop exit, invert the predicate.
2343   if (ProvingLoopExit)
2344     Pred = CmpInst::getInversePredicate(Pred);
2345 
2346   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
2347   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
2348   // Can we prove it to be trivially true?
2349   if (SE->isKnownPredicate(Pred, LHSS, RHSS))
2350     return true;
2351 
2352   return false;
2353 }
2354 
2355 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2356   SmallVector<BasicBlock*, 16> ExitingBlocks;
2357   L->getExitingBlocks(ExitingBlocks);
2358 
2359   // Remove all exits which aren't both rewriteable and execute on every
2360   // iteration.
2361   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2362     // If our exitting block exits multiple loops, we can only rewrite the
2363     // innermost one.  Otherwise, we're changing how many times the innermost
2364     // loop runs before it exits.
2365     if (LI->getLoopFor(ExitingBB) != L)
2366       return true;
2367 
2368     // Can't rewrite non-branch yet.
2369     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2370     if (!BI)
2371       return true;
2372 
2373     // If already constant, nothing to do.
2374     if (isa<Constant>(BI->getCondition()))
2375       return true;
2376 
2377     // Likewise, the loop latch must be dominated by the exiting BB.
2378     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
2379       return true;
2380 
2381     return false;
2382   });
2383   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2384 
2385   if (ExitingBlocks.empty())
2386     return false;
2387 
2388   // Get a symbolic upper bound on the loop backedge taken count.
2389   const SCEV *MaxExitCount = SE->computeMaxBackedgeTakenCount(L);
2390   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2391     return false;
2392 
2393   // Visit our exit blocks in order of dominance. We know from the fact that
2394   // all exits must dominate the latch, so there is a total dominance order
2395   // between them.
2396   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
2397                // std::sort sorts in ascending order, so we want the inverse of
2398                // the normal dominance relation.
2399                if (A == B) return false;
2400                if (DT->properlyDominates(A, B))
2401                  return true;
2402                else {
2403                  assert(DT->properlyDominates(B, A) &&
2404                         "expected total dominance order!");
2405                  return false;
2406                }
2407   });
2408 #ifdef ASSERT
2409   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2410     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2411   }
2412 #endif
2413 
2414   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2415     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2416     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2417     auto *OldCond = BI->getCondition();
2418     auto *NewCond = ConstantInt::get(OldCond->getType(),
2419                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2420     BI->setCondition(NewCond);
2421     if (OldCond->use_empty())
2422       DeadInsts.emplace_back(OldCond);
2423   };
2424 
2425   bool Changed = false;
2426   SmallSet<const SCEV*, 8> DominatingExitCounts;
2427   for (BasicBlock *ExitingBB : ExitingBlocks) {
2428     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2429     if (isa<SCEVCouldNotCompute>(ExitCount)) {
2430       // Okay, we do not know the exit count here. Can we at least prove that it
2431       // will remain the same within iteration space?
2432       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2433       if (isTrivialCond(L, BI, SE, false)) {
2434         FoldExit(ExitingBB, false);
2435         Changed = true;
2436       }
2437       if (isTrivialCond(L, BI, SE, true)) {
2438         FoldExit(ExitingBB, true);
2439         Changed = true;
2440       }
2441       continue;
2442     }
2443 
2444     // If we know we'd exit on the first iteration, rewrite the exit to
2445     // reflect this.  This does not imply the loop must exit through this
2446     // exit; there may be an earlier one taken on the first iteration.
2447     // TODO: Given we know the backedge can't be taken, we should go ahead
2448     // and break it.  Or at least, kill all the header phis and simplify.
2449     if (ExitCount->isZero()) {
2450       FoldExit(ExitingBB, true);
2451       Changed = true;
2452       continue;
2453     }
2454 
2455     // If we end up with a pointer exit count, bail.  Note that we can end up
2456     // with a pointer exit count for one exiting block, and not for another in
2457     // the same loop.
2458     if (!ExitCount->getType()->isIntegerTy() ||
2459         !MaxExitCount->getType()->isIntegerTy())
2460       continue;
2461 
2462     Type *WiderType =
2463       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2464     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2465     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2466     assert(MaxExitCount->getType() == ExitCount->getType());
2467 
2468     // Can we prove that some other exit must be taken strictly before this
2469     // one?
2470     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2471                                      MaxExitCount, ExitCount)) {
2472       FoldExit(ExitingBB, false);
2473       Changed = true;
2474       continue;
2475     }
2476 
2477     // As we run, keep track of which exit counts we've encountered.  If we
2478     // find a duplicate, we've found an exit which would have exited on the
2479     // exiting iteration, but (from the visit order) strictly follows another
2480     // which does the same and is thus dead.
2481     if (!DominatingExitCounts.insert(ExitCount).second) {
2482       FoldExit(ExitingBB, false);
2483       Changed = true;
2484       continue;
2485     }
2486 
2487     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2488     // here.  If we kept track of the min of dominanting exits so far, we could
2489     // discharge exits with EC >= MDEC. This is less powerful than the existing
2490     // transform (since later exits aren't considered), but potentially more
2491     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2492     // or a >u b.  Such a case is not currently known.
2493   }
2494   return Changed;
2495 }
2496 
2497 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2498   SmallVector<BasicBlock*, 16> ExitingBlocks;
2499   L->getExitingBlocks(ExitingBlocks);
2500 
2501   // Finally, see if we can rewrite our exit conditions into a loop invariant
2502   // form. If we have a read-only loop, and we can tell that we must exit down
2503   // a path which does not need any of the values computed within the loop, we
2504   // can rewrite the loop to exit on the first iteration.  Note that this
2505   // doesn't either a) tell us the loop exits on the first iteration (unless
2506   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2507   // This transformation looks a lot like a restricted form of dead loop
2508   // elimination, but restricted to read-only loops and without neccesssarily
2509   // needing to kill the loop entirely.
2510   if (!LoopPredication)
2511     return false;
2512 
2513   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2514     return false;
2515 
2516   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2517   // through *explicit* control flow.  We have to eliminate the possibility of
2518   // implicit exits (see below) before we know it's truly exact.
2519   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2520   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2521       !SE->isLoopInvariant(ExactBTC, L) ||
2522       !isSafeToExpand(ExactBTC, *SE))
2523     return false;
2524 
2525   // If we end up with a pointer exit count, bail.  It may be unsized.
2526   if (!ExactBTC->getType()->isIntegerTy())
2527     return false;
2528 
2529   auto BadExit = [&](BasicBlock *ExitingBB) {
2530     // If our exiting block exits multiple loops, we can only rewrite the
2531     // innermost one.  Otherwise, we're changing how many times the innermost
2532     // loop runs before it exits.
2533     if (LI->getLoopFor(ExitingBB) != L)
2534       return true;
2535 
2536     // Can't rewrite non-branch yet.
2537     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2538     if (!BI)
2539       return true;
2540 
2541     // If already constant, nothing to do.
2542     if (isa<Constant>(BI->getCondition()))
2543       return true;
2544 
2545     // If the exit block has phis, we need to be able to compute the values
2546     // within the loop which contains them.  This assumes trivially lcssa phis
2547     // have already been removed; TODO: generalize
2548     BasicBlock *ExitBlock =
2549     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2550     if (!ExitBlock->phis().empty())
2551       return true;
2552 
2553     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2554     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2555     if (!SE->isLoopInvariant(ExitCount, L) ||
2556         !isSafeToExpand(ExitCount, *SE))
2557       return true;
2558 
2559     // If we end up with a pointer exit count, bail.  It may be unsized.
2560     if (!ExitCount->getType()->isIntegerTy())
2561       return true;
2562 
2563     return false;
2564   };
2565 
2566   // If we have any exits which can't be predicated themselves, than we can't
2567   // predicate any exit which isn't guaranteed to execute before it.  Consider
2568   // two exits (a) and (b) which would both exit on the same iteration.  If we
2569   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2570   // we could convert a loop from exiting through (a) to one exiting through
2571   // (b).  Note that this problem exists only for exits with the same exit
2572   // count, and we could be more aggressive when exit counts are known inequal.
2573   llvm::sort(ExitingBlocks,
2574             [&](BasicBlock *A, BasicBlock *B) {
2575               // std::sort sorts in ascending order, so we want the inverse of
2576               // the normal dominance relation, plus a tie breaker for blocks
2577               // unordered by dominance.
2578               if (DT->properlyDominates(A, B)) return true;
2579               if (DT->properlyDominates(B, A)) return false;
2580               return A->getName() < B->getName();
2581             });
2582   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2583   // exits before the backedge).  If they aren't, reasoning about reachability
2584   // is complicated and we choose not to for now.
2585   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2586     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2587       return false;
2588 
2589   // Given our sorted total order, we know that exit[j] must be evaluated
2590   // after all exit[i] such j > i.
2591   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2592     if (BadExit(ExitingBlocks[i])) {
2593       ExitingBlocks.resize(i);
2594       break;
2595     }
2596 
2597   if (ExitingBlocks.empty())
2598     return false;
2599 
2600   // We rely on not being able to reach an exiting block on a later iteration
2601   // then it's statically compute exit count.  The implementaton of
2602   // getExitCount currently has this invariant, but assert it here so that
2603   // breakage is obvious if this ever changes..
2604   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2605         return DT->dominates(ExitingBB, L->getLoopLatch());
2606       }));
2607 
2608   // At this point, ExitingBlocks consists of only those blocks which are
2609   // predicatable.  Given that, we know we have at least one exit we can
2610   // predicate if the loop is doesn't have side effects and doesn't have any
2611   // implicit exits (because then our exact BTC isn't actually exact).
2612   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2613   // suggestions on how to improve this?  I can obviously bail out for outer
2614   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2615   // is that enough for *all* side effects?
2616   for (BasicBlock *BB : L->blocks())
2617     for (auto &I : *BB)
2618       // TODO:isGuaranteedToTransfer
2619       if (I.mayHaveSideEffects() || I.mayThrow())
2620         return false;
2621 
2622   bool Changed = false;
2623   // Finally, do the actual predication for all predicatable blocks.  A couple
2624   // of notes here:
2625   // 1) We don't bother to constant fold dominated exits with identical exit
2626   //    counts; that's simply a form of CSE/equality propagation and we leave
2627   //    it for dedicated passes.
2628   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2629   //    legality constraints and we leave that to dedicated logic.  We want to
2630   //    predicate even if we can't insert a loop invariant expression as
2631   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2632   //    varying check.
2633   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2634   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2635   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2636   for (BasicBlock *ExitingBB : ExitingBlocks) {
2637     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2638 
2639     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2640     Value *NewCond;
2641     if (ExitCount == ExactBTC) {
2642       NewCond = L->contains(BI->getSuccessor(0)) ?
2643         B.getFalse() : B.getTrue();
2644     } else {
2645       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2646       if (!ExactBTCV)
2647         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2648       Value *RHS = ExactBTCV;
2649       if (ECV->getType() != RHS->getType()) {
2650         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2651         ECV = B.CreateZExt(ECV, WiderTy);
2652         RHS = B.CreateZExt(RHS, WiderTy);
2653       }
2654       auto Pred = L->contains(BI->getSuccessor(0)) ?
2655         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2656       NewCond = B.CreateICmp(Pred, ECV, RHS);
2657     }
2658     Value *OldCond = BI->getCondition();
2659     BI->setCondition(NewCond);
2660     if (OldCond->use_empty())
2661       DeadInsts.emplace_back(OldCond);
2662     Changed = true;
2663   }
2664 
2665   return Changed;
2666 }
2667 
2668 //===----------------------------------------------------------------------===//
2669 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2670 //===----------------------------------------------------------------------===//
2671 
2672 bool IndVarSimplify::run(Loop *L) {
2673   // We need (and expect!) the incoming loop to be in LCSSA.
2674   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2675          "LCSSA required to run indvars!");
2676 
2677   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2678   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2679   //    canonicalization can be a pessimization without LSR to "clean up"
2680   //    afterwards.
2681   //  - We depend on having a preheader; in particular,
2682   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2683   //    and we're in trouble if we can't find the induction variable even when
2684   //    we've manually inserted one.
2685   //  - LFTR relies on having a single backedge.
2686   if (!L->isLoopSimplifyForm())
2687     return false;
2688 
2689 #ifndef NDEBUG
2690   // Used below for a consistency check only
2691   // Note: Since the result returned by ScalarEvolution may depend on the order
2692   // in which previous results are added to its cache, the call to
2693   // getBackedgeTakenCount() may change following SCEV queries.
2694   const SCEV *BackedgeTakenCount;
2695   if (VerifyIndvars)
2696     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2697 #endif
2698 
2699   bool Changed = false;
2700   // If there are any floating-point recurrences, attempt to
2701   // transform them to use integer recurrences.
2702   Changed |= rewriteNonIntegerIVs(L);
2703 
2704   // Create a rewriter object which we'll use to transform the code with.
2705   SCEVExpander Rewriter(*SE, DL, "indvars");
2706 #ifndef NDEBUG
2707   Rewriter.setDebugType(DEBUG_TYPE);
2708 #endif
2709 
2710   // Eliminate redundant IV users.
2711   //
2712   // Simplification works best when run before other consumers of SCEV. We
2713   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2714   // other expressions involving loop IVs have been evaluated. This helps SCEV
2715   // set no-wrap flags before normalizing sign/zero extension.
2716   Rewriter.disableCanonicalMode();
2717   Changed |= simplifyAndExtend(L, Rewriter, LI);
2718 
2719   // Check to see if we can compute the final value of any expressions
2720   // that are recurrent in the loop, and substitute the exit values from the
2721   // loop into any instructions outside of the loop that use the final values
2722   // of the current expressions.
2723   if (ReplaceExitValue != NeverRepl) {
2724     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2725                                              ReplaceExitValue, DeadInsts)) {
2726       NumReplaced += Rewrites;
2727       Changed = true;
2728     }
2729   }
2730 
2731   // Eliminate redundant IV cycles.
2732   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2733 
2734   // Try to eliminate loop exits based on analyzeable exit counts
2735   if (optimizeLoopExits(L, Rewriter))  {
2736     Changed = true;
2737     // Given we've changed exit counts, notify SCEV
2738     SE->forgetLoop(L);
2739   }
2740 
2741   // Try to form loop invariant tests for loop exits by changing how many
2742   // iterations of the loop run when that is unobservable.
2743   if (predicateLoopExits(L, Rewriter)) {
2744     Changed = true;
2745     // Given we've changed exit counts, notify SCEV
2746     SE->forgetLoop(L);
2747   }
2748 
2749   // If we have a trip count expression, rewrite the loop's exit condition
2750   // using it.
2751   if (!DisableLFTR) {
2752     BasicBlock *PreHeader = L->getLoopPreheader();
2753     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2754 
2755     SmallVector<BasicBlock*, 16> ExitingBlocks;
2756     L->getExitingBlocks(ExitingBlocks);
2757     for (BasicBlock *ExitingBB : ExitingBlocks) {
2758       // Can't rewrite non-branch yet.
2759       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2760         continue;
2761 
2762       // If our exitting block exits multiple loops, we can only rewrite the
2763       // innermost one.  Otherwise, we're changing how many times the innermost
2764       // loop runs before it exits.
2765       if (LI->getLoopFor(ExitingBB) != L)
2766         continue;
2767 
2768       if (!needsLFTR(L, ExitingBB))
2769         continue;
2770 
2771       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2772       if (isa<SCEVCouldNotCompute>(ExitCount))
2773         continue;
2774 
2775       // This was handled above, but as we form SCEVs, we can sometimes refine
2776       // existing ones; this allows exit counts to be folded to zero which
2777       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2778       // until stable to handle cases like this better.
2779       if (ExitCount->isZero())
2780         continue;
2781 
2782       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2783       if (!IndVar)
2784         continue;
2785 
2786       // Avoid high cost expansions.  Note: This heuristic is questionable in
2787       // that our definition of "high cost" is not exactly principled.
2788       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2789                                        TTI, PreHeaderBR))
2790         continue;
2791 
2792       // Check preconditions for proper SCEVExpander operation. SCEV does not
2793       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2794       // any pass that uses the SCEVExpander must do it. This does not work
2795       // well for loop passes because SCEVExpander makes assumptions about
2796       // all loops, while LoopPassManager only forces the current loop to be
2797       // simplified.
2798       //
2799       // FIXME: SCEV expansion has no way to bail out, so the caller must
2800       // explicitly check any assumptions made by SCEV. Brittle.
2801       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2802       if (!AR || AR->getLoop()->getLoopPreheader())
2803         Changed |= linearFunctionTestReplace(L, ExitingBB,
2804                                              ExitCount, IndVar,
2805                                              Rewriter);
2806     }
2807   }
2808   // Clear the rewriter cache, because values that are in the rewriter's cache
2809   // can be deleted in the loop below, causing the AssertingVH in the cache to
2810   // trigger.
2811   Rewriter.clear();
2812 
2813   // Now that we're done iterating through lists, clean up any instructions
2814   // which are now dead.
2815   while (!DeadInsts.empty())
2816     if (Instruction *Inst =
2817             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2818       Changed |=
2819           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2820 
2821   // The Rewriter may not be used from this point on.
2822 
2823   // Loop-invariant instructions in the preheader that aren't used in the
2824   // loop may be sunk below the loop to reduce register pressure.
2825   Changed |= sinkUnusedInvariants(L);
2826 
2827   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2828   // trip count and therefore can further simplify exit values in addition to
2829   // rewriteLoopExitValues.
2830   Changed |= rewriteFirstIterationLoopExitValues(L);
2831 
2832   // Clean up dead instructions.
2833   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2834 
2835   // Check a post-condition.
2836   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2837          "Indvars did not preserve LCSSA!");
2838 
2839   // Verify that LFTR, and any other change have not interfered with SCEV's
2840   // ability to compute trip count.  We may have *changed* the exit count, but
2841   // only by reducing it.
2842 #ifndef NDEBUG
2843   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2844     SE->forgetLoop(L);
2845     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2846     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2847         SE->getTypeSizeInBits(NewBECount->getType()))
2848       NewBECount = SE->getTruncateOrNoop(NewBECount,
2849                                          BackedgeTakenCount->getType());
2850     else
2851       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2852                                                  NewBECount->getType());
2853     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2854                                  NewBECount) && "indvars must preserve SCEV");
2855   }
2856   if (VerifyMemorySSA && MSSAU)
2857     MSSAU->getMemorySSA()->verifyMemorySSA();
2858 #endif
2859 
2860   return Changed;
2861 }
2862 
2863 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2864                                           LoopStandardAnalysisResults &AR,
2865                                           LPMUpdater &) {
2866   Function *F = L.getHeader()->getParent();
2867   const DataLayout &DL = F->getParent()->getDataLayout();
2868 
2869   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2870   if (!IVS.run(&L))
2871     return PreservedAnalyses::all();
2872 
2873   auto PA = getLoopPassPreservedAnalyses();
2874   PA.preserveSet<CFGAnalyses>();
2875   if (AR.MSSA)
2876     PA.preserve<MemorySSAAnalysis>();
2877   return PA;
2878 }
2879 
2880 namespace {
2881 
2882 struct IndVarSimplifyLegacyPass : public LoopPass {
2883   static char ID; // Pass identification, replacement for typeid
2884 
2885   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2886     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2887   }
2888 
2889   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2890     if (skipLoop(L))
2891       return false;
2892 
2893     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2894     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2895     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2896     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2897     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2898     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2899     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2900     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2901     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2902     MemorySSA *MSSA = nullptr;
2903     if (MSSAAnalysis)
2904       MSSA = &MSSAAnalysis->getMSSA();
2905 
2906     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2907     return IVS.run(L);
2908   }
2909 
2910   void getAnalysisUsage(AnalysisUsage &AU) const override {
2911     AU.setPreservesCFG();
2912     AU.addPreserved<MemorySSAWrapperPass>();
2913     getLoopAnalysisUsage(AU);
2914   }
2915 };
2916 
2917 } // end anonymous namespace
2918 
2919 char IndVarSimplifyLegacyPass::ID = 0;
2920 
2921 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2922                       "Induction Variable Simplification", false, false)
2923 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2924 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2925                     "Induction Variable Simplification", false, false)
2926 
2927 Pass *llvm::createIndVarSimplifyPass() {
2928   return new IndVarSimplifyLegacyPass();
2929 }
2930