xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision e6bca0eecbd31d9dd65040b212326330c9cf2655)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This transformation analyzes and transforms the induction variables (and
11 // computations derived from them) into simpler forms suitable for subsequent
12 // analysis and transformation.
13 //
14 // If the trip count of a loop is computable, this pass also makes the following
15 // changes:
16 //   1. The exit condition for the loop is canonicalized to compare the
17 //      induction value against the exit value.  This turns loops like:
18 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19 //   2. Any use outside of the loop of an expression derived from the indvar
20 //      is changed to compute the derived value outside of the loop, eliminating
21 //      the dependence on the exit value of the induction variable.  If the only
22 //      purpose of the loop is to compute the exit value of some derived
23 //      expression, this transformation will make the loop dead.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Analysis/GlobalsModRef.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/LoopPass.h"
33 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
34 #include "llvm/Analysis/ScalarEvolutionExpander.h"
35 #include "llvm/Analysis/TargetLibraryInfo.h"
36 #include "llvm/Analysis/TargetTransformInfo.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/raw_ostream.h"
50 #include "llvm/Transforms/Scalar.h"
51 #include "llvm/Transforms/Scalar/LoopPassManager.h"
52 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
53 #include "llvm/Transforms/Utils/Local.h"
54 #include "llvm/Transforms/Utils/LoopUtils.h"
55 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
56 using namespace llvm;
57 
58 #define DEBUG_TYPE "indvars"
59 
60 STATISTIC(NumWidened     , "Number of indvars widened");
61 STATISTIC(NumReplaced    , "Number of exit values replaced");
62 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
63 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
64 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
65 
66 // Trip count verification can be enabled by default under NDEBUG if we
67 // implement a strong expression equivalence checker in SCEV. Until then, we
68 // use the verify-indvars flag, which may assert in some cases.
69 static cl::opt<bool> VerifyIndvars(
70   "verify-indvars", cl::Hidden,
71   cl::desc("Verify the ScalarEvolution result after running indvars"));
72 
73 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
74 
75 static cl::opt<ReplaceExitVal> ReplaceExitValue(
76     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
77     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
78     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
79                clEnumValN(OnlyCheapRepl, "cheap",
80                           "only replace exit value when the cost is cheap"),
81                clEnumValN(AlwaysRepl, "always",
82                           "always replace exit value whenever possible")));
83 
84 static cl::opt<bool> UsePostIncrementRanges(
85   "indvars-post-increment-ranges", cl::Hidden,
86   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
87   cl::init(true));
88 
89 namespace {
90 struct RewritePhi;
91 
92 class IndVarSimplify {
93   LoopInfo *LI;
94   ScalarEvolution *SE;
95   DominatorTree *DT;
96   const DataLayout &DL;
97   TargetLibraryInfo *TLI;
98   const TargetTransformInfo *TTI;
99 
100   SmallVector<WeakTrackingVH, 16> DeadInsts;
101   bool Changed = false;
102 
103   bool isValidRewrite(Value *FromVal, Value *ToVal);
104 
105   void handleFloatingPointIV(Loop *L, PHINode *PH);
106   void rewriteNonIntegerIVs(Loop *L);
107 
108   void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
109 
110   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
111   void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
112   void rewriteFirstIterationLoopExitValues(Loop *L);
113 
114   Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
115                                    PHINode *IndVar, SCEVExpander &Rewriter);
116 
117   void sinkUnusedInvariants(Loop *L);
118 
119   Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
120                             Instruction *InsertPt, Type *Ty);
121 
122 public:
123   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
124                  const DataLayout &DL, TargetLibraryInfo *TLI,
125                  TargetTransformInfo *TTI)
126       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
127 
128   bool run(Loop *L);
129 };
130 }
131 
132 /// Return true if the SCEV expansion generated by the rewriter can replace the
133 /// original value. SCEV guarantees that it produces the same value, but the way
134 /// it is produced may be illegal IR.  Ideally, this function will only be
135 /// called for verification.
136 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
137   // If an SCEV expression subsumed multiple pointers, its expansion could
138   // reassociate the GEP changing the base pointer. This is illegal because the
139   // final address produced by a GEP chain must be inbounds relative to its
140   // underlying object. Otherwise basic alias analysis, among other things,
141   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
142   // producing an expression involving multiple pointers. Until then, we must
143   // bail out here.
144   //
145   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
146   // because it understands lcssa phis while SCEV does not.
147   Value *FromPtr = FromVal;
148   Value *ToPtr = ToVal;
149   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
150     FromPtr = GEP->getPointerOperand();
151   }
152   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
153     ToPtr = GEP->getPointerOperand();
154   }
155   if (FromPtr != FromVal || ToPtr != ToVal) {
156     // Quickly check the common case
157     if (FromPtr == ToPtr)
158       return true;
159 
160     // SCEV may have rewritten an expression that produces the GEP's pointer
161     // operand. That's ok as long as the pointer operand has the same base
162     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
163     // base of a recurrence. This handles the case in which SCEV expansion
164     // converts a pointer type recurrence into a nonrecurrent pointer base
165     // indexed by an integer recurrence.
166 
167     // If the GEP base pointer is a vector of pointers, abort.
168     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
169       return false;
170 
171     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
172     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
173     if (FromBase == ToBase)
174       return true;
175 
176     DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
177           << *FromBase << " != " << *ToBase << "\n");
178 
179     return false;
180   }
181   return true;
182 }
183 
184 /// Determine the insertion point for this user. By default, insert immediately
185 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
186 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
187 /// common dominator for the incoming blocks.
188 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
189                                           DominatorTree *DT, LoopInfo *LI) {
190   PHINode *PHI = dyn_cast<PHINode>(User);
191   if (!PHI)
192     return User;
193 
194   Instruction *InsertPt = nullptr;
195   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
196     if (PHI->getIncomingValue(i) != Def)
197       continue;
198 
199     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
200     if (!InsertPt) {
201       InsertPt = InsertBB->getTerminator();
202       continue;
203     }
204     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
205     InsertPt = InsertBB->getTerminator();
206   }
207   assert(InsertPt && "Missing phi operand");
208 
209   auto *DefI = dyn_cast<Instruction>(Def);
210   if (!DefI)
211     return InsertPt;
212 
213   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
214 
215   auto *L = LI->getLoopFor(DefI->getParent());
216   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
217 
218   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
219     if (LI->getLoopFor(DTN->getBlock()) == L)
220       return DTN->getBlock()->getTerminator();
221 
222   llvm_unreachable("DefI dominates InsertPt!");
223 }
224 
225 //===----------------------------------------------------------------------===//
226 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
227 //===----------------------------------------------------------------------===//
228 
229 /// Convert APF to an integer, if possible.
230 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
231   bool isExact = false;
232   // See if we can convert this to an int64_t
233   uint64_t UIntVal;
234   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
235                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
236       !isExact)
237     return false;
238   IntVal = UIntVal;
239   return true;
240 }
241 
242 /// If the loop has floating induction variable then insert corresponding
243 /// integer induction variable if possible.
244 /// For example,
245 /// for(double i = 0; i < 10000; ++i)
246 ///   bar(i)
247 /// is converted into
248 /// for(int i = 0; i < 10000; ++i)
249 ///   bar((double)i);
250 ///
251 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
252   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
253   unsigned BackEdge     = IncomingEdge^1;
254 
255   // Check incoming value.
256   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
257 
258   int64_t InitValue;
259   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
260     return;
261 
262   // Check IV increment. Reject this PN if increment operation is not
263   // an add or increment value can not be represented by an integer.
264   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
265   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
266 
267   // If this is not an add of the PHI with a constantfp, or if the constant fp
268   // is not an integer, bail out.
269   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
270   int64_t IncValue;
271   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
272       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
273     return;
274 
275   // Check Incr uses. One user is PN and the other user is an exit condition
276   // used by the conditional terminator.
277   Value::user_iterator IncrUse = Incr->user_begin();
278   Instruction *U1 = cast<Instruction>(*IncrUse++);
279   if (IncrUse == Incr->user_end()) return;
280   Instruction *U2 = cast<Instruction>(*IncrUse++);
281   if (IncrUse != Incr->user_end()) return;
282 
283   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
284   // only used by a branch, we can't transform it.
285   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
286   if (!Compare)
287     Compare = dyn_cast<FCmpInst>(U2);
288   if (!Compare || !Compare->hasOneUse() ||
289       !isa<BranchInst>(Compare->user_back()))
290     return;
291 
292   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
293 
294   // We need to verify that the branch actually controls the iteration count
295   // of the loop.  If not, the new IV can overflow and no one will notice.
296   // The branch block must be in the loop and one of the successors must be out
297   // of the loop.
298   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
299   if (!L->contains(TheBr->getParent()) ||
300       (L->contains(TheBr->getSuccessor(0)) &&
301        L->contains(TheBr->getSuccessor(1))))
302     return;
303 
304 
305   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
306   // transform it.
307   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
308   int64_t ExitValue;
309   if (ExitValueVal == nullptr ||
310       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
311     return;
312 
313   // Find new predicate for integer comparison.
314   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
315   switch (Compare->getPredicate()) {
316   default: return;  // Unknown comparison.
317   case CmpInst::FCMP_OEQ:
318   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
319   case CmpInst::FCMP_ONE:
320   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
321   case CmpInst::FCMP_OGT:
322   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
323   case CmpInst::FCMP_OGE:
324   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
325   case CmpInst::FCMP_OLT:
326   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
327   case CmpInst::FCMP_OLE:
328   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
329   }
330 
331   // We convert the floating point induction variable to a signed i32 value if
332   // we can.  This is only safe if the comparison will not overflow in a way
333   // that won't be trapped by the integer equivalent operations.  Check for this
334   // now.
335   // TODO: We could use i64 if it is native and the range requires it.
336 
337   // The start/stride/exit values must all fit in signed i32.
338   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
339     return;
340 
341   // If not actually striding (add x, 0.0), avoid touching the code.
342   if (IncValue == 0)
343     return;
344 
345   // Positive and negative strides have different safety conditions.
346   if (IncValue > 0) {
347     // If we have a positive stride, we require the init to be less than the
348     // exit value.
349     if (InitValue >= ExitValue)
350       return;
351 
352     uint32_t Range = uint32_t(ExitValue-InitValue);
353     // Check for infinite loop, either:
354     // while (i <= Exit) or until (i > Exit)
355     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
356       if (++Range == 0) return;  // Range overflows.
357     }
358 
359     unsigned Leftover = Range % uint32_t(IncValue);
360 
361     // If this is an equality comparison, we require that the strided value
362     // exactly land on the exit value, otherwise the IV condition will wrap
363     // around and do things the fp IV wouldn't.
364     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
365         Leftover != 0)
366       return;
367 
368     // If the stride would wrap around the i32 before exiting, we can't
369     // transform the IV.
370     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
371       return;
372 
373   } else {
374     // If we have a negative stride, we require the init to be greater than the
375     // exit value.
376     if (InitValue <= ExitValue)
377       return;
378 
379     uint32_t Range = uint32_t(InitValue-ExitValue);
380     // Check for infinite loop, either:
381     // while (i >= Exit) or until (i < Exit)
382     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
383       if (++Range == 0) return;  // Range overflows.
384     }
385 
386     unsigned Leftover = Range % uint32_t(-IncValue);
387 
388     // If this is an equality comparison, we require that the strided value
389     // exactly land on the exit value, otherwise the IV condition will wrap
390     // around and do things the fp IV wouldn't.
391     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
392         Leftover != 0)
393       return;
394 
395     // If the stride would wrap around the i32 before exiting, we can't
396     // transform the IV.
397     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
398       return;
399   }
400 
401   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
402 
403   // Insert new integer induction variable.
404   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
405   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
406                       PN->getIncomingBlock(IncomingEdge));
407 
408   Value *NewAdd =
409     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
410                               Incr->getName()+".int", Incr);
411   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
412 
413   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
414                                       ConstantInt::get(Int32Ty, ExitValue),
415                                       Compare->getName());
416 
417   // In the following deletions, PN may become dead and may be deleted.
418   // Use a WeakTrackingVH to observe whether this happens.
419   WeakTrackingVH WeakPH = PN;
420 
421   // Delete the old floating point exit comparison.  The branch starts using the
422   // new comparison.
423   NewCompare->takeName(Compare);
424   Compare->replaceAllUsesWith(NewCompare);
425   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
426 
427   // Delete the old floating point increment.
428   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
429   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
430 
431   // If the FP induction variable still has uses, this is because something else
432   // in the loop uses its value.  In order to canonicalize the induction
433   // variable, we chose to eliminate the IV and rewrite it in terms of an
434   // int->fp cast.
435   //
436   // We give preference to sitofp over uitofp because it is faster on most
437   // platforms.
438   if (WeakPH) {
439     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
440                                  &*PN->getParent()->getFirstInsertionPt());
441     PN->replaceAllUsesWith(Conv);
442     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
443   }
444   Changed = true;
445 }
446 
447 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
448   // First step.  Check to see if there are any floating-point recurrences.
449   // If there are, change them into integer recurrences, permitting analysis by
450   // the SCEV routines.
451   //
452   BasicBlock *Header = L->getHeader();
453 
454   SmallVector<WeakTrackingVH, 8> PHIs;
455   for (BasicBlock::iterator I = Header->begin();
456        PHINode *PN = dyn_cast<PHINode>(I); ++I)
457     PHIs.push_back(PN);
458 
459   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
460     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
461       handleFloatingPointIV(L, PN);
462 
463   // If the loop previously had floating-point IV, ScalarEvolution
464   // may not have been able to compute a trip count. Now that we've done some
465   // re-writing, the trip count may be computable.
466   if (Changed)
467     SE->forgetLoop(L);
468 }
469 
470 namespace {
471 // Collect information about PHI nodes which can be transformed in
472 // rewriteLoopExitValues.
473 struct RewritePhi {
474   PHINode *PN;
475   unsigned Ith;  // Ith incoming value.
476   Value *Val;    // Exit value after expansion.
477   bool HighCost; // High Cost when expansion.
478 
479   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
480       : PN(P), Ith(I), Val(V), HighCost(H) {}
481 };
482 }
483 
484 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
485                                           Loop *L, Instruction *InsertPt,
486                                           Type *ResultTy) {
487   // Before expanding S into an expensive LLVM expression, see if we can use an
488   // already existing value as the expansion for S.
489   if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
490     if (ExistingValue->getType() == ResultTy)
491       return ExistingValue;
492 
493   // We didn't find anything, fall back to using SCEVExpander.
494   return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
495 }
496 
497 //===----------------------------------------------------------------------===//
498 // rewriteLoopExitValues - Optimize IV users outside the loop.
499 // As a side effect, reduces the amount of IV processing within the loop.
500 //===----------------------------------------------------------------------===//
501 
502 /// Check to see if this loop has a computable loop-invariant execution count.
503 /// If so, this means that we can compute the final value of any expressions
504 /// that are recurrent in the loop, and substitute the exit values from the loop
505 /// into any instructions outside of the loop that use the final values of the
506 /// current expressions.
507 ///
508 /// This is mostly redundant with the regular IndVarSimplify activities that
509 /// happen later, except that it's more powerful in some cases, because it's
510 /// able to brute-force evaluate arbitrary instructions as long as they have
511 /// constant operands at the beginning of the loop.
512 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
513   // Check a pre-condition.
514   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
515          "Indvars did not preserve LCSSA!");
516 
517   SmallVector<BasicBlock*, 8> ExitBlocks;
518   L->getUniqueExitBlocks(ExitBlocks);
519 
520   SmallVector<RewritePhi, 8> RewritePhiSet;
521   // Find all values that are computed inside the loop, but used outside of it.
522   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
523   // the exit blocks of the loop to find them.
524   for (BasicBlock *ExitBB : ExitBlocks) {
525     // If there are no PHI nodes in this exit block, then no values defined
526     // inside the loop are used on this path, skip it.
527     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
528     if (!PN) continue;
529 
530     unsigned NumPreds = PN->getNumIncomingValues();
531 
532     // Iterate over all of the PHI nodes.
533     BasicBlock::iterator BBI = ExitBB->begin();
534     while ((PN = dyn_cast<PHINode>(BBI++))) {
535       if (PN->use_empty())
536         continue; // dead use, don't replace it
537 
538       if (!SE->isSCEVable(PN->getType()))
539         continue;
540 
541       // It's necessary to tell ScalarEvolution about this explicitly so that
542       // it can walk the def-use list and forget all SCEVs, as it may not be
543       // watching the PHI itself. Once the new exit value is in place, there
544       // may not be a def-use connection between the loop and every instruction
545       // which got a SCEVAddRecExpr for that loop.
546       SE->forgetValue(PN);
547 
548       // Iterate over all of the values in all the PHI nodes.
549       for (unsigned i = 0; i != NumPreds; ++i) {
550         // If the value being merged in is not integer or is not defined
551         // in the loop, skip it.
552         Value *InVal = PN->getIncomingValue(i);
553         if (!isa<Instruction>(InVal))
554           continue;
555 
556         // If this pred is for a subloop, not L itself, skip it.
557         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
558           continue; // The Block is in a subloop, skip it.
559 
560         // Check that InVal is defined in the loop.
561         Instruction *Inst = cast<Instruction>(InVal);
562         if (!L->contains(Inst))
563           continue;
564 
565         // Okay, this instruction has a user outside of the current loop
566         // and varies predictably *inside* the loop.  Evaluate the value it
567         // contains when the loop exits, if possible.
568         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
569         if (!SE->isLoopInvariant(ExitValue, L) ||
570             !isSafeToExpand(ExitValue, *SE))
571           continue;
572 
573         // Computing the value outside of the loop brings no benefit if :
574         //  - it is definitely used inside the loop in a way which can not be
575         //    optimized away.
576         //  - no use outside of the loop can take advantage of hoisting the
577         //    computation out of the loop
578         if (ExitValue->getSCEVType()>=scMulExpr) {
579           unsigned NumHardInternalUses = 0;
580           unsigned NumSoftExternalUses = 0;
581           unsigned NumUses = 0;
582           for (auto IB = Inst->user_begin(), IE = Inst->user_end();
583                IB != IE && NumUses <= 6; ++IB) {
584             Instruction *UseInstr = cast<Instruction>(*IB);
585             unsigned Opc = UseInstr->getOpcode();
586             NumUses++;
587             if (L->contains(UseInstr)) {
588               if (Opc == Instruction::Call || Opc == Instruction::Ret)
589                 NumHardInternalUses++;
590             } else {
591               if (Opc == Instruction::PHI) {
592                 // Do not count the Phi as a use. LCSSA may have inserted
593                 // plenty of trivial ones.
594                 NumUses--;
595                 for (auto PB = UseInstr->user_begin(),
596                           PE = UseInstr->user_end();
597                      PB != PE && NumUses <= 6; ++PB, ++NumUses) {
598                   unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
599                   if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
600                     NumSoftExternalUses++;
601                 }
602                 continue;
603               }
604               if (Opc != Instruction::Call && Opc != Instruction::Ret)
605                 NumSoftExternalUses++;
606             }
607           }
608           if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
609             continue;
610         }
611 
612         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
613         Value *ExitVal =
614             expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
615 
616         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
617                      << "  LoopVal = " << *Inst << "\n");
618 
619         if (!isValidRewrite(Inst, ExitVal)) {
620           DeadInsts.push_back(ExitVal);
621           continue;
622         }
623 
624         // Collect all the candidate PHINodes to be rewritten.
625         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
626       }
627     }
628   }
629 
630   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
631 
632   // Transformation.
633   for (const RewritePhi &Phi : RewritePhiSet) {
634     PHINode *PN = Phi.PN;
635     Value *ExitVal = Phi.Val;
636 
637     // Only do the rewrite when the ExitValue can be expanded cheaply.
638     // If LoopCanBeDel is true, rewrite exit value aggressively.
639     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
640       DeadInsts.push_back(ExitVal);
641       continue;
642     }
643 
644     Changed = true;
645     ++NumReplaced;
646     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
647     PN->setIncomingValue(Phi.Ith, ExitVal);
648 
649     // If this instruction is dead now, delete it. Don't do it now to avoid
650     // invalidating iterators.
651     if (isInstructionTriviallyDead(Inst, TLI))
652       DeadInsts.push_back(Inst);
653 
654     // Replace PN with ExitVal if that is legal and does not break LCSSA.
655     if (PN->getNumIncomingValues() == 1 &&
656         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
657       PN->replaceAllUsesWith(ExitVal);
658       PN->eraseFromParent();
659     }
660   }
661 
662   // The insertion point instruction may have been deleted; clear it out
663   // so that the rewriter doesn't trip over it later.
664   Rewriter.clearInsertPoint();
665 }
666 
667 //===---------------------------------------------------------------------===//
668 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
669 // they will exit at the first iteration.
670 //===---------------------------------------------------------------------===//
671 
672 /// Check to see if this loop has loop invariant conditions which lead to loop
673 /// exits. If so, we know that if the exit path is taken, it is at the first
674 /// loop iteration. This lets us predict exit values of PHI nodes that live in
675 /// loop header.
676 void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
677   // Verify the input to the pass is already in LCSSA form.
678   assert(L->isLCSSAForm(*DT));
679 
680   SmallVector<BasicBlock *, 8> ExitBlocks;
681   L->getUniqueExitBlocks(ExitBlocks);
682   auto *LoopHeader = L->getHeader();
683   assert(LoopHeader && "Invalid loop");
684 
685   for (auto *ExitBB : ExitBlocks) {
686     BasicBlock::iterator BBI = ExitBB->begin();
687     // If there are no more PHI nodes in this exit block, then no more
688     // values defined inside the loop are used on this path.
689     while (auto *PN = dyn_cast<PHINode>(BBI++)) {
690       for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues();
691           IncomingValIdx != E; ++IncomingValIdx) {
692         auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx);
693 
694         // We currently only support loop exits from loop header. If the
695         // incoming block is not loop header, we need to recursively check
696         // all conditions starting from loop header are loop invariants.
697         // Additional support might be added in the future.
698         if (IncomingBB != LoopHeader)
699           continue;
700 
701         // Get condition that leads to the exit path.
702         auto *TermInst = IncomingBB->getTerminator();
703 
704         Value *Cond = nullptr;
705         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
706           // Must be a conditional branch, otherwise the block
707           // should not be in the loop.
708           Cond = BI->getCondition();
709         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
710           Cond = SI->getCondition();
711         else
712           continue;
713 
714         if (!L->isLoopInvariant(Cond))
715           continue;
716 
717         auto *ExitVal =
718             dyn_cast<PHINode>(PN->getIncomingValue(IncomingValIdx));
719 
720         // Only deal with PHIs.
721         if (!ExitVal)
722           continue;
723 
724         // If ExitVal is a PHI on the loop header, then we know its
725         // value along this exit because the exit can only be taken
726         // on the first iteration.
727         auto *LoopPreheader = L->getLoopPreheader();
728         assert(LoopPreheader && "Invalid loop");
729         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
730         if (PreheaderIdx != -1) {
731           assert(ExitVal->getParent() == LoopHeader &&
732                  "ExitVal must be in loop header");
733           PN->setIncomingValue(IncomingValIdx,
734               ExitVal->getIncomingValue(PreheaderIdx));
735         }
736       }
737     }
738   }
739 }
740 
741 /// Check whether it is possible to delete the loop after rewriting exit
742 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
743 /// aggressively.
744 bool IndVarSimplify::canLoopBeDeleted(
745     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
746 
747   BasicBlock *Preheader = L->getLoopPreheader();
748   // If there is no preheader, the loop will not be deleted.
749   if (!Preheader)
750     return false;
751 
752   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
753   // We obviate multiple ExitingBlocks case for simplicity.
754   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
755   // after exit value rewriting, we can enhance the logic here.
756   SmallVector<BasicBlock *, 4> ExitingBlocks;
757   L->getExitingBlocks(ExitingBlocks);
758   SmallVector<BasicBlock *, 8> ExitBlocks;
759   L->getUniqueExitBlocks(ExitBlocks);
760   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
761     return false;
762 
763   BasicBlock *ExitBlock = ExitBlocks[0];
764   BasicBlock::iterator BI = ExitBlock->begin();
765   while (PHINode *P = dyn_cast<PHINode>(BI)) {
766     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
767 
768     // If the Incoming value of P is found in RewritePhiSet, we know it
769     // could be rewritten to use a loop invariant value in transformation
770     // phase later. Skip it in the loop invariant check below.
771     bool found = false;
772     for (const RewritePhi &Phi : RewritePhiSet) {
773       unsigned i = Phi.Ith;
774       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
775         found = true;
776         break;
777       }
778     }
779 
780     Instruction *I;
781     if (!found && (I = dyn_cast<Instruction>(Incoming)))
782       if (!L->hasLoopInvariantOperands(I))
783         return false;
784 
785     ++BI;
786   }
787 
788   for (auto *BB : L->blocks())
789     if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
790       return false;
791 
792   return true;
793 }
794 
795 //===----------------------------------------------------------------------===//
796 //  IV Widening - Extend the width of an IV to cover its widest uses.
797 //===----------------------------------------------------------------------===//
798 
799 namespace {
800 // Collect information about induction variables that are used by sign/zero
801 // extend operations. This information is recorded by CollectExtend and provides
802 // the input to WidenIV.
803 struct WideIVInfo {
804   PHINode *NarrowIV = nullptr;
805   Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
806   bool IsSigned = false;            // Was a sext user seen before a zext?
807 };
808 }
809 
810 /// Update information about the induction variable that is extended by this
811 /// sign or zero extend operation. This is used to determine the final width of
812 /// the IV before actually widening it.
813 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
814                         const TargetTransformInfo *TTI) {
815   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
816   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
817     return;
818 
819   Type *Ty = Cast->getType();
820   uint64_t Width = SE->getTypeSizeInBits(Ty);
821   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
822     return;
823 
824   // Check that `Cast` actually extends the induction variable (we rely on this
825   // later).  This takes care of cases where `Cast` is extending a truncation of
826   // the narrow induction variable, and thus can end up being narrower than the
827   // "narrow" induction variable.
828   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
829   if (NarrowIVWidth >= Width)
830     return;
831 
832   // Cast is either an sext or zext up to this point.
833   // We should not widen an indvar if arithmetics on the wider indvar are more
834   // expensive than those on the narrower indvar. We check only the cost of ADD
835   // because at least an ADD is required to increment the induction variable. We
836   // could compute more comprehensively the cost of all instructions on the
837   // induction variable when necessary.
838   if (TTI &&
839       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
840           TTI->getArithmeticInstrCost(Instruction::Add,
841                                       Cast->getOperand(0)->getType())) {
842     return;
843   }
844 
845   if (!WI.WidestNativeType) {
846     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
847     WI.IsSigned = IsSigned;
848     return;
849   }
850 
851   // We extend the IV to satisfy the sign of its first user, arbitrarily.
852   if (WI.IsSigned != IsSigned)
853     return;
854 
855   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
856     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
857 }
858 
859 namespace {
860 
861 /// Record a link in the Narrow IV def-use chain along with the WideIV that
862 /// computes the same value as the Narrow IV def.  This avoids caching Use*
863 /// pointers.
864 struct NarrowIVDefUse {
865   Instruction *NarrowDef = nullptr;
866   Instruction *NarrowUse = nullptr;
867   Instruction *WideDef = nullptr;
868 
869   // True if the narrow def is never negative.  Tracking this information lets
870   // us use a sign extension instead of a zero extension or vice versa, when
871   // profitable and legal.
872   bool NeverNegative = false;
873 
874   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
875                  bool NeverNegative)
876       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
877         NeverNegative(NeverNegative) {}
878 };
879 
880 /// The goal of this transform is to remove sign and zero extends without
881 /// creating any new induction variables. To do this, it creates a new phi of
882 /// the wider type and redirects all users, either removing extends or inserting
883 /// truncs whenever we stop propagating the type.
884 ///
885 class WidenIV {
886   // Parameters
887   PHINode *OrigPhi;
888   Type *WideType;
889 
890   // Context
891   LoopInfo        *LI;
892   Loop            *L;
893   ScalarEvolution *SE;
894   DominatorTree   *DT;
895 
896   // Does the module have any calls to the llvm.experimental.guard intrinsic
897   // at all? If not we can avoid scanning instructions looking for guards.
898   bool HasGuards;
899 
900   // Result
901   PHINode *WidePhi;
902   Instruction *WideInc;
903   const SCEV *WideIncExpr;
904   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
905 
906   SmallPtrSet<Instruction *,16> Widened;
907   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
908 
909   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
910   // A map tracking the kind of extension used to widen each narrow IV
911   // and narrow IV user.
912   // Key: pointer to a narrow IV or IV user.
913   // Value: the kind of extension used to widen this Instruction.
914   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
915 
916   typedef std::pair<AssertingVH<Value>, AssertingVH<Instruction>> DefUserPair;
917   // A map with control-dependent ranges for post increment IV uses. The key is
918   // a pair of IV def and a use of this def denoting the context. The value is
919   // a ConstantRange representing possible values of the def at the given
920   // context.
921   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
922 
923   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
924                                               Instruction *UseI) {
925     DefUserPair Key(Def, UseI);
926     auto It = PostIncRangeInfos.find(Key);
927     return It == PostIncRangeInfos.end()
928                ? Optional<ConstantRange>(None)
929                : Optional<ConstantRange>(It->second);
930   }
931 
932   void calculatePostIncRanges(PHINode *OrigPhi);
933   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
934   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
935     DefUserPair Key(Def, UseI);
936     auto It = PostIncRangeInfos.find(Key);
937     if (It == PostIncRangeInfos.end())
938       PostIncRangeInfos.insert({Key, R});
939     else
940       It->second = R.intersectWith(It->second);
941   }
942 
943 public:
944   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
945           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
946           bool HasGuards)
947       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
948         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
949         HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr),
950         WideIncExpr(nullptr), DeadInsts(DI) {
951     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
952     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
953   }
954 
955   PHINode *createWideIV(SCEVExpander &Rewriter);
956 
957 protected:
958   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
959                           Instruction *Use);
960 
961   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
962   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
963                                      const SCEVAddRecExpr *WideAR);
964   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
965 
966   ExtendKind getExtendKind(Instruction *I);
967 
968   typedef std::pair<const SCEVAddRecExpr *, ExtendKind> WidenedRecTy;
969 
970   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
971 
972   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
973 
974   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
975                               unsigned OpCode) const;
976 
977   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
978 
979   bool widenLoopCompare(NarrowIVDefUse DU);
980 
981   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
982 };
983 } // anonymous namespace
984 
985 /// Perform a quick domtree based check for loop invariance assuming that V is
986 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
987 /// purpose.
988 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
989   Instruction *Inst = dyn_cast<Instruction>(V);
990   if (!Inst)
991     return true;
992 
993   return DT->properlyDominates(Inst->getParent(), L->getHeader());
994 }
995 
996 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
997                                  bool IsSigned, Instruction *Use) {
998   // Set the debug location and conservative insertion point.
999   IRBuilder<> Builder(Use);
1000   // Hoist the insertion point into loop preheaders as far as possible.
1001   for (const Loop *L = LI->getLoopFor(Use->getParent());
1002        L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
1003        L = L->getParentLoop())
1004     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1005 
1006   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1007                     Builder.CreateZExt(NarrowOper, WideType);
1008 }
1009 
1010 /// Instantiate a wide operation to replace a narrow operation. This only needs
1011 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1012 /// 0 for any operation we decide not to clone.
1013 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1014                                   const SCEVAddRecExpr *WideAR) {
1015   unsigned Opcode = DU.NarrowUse->getOpcode();
1016   switch (Opcode) {
1017   default:
1018     return nullptr;
1019   case Instruction::Add:
1020   case Instruction::Mul:
1021   case Instruction::UDiv:
1022   case Instruction::Sub:
1023     return cloneArithmeticIVUser(DU, WideAR);
1024 
1025   case Instruction::And:
1026   case Instruction::Or:
1027   case Instruction::Xor:
1028   case Instruction::Shl:
1029   case Instruction::LShr:
1030   case Instruction::AShr:
1031     return cloneBitwiseIVUser(DU);
1032   }
1033 }
1034 
1035 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1036   Instruction *NarrowUse = DU.NarrowUse;
1037   Instruction *NarrowDef = DU.NarrowDef;
1038   Instruction *WideDef = DU.WideDef;
1039 
1040   DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1041 
1042   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1043   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1044   // invariant and will be folded or hoisted. If it actually comes from a
1045   // widened IV, it should be removed during a future call to widenIVUse.
1046   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1047   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1048                    ? WideDef
1049                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1050                                       IsSigned, NarrowUse);
1051   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1052                    ? WideDef
1053                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1054                                       IsSigned, NarrowUse);
1055 
1056   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1057   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1058                                         NarrowBO->getName());
1059   IRBuilder<> Builder(NarrowUse);
1060   Builder.Insert(WideBO);
1061   WideBO->copyIRFlags(NarrowBO);
1062   return WideBO;
1063 }
1064 
1065 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1066                                             const SCEVAddRecExpr *WideAR) {
1067   Instruction *NarrowUse = DU.NarrowUse;
1068   Instruction *NarrowDef = DU.NarrowDef;
1069   Instruction *WideDef = DU.WideDef;
1070 
1071   DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1072 
1073   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1074 
1075   // We're trying to find X such that
1076   //
1077   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1078   //
1079   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1080   // and check using SCEV if any of them are correct.
1081 
1082   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1083   // correct solution to X.
1084   auto GuessNonIVOperand = [&](bool SignExt) {
1085     const SCEV *WideLHS;
1086     const SCEV *WideRHS;
1087 
1088     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1089       if (SignExt)
1090         return SE->getSignExtendExpr(S, Ty);
1091       return SE->getZeroExtendExpr(S, Ty);
1092     };
1093 
1094     if (IVOpIdx == 0) {
1095       WideLHS = SE->getSCEV(WideDef);
1096       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1097       WideRHS = GetExtend(NarrowRHS, WideType);
1098     } else {
1099       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1100       WideLHS = GetExtend(NarrowLHS, WideType);
1101       WideRHS = SE->getSCEV(WideDef);
1102     }
1103 
1104     // WideUse is "WideDef `op.wide` X" as described in the comment.
1105     const SCEV *WideUse = nullptr;
1106 
1107     switch (NarrowUse->getOpcode()) {
1108     default:
1109       llvm_unreachable("No other possibility!");
1110 
1111     case Instruction::Add:
1112       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1113       break;
1114 
1115     case Instruction::Mul:
1116       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1117       break;
1118 
1119     case Instruction::UDiv:
1120       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1121       break;
1122 
1123     case Instruction::Sub:
1124       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1125       break;
1126     }
1127 
1128     return WideUse == WideAR;
1129   };
1130 
1131   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1132   if (!GuessNonIVOperand(SignExtend)) {
1133     SignExtend = !SignExtend;
1134     if (!GuessNonIVOperand(SignExtend))
1135       return nullptr;
1136   }
1137 
1138   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1139                    ? WideDef
1140                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1141                                       SignExtend, NarrowUse);
1142   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1143                    ? WideDef
1144                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1145                                       SignExtend, NarrowUse);
1146 
1147   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1148   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1149                                         NarrowBO->getName());
1150 
1151   IRBuilder<> Builder(NarrowUse);
1152   Builder.Insert(WideBO);
1153   WideBO->copyIRFlags(NarrowBO);
1154   return WideBO;
1155 }
1156 
1157 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1158   auto It = ExtendKindMap.find(I);
1159   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1160   return It->second;
1161 }
1162 
1163 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1164                                      unsigned OpCode) const {
1165   if (OpCode == Instruction::Add)
1166     return SE->getAddExpr(LHS, RHS);
1167   if (OpCode == Instruction::Sub)
1168     return SE->getMinusSCEV(LHS, RHS);
1169   if (OpCode == Instruction::Mul)
1170     return SE->getMulExpr(LHS, RHS);
1171 
1172   llvm_unreachable("Unsupported opcode.");
1173 }
1174 
1175 /// No-wrap operations can transfer sign extension of their result to their
1176 /// operands. Generate the SCEV value for the widened operation without
1177 /// actually modifying the IR yet. If the expression after extending the
1178 /// operands is an AddRec for this loop, return the AddRec and the kind of
1179 /// extension used.
1180 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1181 
1182   // Handle the common case of add<nsw/nuw>
1183   const unsigned OpCode = DU.NarrowUse->getOpcode();
1184   // Only Add/Sub/Mul instructions supported yet.
1185   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1186       OpCode != Instruction::Mul)
1187     return {nullptr, Unknown};
1188 
1189   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1190   // if extending the other will lead to a recurrence.
1191   const unsigned ExtendOperIdx =
1192       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1193   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1194 
1195   const SCEV *ExtendOperExpr = nullptr;
1196   const OverflowingBinaryOperator *OBO =
1197     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1198   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1199   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1200     ExtendOperExpr = SE->getSignExtendExpr(
1201       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1202   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1203     ExtendOperExpr = SE->getZeroExtendExpr(
1204       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1205   else
1206     return {nullptr, Unknown};
1207 
1208   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1209   // flags. This instruction may be guarded by control flow that the no-wrap
1210   // behavior depends on. Non-control-equivalent instructions can be mapped to
1211   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1212   // semantics to those operations.
1213   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1214   const SCEV *rhs = ExtendOperExpr;
1215 
1216   // Let's swap operands to the initial order for the case of non-commutative
1217   // operations, like SUB. See PR21014.
1218   if (ExtendOperIdx == 0)
1219     std::swap(lhs, rhs);
1220   const SCEVAddRecExpr *AddRec =
1221       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1222 
1223   if (!AddRec || AddRec->getLoop() != L)
1224     return {nullptr, Unknown};
1225 
1226   return {AddRec, ExtKind};
1227 }
1228 
1229 /// Is this instruction potentially interesting for further simplification after
1230 /// widening it's type? In other words, can the extend be safely hoisted out of
1231 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1232 /// so, return the extended recurrence and the kind of extension used. Otherwise
1233 /// return {nullptr, Unknown}.
1234 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1235   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1236     return {nullptr, Unknown};
1237 
1238   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1239   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1240       SE->getTypeSizeInBits(WideType)) {
1241     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1242     // index. So don't follow this use.
1243     return {nullptr, Unknown};
1244   }
1245 
1246   const SCEV *WideExpr;
1247   ExtendKind ExtKind;
1248   if (DU.NeverNegative) {
1249     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1250     if (isa<SCEVAddRecExpr>(WideExpr))
1251       ExtKind = SignExtended;
1252     else {
1253       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1254       ExtKind = ZeroExtended;
1255     }
1256   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1257     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1258     ExtKind = SignExtended;
1259   } else {
1260     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1261     ExtKind = ZeroExtended;
1262   }
1263   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1264   if (!AddRec || AddRec->getLoop() != L)
1265     return {nullptr, Unknown};
1266   return {AddRec, ExtKind};
1267 }
1268 
1269 /// This IV user cannot be widen. Replace this use of the original narrow IV
1270 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1271 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1272   DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
1273         << " for user " << *DU.NarrowUse << "\n");
1274   IRBuilder<> Builder(
1275       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1276   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1277   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1278 }
1279 
1280 /// If the narrow use is a compare instruction, then widen the compare
1281 //  (and possibly the other operand).  The extend operation is hoisted into the
1282 // loop preheader as far as possible.
1283 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1284   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1285   if (!Cmp)
1286     return false;
1287 
1288   // We can legally widen the comparison in the following two cases:
1289   //
1290   //  - The signedness of the IV extension and comparison match
1291   //
1292   //  - The narrow IV is always positive (and thus its sign extension is equal
1293   //    to its zero extension).  For instance, let's say we're zero extending
1294   //    %narrow for the following use
1295   //
1296   //      icmp slt i32 %narrow, %val   ... (A)
1297   //
1298   //    and %narrow is always positive.  Then
1299   //
1300   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1301   //          == icmp slt i32 zext(%narrow), sext(%val)
1302   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1303   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1304     return false;
1305 
1306   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1307   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1308   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1309   assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
1310 
1311   // Widen the compare instruction.
1312   IRBuilder<> Builder(
1313       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
1314   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1315 
1316   // Widen the other operand of the compare, if necessary.
1317   if (CastWidth < IVWidth) {
1318     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1319     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1320   }
1321   return true;
1322 }
1323 
1324 /// Determine whether an individual user of the narrow IV can be widened. If so,
1325 /// return the wide clone of the user.
1326 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1327   assert(ExtendKindMap.count(DU.NarrowDef) &&
1328          "Should already know the kind of extension used to widen NarrowDef");
1329 
1330   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1331   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1332     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1333       // For LCSSA phis, sink the truncate outside the loop.
1334       // After SimplifyCFG most loop exit targets have a single predecessor.
1335       // Otherwise fall back to a truncate within the loop.
1336       if (UsePhi->getNumOperands() != 1)
1337         truncateIVUse(DU, DT, LI);
1338       else {
1339         // Widening the PHI requires us to insert a trunc.  The logical place
1340         // for this trunc is in the same BB as the PHI.  This is not possible if
1341         // the BB is terminated by a catchswitch.
1342         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1343           return nullptr;
1344 
1345         PHINode *WidePhi =
1346           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1347                           UsePhi);
1348         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1349         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1350         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1351         UsePhi->replaceAllUsesWith(Trunc);
1352         DeadInsts.emplace_back(UsePhi);
1353         DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
1354               << " to " << *WidePhi << "\n");
1355       }
1356       return nullptr;
1357     }
1358   }
1359 
1360   // This narrow use can be widened by a sext if it's non-negative or its narrow
1361   // def was widended by a sext. Same for zext.
1362   auto canWidenBySExt = [&]() {
1363     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1364   };
1365   auto canWidenByZExt = [&]() {
1366     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1367   };
1368 
1369   // Our raison d'etre! Eliminate sign and zero extension.
1370   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1371       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1372     Value *NewDef = DU.WideDef;
1373     if (DU.NarrowUse->getType() != WideType) {
1374       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1375       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1376       if (CastWidth < IVWidth) {
1377         // The cast isn't as wide as the IV, so insert a Trunc.
1378         IRBuilder<> Builder(DU.NarrowUse);
1379         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1380       }
1381       else {
1382         // A wider extend was hidden behind a narrower one. This may induce
1383         // another round of IV widening in which the intermediate IV becomes
1384         // dead. It should be very rare.
1385         DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1386               << " not wide enough to subsume " << *DU.NarrowUse << "\n");
1387         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1388         NewDef = DU.NarrowUse;
1389       }
1390     }
1391     if (NewDef != DU.NarrowUse) {
1392       DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1393             << " replaced by " << *DU.WideDef << "\n");
1394       ++NumElimExt;
1395       DU.NarrowUse->replaceAllUsesWith(NewDef);
1396       DeadInsts.emplace_back(DU.NarrowUse);
1397     }
1398     // Now that the extend is gone, we want to expose it's uses for potential
1399     // further simplification. We don't need to directly inform SimplifyIVUsers
1400     // of the new users, because their parent IV will be processed later as a
1401     // new loop phi. If we preserved IVUsers analysis, we would also want to
1402     // push the uses of WideDef here.
1403 
1404     // No further widening is needed. The deceased [sz]ext had done it for us.
1405     return nullptr;
1406   }
1407 
1408   // Does this user itself evaluate to a recurrence after widening?
1409   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1410   if (!WideAddRec.first)
1411     WideAddRec = getWideRecurrence(DU);
1412 
1413   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1414   if (!WideAddRec.first) {
1415     // If use is a loop condition, try to promote the condition instead of
1416     // truncating the IV first.
1417     if (widenLoopCompare(DU))
1418       return nullptr;
1419 
1420     // This user does not evaluate to a recurrence after widening, so don't
1421     // follow it. Instead insert a Trunc to kill off the original use,
1422     // eventually isolating the original narrow IV so it can be removed.
1423     truncateIVUse(DU, DT, LI);
1424     return nullptr;
1425   }
1426   // Assume block terminators cannot evaluate to a recurrence. We can't to
1427   // insert a Trunc after a terminator if there happens to be a critical edge.
1428   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1429          "SCEV is not expected to evaluate a block terminator");
1430 
1431   // Reuse the IV increment that SCEVExpander created as long as it dominates
1432   // NarrowUse.
1433   Instruction *WideUse = nullptr;
1434   if (WideAddRec.first == WideIncExpr &&
1435       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1436     WideUse = WideInc;
1437   else {
1438     WideUse = cloneIVUser(DU, WideAddRec.first);
1439     if (!WideUse)
1440       return nullptr;
1441   }
1442   // Evaluation of WideAddRec ensured that the narrow expression could be
1443   // extended outside the loop without overflow. This suggests that the wide use
1444   // evaluates to the same expression as the extended narrow use, but doesn't
1445   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1446   // where it fails, we simply throw away the newly created wide use.
1447   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1448     DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
1449           << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n");
1450     DeadInsts.emplace_back(WideUse);
1451     return nullptr;
1452   }
1453 
1454   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1455   // Returning WideUse pushes it on the worklist.
1456   return WideUse;
1457 }
1458 
1459 /// Add eligible users of NarrowDef to NarrowIVUsers.
1460 ///
1461 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1462   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1463   bool NonNegativeDef =
1464       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1465                            SE->getConstant(NarrowSCEV->getType(), 0));
1466   for (User *U : NarrowDef->users()) {
1467     Instruction *NarrowUser = cast<Instruction>(U);
1468 
1469     // Handle data flow merges and bizarre phi cycles.
1470     if (!Widened.insert(NarrowUser).second)
1471       continue;
1472 
1473     bool NonNegativeUse = false;
1474     if (!NonNegativeDef) {
1475       // We might have a control-dependent range information for this context.
1476       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1477         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1478     }
1479 
1480     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1481                                NonNegativeDef || NonNegativeUse);
1482   }
1483 }
1484 
1485 /// Process a single induction variable. First use the SCEVExpander to create a
1486 /// wide induction variable that evaluates to the same recurrence as the
1487 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1488 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1489 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1490 ///
1491 /// It would be simpler to delete uses as they are processed, but we must avoid
1492 /// invalidating SCEV expressions.
1493 ///
1494 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1495   // Is this phi an induction variable?
1496   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1497   if (!AddRec)
1498     return nullptr;
1499 
1500   // Widen the induction variable expression.
1501   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1502                                ? SE->getSignExtendExpr(AddRec, WideType)
1503                                : SE->getZeroExtendExpr(AddRec, WideType);
1504 
1505   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1506          "Expect the new IV expression to preserve its type");
1507 
1508   // Can the IV be extended outside the loop without overflow?
1509   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1510   if (!AddRec || AddRec->getLoop() != L)
1511     return nullptr;
1512 
1513   // An AddRec must have loop-invariant operands. Since this AddRec is
1514   // materialized by a loop header phi, the expression cannot have any post-loop
1515   // operands, so they must dominate the loop header.
1516   assert(
1517       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1518       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1519       "Loop header phi recurrence inputs do not dominate the loop");
1520 
1521   // Iterate over IV uses (including transitive ones) looking for IV increments
1522   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1523   // the increment calculate control-dependent range information basing on
1524   // dominating conditions inside of the loop (e.g. a range check inside of the
1525   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1526   //
1527   // Control-dependent range information is later used to prove that a narrow
1528   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1529   // this on demand because when pushNarrowIVUsers needs this information some
1530   // of the dominating conditions might be already widened.
1531   if (UsePostIncrementRanges)
1532     calculatePostIncRanges(OrigPhi);
1533 
1534   // The rewriter provides a value for the desired IV expression. This may
1535   // either find an existing phi or materialize a new one. Either way, we
1536   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1537   // of the phi-SCC dominates the loop entry.
1538   Instruction *InsertPt = &L->getHeader()->front();
1539   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1540 
1541   // Remembering the WideIV increment generated by SCEVExpander allows
1542   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1543   // employ a general reuse mechanism because the call above is the only call to
1544   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1545   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1546     WideInc =
1547       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1548     WideIncExpr = SE->getSCEV(WideInc);
1549     // Propagate the debug location associated with the original loop increment
1550     // to the new (widened) increment.
1551     auto *OrigInc =
1552       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1553     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1554   }
1555 
1556   DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1557   ++NumWidened;
1558 
1559   // Traverse the def-use chain using a worklist starting at the original IV.
1560   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1561 
1562   Widened.insert(OrigPhi);
1563   pushNarrowIVUsers(OrigPhi, WidePhi);
1564 
1565   while (!NarrowIVUsers.empty()) {
1566     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1567 
1568     // Process a def-use edge. This may replace the use, so don't hold a
1569     // use_iterator across it.
1570     Instruction *WideUse = widenIVUse(DU, Rewriter);
1571 
1572     // Follow all def-use edges from the previous narrow use.
1573     if (WideUse)
1574       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1575 
1576     // widenIVUse may have removed the def-use edge.
1577     if (DU.NarrowDef->use_empty())
1578       DeadInsts.emplace_back(DU.NarrowDef);
1579   }
1580   return WidePhi;
1581 }
1582 
1583 /// Calculates control-dependent range for the given def at the given context
1584 /// by looking at dominating conditions inside of the loop
1585 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1586                                     Instruction *NarrowUser) {
1587   using namespace llvm::PatternMatch;
1588 
1589   Value *NarrowDefLHS;
1590   const APInt *NarrowDefRHS;
1591   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1592                                  m_APInt(NarrowDefRHS))) ||
1593       !NarrowDefRHS->isNonNegative())
1594     return;
1595 
1596   auto UpdateRangeFromCondition = [&] (Value *Condition,
1597                                        bool TrueDest) {
1598     CmpInst::Predicate Pred;
1599     Value *CmpRHS;
1600     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1601                                  m_Value(CmpRHS))))
1602       return;
1603 
1604     CmpInst::Predicate P =
1605             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1606 
1607     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1608     auto CmpConstrainedLHSRange =
1609             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1610     auto NarrowDefRange =
1611             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1612 
1613     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1614   };
1615 
1616   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1617     if (!HasGuards)
1618       return;
1619 
1620     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1621                                      Ctx->getParent()->rend())) {
1622       Value *C = nullptr;
1623       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1624         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1625     }
1626   };
1627 
1628   UpdateRangeFromGuards(NarrowUser);
1629 
1630   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1631   // If NarrowUserBB is statically unreachable asking dominator queries may
1632   // yield surprising results. (e.g. the block may not have a dom tree node)
1633   if (!DT->isReachableFromEntry(NarrowUserBB))
1634     return;
1635 
1636   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1637        L->contains(DTB->getBlock());
1638        DTB = DTB->getIDom()) {
1639     auto *BB = DTB->getBlock();
1640     auto *TI = BB->getTerminator();
1641     UpdateRangeFromGuards(TI);
1642 
1643     auto *BI = dyn_cast<BranchInst>(TI);
1644     if (!BI || !BI->isConditional())
1645       continue;
1646 
1647     auto *TrueSuccessor = BI->getSuccessor(0);
1648     auto *FalseSuccessor = BI->getSuccessor(1);
1649 
1650     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1651       return BBE.isSingleEdge() &&
1652              DT->dominates(BBE, NarrowUser->getParent());
1653     };
1654 
1655     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1656       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1657 
1658     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1659       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1660   }
1661 }
1662 
1663 /// Calculates PostIncRangeInfos map for the given IV
1664 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1665   SmallPtrSet<Instruction *, 16> Visited;
1666   SmallVector<Instruction *, 6> Worklist;
1667   Worklist.push_back(OrigPhi);
1668   Visited.insert(OrigPhi);
1669 
1670   while (!Worklist.empty()) {
1671     Instruction *NarrowDef = Worklist.pop_back_val();
1672 
1673     for (Use &U : NarrowDef->uses()) {
1674       auto *NarrowUser = cast<Instruction>(U.getUser());
1675 
1676       // Don't go looking outside the current loop.
1677       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1678       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1679         continue;
1680 
1681       if (!Visited.insert(NarrowUser).second)
1682         continue;
1683 
1684       Worklist.push_back(NarrowUser);
1685 
1686       calculatePostIncRange(NarrowDef, NarrowUser);
1687     }
1688   }
1689 }
1690 
1691 //===----------------------------------------------------------------------===//
1692 //  Live IV Reduction - Minimize IVs live across the loop.
1693 //===----------------------------------------------------------------------===//
1694 
1695 
1696 //===----------------------------------------------------------------------===//
1697 //  Simplification of IV users based on SCEV evaluation.
1698 //===----------------------------------------------------------------------===//
1699 
1700 namespace {
1701 class IndVarSimplifyVisitor : public IVVisitor {
1702   ScalarEvolution *SE;
1703   const TargetTransformInfo *TTI;
1704   PHINode *IVPhi;
1705 
1706 public:
1707   WideIVInfo WI;
1708 
1709   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1710                         const TargetTransformInfo *TTI,
1711                         const DominatorTree *DTree)
1712     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1713     DT = DTree;
1714     WI.NarrowIV = IVPhi;
1715   }
1716 
1717   // Implement the interface used by simplifyUsersOfIV.
1718   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1719 };
1720 }
1721 
1722 /// Iteratively perform simplification on a worklist of IV users. Each
1723 /// successive simplification may push more users which may themselves be
1724 /// candidates for simplification.
1725 ///
1726 /// Sign/Zero extend elimination is interleaved with IV simplification.
1727 ///
1728 void IndVarSimplify::simplifyAndExtend(Loop *L,
1729                                        SCEVExpander &Rewriter,
1730                                        LoopInfo *LI) {
1731   SmallVector<WideIVInfo, 8> WideIVs;
1732 
1733   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1734           Intrinsic::getName(Intrinsic::experimental_guard));
1735   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1736 
1737   SmallVector<PHINode*, 8> LoopPhis;
1738   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1739     LoopPhis.push_back(cast<PHINode>(I));
1740   }
1741   // Each round of simplification iterates through the SimplifyIVUsers worklist
1742   // for all current phis, then determines whether any IVs can be
1743   // widened. Widening adds new phis to LoopPhis, inducing another round of
1744   // simplification on the wide IVs.
1745   while (!LoopPhis.empty()) {
1746     // Evaluate as many IV expressions as possible before widening any IVs. This
1747     // forces SCEV to set no-wrap flags before evaluating sign/zero
1748     // extension. The first time SCEV attempts to normalize sign/zero extension,
1749     // the result becomes final. So for the most predictable results, we delay
1750     // evaluation of sign/zero extend evaluation until needed, and avoid running
1751     // other SCEV based analysis prior to simplifyAndExtend.
1752     do {
1753       PHINode *CurrIV = LoopPhis.pop_back_val();
1754 
1755       // Information about sign/zero extensions of CurrIV.
1756       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1757 
1758       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
1759 
1760       if (Visitor.WI.WidestNativeType) {
1761         WideIVs.push_back(Visitor.WI);
1762       }
1763     } while(!LoopPhis.empty());
1764 
1765     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1766       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1767       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1768         Changed = true;
1769         LoopPhis.push_back(WidePhi);
1770       }
1771     }
1772   }
1773 }
1774 
1775 //===----------------------------------------------------------------------===//
1776 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1777 //===----------------------------------------------------------------------===//
1778 
1779 /// Return true if this loop's backedge taken count expression can be safely and
1780 /// cheaply expanded into an instruction sequence that can be used by
1781 /// linearFunctionTestReplace.
1782 ///
1783 /// TODO: This fails for pointer-type loop counters with greater than one byte
1784 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
1785 /// we could skip this check in the case that the LFTR loop counter (chosen by
1786 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
1787 /// the loop test to an inequality test by checking the target data's alignment
1788 /// of element types (given that the initial pointer value originates from or is
1789 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
1790 /// However, we don't yet have a strong motivation for converting loop tests
1791 /// into inequality tests.
1792 static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
1793                                         SCEVExpander &Rewriter) {
1794   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1795   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1796       BackedgeTakenCount->isZero())
1797     return false;
1798 
1799   if (!L->getExitingBlock())
1800     return false;
1801 
1802   // Can't rewrite non-branch yet.
1803   if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
1804     return false;
1805 
1806   if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
1807     return false;
1808 
1809   return true;
1810 }
1811 
1812 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
1813 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
1814   Instruction *IncI = dyn_cast<Instruction>(IncV);
1815   if (!IncI)
1816     return nullptr;
1817 
1818   switch (IncI->getOpcode()) {
1819   case Instruction::Add:
1820   case Instruction::Sub:
1821     break;
1822   case Instruction::GetElementPtr:
1823     // An IV counter must preserve its type.
1824     if (IncI->getNumOperands() == 2)
1825       break;
1826   default:
1827     return nullptr;
1828   }
1829 
1830   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1831   if (Phi && Phi->getParent() == L->getHeader()) {
1832     if (isLoopInvariant(IncI->getOperand(1), L, DT))
1833       return Phi;
1834     return nullptr;
1835   }
1836   if (IncI->getOpcode() == Instruction::GetElementPtr)
1837     return nullptr;
1838 
1839   // Allow add/sub to be commuted.
1840   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1841   if (Phi && Phi->getParent() == L->getHeader()) {
1842     if (isLoopInvariant(IncI->getOperand(0), L, DT))
1843       return Phi;
1844   }
1845   return nullptr;
1846 }
1847 
1848 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
1849 static ICmpInst *getLoopTest(Loop *L) {
1850   assert(L->getExitingBlock() && "expected loop exit");
1851 
1852   BasicBlock *LatchBlock = L->getLoopLatch();
1853   // Don't bother with LFTR if the loop is not properly simplified.
1854   if (!LatchBlock)
1855     return nullptr;
1856 
1857   BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
1858   assert(BI && "expected exit branch");
1859 
1860   return dyn_cast<ICmpInst>(BI->getCondition());
1861 }
1862 
1863 /// linearFunctionTestReplace policy. Return true unless we can show that the
1864 /// current exit test is already sufficiently canonical.
1865 static bool needsLFTR(Loop *L, DominatorTree *DT) {
1866   // Do LFTR to simplify the exit condition to an ICMP.
1867   ICmpInst *Cond = getLoopTest(L);
1868   if (!Cond)
1869     return true;
1870 
1871   // Do LFTR to simplify the exit ICMP to EQ/NE
1872   ICmpInst::Predicate Pred = Cond->getPredicate();
1873   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1874     return true;
1875 
1876   // Look for a loop invariant RHS
1877   Value *LHS = Cond->getOperand(0);
1878   Value *RHS = Cond->getOperand(1);
1879   if (!isLoopInvariant(RHS, L, DT)) {
1880     if (!isLoopInvariant(LHS, L, DT))
1881       return true;
1882     std::swap(LHS, RHS);
1883   }
1884   // Look for a simple IV counter LHS
1885   PHINode *Phi = dyn_cast<PHINode>(LHS);
1886   if (!Phi)
1887     Phi = getLoopPhiForCounter(LHS, L, DT);
1888 
1889   if (!Phi)
1890     return true;
1891 
1892   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1893   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1894   if (Idx < 0)
1895     return true;
1896 
1897   // Do LFTR if the exit condition's IV is *not* a simple counter.
1898   Value *IncV = Phi->getIncomingValue(Idx);
1899   return Phi != getLoopPhiForCounter(IncV, L, DT);
1900 }
1901 
1902 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1903 /// down to checking that all operands are constant and listing instructions
1904 /// that may hide undef.
1905 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1906                                unsigned Depth) {
1907   if (isa<Constant>(V))
1908     return !isa<UndefValue>(V);
1909 
1910   if (Depth >= 6)
1911     return false;
1912 
1913   // Conservatively handle non-constant non-instructions. For example, Arguments
1914   // may be undef.
1915   Instruction *I = dyn_cast<Instruction>(V);
1916   if (!I)
1917     return false;
1918 
1919   // Load and return values may be undef.
1920   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1921     return false;
1922 
1923   // Optimistically handle other instructions.
1924   for (Value *Op : I->operands()) {
1925     if (!Visited.insert(Op).second)
1926       continue;
1927     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1928       return false;
1929   }
1930   return true;
1931 }
1932 
1933 /// Return true if the given value is concrete. We must prove that undef can
1934 /// never reach it.
1935 ///
1936 /// TODO: If we decide that this is a good approach to checking for undef, we
1937 /// may factor it into a common location.
1938 static bool hasConcreteDef(Value *V) {
1939   SmallPtrSet<Value*, 8> Visited;
1940   Visited.insert(V);
1941   return hasConcreteDefImpl(V, Visited, 0);
1942 }
1943 
1944 /// Return true if this IV has any uses other than the (soon to be rewritten)
1945 /// loop exit test.
1946 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1947   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1948   Value *IncV = Phi->getIncomingValue(LatchIdx);
1949 
1950   for (User *U : Phi->users())
1951     if (U != Cond && U != IncV) return false;
1952 
1953   for (User *U : IncV->users())
1954     if (U != Cond && U != Phi) return false;
1955   return true;
1956 }
1957 
1958 /// Find an affine IV in canonical form.
1959 ///
1960 /// BECount may be an i8* pointer type. The pointer difference is already
1961 /// valid count without scaling the address stride, so it remains a pointer
1962 /// expression as far as SCEV is concerned.
1963 ///
1964 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
1965 ///
1966 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
1967 ///
1968 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
1969 /// This is difficult in general for SCEV because of potential overflow. But we
1970 /// could at least handle constant BECounts.
1971 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
1972                                 ScalarEvolution *SE, DominatorTree *DT) {
1973   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1974 
1975   Value *Cond =
1976     cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
1977 
1978   // Loop over all of the PHI nodes, looking for a simple counter.
1979   PHINode *BestPhi = nullptr;
1980   const SCEV *BestInit = nullptr;
1981   BasicBlock *LatchBlock = L->getLoopLatch();
1982   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
1983   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1984 
1985   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1986     PHINode *Phi = cast<PHINode>(I);
1987     if (!SE->isSCEVable(Phi->getType()))
1988       continue;
1989 
1990     // Avoid comparing an integer IV against a pointer Limit.
1991     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1992       continue;
1993 
1994     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1995     if (!AR || AR->getLoop() != L || !AR->isAffine())
1996       continue;
1997 
1998     // AR may be a pointer type, while BECount is an integer type.
1999     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2000     // AR may not be a narrower type, or we may never exit.
2001     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2002     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2003       continue;
2004 
2005     const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2006     if (!Step || !Step->isOne())
2007       continue;
2008 
2009     int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2010     Value *IncV = Phi->getIncomingValue(LatchIdx);
2011     if (getLoopPhiForCounter(IncV, L, DT) != Phi)
2012       continue;
2013 
2014     // Avoid reusing a potentially undef value to compute other values that may
2015     // have originally had a concrete definition.
2016     if (!hasConcreteDef(Phi)) {
2017       // We explicitly allow unknown phis as long as they are already used by
2018       // the loop test. In this case we assume that performing LFTR could not
2019       // increase the number of undef users.
2020       if (ICmpInst *Cond = getLoopTest(L)) {
2021         if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
2022             Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
2023           continue;
2024         }
2025       }
2026     }
2027     const SCEV *Init = AR->getStart();
2028 
2029     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2030       // Don't force a live loop counter if another IV can be used.
2031       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2032         continue;
2033 
2034       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2035       // also prefers integer to pointer IVs.
2036       if (BestInit->isZero() != Init->isZero()) {
2037         if (BestInit->isZero())
2038           continue;
2039       }
2040       // If two IVs both count from zero or both count from nonzero then the
2041       // narrower is likely a dead phi that has been widened. Use the wider phi
2042       // to allow the other to be eliminated.
2043       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2044         continue;
2045     }
2046     BestPhi = Phi;
2047     BestInit = Init;
2048   }
2049   return BestPhi;
2050 }
2051 
2052 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
2053 /// the new loop test.
2054 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2055                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2056   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2057   assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
2058   const SCEV *IVInit = AR->getStart();
2059 
2060   // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
2061   // finds a valid pointer IV. Sign extend BECount in order to materialize a
2062   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2063   // the existing GEPs whenever possible.
2064   if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
2065     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2066     // signed value. IVCount on the other hand represents the loop trip count,
2067     // which is an unsigned value. FindLoopCounter only allows induction
2068     // variables that have a positive unit stride of one. This means we don't
2069     // have to handle the case of negative offsets (yet) and just need to zero
2070     // extend IVCount.
2071     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2072     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
2073 
2074     // Expand the code for the iteration count.
2075     assert(SE->isLoopInvariant(IVOffset, L) &&
2076            "Computed iteration count is not loop invariant!");
2077     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2078     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
2079 
2080     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
2081     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
2082     // We could handle pointer IVs other than i8*, but we need to compensate for
2083     // gep index scaling. See canExpandBackedgeTakenCount comments.
2084     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2085                              cast<PointerType>(GEPBase->getType())
2086                                  ->getElementType())->isOne() &&
2087            "unit stride pointer IV must be i8*");
2088 
2089     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
2090     return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
2091   } else {
2092     // In any other case, convert both IVInit and IVCount to integers before
2093     // comparing. This may result in SCEV expansion of pointers, but in practice
2094     // SCEV will fold the pointer arithmetic away as such:
2095     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2096     //
2097     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2098     // for simple memset-style loops.
2099     //
2100     // IVInit integer and IVCount pointer would only occur if a canonical IV
2101     // were generated on top of case #2, which is not expected.
2102 
2103     const SCEV *IVLimit = nullptr;
2104     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
2105     // For non-zero Start, compute IVCount here.
2106     if (AR->getStart()->isZero())
2107       IVLimit = IVCount;
2108     else {
2109       assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2110       const SCEV *IVInit = AR->getStart();
2111 
2112       // For integer IVs, truncate the IV before computing IVInit + BECount.
2113       if (SE->getTypeSizeInBits(IVInit->getType())
2114           > SE->getTypeSizeInBits(IVCount->getType()))
2115         IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
2116 
2117       IVLimit = SE->getAddExpr(IVInit, IVCount);
2118     }
2119     // Expand the code for the iteration count.
2120     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2121     IRBuilder<> Builder(BI);
2122     assert(SE->isLoopInvariant(IVLimit, L) &&
2123            "Computed iteration count is not loop invariant!");
2124     // Ensure that we generate the same type as IndVar, or a smaller integer
2125     // type. In the presence of null pointer values, we have an integer type
2126     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2127     Type *LimitTy = IVCount->getType()->isPointerTy() ?
2128       IndVar->getType() : IVCount->getType();
2129     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2130   }
2131 }
2132 
2133 /// This method rewrites the exit condition of the loop to be a canonical !=
2134 /// comparison against the incremented loop induction variable.  This pass is
2135 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2136 /// determine a loop-invariant trip count of the loop, which is actually a much
2137 /// broader range than just linear tests.
2138 Value *IndVarSimplify::
2139 linearFunctionTestReplace(Loop *L,
2140                           const SCEV *BackedgeTakenCount,
2141                           PHINode *IndVar,
2142                           SCEVExpander &Rewriter) {
2143   assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2144 
2145   // Initialize CmpIndVar and IVCount to their preincremented values.
2146   Value *CmpIndVar = IndVar;
2147   const SCEV *IVCount = BackedgeTakenCount;
2148 
2149   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2150 
2151   // If the exiting block is the same as the backedge block, we prefer to
2152   // compare against the post-incremented value, otherwise we must compare
2153   // against the preincremented value.
2154   if (L->getExitingBlock() == L->getLoopLatch()) {
2155     // Add one to the "backedge-taken" count to get the trip count.
2156     // This addition may overflow, which is valid as long as the comparison is
2157     // truncated to BackedgeTakenCount->getType().
2158     IVCount = SE->getAddExpr(BackedgeTakenCount,
2159                              SE->getOne(BackedgeTakenCount->getType()));
2160     // The BackedgeTaken expression contains the number of times that the
2161     // backedge branches to the loop header.  This is one less than the
2162     // number of times the loop executes, so use the incremented indvar.
2163     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2164   }
2165 
2166   Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2167   assert(ExitCnt->getType()->isPointerTy() ==
2168              IndVar->getType()->isPointerTy() &&
2169          "genLoopLimit missed a cast");
2170 
2171   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2172   BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2173   ICmpInst::Predicate P;
2174   if (L->contains(BI->getSuccessor(0)))
2175     P = ICmpInst::ICMP_NE;
2176   else
2177     P = ICmpInst::ICMP_EQ;
2178 
2179   DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2180                << "      LHS:" << *CmpIndVar << '\n'
2181                << "       op:\t"
2182                << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
2183                << "      RHS:\t" << *ExitCnt << "\n"
2184                << "  IVCount:\t" << *IVCount << "\n");
2185 
2186   IRBuilder<> Builder(BI);
2187 
2188   // The new loop exit condition should reuse the debug location of the
2189   // original loop exit condition.
2190   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2191     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2192 
2193   // LFTR can ignore IV overflow and truncate to the width of
2194   // BECount. This avoids materializing the add(zext(add)) expression.
2195   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2196   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2197   if (CmpIndVarSize > ExitCntSize) {
2198     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2199     const SCEV *ARStart = AR->getStart();
2200     const SCEV *ARStep = AR->getStepRecurrence(*SE);
2201     // For constant IVCount, avoid truncation.
2202     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
2203       const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
2204       APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
2205       // Note that the post-inc value of BackedgeTakenCount may have overflowed
2206       // above such that IVCount is now zero.
2207       if (IVCount != BackedgeTakenCount && Count == 0) {
2208         Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
2209         ++Count;
2210       }
2211       else
2212         Count = Count.zext(CmpIndVarSize);
2213       APInt NewLimit;
2214       if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
2215         NewLimit = Start - Count;
2216       else
2217         NewLimit = Start + Count;
2218       ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
2219 
2220       DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
2221     } else {
2222       // We try to extend trip count first. If that doesn't work we truncate IV.
2223       // Zext(trunc(IV)) == IV implies equivalence of the following two:
2224       // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
2225       // one of the two holds, extend the trip count, otherwise we truncate IV.
2226       bool Extended = false;
2227       const SCEV *IV = SE->getSCEV(CmpIndVar);
2228       const SCEV *ZExtTrunc =
2229            SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2230                                                      ExitCnt->getType()),
2231                                  CmpIndVar->getType());
2232 
2233       if (ZExtTrunc == IV) {
2234         Extended = true;
2235         ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2236                                      "wide.trip.count");
2237       } else {
2238         const SCEV *SExtTrunc =
2239           SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2240                                                     ExitCnt->getType()),
2241                                 CmpIndVar->getType());
2242         if (SExtTrunc == IV) {
2243           Extended = true;
2244           ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2245                                        "wide.trip.count");
2246         }
2247       }
2248 
2249       if (!Extended)
2250         CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2251                                         "lftr.wideiv");
2252     }
2253   }
2254   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2255   Value *OrigCond = BI->getCondition();
2256   // It's tempting to use replaceAllUsesWith here to fully replace the old
2257   // comparison, but that's not immediately safe, since users of the old
2258   // comparison may not be dominated by the new comparison. Instead, just
2259   // update the branch to use the new comparison; in the common case this
2260   // will make old comparison dead.
2261   BI->setCondition(Cond);
2262   DeadInsts.push_back(OrigCond);
2263 
2264   ++NumLFTR;
2265   Changed = true;
2266   return Cond;
2267 }
2268 
2269 //===----------------------------------------------------------------------===//
2270 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2271 //===----------------------------------------------------------------------===//
2272 
2273 /// If there's a single exit block, sink any loop-invariant values that
2274 /// were defined in the preheader but not used inside the loop into the
2275 /// exit block to reduce register pressure in the loop.
2276 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2277   BasicBlock *ExitBlock = L->getExitBlock();
2278   if (!ExitBlock) return;
2279 
2280   BasicBlock *Preheader = L->getLoopPreheader();
2281   if (!Preheader) return;
2282 
2283   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2284   BasicBlock::iterator I(Preheader->getTerminator());
2285   while (I != Preheader->begin()) {
2286     --I;
2287     // New instructions were inserted at the end of the preheader.
2288     if (isa<PHINode>(I))
2289       break;
2290 
2291     // Don't move instructions which might have side effects, since the side
2292     // effects need to complete before instructions inside the loop.  Also don't
2293     // move instructions which might read memory, since the loop may modify
2294     // memory. Note that it's okay if the instruction might have undefined
2295     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2296     // block.
2297     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2298       continue;
2299 
2300     // Skip debug info intrinsics.
2301     if (isa<DbgInfoIntrinsic>(I))
2302       continue;
2303 
2304     // Skip eh pad instructions.
2305     if (I->isEHPad())
2306       continue;
2307 
2308     // Don't sink alloca: we never want to sink static alloca's out of the
2309     // entry block, and correctly sinking dynamic alloca's requires
2310     // checks for stacksave/stackrestore intrinsics.
2311     // FIXME: Refactor this check somehow?
2312     if (isa<AllocaInst>(I))
2313       continue;
2314 
2315     // Determine if there is a use in or before the loop (direct or
2316     // otherwise).
2317     bool UsedInLoop = false;
2318     for (Use &U : I->uses()) {
2319       Instruction *User = cast<Instruction>(U.getUser());
2320       BasicBlock *UseBB = User->getParent();
2321       if (PHINode *P = dyn_cast<PHINode>(User)) {
2322         unsigned i =
2323           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2324         UseBB = P->getIncomingBlock(i);
2325       }
2326       if (UseBB == Preheader || L->contains(UseBB)) {
2327         UsedInLoop = true;
2328         break;
2329       }
2330     }
2331 
2332     // If there is, the def must remain in the preheader.
2333     if (UsedInLoop)
2334       continue;
2335 
2336     // Otherwise, sink it to the exit block.
2337     Instruction *ToMove = &*I;
2338     bool Done = false;
2339 
2340     if (I != Preheader->begin()) {
2341       // Skip debug info intrinsics.
2342       do {
2343         --I;
2344       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2345 
2346       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2347         Done = true;
2348     } else {
2349       Done = true;
2350     }
2351 
2352     ToMove->moveBefore(*ExitBlock, InsertPt);
2353     if (Done) break;
2354     InsertPt = ToMove->getIterator();
2355   }
2356 }
2357 
2358 //===----------------------------------------------------------------------===//
2359 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2360 //===----------------------------------------------------------------------===//
2361 
2362 bool IndVarSimplify::run(Loop *L) {
2363   // We need (and expect!) the incoming loop to be in LCSSA.
2364   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2365          "LCSSA required to run indvars!");
2366 
2367   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2368   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2369   //    canonicalization can be a pessimization without LSR to "clean up"
2370   //    afterwards.
2371   //  - We depend on having a preheader; in particular,
2372   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2373   //    and we're in trouble if we can't find the induction variable even when
2374   //    we've manually inserted one.
2375   //  - LFTR relies on having a single backedge.
2376   if (!L->isLoopSimplifyForm())
2377     return false;
2378 
2379   // If there are any floating-point recurrences, attempt to
2380   // transform them to use integer recurrences.
2381   rewriteNonIntegerIVs(L);
2382 
2383   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2384 
2385   // Create a rewriter object which we'll use to transform the code with.
2386   SCEVExpander Rewriter(*SE, DL, "indvars");
2387 #ifndef NDEBUG
2388   Rewriter.setDebugType(DEBUG_TYPE);
2389 #endif
2390 
2391   // Eliminate redundant IV users.
2392   //
2393   // Simplification works best when run before other consumers of SCEV. We
2394   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2395   // other expressions involving loop IVs have been evaluated. This helps SCEV
2396   // set no-wrap flags before normalizing sign/zero extension.
2397   Rewriter.disableCanonicalMode();
2398   simplifyAndExtend(L, Rewriter, LI);
2399 
2400   // Check to see if this loop has a computable loop-invariant execution count.
2401   // If so, this means that we can compute the final value of any expressions
2402   // that are recurrent in the loop, and substitute the exit values from the
2403   // loop into any instructions outside of the loop that use the final values of
2404   // the current expressions.
2405   //
2406   if (ReplaceExitValue != NeverRepl &&
2407       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2408     rewriteLoopExitValues(L, Rewriter);
2409 
2410   // Eliminate redundant IV cycles.
2411   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2412 
2413   // If we have a trip count expression, rewrite the loop's exit condition
2414   // using it.  We can currently only handle loops with a single exit.
2415   if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
2416     PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
2417     if (IndVar) {
2418       // Check preconditions for proper SCEVExpander operation. SCEV does not
2419       // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2420       // pass that uses the SCEVExpander must do it. This does not work well for
2421       // loop passes because SCEVExpander makes assumptions about all loops,
2422       // while LoopPassManager only forces the current loop to be simplified.
2423       //
2424       // FIXME: SCEV expansion has no way to bail out, so the caller must
2425       // explicitly check any assumptions made by SCEV. Brittle.
2426       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2427       if (!AR || AR->getLoop()->getLoopPreheader())
2428         (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2429                                         Rewriter);
2430     }
2431   }
2432   // Clear the rewriter cache, because values that are in the rewriter's cache
2433   // can be deleted in the loop below, causing the AssertingVH in the cache to
2434   // trigger.
2435   Rewriter.clear();
2436 
2437   // Now that we're done iterating through lists, clean up any instructions
2438   // which are now dead.
2439   while (!DeadInsts.empty())
2440     if (Instruction *Inst =
2441             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2442       RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2443 
2444   // The Rewriter may not be used from this point on.
2445 
2446   // Loop-invariant instructions in the preheader that aren't used in the
2447   // loop may be sunk below the loop to reduce register pressure.
2448   sinkUnusedInvariants(L);
2449 
2450   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2451   // trip count and therefore can further simplify exit values in addition to
2452   // rewriteLoopExitValues.
2453   rewriteFirstIterationLoopExitValues(L);
2454 
2455   // Clean up dead instructions.
2456   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2457 
2458   // Check a post-condition.
2459   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2460          "Indvars did not preserve LCSSA!");
2461 
2462   // Verify that LFTR, and any other change have not interfered with SCEV's
2463   // ability to compute trip count.
2464 #ifndef NDEBUG
2465   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2466     SE->forgetLoop(L);
2467     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2468     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2469         SE->getTypeSizeInBits(NewBECount->getType()))
2470       NewBECount = SE->getTruncateOrNoop(NewBECount,
2471                                          BackedgeTakenCount->getType());
2472     else
2473       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2474                                                  NewBECount->getType());
2475     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2476   }
2477 #endif
2478 
2479   return Changed;
2480 }
2481 
2482 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2483                                           LoopStandardAnalysisResults &AR,
2484                                           LPMUpdater &) {
2485   Function *F = L.getHeader()->getParent();
2486   const DataLayout &DL = F->getParent()->getDataLayout();
2487 
2488   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2489   if (!IVS.run(&L))
2490     return PreservedAnalyses::all();
2491 
2492   auto PA = getLoopPassPreservedAnalyses();
2493   PA.preserveSet<CFGAnalyses>();
2494   return PA;
2495 }
2496 
2497 namespace {
2498 struct IndVarSimplifyLegacyPass : public LoopPass {
2499   static char ID; // Pass identification, replacement for typeid
2500   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2501     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2502   }
2503 
2504   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2505     if (skipLoop(L))
2506       return false;
2507 
2508     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2509     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2510     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2511     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2512     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2513     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2514     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2515     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2516 
2517     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2518     return IVS.run(L);
2519   }
2520 
2521   void getAnalysisUsage(AnalysisUsage &AU) const override {
2522     AU.setPreservesCFG();
2523     getLoopAnalysisUsage(AU);
2524   }
2525 };
2526 }
2527 
2528 char IndVarSimplifyLegacyPass::ID = 0;
2529 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2530                       "Induction Variable Simplification", false, false)
2531 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2532 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2533                     "Induction Variable Simplification", false, false)
2534 
2535 Pass *llvm::createIndVarSimplifyPass() {
2536   return new IndVarSimplifyLegacyPass();
2537 }
2538