xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 79c27c9464f1605a56d31d2c49d143117ad434a2)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened     , "Number of indvars widened");
93 STATISTIC(NumReplaced    , "Number of exit values replaced");
94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102   "verify-indvars", cl::Hidden,
103   cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
106 
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111                clEnumValN(OnlyCheapRepl, "cheap",
112                           "only replace exit value when the cost is cheap"),
113                clEnumValN(NoHardUse, "noharduse",
114                           "only replace exit values when loop def likely dead"),
115                clEnumValN(AlwaysRepl, "always",
116                           "always replace exit value whenever possible")));
117 
118 static cl::opt<bool> UsePostIncrementRanges(
119   "indvars-post-increment-ranges", cl::Hidden,
120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121   cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125             cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 namespace {
128 
129 struct RewritePhi;
130 
131 class IndVarSimplify {
132   LoopInfo *LI;
133   ScalarEvolution *SE;
134   DominatorTree *DT;
135   const DataLayout &DL;
136   TargetLibraryInfo *TLI;
137   const TargetTransformInfo *TTI;
138 
139   SmallVector<WeakTrackingVH, 16> DeadInsts;
140 
141   bool isValidRewrite(Value *FromVal, Value *ToVal);
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147   bool optimizeLoopExits(Loop *L);
148 
149   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
150   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
151   bool rewriteFirstIterationLoopExitValues(Loop *L);
152   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
153 
154   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
155                                  const SCEV *ExitCount,
156                                  PHINode *IndVar, SCEVExpander &Rewriter);
157 
158   bool sinkUnusedInvariants(Loop *L);
159 
160 public:
161   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
162                  const DataLayout &DL, TargetLibraryInfo *TLI,
163                  TargetTransformInfo *TTI)
164       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
165 
166   bool run(Loop *L);
167 };
168 
169 } // end anonymous namespace
170 
171 /// Return true if the SCEV expansion generated by the rewriter can replace the
172 /// original value. SCEV guarantees that it produces the same value, but the way
173 /// it is produced may be illegal IR.  Ideally, this function will only be
174 /// called for verification.
175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
176   // If an SCEV expression subsumed multiple pointers, its expansion could
177   // reassociate the GEP changing the base pointer. This is illegal because the
178   // final address produced by a GEP chain must be inbounds relative to its
179   // underlying object. Otherwise basic alias analysis, among other things,
180   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
181   // producing an expression involving multiple pointers. Until then, we must
182   // bail out here.
183   //
184   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
185   // because it understands lcssa phis while SCEV does not.
186   Value *FromPtr = FromVal;
187   Value *ToPtr = ToVal;
188   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
189     FromPtr = GEP->getPointerOperand();
190   }
191   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
192     ToPtr = GEP->getPointerOperand();
193   }
194   if (FromPtr != FromVal || ToPtr != ToVal) {
195     // Quickly check the common case
196     if (FromPtr == ToPtr)
197       return true;
198 
199     // SCEV may have rewritten an expression that produces the GEP's pointer
200     // operand. That's ok as long as the pointer operand has the same base
201     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
202     // base of a recurrence. This handles the case in which SCEV expansion
203     // converts a pointer type recurrence into a nonrecurrent pointer base
204     // indexed by an integer recurrence.
205 
206     // If the GEP base pointer is a vector of pointers, abort.
207     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
208       return false;
209 
210     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
211     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
212     if (FromBase == ToBase)
213       return true;
214 
215     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
216                       << " != " << *ToBase << "\n");
217 
218     return false;
219   }
220   return true;
221 }
222 
223 /// Determine the insertion point for this user. By default, insert immediately
224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
226 /// common dominator for the incoming blocks. A nullptr can be returned if no
227 /// viable location is found: it may happen if User is a PHI and Def only comes
228 /// to this PHI from unreachable blocks.
229 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
230                                           DominatorTree *DT, LoopInfo *LI) {
231   PHINode *PHI = dyn_cast<PHINode>(User);
232   if (!PHI)
233     return User;
234 
235   Instruction *InsertPt = nullptr;
236   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
237     if (PHI->getIncomingValue(i) != Def)
238       continue;
239 
240     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
241 
242     if (!DT->isReachableFromEntry(InsertBB))
243       continue;
244 
245     if (!InsertPt) {
246       InsertPt = InsertBB->getTerminator();
247       continue;
248     }
249     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
250     InsertPt = InsertBB->getTerminator();
251   }
252 
253   // If we have skipped all inputs, it means that Def only comes to Phi from
254   // unreachable blocks.
255   if (!InsertPt)
256     return nullptr;
257 
258   auto *DefI = dyn_cast<Instruction>(Def);
259   if (!DefI)
260     return InsertPt;
261 
262   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
263 
264   auto *L = LI->getLoopFor(DefI->getParent());
265   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
266 
267   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
268     if (LI->getLoopFor(DTN->getBlock()) == L)
269       return DTN->getBlock()->getTerminator();
270 
271   llvm_unreachable("DefI dominates InsertPt!");
272 }
273 
274 //===----------------------------------------------------------------------===//
275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
276 //===----------------------------------------------------------------------===//
277 
278 /// Convert APF to an integer, if possible.
279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
280   bool isExact = false;
281   // See if we can convert this to an int64_t
282   uint64_t UIntVal;
283   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
284                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
285       !isExact)
286     return false;
287   IntVal = UIntVal;
288   return true;
289 }
290 
291 /// If the loop has floating induction variable then insert corresponding
292 /// integer induction variable if possible.
293 /// For example,
294 /// for(double i = 0; i < 10000; ++i)
295 ///   bar(i)
296 /// is converted into
297 /// for(int i = 0; i < 10000; ++i)
298 ///   bar((double)i);
299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
300   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
301   unsigned BackEdge     = IncomingEdge^1;
302 
303   // Check incoming value.
304   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
305 
306   int64_t InitValue;
307   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
308     return false;
309 
310   // Check IV increment. Reject this PN if increment operation is not
311   // an add or increment value can not be represented by an integer.
312   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
313   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
314 
315   // If this is not an add of the PHI with a constantfp, or if the constant fp
316   // is not an integer, bail out.
317   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
318   int64_t IncValue;
319   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
320       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
321     return false;
322 
323   // Check Incr uses. One user is PN and the other user is an exit condition
324   // used by the conditional terminator.
325   Value::user_iterator IncrUse = Incr->user_begin();
326   Instruction *U1 = cast<Instruction>(*IncrUse++);
327   if (IncrUse == Incr->user_end()) return false;
328   Instruction *U2 = cast<Instruction>(*IncrUse++);
329   if (IncrUse != Incr->user_end()) return false;
330 
331   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
332   // only used by a branch, we can't transform it.
333   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
334   if (!Compare)
335     Compare = dyn_cast<FCmpInst>(U2);
336   if (!Compare || !Compare->hasOneUse() ||
337       !isa<BranchInst>(Compare->user_back()))
338     return false;
339 
340   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
341 
342   // We need to verify that the branch actually controls the iteration count
343   // of the loop.  If not, the new IV can overflow and no one will notice.
344   // The branch block must be in the loop and one of the successors must be out
345   // of the loop.
346   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
347   if (!L->contains(TheBr->getParent()) ||
348       (L->contains(TheBr->getSuccessor(0)) &&
349        L->contains(TheBr->getSuccessor(1))))
350     return false;
351 
352   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
353   // transform it.
354   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
355   int64_t ExitValue;
356   if (ExitValueVal == nullptr ||
357       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
358     return false;
359 
360   // Find new predicate for integer comparison.
361   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
362   switch (Compare->getPredicate()) {
363   default: return false;  // Unknown comparison.
364   case CmpInst::FCMP_OEQ:
365   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
366   case CmpInst::FCMP_ONE:
367   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
368   case CmpInst::FCMP_OGT:
369   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
370   case CmpInst::FCMP_OGE:
371   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
372   case CmpInst::FCMP_OLT:
373   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
374   case CmpInst::FCMP_OLE:
375   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
376   }
377 
378   // We convert the floating point induction variable to a signed i32 value if
379   // we can.  This is only safe if the comparison will not overflow in a way
380   // that won't be trapped by the integer equivalent operations.  Check for this
381   // now.
382   // TODO: We could use i64 if it is native and the range requires it.
383 
384   // The start/stride/exit values must all fit in signed i32.
385   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
386     return false;
387 
388   // If not actually striding (add x, 0.0), avoid touching the code.
389   if (IncValue == 0)
390     return false;
391 
392   // Positive and negative strides have different safety conditions.
393   if (IncValue > 0) {
394     // If we have a positive stride, we require the init to be less than the
395     // exit value.
396     if (InitValue >= ExitValue)
397       return false;
398 
399     uint32_t Range = uint32_t(ExitValue-InitValue);
400     // Check for infinite loop, either:
401     // while (i <= Exit) or until (i > Exit)
402     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
403       if (++Range == 0) return false;  // Range overflows.
404     }
405 
406     unsigned Leftover = Range % uint32_t(IncValue);
407 
408     // If this is an equality comparison, we require that the strided value
409     // exactly land on the exit value, otherwise the IV condition will wrap
410     // around and do things the fp IV wouldn't.
411     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412         Leftover != 0)
413       return false;
414 
415     // If the stride would wrap around the i32 before exiting, we can't
416     // transform the IV.
417     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
418       return false;
419   } else {
420     // If we have a negative stride, we require the init to be greater than the
421     // exit value.
422     if (InitValue <= ExitValue)
423       return false;
424 
425     uint32_t Range = uint32_t(InitValue-ExitValue);
426     // Check for infinite loop, either:
427     // while (i >= Exit) or until (i < Exit)
428     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
429       if (++Range == 0) return false;  // Range overflows.
430     }
431 
432     unsigned Leftover = Range % uint32_t(-IncValue);
433 
434     // If this is an equality comparison, we require that the strided value
435     // exactly land on the exit value, otherwise the IV condition will wrap
436     // around and do things the fp IV wouldn't.
437     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
438         Leftover != 0)
439       return false;
440 
441     // If the stride would wrap around the i32 before exiting, we can't
442     // transform the IV.
443     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
444       return false;
445   }
446 
447   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
448 
449   // Insert new integer induction variable.
450   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
451   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
452                       PN->getIncomingBlock(IncomingEdge));
453 
454   Value *NewAdd =
455     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
456                               Incr->getName()+".int", Incr);
457   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
458 
459   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
460                                       ConstantInt::get(Int32Ty, ExitValue),
461                                       Compare->getName());
462 
463   // In the following deletions, PN may become dead and may be deleted.
464   // Use a WeakTrackingVH to observe whether this happens.
465   WeakTrackingVH WeakPH = PN;
466 
467   // Delete the old floating point exit comparison.  The branch starts using the
468   // new comparison.
469   NewCompare->takeName(Compare);
470   Compare->replaceAllUsesWith(NewCompare);
471   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
472 
473   // Delete the old floating point increment.
474   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
475   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
476 
477   // If the FP induction variable still has uses, this is because something else
478   // in the loop uses its value.  In order to canonicalize the induction
479   // variable, we chose to eliminate the IV and rewrite it in terms of an
480   // int->fp cast.
481   //
482   // We give preference to sitofp over uitofp because it is faster on most
483   // platforms.
484   if (WeakPH) {
485     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486                                  &*PN->getParent()->getFirstInsertionPt());
487     PN->replaceAllUsesWith(Conv);
488     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
489   }
490   return true;
491 }
492 
493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
494   // First step.  Check to see if there are any floating-point recurrences.
495   // If there are, change them into integer recurrences, permitting analysis by
496   // the SCEV routines.
497   BasicBlock *Header = L->getHeader();
498 
499   SmallVector<WeakTrackingVH, 8> PHIs;
500   for (PHINode &PN : Header->phis())
501     PHIs.push_back(&PN);
502 
503   bool Changed = false;
504   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
505     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
506       Changed |= handleFloatingPointIV(L, PN);
507 
508   // If the loop previously had floating-point IV, ScalarEvolution
509   // may not have been able to compute a trip count. Now that we've done some
510   // re-writing, the trip count may be computable.
511   if (Changed)
512     SE->forgetLoop(L);
513   return Changed;
514 }
515 
516 namespace {
517 
518 // Collect information about PHI nodes which can be transformed in
519 // rewriteLoopExitValues.
520 struct RewritePhi {
521   PHINode *PN;
522 
523   // Ith incoming value.
524   unsigned Ith;
525 
526   // Exit value after expansion.
527   Value *Val;
528 
529   // High Cost when expansion.
530   bool HighCost;
531 
532   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
533       : PN(P), Ith(I), Val(V), HighCost(H) {}
534 };
535 
536 } // end anonymous namespace
537 
538 //===----------------------------------------------------------------------===//
539 // rewriteLoopExitValues - Optimize IV users outside the loop.
540 // As a side effect, reduces the amount of IV processing within the loop.
541 //===----------------------------------------------------------------------===//
542 
543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
544   SmallPtrSet<const Instruction *, 8> Visited;
545   SmallVector<const Instruction *, 8> WorkList;
546   Visited.insert(I);
547   WorkList.push_back(I);
548   while (!WorkList.empty()) {
549     const Instruction *Curr = WorkList.pop_back_val();
550     // This use is outside the loop, nothing to do.
551     if (!L->contains(Curr))
552       continue;
553     // Do we assume it is a "hard" use which will not be eliminated easily?
554     if (Curr->mayHaveSideEffects())
555       return true;
556     // Otherwise, add all its users to worklist.
557     for (auto U : Curr->users()) {
558       auto *UI = cast<Instruction>(U);
559       if (Visited.insert(UI).second)
560         WorkList.push_back(UI);
561     }
562   }
563   return false;
564 }
565 
566 /// Check to see if this loop has a computable loop-invariant execution count.
567 /// If so, this means that we can compute the final value of any expressions
568 /// that are recurrent in the loop, and substitute the exit values from the loop
569 /// into any instructions outside of the loop that use the final values of the
570 /// current expressions.
571 ///
572 /// This is mostly redundant with the regular IndVarSimplify activities that
573 /// happen later, except that it's more powerful in some cases, because it's
574 /// able to brute-force evaluate arbitrary instructions as long as they have
575 /// constant operands at the beginning of the loop.
576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
577   // Check a pre-condition.
578   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
579          "Indvars did not preserve LCSSA!");
580 
581   SmallVector<BasicBlock*, 8> ExitBlocks;
582   L->getUniqueExitBlocks(ExitBlocks);
583 
584   SmallVector<RewritePhi, 8> RewritePhiSet;
585   // Find all values that are computed inside the loop, but used outside of it.
586   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
587   // the exit blocks of the loop to find them.
588   for (BasicBlock *ExitBB : ExitBlocks) {
589     // If there are no PHI nodes in this exit block, then no values defined
590     // inside the loop are used on this path, skip it.
591     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
592     if (!PN) continue;
593 
594     unsigned NumPreds = PN->getNumIncomingValues();
595 
596     // Iterate over all of the PHI nodes.
597     BasicBlock::iterator BBI = ExitBB->begin();
598     while ((PN = dyn_cast<PHINode>(BBI++))) {
599       if (PN->use_empty())
600         continue; // dead use, don't replace it
601 
602       if (!SE->isSCEVable(PN->getType()))
603         continue;
604 
605       // It's necessary to tell ScalarEvolution about this explicitly so that
606       // it can walk the def-use list and forget all SCEVs, as it may not be
607       // watching the PHI itself. Once the new exit value is in place, there
608       // may not be a def-use connection between the loop and every instruction
609       // which got a SCEVAddRecExpr for that loop.
610       SE->forgetValue(PN);
611 
612       // Iterate over all of the values in all the PHI nodes.
613       for (unsigned i = 0; i != NumPreds; ++i) {
614         // If the value being merged in is not integer or is not defined
615         // in the loop, skip it.
616         Value *InVal = PN->getIncomingValue(i);
617         if (!isa<Instruction>(InVal))
618           continue;
619 
620         // If this pred is for a subloop, not L itself, skip it.
621         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
622           continue; // The Block is in a subloop, skip it.
623 
624         // Check that InVal is defined in the loop.
625         Instruction *Inst = cast<Instruction>(InVal);
626         if (!L->contains(Inst))
627           continue;
628 
629         // Okay, this instruction has a user outside of the current loop
630         // and varies predictably *inside* the loop.  Evaluate the value it
631         // contains when the loop exits, if possible.
632         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
633         if (isa<SCEVCouldNotCompute>(ExitValue) ||
634             !SE->isLoopInvariant(ExitValue, L) ||
635             !isSafeToExpand(ExitValue, *SE))
636           continue;
637 
638         // Computing the value outside of the loop brings no benefit if it is
639         // definitely used inside the loop in a way which can not be optimized
640         // away.  Avoid doing so unless we know we have a value which computes
641         // the ExitValue already.  TODO: This should be merged into SCEV
642         // expander to leverage its knowledge of existing expressions.
643         if (ReplaceExitValue != AlwaysRepl &&
644             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
645             hasHardUserWithinLoop(L, Inst))
646           continue;
647 
648         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
649         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
650 
651         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
652                           << '\n'
653                           << "  LoopVal = " << *Inst << "\n");
654 
655         if (!isValidRewrite(Inst, ExitVal)) {
656           DeadInsts.push_back(ExitVal);
657           continue;
658         }
659 
660 #ifndef NDEBUG
661         // If we reuse an instruction from a loop which is neither L nor one of
662         // its containing loops, we end up breaking LCSSA form for this loop by
663         // creating a new use of its instruction.
664         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
665           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
666             if (EVL != L)
667               assert(EVL->contains(L) && "LCSSA breach detected!");
668 #endif
669 
670         // Collect all the candidate PHINodes to be rewritten.
671         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
672       }
673     }
674   }
675 
676   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
677 
678   bool Changed = false;
679   // Transformation.
680   for (const RewritePhi &Phi : RewritePhiSet) {
681     PHINode *PN = Phi.PN;
682     Value *ExitVal = Phi.Val;
683 
684     // Only do the rewrite when the ExitValue can be expanded cheaply.
685     // If LoopCanBeDel is true, rewrite exit value aggressively.
686     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
687       DeadInsts.push_back(ExitVal);
688       continue;
689     }
690 
691     Changed = true;
692     ++NumReplaced;
693     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
694     PN->setIncomingValue(Phi.Ith, ExitVal);
695 
696     // If this instruction is dead now, delete it. Don't do it now to avoid
697     // invalidating iterators.
698     if (isInstructionTriviallyDead(Inst, TLI))
699       DeadInsts.push_back(Inst);
700 
701     // Replace PN with ExitVal if that is legal and does not break LCSSA.
702     if (PN->getNumIncomingValues() == 1 &&
703         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
704       PN->replaceAllUsesWith(ExitVal);
705       PN->eraseFromParent();
706     }
707   }
708 
709   // The insertion point instruction may have been deleted; clear it out
710   // so that the rewriter doesn't trip over it later.
711   Rewriter.clearInsertPoint();
712   return Changed;
713 }
714 
715 //===---------------------------------------------------------------------===//
716 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
717 // they will exit at the first iteration.
718 //===---------------------------------------------------------------------===//
719 
720 /// Check to see if this loop has loop invariant conditions which lead to loop
721 /// exits. If so, we know that if the exit path is taken, it is at the first
722 /// loop iteration. This lets us predict exit values of PHI nodes that live in
723 /// loop header.
724 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
725   // Verify the input to the pass is already in LCSSA form.
726   assert(L->isLCSSAForm(*DT));
727 
728   SmallVector<BasicBlock *, 8> ExitBlocks;
729   L->getUniqueExitBlocks(ExitBlocks);
730 
731   bool MadeAnyChanges = false;
732   for (auto *ExitBB : ExitBlocks) {
733     // If there are no more PHI nodes in this exit block, then no more
734     // values defined inside the loop are used on this path.
735     for (PHINode &PN : ExitBB->phis()) {
736       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
737            IncomingValIdx != E; ++IncomingValIdx) {
738         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
739 
740         // Can we prove that the exit must run on the first iteration if it
741         // runs at all?  (i.e. early exits are fine for our purposes, but
742         // traces which lead to this exit being taken on the 2nd iteration
743         // aren't.)  Note that this is about whether the exit branch is
744         // executed, not about whether it is taken.
745         if (!L->getLoopLatch() ||
746             !DT->dominates(IncomingBB, L->getLoopLatch()))
747           continue;
748 
749         // Get condition that leads to the exit path.
750         auto *TermInst = IncomingBB->getTerminator();
751 
752         Value *Cond = nullptr;
753         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
754           // Must be a conditional branch, otherwise the block
755           // should not be in the loop.
756           Cond = BI->getCondition();
757         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
758           Cond = SI->getCondition();
759         else
760           continue;
761 
762         if (!L->isLoopInvariant(Cond))
763           continue;
764 
765         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
766 
767         // Only deal with PHIs in the loop header.
768         if (!ExitVal || ExitVal->getParent() != L->getHeader())
769           continue;
770 
771         // If ExitVal is a PHI on the loop header, then we know its
772         // value along this exit because the exit can only be taken
773         // on the first iteration.
774         auto *LoopPreheader = L->getLoopPreheader();
775         assert(LoopPreheader && "Invalid loop");
776         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
777         if (PreheaderIdx != -1) {
778           assert(ExitVal->getParent() == L->getHeader() &&
779                  "ExitVal must be in loop header");
780           MadeAnyChanges = true;
781           PN.setIncomingValue(IncomingValIdx,
782                               ExitVal->getIncomingValue(PreheaderIdx));
783         }
784       }
785     }
786   }
787   return MadeAnyChanges;
788 }
789 
790 /// Check whether it is possible to delete the loop after rewriting exit
791 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
792 /// aggressively.
793 bool IndVarSimplify::canLoopBeDeleted(
794     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
795   BasicBlock *Preheader = L->getLoopPreheader();
796   // If there is no preheader, the loop will not be deleted.
797   if (!Preheader)
798     return false;
799 
800   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
801   // We obviate multiple ExitingBlocks case for simplicity.
802   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
803   // after exit value rewriting, we can enhance the logic here.
804   SmallVector<BasicBlock *, 4> ExitingBlocks;
805   L->getExitingBlocks(ExitingBlocks);
806   SmallVector<BasicBlock *, 8> ExitBlocks;
807   L->getUniqueExitBlocks(ExitBlocks);
808   if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
809     return false;
810 
811   BasicBlock *ExitBlock = ExitBlocks[0];
812   BasicBlock::iterator BI = ExitBlock->begin();
813   while (PHINode *P = dyn_cast<PHINode>(BI)) {
814     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
815 
816     // If the Incoming value of P is found in RewritePhiSet, we know it
817     // could be rewritten to use a loop invariant value in transformation
818     // phase later. Skip it in the loop invariant check below.
819     bool found = false;
820     for (const RewritePhi &Phi : RewritePhiSet) {
821       unsigned i = Phi.Ith;
822       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
823         found = true;
824         break;
825       }
826     }
827 
828     Instruction *I;
829     if (!found && (I = dyn_cast<Instruction>(Incoming)))
830       if (!L->hasLoopInvariantOperands(I))
831         return false;
832 
833     ++BI;
834   }
835 
836   for (auto *BB : L->blocks())
837     if (llvm::any_of(*BB, [](Instruction &I) {
838           return I.mayHaveSideEffects();
839         }))
840       return false;
841 
842   return true;
843 }
844 
845 //===----------------------------------------------------------------------===//
846 //  IV Widening - Extend the width of an IV to cover its widest uses.
847 //===----------------------------------------------------------------------===//
848 
849 namespace {
850 
851 // Collect information about induction variables that are used by sign/zero
852 // extend operations. This information is recorded by CollectExtend and provides
853 // the input to WidenIV.
854 struct WideIVInfo {
855   PHINode *NarrowIV = nullptr;
856 
857   // Widest integer type created [sz]ext
858   Type *WidestNativeType = nullptr;
859 
860   // Was a sext user seen before a zext?
861   bool IsSigned = false;
862 };
863 
864 } // end anonymous namespace
865 
866 /// Update information about the induction variable that is extended by this
867 /// sign or zero extend operation. This is used to determine the final width of
868 /// the IV before actually widening it.
869 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
870                         const TargetTransformInfo *TTI) {
871   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
872   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
873     return;
874 
875   Type *Ty = Cast->getType();
876   uint64_t Width = SE->getTypeSizeInBits(Ty);
877   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
878     return;
879 
880   // Check that `Cast` actually extends the induction variable (we rely on this
881   // later).  This takes care of cases where `Cast` is extending a truncation of
882   // the narrow induction variable, and thus can end up being narrower than the
883   // "narrow" induction variable.
884   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
885   if (NarrowIVWidth >= Width)
886     return;
887 
888   // Cast is either an sext or zext up to this point.
889   // We should not widen an indvar if arithmetics on the wider indvar are more
890   // expensive than those on the narrower indvar. We check only the cost of ADD
891   // because at least an ADD is required to increment the induction variable. We
892   // could compute more comprehensively the cost of all instructions on the
893   // induction variable when necessary.
894   if (TTI &&
895       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
896           TTI->getArithmeticInstrCost(Instruction::Add,
897                                       Cast->getOperand(0)->getType())) {
898     return;
899   }
900 
901   if (!WI.WidestNativeType) {
902     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
903     WI.IsSigned = IsSigned;
904     return;
905   }
906 
907   // We extend the IV to satisfy the sign of its first user, arbitrarily.
908   if (WI.IsSigned != IsSigned)
909     return;
910 
911   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
912     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
913 }
914 
915 namespace {
916 
917 /// Record a link in the Narrow IV def-use chain along with the WideIV that
918 /// computes the same value as the Narrow IV def.  This avoids caching Use*
919 /// pointers.
920 struct NarrowIVDefUse {
921   Instruction *NarrowDef = nullptr;
922   Instruction *NarrowUse = nullptr;
923   Instruction *WideDef = nullptr;
924 
925   // True if the narrow def is never negative.  Tracking this information lets
926   // us use a sign extension instead of a zero extension or vice versa, when
927   // profitable and legal.
928   bool NeverNegative = false;
929 
930   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
931                  bool NeverNegative)
932       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
933         NeverNegative(NeverNegative) {}
934 };
935 
936 /// The goal of this transform is to remove sign and zero extends without
937 /// creating any new induction variables. To do this, it creates a new phi of
938 /// the wider type and redirects all users, either removing extends or inserting
939 /// truncs whenever we stop propagating the type.
940 class WidenIV {
941   // Parameters
942   PHINode *OrigPhi;
943   Type *WideType;
944 
945   // Context
946   LoopInfo        *LI;
947   Loop            *L;
948   ScalarEvolution *SE;
949   DominatorTree   *DT;
950 
951   // Does the module have any calls to the llvm.experimental.guard intrinsic
952   // at all? If not we can avoid scanning instructions looking for guards.
953   bool HasGuards;
954 
955   // Result
956   PHINode *WidePhi = nullptr;
957   Instruction *WideInc = nullptr;
958   const SCEV *WideIncExpr = nullptr;
959   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
960 
961   SmallPtrSet<Instruction *,16> Widened;
962   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
963 
964   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
965 
966   // A map tracking the kind of extension used to widen each narrow IV
967   // and narrow IV user.
968   // Key: pointer to a narrow IV or IV user.
969   // Value: the kind of extension used to widen this Instruction.
970   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
971 
972   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
973 
974   // A map with control-dependent ranges for post increment IV uses. The key is
975   // a pair of IV def and a use of this def denoting the context. The value is
976   // a ConstantRange representing possible values of the def at the given
977   // context.
978   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
979 
980   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
981                                               Instruction *UseI) {
982     DefUserPair Key(Def, UseI);
983     auto It = PostIncRangeInfos.find(Key);
984     return It == PostIncRangeInfos.end()
985                ? Optional<ConstantRange>(None)
986                : Optional<ConstantRange>(It->second);
987   }
988 
989   void calculatePostIncRanges(PHINode *OrigPhi);
990   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
991 
992   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
993     DefUserPair Key(Def, UseI);
994     auto It = PostIncRangeInfos.find(Key);
995     if (It == PostIncRangeInfos.end())
996       PostIncRangeInfos.insert({Key, R});
997     else
998       It->second = R.intersectWith(It->second);
999   }
1000 
1001 public:
1002   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1003           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1004           bool HasGuards)
1005       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1006         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1007         HasGuards(HasGuards), DeadInsts(DI) {
1008     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1009     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1010   }
1011 
1012   PHINode *createWideIV(SCEVExpander &Rewriter);
1013 
1014 protected:
1015   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1016                           Instruction *Use);
1017 
1018   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1019   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1020                                      const SCEVAddRecExpr *WideAR);
1021   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1022 
1023   ExtendKind getExtendKind(Instruction *I);
1024 
1025   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1026 
1027   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1028 
1029   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1030 
1031   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1032                               unsigned OpCode) const;
1033 
1034   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1035 
1036   bool widenLoopCompare(NarrowIVDefUse DU);
1037   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1038   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1039 
1040   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1041 };
1042 
1043 } // end anonymous namespace
1044 
1045 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1046                                  bool IsSigned, Instruction *Use) {
1047   // Set the debug location and conservative insertion point.
1048   IRBuilder<> Builder(Use);
1049   // Hoist the insertion point into loop preheaders as far as possible.
1050   for (const Loop *L = LI->getLoopFor(Use->getParent());
1051        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1052        L = L->getParentLoop())
1053     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1054 
1055   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1056                     Builder.CreateZExt(NarrowOper, WideType);
1057 }
1058 
1059 /// Instantiate a wide operation to replace a narrow operation. This only needs
1060 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1061 /// 0 for any operation we decide not to clone.
1062 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1063                                   const SCEVAddRecExpr *WideAR) {
1064   unsigned Opcode = DU.NarrowUse->getOpcode();
1065   switch (Opcode) {
1066   default:
1067     return nullptr;
1068   case Instruction::Add:
1069   case Instruction::Mul:
1070   case Instruction::UDiv:
1071   case Instruction::Sub:
1072     return cloneArithmeticIVUser(DU, WideAR);
1073 
1074   case Instruction::And:
1075   case Instruction::Or:
1076   case Instruction::Xor:
1077   case Instruction::Shl:
1078   case Instruction::LShr:
1079   case Instruction::AShr:
1080     return cloneBitwiseIVUser(DU);
1081   }
1082 }
1083 
1084 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1085   Instruction *NarrowUse = DU.NarrowUse;
1086   Instruction *NarrowDef = DU.NarrowDef;
1087   Instruction *WideDef = DU.WideDef;
1088 
1089   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1090 
1091   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1092   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1093   // invariant and will be folded or hoisted. If it actually comes from a
1094   // widened IV, it should be removed during a future call to widenIVUse.
1095   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1096   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1097                    ? WideDef
1098                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1099                                       IsSigned, NarrowUse);
1100   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1101                    ? WideDef
1102                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1103                                       IsSigned, NarrowUse);
1104 
1105   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1106   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1107                                         NarrowBO->getName());
1108   IRBuilder<> Builder(NarrowUse);
1109   Builder.Insert(WideBO);
1110   WideBO->copyIRFlags(NarrowBO);
1111   return WideBO;
1112 }
1113 
1114 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1115                                             const SCEVAddRecExpr *WideAR) {
1116   Instruction *NarrowUse = DU.NarrowUse;
1117   Instruction *NarrowDef = DU.NarrowDef;
1118   Instruction *WideDef = DU.WideDef;
1119 
1120   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1121 
1122   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1123 
1124   // We're trying to find X such that
1125   //
1126   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1127   //
1128   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1129   // and check using SCEV if any of them are correct.
1130 
1131   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1132   // correct solution to X.
1133   auto GuessNonIVOperand = [&](bool SignExt) {
1134     const SCEV *WideLHS;
1135     const SCEV *WideRHS;
1136 
1137     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1138       if (SignExt)
1139         return SE->getSignExtendExpr(S, Ty);
1140       return SE->getZeroExtendExpr(S, Ty);
1141     };
1142 
1143     if (IVOpIdx == 0) {
1144       WideLHS = SE->getSCEV(WideDef);
1145       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1146       WideRHS = GetExtend(NarrowRHS, WideType);
1147     } else {
1148       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1149       WideLHS = GetExtend(NarrowLHS, WideType);
1150       WideRHS = SE->getSCEV(WideDef);
1151     }
1152 
1153     // WideUse is "WideDef `op.wide` X" as described in the comment.
1154     const SCEV *WideUse = nullptr;
1155 
1156     switch (NarrowUse->getOpcode()) {
1157     default:
1158       llvm_unreachable("No other possibility!");
1159 
1160     case Instruction::Add:
1161       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1162       break;
1163 
1164     case Instruction::Mul:
1165       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1166       break;
1167 
1168     case Instruction::UDiv:
1169       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1170       break;
1171 
1172     case Instruction::Sub:
1173       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1174       break;
1175     }
1176 
1177     return WideUse == WideAR;
1178   };
1179 
1180   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1181   if (!GuessNonIVOperand(SignExtend)) {
1182     SignExtend = !SignExtend;
1183     if (!GuessNonIVOperand(SignExtend))
1184       return nullptr;
1185   }
1186 
1187   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1188                    ? WideDef
1189                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1190                                       SignExtend, NarrowUse);
1191   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1192                    ? WideDef
1193                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1194                                       SignExtend, NarrowUse);
1195 
1196   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1197   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1198                                         NarrowBO->getName());
1199 
1200   IRBuilder<> Builder(NarrowUse);
1201   Builder.Insert(WideBO);
1202   WideBO->copyIRFlags(NarrowBO);
1203   return WideBO;
1204 }
1205 
1206 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1207   auto It = ExtendKindMap.find(I);
1208   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1209   return It->second;
1210 }
1211 
1212 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1213                                      unsigned OpCode) const {
1214   if (OpCode == Instruction::Add)
1215     return SE->getAddExpr(LHS, RHS);
1216   if (OpCode == Instruction::Sub)
1217     return SE->getMinusSCEV(LHS, RHS);
1218   if (OpCode == Instruction::Mul)
1219     return SE->getMulExpr(LHS, RHS);
1220 
1221   llvm_unreachable("Unsupported opcode.");
1222 }
1223 
1224 /// No-wrap operations can transfer sign extension of their result to their
1225 /// operands. Generate the SCEV value for the widened operation without
1226 /// actually modifying the IR yet. If the expression after extending the
1227 /// operands is an AddRec for this loop, return the AddRec and the kind of
1228 /// extension used.
1229 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1230   // Handle the common case of add<nsw/nuw>
1231   const unsigned OpCode = DU.NarrowUse->getOpcode();
1232   // Only Add/Sub/Mul instructions supported yet.
1233   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1234       OpCode != Instruction::Mul)
1235     return {nullptr, Unknown};
1236 
1237   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1238   // if extending the other will lead to a recurrence.
1239   const unsigned ExtendOperIdx =
1240       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1241   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1242 
1243   const SCEV *ExtendOperExpr = nullptr;
1244   const OverflowingBinaryOperator *OBO =
1245     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1246   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1247   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1248     ExtendOperExpr = SE->getSignExtendExpr(
1249       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1250   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1251     ExtendOperExpr = SE->getZeroExtendExpr(
1252       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1253   else
1254     return {nullptr, Unknown};
1255 
1256   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1257   // flags. This instruction may be guarded by control flow that the no-wrap
1258   // behavior depends on. Non-control-equivalent instructions can be mapped to
1259   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1260   // semantics to those operations.
1261   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1262   const SCEV *rhs = ExtendOperExpr;
1263 
1264   // Let's swap operands to the initial order for the case of non-commutative
1265   // operations, like SUB. See PR21014.
1266   if (ExtendOperIdx == 0)
1267     std::swap(lhs, rhs);
1268   const SCEVAddRecExpr *AddRec =
1269       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1270 
1271   if (!AddRec || AddRec->getLoop() != L)
1272     return {nullptr, Unknown};
1273 
1274   return {AddRec, ExtKind};
1275 }
1276 
1277 /// Is this instruction potentially interesting for further simplification after
1278 /// widening it's type? In other words, can the extend be safely hoisted out of
1279 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1280 /// so, return the extended recurrence and the kind of extension used. Otherwise
1281 /// return {nullptr, Unknown}.
1282 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1283   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1284     return {nullptr, Unknown};
1285 
1286   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1287   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1288       SE->getTypeSizeInBits(WideType)) {
1289     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1290     // index. So don't follow this use.
1291     return {nullptr, Unknown};
1292   }
1293 
1294   const SCEV *WideExpr;
1295   ExtendKind ExtKind;
1296   if (DU.NeverNegative) {
1297     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1298     if (isa<SCEVAddRecExpr>(WideExpr))
1299       ExtKind = SignExtended;
1300     else {
1301       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1302       ExtKind = ZeroExtended;
1303     }
1304   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1305     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1306     ExtKind = SignExtended;
1307   } else {
1308     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1309     ExtKind = ZeroExtended;
1310   }
1311   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1312   if (!AddRec || AddRec->getLoop() != L)
1313     return {nullptr, Unknown};
1314   return {AddRec, ExtKind};
1315 }
1316 
1317 /// This IV user cannot be widened. Replace this use of the original narrow IV
1318 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1319 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1320   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1321   if (!InsertPt)
1322     return;
1323   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1324                     << *DU.NarrowUse << "\n");
1325   IRBuilder<> Builder(InsertPt);
1326   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1327   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1328 }
1329 
1330 /// If the narrow use is a compare instruction, then widen the compare
1331 //  (and possibly the other operand).  The extend operation is hoisted into the
1332 // loop preheader as far as possible.
1333 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1334   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1335   if (!Cmp)
1336     return false;
1337 
1338   // We can legally widen the comparison in the following two cases:
1339   //
1340   //  - The signedness of the IV extension and comparison match
1341   //
1342   //  - The narrow IV is always positive (and thus its sign extension is equal
1343   //    to its zero extension).  For instance, let's say we're zero extending
1344   //    %narrow for the following use
1345   //
1346   //      icmp slt i32 %narrow, %val   ... (A)
1347   //
1348   //    and %narrow is always positive.  Then
1349   //
1350   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1351   //          == icmp slt i32 zext(%narrow), sext(%val)
1352   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1353   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1354     return false;
1355 
1356   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1357   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1358   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1359   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1360 
1361   // Widen the compare instruction.
1362   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1363   if (!InsertPt)
1364     return false;
1365   IRBuilder<> Builder(InsertPt);
1366   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1367 
1368   // Widen the other operand of the compare, if necessary.
1369   if (CastWidth < IVWidth) {
1370     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1371     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1372   }
1373   return true;
1374 }
1375 
1376 /// If the narrow use is an instruction whose two operands are the defining
1377 /// instruction of DU and a load instruction, then we have the following:
1378 /// if the load is hoisted outside the loop, then we do not reach this function
1379 /// as scalar evolution analysis works fine in widenIVUse with variables
1380 /// hoisted outside the loop and efficient code is subsequently generated by
1381 /// not emitting truncate instructions. But when the load is not hoisted
1382 /// (whether due to limitation in alias analysis or due to a true legality),
1383 /// then scalar evolution can not proceed with loop variant values and
1384 /// inefficient code is generated. This function handles the non-hoisted load
1385 /// special case by making the optimization generate the same type of code for
1386 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1387 /// instruction). This special case is important especially when the induction
1388 /// variables are affecting addressing mode in code generation.
1389 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1390   Instruction *NarrowUse = DU.NarrowUse;
1391   Instruction *NarrowDef = DU.NarrowDef;
1392   Instruction *WideDef = DU.WideDef;
1393 
1394   // Handle the common case of add<nsw/nuw>
1395   const unsigned OpCode = NarrowUse->getOpcode();
1396   // Only Add/Sub/Mul instructions are supported.
1397   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1398       OpCode != Instruction::Mul)
1399     return false;
1400 
1401   // The operand that is not defined by NarrowDef of DU. Let's call it the
1402   // other operand.
1403   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1404   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1405          "bad DU");
1406 
1407   const SCEV *ExtendOperExpr = nullptr;
1408   const OverflowingBinaryOperator *OBO =
1409     cast<OverflowingBinaryOperator>(NarrowUse);
1410   ExtendKind ExtKind = getExtendKind(NarrowDef);
1411   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1412     ExtendOperExpr = SE->getSignExtendExpr(
1413       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1414   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1415     ExtendOperExpr = SE->getZeroExtendExpr(
1416       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1417   else
1418     return false;
1419 
1420   // We are interested in the other operand being a load instruction.
1421   // But, we should look into relaxing this restriction later on.
1422   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1423   if (I && I->getOpcode() != Instruction::Load)
1424     return false;
1425 
1426   // Verifying that Defining operand is an AddRec
1427   const SCEV *Op1 = SE->getSCEV(WideDef);
1428   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1429   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1430     return false;
1431   // Verifying that other operand is an Extend.
1432   if (ExtKind == SignExtended) {
1433     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1434       return false;
1435   } else {
1436     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1437       return false;
1438   }
1439 
1440   if (ExtKind == SignExtended) {
1441     for (Use &U : NarrowUse->uses()) {
1442       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1443       if (!User || User->getType() != WideType)
1444         return false;
1445     }
1446   } else { // ExtKind == ZeroExtended
1447     for (Use &U : NarrowUse->uses()) {
1448       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1449       if (!User || User->getType() != WideType)
1450         return false;
1451     }
1452   }
1453 
1454   return true;
1455 }
1456 
1457 /// Special Case for widening with variant Loads (see
1458 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1459 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1460   Instruction *NarrowUse = DU.NarrowUse;
1461   Instruction *NarrowDef = DU.NarrowDef;
1462   Instruction *WideDef = DU.WideDef;
1463 
1464   ExtendKind ExtKind = getExtendKind(NarrowDef);
1465 
1466   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1467 
1468   // Generating a widening use instruction.
1469   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1470                    ? WideDef
1471                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1472                                       ExtKind, NarrowUse);
1473   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1474                    ? WideDef
1475                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1476                                       ExtKind, NarrowUse);
1477 
1478   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1479   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1480                                         NarrowBO->getName());
1481   IRBuilder<> Builder(NarrowUse);
1482   Builder.Insert(WideBO);
1483   WideBO->copyIRFlags(NarrowBO);
1484 
1485   if (ExtKind == SignExtended)
1486     ExtendKindMap[NarrowUse] = SignExtended;
1487   else
1488     ExtendKindMap[NarrowUse] = ZeroExtended;
1489 
1490   // Update the Use.
1491   if (ExtKind == SignExtended) {
1492     for (Use &U : NarrowUse->uses()) {
1493       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1494       if (User && User->getType() == WideType) {
1495         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1496                           << *WideBO << "\n");
1497         ++NumElimExt;
1498         User->replaceAllUsesWith(WideBO);
1499         DeadInsts.emplace_back(User);
1500       }
1501     }
1502   } else { // ExtKind == ZeroExtended
1503     for (Use &U : NarrowUse->uses()) {
1504       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1505       if (User && User->getType() == WideType) {
1506         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1507                           << *WideBO << "\n");
1508         ++NumElimExt;
1509         User->replaceAllUsesWith(WideBO);
1510         DeadInsts.emplace_back(User);
1511       }
1512     }
1513   }
1514 }
1515 
1516 /// Determine whether an individual user of the narrow IV can be widened. If so,
1517 /// return the wide clone of the user.
1518 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1519   assert(ExtendKindMap.count(DU.NarrowDef) &&
1520          "Should already know the kind of extension used to widen NarrowDef");
1521 
1522   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1523   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1524     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1525       // For LCSSA phis, sink the truncate outside the loop.
1526       // After SimplifyCFG most loop exit targets have a single predecessor.
1527       // Otherwise fall back to a truncate within the loop.
1528       if (UsePhi->getNumOperands() != 1)
1529         truncateIVUse(DU, DT, LI);
1530       else {
1531         // Widening the PHI requires us to insert a trunc.  The logical place
1532         // for this trunc is in the same BB as the PHI.  This is not possible if
1533         // the BB is terminated by a catchswitch.
1534         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1535           return nullptr;
1536 
1537         PHINode *WidePhi =
1538           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1539                           UsePhi);
1540         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1541         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1542         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1543         UsePhi->replaceAllUsesWith(Trunc);
1544         DeadInsts.emplace_back(UsePhi);
1545         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1546                           << *WidePhi << "\n");
1547       }
1548       return nullptr;
1549     }
1550   }
1551 
1552   // This narrow use can be widened by a sext if it's non-negative or its narrow
1553   // def was widended by a sext. Same for zext.
1554   auto canWidenBySExt = [&]() {
1555     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1556   };
1557   auto canWidenByZExt = [&]() {
1558     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1559   };
1560 
1561   // Our raison d'etre! Eliminate sign and zero extension.
1562   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1563       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1564     Value *NewDef = DU.WideDef;
1565     if (DU.NarrowUse->getType() != WideType) {
1566       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1567       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1568       if (CastWidth < IVWidth) {
1569         // The cast isn't as wide as the IV, so insert a Trunc.
1570         IRBuilder<> Builder(DU.NarrowUse);
1571         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1572       }
1573       else {
1574         // A wider extend was hidden behind a narrower one. This may induce
1575         // another round of IV widening in which the intermediate IV becomes
1576         // dead. It should be very rare.
1577         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1578                           << " not wide enough to subsume " << *DU.NarrowUse
1579                           << "\n");
1580         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1581         NewDef = DU.NarrowUse;
1582       }
1583     }
1584     if (NewDef != DU.NarrowUse) {
1585       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1586                         << " replaced by " << *DU.WideDef << "\n");
1587       ++NumElimExt;
1588       DU.NarrowUse->replaceAllUsesWith(NewDef);
1589       DeadInsts.emplace_back(DU.NarrowUse);
1590     }
1591     // Now that the extend is gone, we want to expose it's uses for potential
1592     // further simplification. We don't need to directly inform SimplifyIVUsers
1593     // of the new users, because their parent IV will be processed later as a
1594     // new loop phi. If we preserved IVUsers analysis, we would also want to
1595     // push the uses of WideDef here.
1596 
1597     // No further widening is needed. The deceased [sz]ext had done it for us.
1598     return nullptr;
1599   }
1600 
1601   // Does this user itself evaluate to a recurrence after widening?
1602   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1603   if (!WideAddRec.first)
1604     WideAddRec = getWideRecurrence(DU);
1605 
1606   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1607   if (!WideAddRec.first) {
1608     // If use is a loop condition, try to promote the condition instead of
1609     // truncating the IV first.
1610     if (widenLoopCompare(DU))
1611       return nullptr;
1612 
1613     // We are here about to generate a truncate instruction that may hurt
1614     // performance because the scalar evolution expression computed earlier
1615     // in WideAddRec.first does not indicate a polynomial induction expression.
1616     // In that case, look at the operands of the use instruction to determine
1617     // if we can still widen the use instead of truncating its operand.
1618     if (widenWithVariantLoadUse(DU)) {
1619       widenWithVariantLoadUseCodegen(DU);
1620       return nullptr;
1621     }
1622 
1623     // This user does not evaluate to a recurrence after widening, so don't
1624     // follow it. Instead insert a Trunc to kill off the original use,
1625     // eventually isolating the original narrow IV so it can be removed.
1626     truncateIVUse(DU, DT, LI);
1627     return nullptr;
1628   }
1629   // Assume block terminators cannot evaluate to a recurrence. We can't to
1630   // insert a Trunc after a terminator if there happens to be a critical edge.
1631   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1632          "SCEV is not expected to evaluate a block terminator");
1633 
1634   // Reuse the IV increment that SCEVExpander created as long as it dominates
1635   // NarrowUse.
1636   Instruction *WideUse = nullptr;
1637   if (WideAddRec.first == WideIncExpr &&
1638       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1639     WideUse = WideInc;
1640   else {
1641     WideUse = cloneIVUser(DU, WideAddRec.first);
1642     if (!WideUse)
1643       return nullptr;
1644   }
1645   // Evaluation of WideAddRec ensured that the narrow expression could be
1646   // extended outside the loop without overflow. This suggests that the wide use
1647   // evaluates to the same expression as the extended narrow use, but doesn't
1648   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1649   // where it fails, we simply throw away the newly created wide use.
1650   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1651     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1652                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1653                       << "\n");
1654     DeadInsts.emplace_back(WideUse);
1655     return nullptr;
1656   }
1657 
1658   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1659   // Returning WideUse pushes it on the worklist.
1660   return WideUse;
1661 }
1662 
1663 /// Add eligible users of NarrowDef to NarrowIVUsers.
1664 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1665   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1666   bool NonNegativeDef =
1667       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1668                            SE->getConstant(NarrowSCEV->getType(), 0));
1669   for (User *U : NarrowDef->users()) {
1670     Instruction *NarrowUser = cast<Instruction>(U);
1671 
1672     // Handle data flow merges and bizarre phi cycles.
1673     if (!Widened.insert(NarrowUser).second)
1674       continue;
1675 
1676     bool NonNegativeUse = false;
1677     if (!NonNegativeDef) {
1678       // We might have a control-dependent range information for this context.
1679       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1680         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1681     }
1682 
1683     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1684                                NonNegativeDef || NonNegativeUse);
1685   }
1686 }
1687 
1688 /// Process a single induction variable. First use the SCEVExpander to create a
1689 /// wide induction variable that evaluates to the same recurrence as the
1690 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1691 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1692 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1693 ///
1694 /// It would be simpler to delete uses as they are processed, but we must avoid
1695 /// invalidating SCEV expressions.
1696 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1697   // Is this phi an induction variable?
1698   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1699   if (!AddRec)
1700     return nullptr;
1701 
1702   // Widen the induction variable expression.
1703   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1704                                ? SE->getSignExtendExpr(AddRec, WideType)
1705                                : SE->getZeroExtendExpr(AddRec, WideType);
1706 
1707   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1708          "Expect the new IV expression to preserve its type");
1709 
1710   // Can the IV be extended outside the loop without overflow?
1711   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1712   if (!AddRec || AddRec->getLoop() != L)
1713     return nullptr;
1714 
1715   // An AddRec must have loop-invariant operands. Since this AddRec is
1716   // materialized by a loop header phi, the expression cannot have any post-loop
1717   // operands, so they must dominate the loop header.
1718   assert(
1719       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1720       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1721       "Loop header phi recurrence inputs do not dominate the loop");
1722 
1723   // Iterate over IV uses (including transitive ones) looking for IV increments
1724   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1725   // the increment calculate control-dependent range information basing on
1726   // dominating conditions inside of the loop (e.g. a range check inside of the
1727   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1728   //
1729   // Control-dependent range information is later used to prove that a narrow
1730   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1731   // this on demand because when pushNarrowIVUsers needs this information some
1732   // of the dominating conditions might be already widened.
1733   if (UsePostIncrementRanges)
1734     calculatePostIncRanges(OrigPhi);
1735 
1736   // The rewriter provides a value for the desired IV expression. This may
1737   // either find an existing phi or materialize a new one. Either way, we
1738   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1739   // of the phi-SCC dominates the loop entry.
1740   Instruction *InsertPt = &L->getHeader()->front();
1741   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1742 
1743   // Remembering the WideIV increment generated by SCEVExpander allows
1744   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1745   // employ a general reuse mechanism because the call above is the only call to
1746   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1747   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1748     WideInc =
1749       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1750     WideIncExpr = SE->getSCEV(WideInc);
1751     // Propagate the debug location associated with the original loop increment
1752     // to the new (widened) increment.
1753     auto *OrigInc =
1754       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1755     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1756   }
1757 
1758   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1759   ++NumWidened;
1760 
1761   // Traverse the def-use chain using a worklist starting at the original IV.
1762   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1763 
1764   Widened.insert(OrigPhi);
1765   pushNarrowIVUsers(OrigPhi, WidePhi);
1766 
1767   while (!NarrowIVUsers.empty()) {
1768     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1769 
1770     // Process a def-use edge. This may replace the use, so don't hold a
1771     // use_iterator across it.
1772     Instruction *WideUse = widenIVUse(DU, Rewriter);
1773 
1774     // Follow all def-use edges from the previous narrow use.
1775     if (WideUse)
1776       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1777 
1778     // widenIVUse may have removed the def-use edge.
1779     if (DU.NarrowDef->use_empty())
1780       DeadInsts.emplace_back(DU.NarrowDef);
1781   }
1782 
1783   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1784   // evaluate the same recurrence, we can just copy the debug info over.
1785   SmallVector<DbgValueInst *, 1> DbgValues;
1786   llvm::findDbgValues(DbgValues, OrigPhi);
1787   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1788                                      ValueAsMetadata::get(WidePhi));
1789   for (auto &DbgValue : DbgValues)
1790     DbgValue->setOperand(0, MDPhi);
1791   return WidePhi;
1792 }
1793 
1794 /// Calculates control-dependent range for the given def at the given context
1795 /// by looking at dominating conditions inside of the loop
1796 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1797                                     Instruction *NarrowUser) {
1798   using namespace llvm::PatternMatch;
1799 
1800   Value *NarrowDefLHS;
1801   const APInt *NarrowDefRHS;
1802   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1803                                  m_APInt(NarrowDefRHS))) ||
1804       !NarrowDefRHS->isNonNegative())
1805     return;
1806 
1807   auto UpdateRangeFromCondition = [&] (Value *Condition,
1808                                        bool TrueDest) {
1809     CmpInst::Predicate Pred;
1810     Value *CmpRHS;
1811     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1812                                  m_Value(CmpRHS))))
1813       return;
1814 
1815     CmpInst::Predicate P =
1816             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1817 
1818     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1819     auto CmpConstrainedLHSRange =
1820             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1821     auto NarrowDefRange =
1822             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1823 
1824     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1825   };
1826 
1827   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1828     if (!HasGuards)
1829       return;
1830 
1831     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1832                                      Ctx->getParent()->rend())) {
1833       Value *C = nullptr;
1834       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1835         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1836     }
1837   };
1838 
1839   UpdateRangeFromGuards(NarrowUser);
1840 
1841   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1842   // If NarrowUserBB is statically unreachable asking dominator queries may
1843   // yield surprising results. (e.g. the block may not have a dom tree node)
1844   if (!DT->isReachableFromEntry(NarrowUserBB))
1845     return;
1846 
1847   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1848        L->contains(DTB->getBlock());
1849        DTB = DTB->getIDom()) {
1850     auto *BB = DTB->getBlock();
1851     auto *TI = BB->getTerminator();
1852     UpdateRangeFromGuards(TI);
1853 
1854     auto *BI = dyn_cast<BranchInst>(TI);
1855     if (!BI || !BI->isConditional())
1856       continue;
1857 
1858     auto *TrueSuccessor = BI->getSuccessor(0);
1859     auto *FalseSuccessor = BI->getSuccessor(1);
1860 
1861     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1862       return BBE.isSingleEdge() &&
1863              DT->dominates(BBE, NarrowUser->getParent());
1864     };
1865 
1866     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1867       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1868 
1869     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1870       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1871   }
1872 }
1873 
1874 /// Calculates PostIncRangeInfos map for the given IV
1875 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1876   SmallPtrSet<Instruction *, 16> Visited;
1877   SmallVector<Instruction *, 6> Worklist;
1878   Worklist.push_back(OrigPhi);
1879   Visited.insert(OrigPhi);
1880 
1881   while (!Worklist.empty()) {
1882     Instruction *NarrowDef = Worklist.pop_back_val();
1883 
1884     for (Use &U : NarrowDef->uses()) {
1885       auto *NarrowUser = cast<Instruction>(U.getUser());
1886 
1887       // Don't go looking outside the current loop.
1888       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1889       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1890         continue;
1891 
1892       if (!Visited.insert(NarrowUser).second)
1893         continue;
1894 
1895       Worklist.push_back(NarrowUser);
1896 
1897       calculatePostIncRange(NarrowDef, NarrowUser);
1898     }
1899   }
1900 }
1901 
1902 //===----------------------------------------------------------------------===//
1903 //  Live IV Reduction - Minimize IVs live across the loop.
1904 //===----------------------------------------------------------------------===//
1905 
1906 //===----------------------------------------------------------------------===//
1907 //  Simplification of IV users based on SCEV evaluation.
1908 //===----------------------------------------------------------------------===//
1909 
1910 namespace {
1911 
1912 class IndVarSimplifyVisitor : public IVVisitor {
1913   ScalarEvolution *SE;
1914   const TargetTransformInfo *TTI;
1915   PHINode *IVPhi;
1916 
1917 public:
1918   WideIVInfo WI;
1919 
1920   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1921                         const TargetTransformInfo *TTI,
1922                         const DominatorTree *DTree)
1923     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1924     DT = DTree;
1925     WI.NarrowIV = IVPhi;
1926   }
1927 
1928   // Implement the interface used by simplifyUsersOfIV.
1929   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1930 };
1931 
1932 } // end anonymous namespace
1933 
1934 /// Iteratively perform simplification on a worklist of IV users. Each
1935 /// successive simplification may push more users which may themselves be
1936 /// candidates for simplification.
1937 ///
1938 /// Sign/Zero extend elimination is interleaved with IV simplification.
1939 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1940                                        SCEVExpander &Rewriter,
1941                                        LoopInfo *LI) {
1942   SmallVector<WideIVInfo, 8> WideIVs;
1943 
1944   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1945           Intrinsic::getName(Intrinsic::experimental_guard));
1946   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1947 
1948   SmallVector<PHINode*, 8> LoopPhis;
1949   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1950     LoopPhis.push_back(cast<PHINode>(I));
1951   }
1952   // Each round of simplification iterates through the SimplifyIVUsers worklist
1953   // for all current phis, then determines whether any IVs can be
1954   // widened. Widening adds new phis to LoopPhis, inducing another round of
1955   // simplification on the wide IVs.
1956   bool Changed = false;
1957   while (!LoopPhis.empty()) {
1958     // Evaluate as many IV expressions as possible before widening any IVs. This
1959     // forces SCEV to set no-wrap flags before evaluating sign/zero
1960     // extension. The first time SCEV attempts to normalize sign/zero extension,
1961     // the result becomes final. So for the most predictable results, we delay
1962     // evaluation of sign/zero extend evaluation until needed, and avoid running
1963     // other SCEV based analysis prior to simplifyAndExtend.
1964     do {
1965       PHINode *CurrIV = LoopPhis.pop_back_val();
1966 
1967       // Information about sign/zero extensions of CurrIV.
1968       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1969 
1970       Changed |=
1971           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1972 
1973       if (Visitor.WI.WidestNativeType) {
1974         WideIVs.push_back(Visitor.WI);
1975       }
1976     } while(!LoopPhis.empty());
1977 
1978     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1979       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1980       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1981         Changed = true;
1982         LoopPhis.push_back(WidePhi);
1983       }
1984     }
1985   }
1986   return Changed;
1987 }
1988 
1989 //===----------------------------------------------------------------------===//
1990 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1991 //===----------------------------------------------------------------------===//
1992 
1993 /// Given an Value which is hoped to be part of an add recurance in the given
1994 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1995 /// that this is less general than SCEVs AddRec checking.
1996 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1997   Instruction *IncI = dyn_cast<Instruction>(IncV);
1998   if (!IncI)
1999     return nullptr;
2000 
2001   switch (IncI->getOpcode()) {
2002   case Instruction::Add:
2003   case Instruction::Sub:
2004     break;
2005   case Instruction::GetElementPtr:
2006     // An IV counter must preserve its type.
2007     if (IncI->getNumOperands() == 2)
2008       break;
2009     LLVM_FALLTHROUGH;
2010   default:
2011     return nullptr;
2012   }
2013 
2014   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2015   if (Phi && Phi->getParent() == L->getHeader()) {
2016     if (L->isLoopInvariant(IncI->getOperand(1)))
2017       return Phi;
2018     return nullptr;
2019   }
2020   if (IncI->getOpcode() == Instruction::GetElementPtr)
2021     return nullptr;
2022 
2023   // Allow add/sub to be commuted.
2024   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2025   if (Phi && Phi->getParent() == L->getHeader()) {
2026     if (L->isLoopInvariant(IncI->getOperand(0)))
2027       return Phi;
2028   }
2029   return nullptr;
2030 }
2031 
2032 /// Whether the current loop exit test is based on this value.  Currently this
2033 /// is limited to a direct use in the loop condition.
2034 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2035   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2036   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2037   // TODO: Allow non-icmp loop test.
2038   if (!ICmp)
2039     return false;
2040 
2041   // TODO: Allow indirect use.
2042   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2043 }
2044 
2045 /// linearFunctionTestReplace policy. Return true unless we can show that the
2046 /// current exit test is already sufficiently canonical.
2047 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2048   assert(L->getLoopLatch() && "Must be in simplified form");
2049 
2050   // Avoid converting a constant or loop invariant test back to a runtime
2051   // test.  This is critical for when SCEV's cached ExitCount is less precise
2052   // than the current IR (such as after we've proven a particular exit is
2053   // actually dead and thus the BE count never reaches our ExitCount.)
2054   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2055   if (L->isLoopInvariant(BI->getCondition()))
2056     return false;
2057 
2058   // Do LFTR to simplify the exit condition to an ICMP.
2059   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2060   if (!Cond)
2061     return true;
2062 
2063   // Do LFTR to simplify the exit ICMP to EQ/NE
2064   ICmpInst::Predicate Pred = Cond->getPredicate();
2065   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2066     return true;
2067 
2068   // Look for a loop invariant RHS
2069   Value *LHS = Cond->getOperand(0);
2070   Value *RHS = Cond->getOperand(1);
2071   if (!L->isLoopInvariant(RHS)) {
2072     if (!L->isLoopInvariant(LHS))
2073       return true;
2074     std::swap(LHS, RHS);
2075   }
2076   // Look for a simple IV counter LHS
2077   PHINode *Phi = dyn_cast<PHINode>(LHS);
2078   if (!Phi)
2079     Phi = getLoopPhiForCounter(LHS, L);
2080 
2081   if (!Phi)
2082     return true;
2083 
2084   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2085   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2086   if (Idx < 0)
2087     return true;
2088 
2089   // Do LFTR if the exit condition's IV is *not* a simple counter.
2090   Value *IncV = Phi->getIncomingValue(Idx);
2091   return Phi != getLoopPhiForCounter(IncV, L);
2092 }
2093 
2094 /// Return true if undefined behavior would provable be executed on the path to
2095 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2096 /// anything about whether OnPathTo is actually executed or whether Root is
2097 /// actually poison.  This can be used to assess whether a new use of Root can
2098 /// be added at a location which is control equivalent with OnPathTo (such as
2099 /// immediately before it) without introducing UB which didn't previously
2100 /// exist.  Note that a false result conveys no information.
2101 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2102                                           Instruction *OnPathTo,
2103                                           DominatorTree *DT) {
2104   // Basic approach is to assume Root is poison, propagate poison forward
2105   // through all users we can easily track, and then check whether any of those
2106   // users are provable UB and must execute before out exiting block might
2107   // exit.
2108 
2109   // The set of all recursive users we've visited (which are assumed to all be
2110   // poison because of said visit)
2111   SmallSet<const Value *, 16> KnownPoison;
2112   SmallVector<const Instruction*, 16> Worklist;
2113   Worklist.push_back(Root);
2114   while (!Worklist.empty()) {
2115     const Instruction *I = Worklist.pop_back_val();
2116 
2117     // If we know this must trigger UB on a path leading our target.
2118     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2119       return true;
2120 
2121     // If we can't analyze propagation through this instruction, just skip it
2122     // and transitive users.  Safe as false is a conservative result.
2123     if (!propagatesFullPoison(I) && I != Root)
2124       continue;
2125 
2126     if (KnownPoison.insert(I).second)
2127       for (const User *User : I->users())
2128         Worklist.push_back(cast<Instruction>(User));
2129   }
2130 
2131   // Might be non-UB, or might have a path we couldn't prove must execute on
2132   // way to exiting bb.
2133   return false;
2134 }
2135 
2136 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2137 /// down to checking that all operands are constant and listing instructions
2138 /// that may hide undef.
2139 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2140                                unsigned Depth) {
2141   if (isa<Constant>(V))
2142     return !isa<UndefValue>(V);
2143 
2144   if (Depth >= 6)
2145     return false;
2146 
2147   // Conservatively handle non-constant non-instructions. For example, Arguments
2148   // may be undef.
2149   Instruction *I = dyn_cast<Instruction>(V);
2150   if (!I)
2151     return false;
2152 
2153   // Load and return values may be undef.
2154   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2155     return false;
2156 
2157   // Optimistically handle other instructions.
2158   for (Value *Op : I->operands()) {
2159     if (!Visited.insert(Op).second)
2160       continue;
2161     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2162       return false;
2163   }
2164   return true;
2165 }
2166 
2167 /// Return true if the given value is concrete. We must prove that undef can
2168 /// never reach it.
2169 ///
2170 /// TODO: If we decide that this is a good approach to checking for undef, we
2171 /// may factor it into a common location.
2172 static bool hasConcreteDef(Value *V) {
2173   SmallPtrSet<Value*, 8> Visited;
2174   Visited.insert(V);
2175   return hasConcreteDefImpl(V, Visited, 0);
2176 }
2177 
2178 /// Return true if this IV has any uses other than the (soon to be rewritten)
2179 /// loop exit test.
2180 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2181   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2182   Value *IncV = Phi->getIncomingValue(LatchIdx);
2183 
2184   for (User *U : Phi->users())
2185     if (U != Cond && U != IncV) return false;
2186 
2187   for (User *U : IncV->users())
2188     if (U != Cond && U != Phi) return false;
2189   return true;
2190 }
2191 
2192 /// Return true if the given phi is a "counter" in L.  A counter is an
2193 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2194 /// step of 1.  Note that L must have exactly one latch.
2195 static bool isLoopCounter(PHINode* Phi, Loop *L,
2196                           ScalarEvolution *SE) {
2197   assert(Phi->getParent() == L->getHeader());
2198   assert(L->getLoopLatch());
2199 
2200   if (!SE->isSCEVable(Phi->getType()))
2201     return false;
2202 
2203   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2204   if (!AR || AR->getLoop() != L || !AR->isAffine())
2205     return false;
2206 
2207   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2208   if (!Step || !Step->isOne())
2209     return false;
2210 
2211   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2212   Value *IncV = Phi->getIncomingValue(LatchIdx);
2213   return (getLoopPhiForCounter(IncV, L) == Phi);
2214 }
2215 
2216 /// Search the loop header for a loop counter (anadd rec w/step of one)
2217 /// suitable for use by LFTR.  If multiple counters are available, select the
2218 /// "best" one based profitable heuristics.
2219 ///
2220 /// BECount may be an i8* pointer type. The pointer difference is already
2221 /// valid count without scaling the address stride, so it remains a pointer
2222 /// expression as far as SCEV is concerned.
2223 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2224                                 const SCEV *BECount,
2225                                 ScalarEvolution *SE, DominatorTree *DT) {
2226   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2227 
2228   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2229 
2230   // Loop over all of the PHI nodes, looking for a simple counter.
2231   PHINode *BestPhi = nullptr;
2232   const SCEV *BestInit = nullptr;
2233   BasicBlock *LatchBlock = L->getLoopLatch();
2234   assert(LatchBlock && "Must be in simplified form");
2235   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2236 
2237   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2238     PHINode *Phi = cast<PHINode>(I);
2239     if (!isLoopCounter(Phi, L, SE))
2240       continue;
2241 
2242     // Avoid comparing an integer IV against a pointer Limit.
2243     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2244       continue;
2245 
2246     const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2247 
2248     // AR may be a pointer type, while BECount is an integer type.
2249     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2250     // AR may not be a narrower type, or we may never exit.
2251     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2252     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2253       continue;
2254 
2255     // Avoid reusing a potentially undef value to compute other values that may
2256     // have originally had a concrete definition.
2257     if (!hasConcreteDef(Phi)) {
2258       // We explicitly allow unknown phis as long as they are already used by
2259       // the loop exit test.  This is legal since performing LFTR could not
2260       // increase the number of undef users.
2261       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2262       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2263           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2264         continue;
2265     }
2266 
2267     // Avoid introducing undefined behavior due to poison which didn't exist in
2268     // the original program.  (Annoyingly, the rules for poison and undef
2269     // propagation are distinct, so this does NOT cover the undef case above.)
2270     // We have to ensure that we don't introduce UB by introducing a use on an
2271     // iteration where said IV produces poison.  Our strategy here differs for
2272     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2273     // see code in linearFunctionTestReplace.  For pointers, we restrict
2274     // transforms as there is no good way to reinfer inbounds once lost.
2275     if (!Phi->getType()->isIntegerTy() &&
2276         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2277       continue;
2278 
2279     const SCEV *Init = AR->getStart();
2280 
2281     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2282       // Don't force a live loop counter if another IV can be used.
2283       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2284         continue;
2285 
2286       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2287       // also prefers integer to pointer IVs.
2288       if (BestInit->isZero() != Init->isZero()) {
2289         if (BestInit->isZero())
2290           continue;
2291       }
2292       // If two IVs both count from zero or both count from nonzero then the
2293       // narrower is likely a dead phi that has been widened. Use the wider phi
2294       // to allow the other to be eliminated.
2295       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2296         continue;
2297     }
2298     BestPhi = Phi;
2299     BestInit = Init;
2300   }
2301   return BestPhi;
2302 }
2303 
2304 /// Insert an IR expression which computes the value held by the IV IndVar
2305 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2306 /// is taken ExitCount times.
2307 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2308                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2309                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2310   assert(isLoopCounter(IndVar, L, SE));
2311   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2312   const SCEV *IVInit = AR->getStart();
2313 
2314   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2315   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2316   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2317   // the existing GEPs whenever possible.
2318   if (IndVar->getType()->isPointerTy() &&
2319       !ExitCount->getType()->isPointerTy()) {
2320     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2321     // signed value. ExitCount on the other hand represents the loop trip count,
2322     // which is an unsigned value. FindLoopCounter only allows induction
2323     // variables that have a positive unit stride of one. This means we don't
2324     // have to handle the case of negative offsets (yet) and just need to zero
2325     // extend ExitCount.
2326     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2327     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2328     if (UsePostInc)
2329       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2330 
2331     // Expand the code for the iteration count.
2332     assert(SE->isLoopInvariant(IVOffset, L) &&
2333            "Computed iteration count is not loop invariant!");
2334 
2335     // We could handle pointer IVs other than i8*, but we need to compensate for
2336     // gep index scaling.
2337     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2338                              cast<PointerType>(IndVar->getType())
2339                                  ->getElementType())->isOne() &&
2340            "unit stride pointer IV must be i8*");
2341 
2342     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2343     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2344     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2345   } else {
2346     // In any other case, convert both IVInit and ExitCount to integers before
2347     // comparing. This may result in SCEV expansion of pointers, but in practice
2348     // SCEV will fold the pointer arithmetic away as such:
2349     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2350     //
2351     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2352     // for simple memset-style loops.
2353     //
2354     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2355     // were generated on top of case #2, which is not expected.
2356 
2357     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2358     // For unit stride, IVCount = Start + ExitCount with 2's complement
2359     // overflow.
2360 
2361     // For integer IVs, truncate the IV before computing IVInit + BECount,
2362     // unless we know apriori that the limit must be a constant when evaluated
2363     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2364     // of the IV in the loop over a (potentially) expensive expansion of the
2365     // widened exit count add(zext(add)) expression.
2366     if (SE->getTypeSizeInBits(IVInit->getType())
2367         > SE->getTypeSizeInBits(ExitCount->getType())) {
2368       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2369         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2370       else
2371         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2372     }
2373 
2374     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2375 
2376     if (UsePostInc)
2377       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2378 
2379     // Expand the code for the iteration count.
2380     assert(SE->isLoopInvariant(IVLimit, L) &&
2381            "Computed iteration count is not loop invariant!");
2382     // Ensure that we generate the same type as IndVar, or a smaller integer
2383     // type. In the presence of null pointer values, we have an integer type
2384     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2385     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2386       IndVar->getType() : ExitCount->getType();
2387     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2388     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2389   }
2390 }
2391 
2392 /// This method rewrites the exit condition of the loop to be a canonical !=
2393 /// comparison against the incremented loop induction variable.  This pass is
2394 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2395 /// determine a loop-invariant trip count of the loop, which is actually a much
2396 /// broader range than just linear tests.
2397 bool IndVarSimplify::
2398 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2399                           const SCEV *ExitCount,
2400                           PHINode *IndVar, SCEVExpander &Rewriter) {
2401   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2402   assert(isLoopCounter(IndVar, L, SE));
2403   Instruction * const IncVar =
2404     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2405 
2406   // Initialize CmpIndVar to the preincremented IV.
2407   Value *CmpIndVar = IndVar;
2408   bool UsePostInc = false;
2409 
2410   // If the exiting block is the same as the backedge block, we prefer to
2411   // compare against the post-incremented value, otherwise we must compare
2412   // against the preincremented value.
2413   if (ExitingBB == L->getLoopLatch()) {
2414     // For pointer IVs, we chose to not strip inbounds which requires us not
2415     // to add a potentially UB introducing use.  We need to either a) show
2416     // the loop test we're modifying is already in post-inc form, or b) show
2417     // that adding a use must not introduce UB.
2418     bool SafeToPostInc =
2419         IndVar->getType()->isIntegerTy() ||
2420         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2421         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2422     if (SafeToPostInc) {
2423       UsePostInc = true;
2424       CmpIndVar = IncVar;
2425     }
2426   }
2427 
2428   // It may be necessary to drop nowrap flags on the incrementing instruction
2429   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2430   // case the increment might have previously been poison on the last iteration
2431   // only) or if LFTR switches to a different IV that was previously dynamically
2432   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2433   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2434   // check), because the pre-inc addrec flags may be adopted from the original
2435   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2436   // TODO: This handling is inaccurate for one case: If we switch to a
2437   // dynamically dead IV that wraps on the first loop iteration only, which is
2438   // not covered by the post-inc addrec. (If the new IV was not dynamically
2439   // dead, it could not be poison on the first iteration in the first place.)
2440   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2441     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2442     if (BO->hasNoUnsignedWrap())
2443       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2444     if (BO->hasNoSignedWrap())
2445       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2446   }
2447 
2448   Value *ExitCnt = genLoopLimit(
2449       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2450   assert(ExitCnt->getType()->isPointerTy() ==
2451              IndVar->getType()->isPointerTy() &&
2452          "genLoopLimit missed a cast");
2453 
2454   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2455   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2456   ICmpInst::Predicate P;
2457   if (L->contains(BI->getSuccessor(0)))
2458     P = ICmpInst::ICMP_NE;
2459   else
2460     P = ICmpInst::ICMP_EQ;
2461 
2462   IRBuilder<> Builder(BI);
2463 
2464   // The new loop exit condition should reuse the debug location of the
2465   // original loop exit condition.
2466   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2467     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2468 
2469   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2470   // avoid the expensive expansion of the limit expression in the wider type,
2471   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2472   // since we know (from the exit count bitwidth), that we can't self-wrap in
2473   // the narrower type.
2474   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2475   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2476   if (CmpIndVarSize > ExitCntSize) {
2477     assert(!CmpIndVar->getType()->isPointerTy() &&
2478            !ExitCnt->getType()->isPointerTy());
2479 
2480     // Before resorting to actually inserting the truncate, use the same
2481     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2482     // the other side of the comparison instead.  We still evaluate the limit
2483     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2484     // a truncate within in.
2485     bool Extended = false;
2486     const SCEV *IV = SE->getSCEV(CmpIndVar);
2487     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2488                                                   ExitCnt->getType());
2489     const SCEV *ZExtTrunc =
2490       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2491 
2492     if (ZExtTrunc == IV) {
2493       Extended = true;
2494       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2495                                    "wide.trip.count");
2496     } else {
2497       const SCEV *SExtTrunc =
2498         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2499       if (SExtTrunc == IV) {
2500         Extended = true;
2501         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2502                                      "wide.trip.count");
2503       }
2504     }
2505 
2506     if (Extended) {
2507       bool Discard;
2508       L->makeLoopInvariant(ExitCnt, Discard);
2509     } else
2510       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2511                                       "lftr.wideiv");
2512   }
2513   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2514                     << "      LHS:" << *CmpIndVar << '\n'
2515                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2516                     << "\n"
2517                     << "      RHS:\t" << *ExitCnt << "\n"
2518                     << "ExitCount:\t" << *ExitCount << "\n"
2519                     << "  was: " << *BI->getCondition() << "\n");
2520 
2521   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2522   Value *OrigCond = BI->getCondition();
2523   // It's tempting to use replaceAllUsesWith here to fully replace the old
2524   // comparison, but that's not immediately safe, since users of the old
2525   // comparison may not be dominated by the new comparison. Instead, just
2526   // update the branch to use the new comparison; in the common case this
2527   // will make old comparison dead.
2528   BI->setCondition(Cond);
2529   DeadInsts.push_back(OrigCond);
2530 
2531   ++NumLFTR;
2532   return true;
2533 }
2534 
2535 //===----------------------------------------------------------------------===//
2536 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2537 //===----------------------------------------------------------------------===//
2538 
2539 /// If there's a single exit block, sink any loop-invariant values that
2540 /// were defined in the preheader but not used inside the loop into the
2541 /// exit block to reduce register pressure in the loop.
2542 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2543   BasicBlock *ExitBlock = L->getExitBlock();
2544   if (!ExitBlock) return false;
2545 
2546   BasicBlock *Preheader = L->getLoopPreheader();
2547   if (!Preheader) return false;
2548 
2549   bool MadeAnyChanges = false;
2550   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2551   BasicBlock::iterator I(Preheader->getTerminator());
2552   while (I != Preheader->begin()) {
2553     --I;
2554     // New instructions were inserted at the end of the preheader.
2555     if (isa<PHINode>(I))
2556       break;
2557 
2558     // Don't move instructions which might have side effects, since the side
2559     // effects need to complete before instructions inside the loop.  Also don't
2560     // move instructions which might read memory, since the loop may modify
2561     // memory. Note that it's okay if the instruction might have undefined
2562     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2563     // block.
2564     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2565       continue;
2566 
2567     // Skip debug info intrinsics.
2568     if (isa<DbgInfoIntrinsic>(I))
2569       continue;
2570 
2571     // Skip eh pad instructions.
2572     if (I->isEHPad())
2573       continue;
2574 
2575     // Don't sink alloca: we never want to sink static alloca's out of the
2576     // entry block, and correctly sinking dynamic alloca's requires
2577     // checks for stacksave/stackrestore intrinsics.
2578     // FIXME: Refactor this check somehow?
2579     if (isa<AllocaInst>(I))
2580       continue;
2581 
2582     // Determine if there is a use in or before the loop (direct or
2583     // otherwise).
2584     bool UsedInLoop = false;
2585     for (Use &U : I->uses()) {
2586       Instruction *User = cast<Instruction>(U.getUser());
2587       BasicBlock *UseBB = User->getParent();
2588       if (PHINode *P = dyn_cast<PHINode>(User)) {
2589         unsigned i =
2590           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2591         UseBB = P->getIncomingBlock(i);
2592       }
2593       if (UseBB == Preheader || L->contains(UseBB)) {
2594         UsedInLoop = true;
2595         break;
2596       }
2597     }
2598 
2599     // If there is, the def must remain in the preheader.
2600     if (UsedInLoop)
2601       continue;
2602 
2603     // Otherwise, sink it to the exit block.
2604     Instruction *ToMove = &*I;
2605     bool Done = false;
2606 
2607     if (I != Preheader->begin()) {
2608       // Skip debug info intrinsics.
2609       do {
2610         --I;
2611       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2612 
2613       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2614         Done = true;
2615     } else {
2616       Done = true;
2617     }
2618 
2619     MadeAnyChanges = true;
2620     ToMove->moveBefore(*ExitBlock, InsertPt);
2621     if (Done) break;
2622     InsertPt = ToMove->getIterator();
2623   }
2624 
2625   return MadeAnyChanges;
2626 }
2627 
2628 bool IndVarSimplify::optimizeLoopExits(Loop *L) {
2629   SmallVector<BasicBlock*, 16> ExitingBlocks;
2630   L->getExitingBlocks(ExitingBlocks);
2631 
2632   // Form an expression for the maximum exit count possible for this loop. We
2633   // merge the max and exact information to approximate a version of
2634   // getMaxBackedgeTakenInfo which isn't restricted to just constants.
2635   // TODO: factor this out as a version of getMaxBackedgeTakenCount which
2636   // isn't guaranteed to return a constant.
2637   SmallVector<const SCEV*, 4> ExitCounts;
2638   const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
2639   if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2640     ExitCounts.push_back(MaxConstEC);
2641   for (BasicBlock *ExitingBB : ExitingBlocks) {
2642     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2643     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2644       assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2645              "We should only have known counts for exiting blocks that "
2646              "dominate latch!");
2647       ExitCounts.push_back(ExitCount);
2648     }
2649   }
2650   if (ExitCounts.empty())
2651     return false;
2652   const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2653 
2654   bool Changed = false;
2655   for (BasicBlock *ExitingBB : ExitingBlocks) {
2656     // If our exitting block exits multiple loops, we can only rewrite the
2657     // innermost one.  Otherwise, we're changing how many times the innermost
2658     // loop runs before it exits.
2659     if (LI->getLoopFor(ExitingBB) != L)
2660       continue;
2661 
2662     // Can't rewrite non-branch yet.
2663     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2664     if (!BI)
2665       continue;
2666 
2667     // If already constant, nothing to do.
2668     if (isa<Constant>(BI->getCondition()))
2669       continue;
2670 
2671     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2672     if (isa<SCEVCouldNotCompute>(ExitCount))
2673       continue;
2674 
2675     // If we know we'd exit on the first iteration, rewrite the exit to
2676     // reflect this.  This does not imply the loop must exit through this
2677     // exit; there may be an earlier one taken on the first iteration.
2678     // TODO: Given we know the backedge can't be taken, we should go ahead
2679     // and break it.  Or at least, kill all the header phis and simplify.
2680     if (ExitCount->isZero()) {
2681       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2682       auto *OldCond = BI->getCondition();
2683       auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
2684         ConstantInt::getFalse(OldCond->getType());
2685       BI->setCondition(NewCond);
2686       if (OldCond->use_empty())
2687         DeadInsts.push_back(OldCond);
2688       Changed = true;
2689       continue;
2690     }
2691 
2692     // If we end up with a pointer exit count, bail.  Note that we can end up
2693     // with a pointer exit count for one exiting block, and not for another in
2694     // the same loop.
2695     if (!ExitCount->getType()->isIntegerTy() ||
2696         !MaxExitCount->getType()->isIntegerTy())
2697       continue;
2698 
2699     Type *WiderType =
2700       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2701     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2702     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2703     assert(MaxExitCount->getType() == ExitCount->getType());
2704 
2705     // Can we prove that some other exit must be taken strictly before this
2706     // one?  TODO: handle cases where ule is known, and equality is covered
2707     // by a dominating exit
2708     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2709                                      MaxExitCount, ExitCount)) {
2710       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2711       auto *OldCond = BI->getCondition();
2712       auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
2713         ConstantInt::getTrue(OldCond->getType());
2714       BI->setCondition(NewCond);
2715       if (OldCond->use_empty())
2716         DeadInsts.push_back(OldCond);
2717       Changed = true;
2718       continue;
2719     }
2720 
2721     // TODO: If we can prove that the exiting iteration is equal to the exit
2722     // count for this exit and that no previous exit oppurtunities exist within
2723     // the loop, then we can discharge all other exits.  (May fall out of
2724     // previous TODO.)
2725 
2726     // TODO: If we can't prove any relation between our exit count and the
2727     // loops exit count, but taking this exit doesn't require actually running
2728     // the loop (i.e. no side effects, no computed values used in exit), then
2729     // we can replace the exit test with a loop invariant test which exits on
2730     // the first iteration.
2731   }
2732   return Changed;
2733 }
2734 
2735 //===----------------------------------------------------------------------===//
2736 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2737 //===----------------------------------------------------------------------===//
2738 
2739 bool IndVarSimplify::run(Loop *L) {
2740   // We need (and expect!) the incoming loop to be in LCSSA.
2741   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2742          "LCSSA required to run indvars!");
2743   bool Changed = false;
2744 
2745   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2746   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2747   //    canonicalization can be a pessimization without LSR to "clean up"
2748   //    afterwards.
2749   //  - We depend on having a preheader; in particular,
2750   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2751   //    and we're in trouble if we can't find the induction variable even when
2752   //    we've manually inserted one.
2753   //  - LFTR relies on having a single backedge.
2754   if (!L->isLoopSimplifyForm())
2755     return false;
2756 
2757   // If there are any floating-point recurrences, attempt to
2758   // transform them to use integer recurrences.
2759   Changed |= rewriteNonIntegerIVs(L);
2760 
2761 #ifndef NDEBUG
2762   // Used below for a consistency check only
2763   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2764 #endif
2765 
2766   // Create a rewriter object which we'll use to transform the code with.
2767   SCEVExpander Rewriter(*SE, DL, "indvars");
2768 #ifndef NDEBUG
2769   Rewriter.setDebugType(DEBUG_TYPE);
2770 #endif
2771 
2772   // Eliminate redundant IV users.
2773   //
2774   // Simplification works best when run before other consumers of SCEV. We
2775   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2776   // other expressions involving loop IVs have been evaluated. This helps SCEV
2777   // set no-wrap flags before normalizing sign/zero extension.
2778   Rewriter.disableCanonicalMode();
2779   Changed |= simplifyAndExtend(L, Rewriter, LI);
2780 
2781   // Check to see if we can compute the final value of any expressions
2782   // that are recurrent in the loop, and substitute the exit values from the
2783   // loop into any instructions outside of the loop that use the final values
2784   // of the current expressions.
2785   if (ReplaceExitValue != NeverRepl)
2786     Changed |= rewriteLoopExitValues(L, Rewriter);
2787 
2788   // Eliminate redundant IV cycles.
2789   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2790 
2791   Changed |= optimizeLoopExits(L);
2792 
2793   // If we have a trip count expression, rewrite the loop's exit condition
2794   // using it.
2795   if (!DisableLFTR) {
2796     SmallVector<BasicBlock*, 16> ExitingBlocks;
2797     L->getExitingBlocks(ExitingBlocks);
2798     for (BasicBlock *ExitingBB : ExitingBlocks) {
2799       // Can't rewrite non-branch yet.
2800       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2801         continue;
2802 
2803       // If our exitting block exits multiple loops, we can only rewrite the
2804       // innermost one.  Otherwise, we're changing how many times the innermost
2805       // loop runs before it exits.
2806       if (LI->getLoopFor(ExitingBB) != L)
2807         continue;
2808 
2809       if (!needsLFTR(L, ExitingBB))
2810         continue;
2811 
2812       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2813       if (isa<SCEVCouldNotCompute>(ExitCount))
2814         continue;
2815 
2816       // This was handled above, but as we form SCEVs, we can sometimes refine
2817       // existing ones; this allows exit counts to be folded to zero which
2818       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2819       // until stable to handle cases like this better.
2820       if (ExitCount->isZero())
2821         continue;
2822 
2823       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2824       if (!IndVar)
2825         continue;
2826 
2827       // Avoid high cost expansions.  Note: This heuristic is questionable in
2828       // that our definition of "high cost" is not exactly principled.
2829       if (Rewriter.isHighCostExpansion(ExitCount, L))
2830         continue;
2831 
2832       // Check preconditions for proper SCEVExpander operation. SCEV does not
2833       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2834       // any pass that uses the SCEVExpander must do it. This does not work
2835       // well for loop passes because SCEVExpander makes assumptions about
2836       // all loops, while LoopPassManager only forces the current loop to be
2837       // simplified.
2838       //
2839       // FIXME: SCEV expansion has no way to bail out, so the caller must
2840       // explicitly check any assumptions made by SCEV. Brittle.
2841       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2842       if (!AR || AR->getLoop()->getLoopPreheader())
2843         Changed |= linearFunctionTestReplace(L, ExitingBB,
2844                                              ExitCount, IndVar,
2845                                              Rewriter);
2846     }
2847   }
2848   // Clear the rewriter cache, because values that are in the rewriter's cache
2849   // can be deleted in the loop below, causing the AssertingVH in the cache to
2850   // trigger.
2851   Rewriter.clear();
2852 
2853   // Now that we're done iterating through lists, clean up any instructions
2854   // which are now dead.
2855   while (!DeadInsts.empty())
2856     if (Instruction *Inst =
2857             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2858       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2859 
2860   // The Rewriter may not be used from this point on.
2861 
2862   // Loop-invariant instructions in the preheader that aren't used in the
2863   // loop may be sunk below the loop to reduce register pressure.
2864   Changed |= sinkUnusedInvariants(L);
2865 
2866   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2867   // trip count and therefore can further simplify exit values in addition to
2868   // rewriteLoopExitValues.
2869   Changed |= rewriteFirstIterationLoopExitValues(L);
2870 
2871   // Clean up dead instructions.
2872   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2873 
2874   // Check a post-condition.
2875   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2876          "Indvars did not preserve LCSSA!");
2877 
2878   // Verify that LFTR, and any other change have not interfered with SCEV's
2879   // ability to compute trip count.
2880 #ifndef NDEBUG
2881   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2882     SE->forgetLoop(L);
2883     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2884     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2885         SE->getTypeSizeInBits(NewBECount->getType()))
2886       NewBECount = SE->getTruncateOrNoop(NewBECount,
2887                                          BackedgeTakenCount->getType());
2888     else
2889       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2890                                                  NewBECount->getType());
2891     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2892   }
2893 #endif
2894 
2895   return Changed;
2896 }
2897 
2898 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2899                                           LoopStandardAnalysisResults &AR,
2900                                           LPMUpdater &) {
2901   Function *F = L.getHeader()->getParent();
2902   const DataLayout &DL = F->getParent()->getDataLayout();
2903 
2904   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2905   if (!IVS.run(&L))
2906     return PreservedAnalyses::all();
2907 
2908   auto PA = getLoopPassPreservedAnalyses();
2909   PA.preserveSet<CFGAnalyses>();
2910   return PA;
2911 }
2912 
2913 namespace {
2914 
2915 struct IndVarSimplifyLegacyPass : public LoopPass {
2916   static char ID; // Pass identification, replacement for typeid
2917 
2918   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2919     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2920   }
2921 
2922   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2923     if (skipLoop(L))
2924       return false;
2925 
2926     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2927     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2928     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2929     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2930     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2931     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2932     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2933     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2934 
2935     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2936     return IVS.run(L);
2937   }
2938 
2939   void getAnalysisUsage(AnalysisUsage &AU) const override {
2940     AU.setPreservesCFG();
2941     getLoopAnalysisUsage(AU);
2942   }
2943 };
2944 
2945 } // end anonymous namespace
2946 
2947 char IndVarSimplifyLegacyPass::ID = 0;
2948 
2949 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2950                       "Induction Variable Simplification", false, false)
2951 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2952 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2953                     "Induction Variable Simplification", false, false)
2954 
2955 Pass *llvm::createIndVarSimplifyPass() {
2956   return new IndVarSimplifyLegacyPass();
2957 }
2958