xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 93175a5caa08360ca60b417cc04c094e1ed05c76)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/InitializePasses.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/Local.h"
83 #include "llvm/Transforms/Utils/LoopUtils.h"
84 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
85 #include <cassert>
86 #include <cstdint>
87 #include <utility>
88 
89 using namespace llvm;
90 
91 #define DEBUG_TYPE "indvars"
92 
93 STATISTIC(NumWidened     , "Number of indvars widened");
94 STATISTIC(NumReplaced    , "Number of exit values replaced");
95 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
96 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
97 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
98 
99 // Trip count verification can be enabled by default under NDEBUG if we
100 // implement a strong expression equivalence checker in SCEV. Until then, we
101 // use the verify-indvars flag, which may assert in some cases.
102 static cl::opt<bool> VerifyIndvars(
103   "verify-indvars", cl::Hidden,
104   cl::desc("Verify the ScalarEvolution result after running indvars"));
105 
106 static cl::opt<ReplaceExitVal> ReplaceExitValue(
107     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
108     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
109     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
110                clEnumValN(OnlyCheapRepl, "cheap",
111                           "only replace exit value when the cost is cheap"),
112                clEnumValN(NoHardUse, "noharduse",
113                           "only replace exit values when loop def likely dead"),
114                clEnumValN(AlwaysRepl, "always",
115                           "always replace exit value whenever possible")));
116 
117 static cl::opt<bool> UsePostIncrementRanges(
118   "indvars-post-increment-ranges", cl::Hidden,
119   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
120   cl::init(true));
121 
122 static cl::opt<bool>
123 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
124             cl::desc("Disable Linear Function Test Replace optimization"));
125 
126 static cl::opt<bool>
127 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
128                 cl::desc("Predicate conditions in read only loops"));
129 
130 namespace {
131 
132 struct RewritePhi;
133 
134 class IndVarSimplify {
135   LoopInfo *LI;
136   ScalarEvolution *SE;
137   DominatorTree *DT;
138   const DataLayout &DL;
139   TargetLibraryInfo *TLI;
140   const TargetTransformInfo *TTI;
141 
142   SmallVector<WeakTrackingVH, 16> DeadInsts;
143 
144   bool handleFloatingPointIV(Loop *L, PHINode *PH);
145   bool rewriteNonIntegerIVs(Loop *L);
146 
147   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
148   /// Try to eliminate loop exits based on analyzeable exit counts
149   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
150   /// Try to form loop invariant tests for loop exits by changing how many
151   /// iterations of the loop run when that is unobservable.
152   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
153 
154   bool rewriteFirstIterationLoopExitValues(Loop *L);
155 
156   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
157                                  const SCEV *ExitCount,
158                                  PHINode *IndVar, SCEVExpander &Rewriter);
159 
160   bool sinkUnusedInvariants(Loop *L);
161 
162 public:
163   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
164                  const DataLayout &DL, TargetLibraryInfo *TLI,
165                  TargetTransformInfo *TTI)
166       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
167 
168   bool run(Loop *L);
169 };
170 
171 } // end anonymous namespace
172 
173 /// Determine the insertion point for this user. By default, insert immediately
174 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
175 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
176 /// common dominator for the incoming blocks. A nullptr can be returned if no
177 /// viable location is found: it may happen if User is a PHI and Def only comes
178 /// to this PHI from unreachable blocks.
179 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
180                                           DominatorTree *DT, LoopInfo *LI) {
181   PHINode *PHI = dyn_cast<PHINode>(User);
182   if (!PHI)
183     return User;
184 
185   Instruction *InsertPt = nullptr;
186   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
187     if (PHI->getIncomingValue(i) != Def)
188       continue;
189 
190     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
191 
192     if (!DT->isReachableFromEntry(InsertBB))
193       continue;
194 
195     if (!InsertPt) {
196       InsertPt = InsertBB->getTerminator();
197       continue;
198     }
199     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
200     InsertPt = InsertBB->getTerminator();
201   }
202 
203   // If we have skipped all inputs, it means that Def only comes to Phi from
204   // unreachable blocks.
205   if (!InsertPt)
206     return nullptr;
207 
208   auto *DefI = dyn_cast<Instruction>(Def);
209   if (!DefI)
210     return InsertPt;
211 
212   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
213 
214   auto *L = LI->getLoopFor(DefI->getParent());
215   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
216 
217   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
218     if (LI->getLoopFor(DTN->getBlock()) == L)
219       return DTN->getBlock()->getTerminator();
220 
221   llvm_unreachable("DefI dominates InsertPt!");
222 }
223 
224 //===----------------------------------------------------------------------===//
225 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
226 //===----------------------------------------------------------------------===//
227 
228 /// Convert APF to an integer, if possible.
229 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
230   bool isExact = false;
231   // See if we can convert this to an int64_t
232   uint64_t UIntVal;
233   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
234                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
235       !isExact)
236     return false;
237   IntVal = UIntVal;
238   return true;
239 }
240 
241 /// If the loop has floating induction variable then insert corresponding
242 /// integer induction variable if possible.
243 /// For example,
244 /// for(double i = 0; i < 10000; ++i)
245 ///   bar(i)
246 /// is converted into
247 /// for(int i = 0; i < 10000; ++i)
248 ///   bar((double)i);
249 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
250   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
251   unsigned BackEdge     = IncomingEdge^1;
252 
253   // Check incoming value.
254   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
255 
256   int64_t InitValue;
257   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
258     return false;
259 
260   // Check IV increment. Reject this PN if increment operation is not
261   // an add or increment value can not be represented by an integer.
262   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
263   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
264 
265   // If this is not an add of the PHI with a constantfp, or if the constant fp
266   // is not an integer, bail out.
267   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
268   int64_t IncValue;
269   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
270       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
271     return false;
272 
273   // Check Incr uses. One user is PN and the other user is an exit condition
274   // used by the conditional terminator.
275   Value::user_iterator IncrUse = Incr->user_begin();
276   Instruction *U1 = cast<Instruction>(*IncrUse++);
277   if (IncrUse == Incr->user_end()) return false;
278   Instruction *U2 = cast<Instruction>(*IncrUse++);
279   if (IncrUse != Incr->user_end()) return false;
280 
281   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
282   // only used by a branch, we can't transform it.
283   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
284   if (!Compare)
285     Compare = dyn_cast<FCmpInst>(U2);
286   if (!Compare || !Compare->hasOneUse() ||
287       !isa<BranchInst>(Compare->user_back()))
288     return false;
289 
290   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
291 
292   // We need to verify that the branch actually controls the iteration count
293   // of the loop.  If not, the new IV can overflow and no one will notice.
294   // The branch block must be in the loop and one of the successors must be out
295   // of the loop.
296   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
297   if (!L->contains(TheBr->getParent()) ||
298       (L->contains(TheBr->getSuccessor(0)) &&
299        L->contains(TheBr->getSuccessor(1))))
300     return false;
301 
302   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
303   // transform it.
304   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
305   int64_t ExitValue;
306   if (ExitValueVal == nullptr ||
307       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
308     return false;
309 
310   // Find new predicate for integer comparison.
311   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
312   switch (Compare->getPredicate()) {
313   default: return false;  // Unknown comparison.
314   case CmpInst::FCMP_OEQ:
315   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
316   case CmpInst::FCMP_ONE:
317   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
318   case CmpInst::FCMP_OGT:
319   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
320   case CmpInst::FCMP_OGE:
321   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
322   case CmpInst::FCMP_OLT:
323   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
324   case CmpInst::FCMP_OLE:
325   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
326   }
327 
328   // We convert the floating point induction variable to a signed i32 value if
329   // we can.  This is only safe if the comparison will not overflow in a way
330   // that won't be trapped by the integer equivalent operations.  Check for this
331   // now.
332   // TODO: We could use i64 if it is native and the range requires it.
333 
334   // The start/stride/exit values must all fit in signed i32.
335   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
336     return false;
337 
338   // If not actually striding (add x, 0.0), avoid touching the code.
339   if (IncValue == 0)
340     return false;
341 
342   // Positive and negative strides have different safety conditions.
343   if (IncValue > 0) {
344     // If we have a positive stride, we require the init to be less than the
345     // exit value.
346     if (InitValue >= ExitValue)
347       return false;
348 
349     uint32_t Range = uint32_t(ExitValue-InitValue);
350     // Check for infinite loop, either:
351     // while (i <= Exit) or until (i > Exit)
352     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
353       if (++Range == 0) return false;  // Range overflows.
354     }
355 
356     unsigned Leftover = Range % uint32_t(IncValue);
357 
358     // If this is an equality comparison, we require that the strided value
359     // exactly land on the exit value, otherwise the IV condition will wrap
360     // around and do things the fp IV wouldn't.
361     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
362         Leftover != 0)
363       return false;
364 
365     // If the stride would wrap around the i32 before exiting, we can't
366     // transform the IV.
367     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
368       return false;
369   } else {
370     // If we have a negative stride, we require the init to be greater than the
371     // exit value.
372     if (InitValue <= ExitValue)
373       return false;
374 
375     uint32_t Range = uint32_t(InitValue-ExitValue);
376     // Check for infinite loop, either:
377     // while (i >= Exit) or until (i < Exit)
378     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
379       if (++Range == 0) return false;  // Range overflows.
380     }
381 
382     unsigned Leftover = Range % uint32_t(-IncValue);
383 
384     // If this is an equality comparison, we require that the strided value
385     // exactly land on the exit value, otherwise the IV condition will wrap
386     // around and do things the fp IV wouldn't.
387     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
388         Leftover != 0)
389       return false;
390 
391     // If the stride would wrap around the i32 before exiting, we can't
392     // transform the IV.
393     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
394       return false;
395   }
396 
397   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
398 
399   // Insert new integer induction variable.
400   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
401   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
402                       PN->getIncomingBlock(IncomingEdge));
403 
404   Value *NewAdd =
405     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
406                               Incr->getName()+".int", Incr);
407   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
408 
409   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
410                                       ConstantInt::get(Int32Ty, ExitValue),
411                                       Compare->getName());
412 
413   // In the following deletions, PN may become dead and may be deleted.
414   // Use a WeakTrackingVH to observe whether this happens.
415   WeakTrackingVH WeakPH = PN;
416 
417   // Delete the old floating point exit comparison.  The branch starts using the
418   // new comparison.
419   NewCompare->takeName(Compare);
420   Compare->replaceAllUsesWith(NewCompare);
421   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
422 
423   // Delete the old floating point increment.
424   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
425   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
426 
427   // If the FP induction variable still has uses, this is because something else
428   // in the loop uses its value.  In order to canonicalize the induction
429   // variable, we chose to eliminate the IV and rewrite it in terms of an
430   // int->fp cast.
431   //
432   // We give preference to sitofp over uitofp because it is faster on most
433   // platforms.
434   if (WeakPH) {
435     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
436                                  &*PN->getParent()->getFirstInsertionPt());
437     PN->replaceAllUsesWith(Conv);
438     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
439   }
440   return true;
441 }
442 
443 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
444   // First step.  Check to see if there are any floating-point recurrences.
445   // If there are, change them into integer recurrences, permitting analysis by
446   // the SCEV routines.
447   BasicBlock *Header = L->getHeader();
448 
449   SmallVector<WeakTrackingVH, 8> PHIs;
450   for (PHINode &PN : Header->phis())
451     PHIs.push_back(&PN);
452 
453   bool Changed = false;
454   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
455     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
456       Changed |= handleFloatingPointIV(L, PN);
457 
458   // If the loop previously had floating-point IV, ScalarEvolution
459   // may not have been able to compute a trip count. Now that we've done some
460   // re-writing, the trip count may be computable.
461   if (Changed)
462     SE->forgetLoop(L);
463   return Changed;
464 }
465 
466 //===---------------------------------------------------------------------===//
467 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
468 // they will exit at the first iteration.
469 //===---------------------------------------------------------------------===//
470 
471 /// Check to see if this loop has loop invariant conditions which lead to loop
472 /// exits. If so, we know that if the exit path is taken, it is at the first
473 /// loop iteration. This lets us predict exit values of PHI nodes that live in
474 /// loop header.
475 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
476   // Verify the input to the pass is already in LCSSA form.
477   assert(L->isLCSSAForm(*DT));
478 
479   SmallVector<BasicBlock *, 8> ExitBlocks;
480   L->getUniqueExitBlocks(ExitBlocks);
481 
482   bool MadeAnyChanges = false;
483   for (auto *ExitBB : ExitBlocks) {
484     // If there are no more PHI nodes in this exit block, then no more
485     // values defined inside the loop are used on this path.
486     for (PHINode &PN : ExitBB->phis()) {
487       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
488            IncomingValIdx != E; ++IncomingValIdx) {
489         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
490 
491         // Can we prove that the exit must run on the first iteration if it
492         // runs at all?  (i.e. early exits are fine for our purposes, but
493         // traces which lead to this exit being taken on the 2nd iteration
494         // aren't.)  Note that this is about whether the exit branch is
495         // executed, not about whether it is taken.
496         if (!L->getLoopLatch() ||
497             !DT->dominates(IncomingBB, L->getLoopLatch()))
498           continue;
499 
500         // Get condition that leads to the exit path.
501         auto *TermInst = IncomingBB->getTerminator();
502 
503         Value *Cond = nullptr;
504         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
505           // Must be a conditional branch, otherwise the block
506           // should not be in the loop.
507           Cond = BI->getCondition();
508         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
509           Cond = SI->getCondition();
510         else
511           continue;
512 
513         if (!L->isLoopInvariant(Cond))
514           continue;
515 
516         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
517 
518         // Only deal with PHIs in the loop header.
519         if (!ExitVal || ExitVal->getParent() != L->getHeader())
520           continue;
521 
522         // If ExitVal is a PHI on the loop header, then we know its
523         // value along this exit because the exit can only be taken
524         // on the first iteration.
525         auto *LoopPreheader = L->getLoopPreheader();
526         assert(LoopPreheader && "Invalid loop");
527         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
528         if (PreheaderIdx != -1) {
529           assert(ExitVal->getParent() == L->getHeader() &&
530                  "ExitVal must be in loop header");
531           MadeAnyChanges = true;
532           PN.setIncomingValue(IncomingValIdx,
533                               ExitVal->getIncomingValue(PreheaderIdx));
534         }
535       }
536     }
537   }
538   return MadeAnyChanges;
539 }
540 
541 //===----------------------------------------------------------------------===//
542 //  IV Widening - Extend the width of an IV to cover its widest uses.
543 //===----------------------------------------------------------------------===//
544 
545 namespace {
546 
547 // Collect information about induction variables that are used by sign/zero
548 // extend operations. This information is recorded by CollectExtend and provides
549 // the input to WidenIV.
550 struct WideIVInfo {
551   PHINode *NarrowIV = nullptr;
552 
553   // Widest integer type created [sz]ext
554   Type *WidestNativeType = nullptr;
555 
556   // Was a sext user seen before a zext?
557   bool IsSigned = false;
558 };
559 
560 } // end anonymous namespace
561 
562 /// Update information about the induction variable that is extended by this
563 /// sign or zero extend operation. This is used to determine the final width of
564 /// the IV before actually widening it.
565 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
566                         const TargetTransformInfo *TTI) {
567   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
568   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
569     return;
570 
571   Type *Ty = Cast->getType();
572   uint64_t Width = SE->getTypeSizeInBits(Ty);
573   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
574     return;
575 
576   // Check that `Cast` actually extends the induction variable (we rely on this
577   // later).  This takes care of cases where `Cast` is extending a truncation of
578   // the narrow induction variable, and thus can end up being narrower than the
579   // "narrow" induction variable.
580   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
581   if (NarrowIVWidth >= Width)
582     return;
583 
584   // Cast is either an sext or zext up to this point.
585   // We should not widen an indvar if arithmetics on the wider indvar are more
586   // expensive than those on the narrower indvar. We check only the cost of ADD
587   // because at least an ADD is required to increment the induction variable. We
588   // could compute more comprehensively the cost of all instructions on the
589   // induction variable when necessary.
590   if (TTI &&
591       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
592           TTI->getArithmeticInstrCost(Instruction::Add,
593                                       Cast->getOperand(0)->getType())) {
594     return;
595   }
596 
597   if (!WI.WidestNativeType) {
598     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
599     WI.IsSigned = IsSigned;
600     return;
601   }
602 
603   // We extend the IV to satisfy the sign of its first user, arbitrarily.
604   if (WI.IsSigned != IsSigned)
605     return;
606 
607   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
608     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
609 }
610 
611 namespace {
612 
613 /// Record a link in the Narrow IV def-use chain along with the WideIV that
614 /// computes the same value as the Narrow IV def.  This avoids caching Use*
615 /// pointers.
616 struct NarrowIVDefUse {
617   Instruction *NarrowDef = nullptr;
618   Instruction *NarrowUse = nullptr;
619   Instruction *WideDef = nullptr;
620 
621   // True if the narrow def is never negative.  Tracking this information lets
622   // us use a sign extension instead of a zero extension or vice versa, when
623   // profitable and legal.
624   bool NeverNegative = false;
625 
626   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
627                  bool NeverNegative)
628       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
629         NeverNegative(NeverNegative) {}
630 };
631 
632 /// The goal of this transform is to remove sign and zero extends without
633 /// creating any new induction variables. To do this, it creates a new phi of
634 /// the wider type and redirects all users, either removing extends or inserting
635 /// truncs whenever we stop propagating the type.
636 class WidenIV {
637   // Parameters
638   PHINode *OrigPhi;
639   Type *WideType;
640 
641   // Context
642   LoopInfo        *LI;
643   Loop            *L;
644   ScalarEvolution *SE;
645   DominatorTree   *DT;
646 
647   // Does the module have any calls to the llvm.experimental.guard intrinsic
648   // at all? If not we can avoid scanning instructions looking for guards.
649   bool HasGuards;
650 
651   // Result
652   PHINode *WidePhi = nullptr;
653   Instruction *WideInc = nullptr;
654   const SCEV *WideIncExpr = nullptr;
655   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
656 
657   SmallPtrSet<Instruction *,16> Widened;
658   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
659 
660   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
661 
662   // A map tracking the kind of extension used to widen each narrow IV
663   // and narrow IV user.
664   // Key: pointer to a narrow IV or IV user.
665   // Value: the kind of extension used to widen this Instruction.
666   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
667 
668   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
669 
670   // A map with control-dependent ranges for post increment IV uses. The key is
671   // a pair of IV def and a use of this def denoting the context. The value is
672   // a ConstantRange representing possible values of the def at the given
673   // context.
674   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
675 
676   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
677                                               Instruction *UseI) {
678     DefUserPair Key(Def, UseI);
679     auto It = PostIncRangeInfos.find(Key);
680     return It == PostIncRangeInfos.end()
681                ? Optional<ConstantRange>(None)
682                : Optional<ConstantRange>(It->second);
683   }
684 
685   void calculatePostIncRanges(PHINode *OrigPhi);
686   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
687 
688   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
689     DefUserPair Key(Def, UseI);
690     auto It = PostIncRangeInfos.find(Key);
691     if (It == PostIncRangeInfos.end())
692       PostIncRangeInfos.insert({Key, R});
693     else
694       It->second = R.intersectWith(It->second);
695   }
696 
697 public:
698   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
699           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
700           bool HasGuards)
701       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
702         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
703         HasGuards(HasGuards), DeadInsts(DI) {
704     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
705     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
706   }
707 
708   PHINode *createWideIV(SCEVExpander &Rewriter);
709 
710 protected:
711   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
712                           Instruction *Use);
713 
714   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
715   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
716                                      const SCEVAddRecExpr *WideAR);
717   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
718 
719   ExtendKind getExtendKind(Instruction *I);
720 
721   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
722 
723   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
724 
725   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
726 
727   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
728                               unsigned OpCode) const;
729 
730   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
731 
732   bool widenLoopCompare(NarrowIVDefUse DU);
733   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
734   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
735 
736   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
737 };
738 
739 } // end anonymous namespace
740 
741 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
742                                  bool IsSigned, Instruction *Use) {
743   // Set the debug location and conservative insertion point.
744   IRBuilder<> Builder(Use);
745   // Hoist the insertion point into loop preheaders as far as possible.
746   for (const Loop *L = LI->getLoopFor(Use->getParent());
747        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
748        L = L->getParentLoop())
749     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
750 
751   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
752                     Builder.CreateZExt(NarrowOper, WideType);
753 }
754 
755 /// Instantiate a wide operation to replace a narrow operation. This only needs
756 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
757 /// 0 for any operation we decide not to clone.
758 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
759                                   const SCEVAddRecExpr *WideAR) {
760   unsigned Opcode = DU.NarrowUse->getOpcode();
761   switch (Opcode) {
762   default:
763     return nullptr;
764   case Instruction::Add:
765   case Instruction::Mul:
766   case Instruction::UDiv:
767   case Instruction::Sub:
768     return cloneArithmeticIVUser(DU, WideAR);
769 
770   case Instruction::And:
771   case Instruction::Or:
772   case Instruction::Xor:
773   case Instruction::Shl:
774   case Instruction::LShr:
775   case Instruction::AShr:
776     return cloneBitwiseIVUser(DU);
777   }
778 }
779 
780 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
781   Instruction *NarrowUse = DU.NarrowUse;
782   Instruction *NarrowDef = DU.NarrowDef;
783   Instruction *WideDef = DU.WideDef;
784 
785   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
786 
787   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
788   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
789   // invariant and will be folded or hoisted. If it actually comes from a
790   // widened IV, it should be removed during a future call to widenIVUse.
791   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
792   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
793                    ? WideDef
794                    : createExtendInst(NarrowUse->getOperand(0), WideType,
795                                       IsSigned, NarrowUse);
796   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
797                    ? WideDef
798                    : createExtendInst(NarrowUse->getOperand(1), WideType,
799                                       IsSigned, NarrowUse);
800 
801   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
802   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
803                                         NarrowBO->getName());
804   IRBuilder<> Builder(NarrowUse);
805   Builder.Insert(WideBO);
806   WideBO->copyIRFlags(NarrowBO);
807   return WideBO;
808 }
809 
810 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
811                                             const SCEVAddRecExpr *WideAR) {
812   Instruction *NarrowUse = DU.NarrowUse;
813   Instruction *NarrowDef = DU.NarrowDef;
814   Instruction *WideDef = DU.WideDef;
815 
816   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
817 
818   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
819 
820   // We're trying to find X such that
821   //
822   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
823   //
824   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
825   // and check using SCEV if any of them are correct.
826 
827   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
828   // correct solution to X.
829   auto GuessNonIVOperand = [&](bool SignExt) {
830     const SCEV *WideLHS;
831     const SCEV *WideRHS;
832 
833     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
834       if (SignExt)
835         return SE->getSignExtendExpr(S, Ty);
836       return SE->getZeroExtendExpr(S, Ty);
837     };
838 
839     if (IVOpIdx == 0) {
840       WideLHS = SE->getSCEV(WideDef);
841       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
842       WideRHS = GetExtend(NarrowRHS, WideType);
843     } else {
844       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
845       WideLHS = GetExtend(NarrowLHS, WideType);
846       WideRHS = SE->getSCEV(WideDef);
847     }
848 
849     // WideUse is "WideDef `op.wide` X" as described in the comment.
850     const SCEV *WideUse = nullptr;
851 
852     switch (NarrowUse->getOpcode()) {
853     default:
854       llvm_unreachable("No other possibility!");
855 
856     case Instruction::Add:
857       WideUse = SE->getAddExpr(WideLHS, WideRHS);
858       break;
859 
860     case Instruction::Mul:
861       WideUse = SE->getMulExpr(WideLHS, WideRHS);
862       break;
863 
864     case Instruction::UDiv:
865       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
866       break;
867 
868     case Instruction::Sub:
869       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
870       break;
871     }
872 
873     return WideUse == WideAR;
874   };
875 
876   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
877   if (!GuessNonIVOperand(SignExtend)) {
878     SignExtend = !SignExtend;
879     if (!GuessNonIVOperand(SignExtend))
880       return nullptr;
881   }
882 
883   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
884                    ? WideDef
885                    : createExtendInst(NarrowUse->getOperand(0), WideType,
886                                       SignExtend, NarrowUse);
887   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
888                    ? WideDef
889                    : createExtendInst(NarrowUse->getOperand(1), WideType,
890                                       SignExtend, NarrowUse);
891 
892   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
893   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
894                                         NarrowBO->getName());
895 
896   IRBuilder<> Builder(NarrowUse);
897   Builder.Insert(WideBO);
898   WideBO->copyIRFlags(NarrowBO);
899   return WideBO;
900 }
901 
902 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
903   auto It = ExtendKindMap.find(I);
904   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
905   return It->second;
906 }
907 
908 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
909                                      unsigned OpCode) const {
910   if (OpCode == Instruction::Add)
911     return SE->getAddExpr(LHS, RHS);
912   if (OpCode == Instruction::Sub)
913     return SE->getMinusSCEV(LHS, RHS);
914   if (OpCode == Instruction::Mul)
915     return SE->getMulExpr(LHS, RHS);
916 
917   llvm_unreachable("Unsupported opcode.");
918 }
919 
920 /// No-wrap operations can transfer sign extension of their result to their
921 /// operands. Generate the SCEV value for the widened operation without
922 /// actually modifying the IR yet. If the expression after extending the
923 /// operands is an AddRec for this loop, return the AddRec and the kind of
924 /// extension used.
925 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
926   // Handle the common case of add<nsw/nuw>
927   const unsigned OpCode = DU.NarrowUse->getOpcode();
928   // Only Add/Sub/Mul instructions supported yet.
929   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
930       OpCode != Instruction::Mul)
931     return {nullptr, Unknown};
932 
933   // One operand (NarrowDef) has already been extended to WideDef. Now determine
934   // if extending the other will lead to a recurrence.
935   const unsigned ExtendOperIdx =
936       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
937   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
938 
939   const SCEV *ExtendOperExpr = nullptr;
940   const OverflowingBinaryOperator *OBO =
941     cast<OverflowingBinaryOperator>(DU.NarrowUse);
942   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
943   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
944     ExtendOperExpr = SE->getSignExtendExpr(
945       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
946   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
947     ExtendOperExpr = SE->getZeroExtendExpr(
948       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
949   else
950     return {nullptr, Unknown};
951 
952   // When creating this SCEV expr, don't apply the current operations NSW or NUW
953   // flags. This instruction may be guarded by control flow that the no-wrap
954   // behavior depends on. Non-control-equivalent instructions can be mapped to
955   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
956   // semantics to those operations.
957   const SCEV *lhs = SE->getSCEV(DU.WideDef);
958   const SCEV *rhs = ExtendOperExpr;
959 
960   // Let's swap operands to the initial order for the case of non-commutative
961   // operations, like SUB. See PR21014.
962   if (ExtendOperIdx == 0)
963     std::swap(lhs, rhs);
964   const SCEVAddRecExpr *AddRec =
965       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
966 
967   if (!AddRec || AddRec->getLoop() != L)
968     return {nullptr, Unknown};
969 
970   return {AddRec, ExtKind};
971 }
972 
973 /// Is this instruction potentially interesting for further simplification after
974 /// widening it's type? In other words, can the extend be safely hoisted out of
975 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
976 /// so, return the extended recurrence and the kind of extension used. Otherwise
977 /// return {nullptr, Unknown}.
978 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
979   if (!SE->isSCEVable(DU.NarrowUse->getType()))
980     return {nullptr, Unknown};
981 
982   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
983   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
984       SE->getTypeSizeInBits(WideType)) {
985     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
986     // index. So don't follow this use.
987     return {nullptr, Unknown};
988   }
989 
990   const SCEV *WideExpr;
991   ExtendKind ExtKind;
992   if (DU.NeverNegative) {
993     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
994     if (isa<SCEVAddRecExpr>(WideExpr))
995       ExtKind = SignExtended;
996     else {
997       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
998       ExtKind = ZeroExtended;
999     }
1000   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1001     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1002     ExtKind = SignExtended;
1003   } else {
1004     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1005     ExtKind = ZeroExtended;
1006   }
1007   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1008   if (!AddRec || AddRec->getLoop() != L)
1009     return {nullptr, Unknown};
1010   return {AddRec, ExtKind};
1011 }
1012 
1013 /// This IV user cannot be widened. Replace this use of the original narrow IV
1014 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1015 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1016   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1017   if (!InsertPt)
1018     return;
1019   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1020                     << *DU.NarrowUse << "\n");
1021   IRBuilder<> Builder(InsertPt);
1022   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1023   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1024 }
1025 
1026 /// If the narrow use is a compare instruction, then widen the compare
1027 //  (and possibly the other operand).  The extend operation is hoisted into the
1028 // loop preheader as far as possible.
1029 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1030   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1031   if (!Cmp)
1032     return false;
1033 
1034   // We can legally widen the comparison in the following two cases:
1035   //
1036   //  - The signedness of the IV extension and comparison match
1037   //
1038   //  - The narrow IV is always positive (and thus its sign extension is equal
1039   //    to its zero extension).  For instance, let's say we're zero extending
1040   //    %narrow for the following use
1041   //
1042   //      icmp slt i32 %narrow, %val   ... (A)
1043   //
1044   //    and %narrow is always positive.  Then
1045   //
1046   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1047   //          == icmp slt i32 zext(%narrow), sext(%val)
1048   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1049   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1050     return false;
1051 
1052   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1053   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1054   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1055   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1056 
1057   // Widen the compare instruction.
1058   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1059   if (!InsertPt)
1060     return false;
1061   IRBuilder<> Builder(InsertPt);
1062   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1063 
1064   // Widen the other operand of the compare, if necessary.
1065   if (CastWidth < IVWidth) {
1066     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1067     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1068   }
1069   return true;
1070 }
1071 
1072 /// If the narrow use is an instruction whose two operands are the defining
1073 /// instruction of DU and a load instruction, then we have the following:
1074 /// if the load is hoisted outside the loop, then we do not reach this function
1075 /// as scalar evolution analysis works fine in widenIVUse with variables
1076 /// hoisted outside the loop and efficient code is subsequently generated by
1077 /// not emitting truncate instructions. But when the load is not hoisted
1078 /// (whether due to limitation in alias analysis or due to a true legality),
1079 /// then scalar evolution can not proceed with loop variant values and
1080 /// inefficient code is generated. This function handles the non-hoisted load
1081 /// special case by making the optimization generate the same type of code for
1082 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1083 /// instruction). This special case is important especially when the induction
1084 /// variables are affecting addressing mode in code generation.
1085 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1086   Instruction *NarrowUse = DU.NarrowUse;
1087   Instruction *NarrowDef = DU.NarrowDef;
1088   Instruction *WideDef = DU.WideDef;
1089 
1090   // Handle the common case of add<nsw/nuw>
1091   const unsigned OpCode = NarrowUse->getOpcode();
1092   // Only Add/Sub/Mul instructions are supported.
1093   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1094       OpCode != Instruction::Mul)
1095     return false;
1096 
1097   // The operand that is not defined by NarrowDef of DU. Let's call it the
1098   // other operand.
1099   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1100   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1101          "bad DU");
1102 
1103   const SCEV *ExtendOperExpr = nullptr;
1104   const OverflowingBinaryOperator *OBO =
1105     cast<OverflowingBinaryOperator>(NarrowUse);
1106   ExtendKind ExtKind = getExtendKind(NarrowDef);
1107   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1108     ExtendOperExpr = SE->getSignExtendExpr(
1109       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1110   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1111     ExtendOperExpr = SE->getZeroExtendExpr(
1112       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1113   else
1114     return false;
1115 
1116   // We are interested in the other operand being a load instruction.
1117   // But, we should look into relaxing this restriction later on.
1118   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1119   if (I && I->getOpcode() != Instruction::Load)
1120     return false;
1121 
1122   // Verifying that Defining operand is an AddRec
1123   const SCEV *Op1 = SE->getSCEV(WideDef);
1124   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1125   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1126     return false;
1127   // Verifying that other operand is an Extend.
1128   if (ExtKind == SignExtended) {
1129     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1130       return false;
1131   } else {
1132     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1133       return false;
1134   }
1135 
1136   if (ExtKind == SignExtended) {
1137     for (Use &U : NarrowUse->uses()) {
1138       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1139       if (!User || User->getType() != WideType)
1140         return false;
1141     }
1142   } else { // ExtKind == ZeroExtended
1143     for (Use &U : NarrowUse->uses()) {
1144       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1145       if (!User || User->getType() != WideType)
1146         return false;
1147     }
1148   }
1149 
1150   return true;
1151 }
1152 
1153 /// Special Case for widening with variant Loads (see
1154 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1155 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1156   Instruction *NarrowUse = DU.NarrowUse;
1157   Instruction *NarrowDef = DU.NarrowDef;
1158   Instruction *WideDef = DU.WideDef;
1159 
1160   ExtendKind ExtKind = getExtendKind(NarrowDef);
1161 
1162   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1163 
1164   // Generating a widening use instruction.
1165   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1166                    ? WideDef
1167                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1168                                       ExtKind, NarrowUse);
1169   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1170                    ? WideDef
1171                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1172                                       ExtKind, NarrowUse);
1173 
1174   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1175   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1176                                         NarrowBO->getName());
1177   IRBuilder<> Builder(NarrowUse);
1178   Builder.Insert(WideBO);
1179   WideBO->copyIRFlags(NarrowBO);
1180 
1181   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1182 
1183   ExtendKindMap[NarrowUse] = ExtKind;
1184 
1185   for (Use &U : NarrowUse->uses()) {
1186     Instruction *User = nullptr;
1187     if (ExtKind == SignExtended)
1188       User = dyn_cast<SExtInst>(U.getUser());
1189     else
1190       User = dyn_cast<ZExtInst>(U.getUser());
1191     if (User && User->getType() == WideType) {
1192       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1193                         << *WideBO << "\n");
1194       ++NumElimExt;
1195       User->replaceAllUsesWith(WideBO);
1196       DeadInsts.emplace_back(User);
1197     }
1198   }
1199 }
1200 
1201 /// Determine whether an individual user of the narrow IV can be widened. If so,
1202 /// return the wide clone of the user.
1203 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1204   assert(ExtendKindMap.count(DU.NarrowDef) &&
1205          "Should already know the kind of extension used to widen NarrowDef");
1206 
1207   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1208   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1209     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1210       // For LCSSA phis, sink the truncate outside the loop.
1211       // After SimplifyCFG most loop exit targets have a single predecessor.
1212       // Otherwise fall back to a truncate within the loop.
1213       if (UsePhi->getNumOperands() != 1)
1214         truncateIVUse(DU, DT, LI);
1215       else {
1216         // Widening the PHI requires us to insert a trunc.  The logical place
1217         // for this trunc is in the same BB as the PHI.  This is not possible if
1218         // the BB is terminated by a catchswitch.
1219         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1220           return nullptr;
1221 
1222         PHINode *WidePhi =
1223           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1224                           UsePhi);
1225         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1226         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1227         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1228         UsePhi->replaceAllUsesWith(Trunc);
1229         DeadInsts.emplace_back(UsePhi);
1230         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1231                           << *WidePhi << "\n");
1232       }
1233       return nullptr;
1234     }
1235   }
1236 
1237   // This narrow use can be widened by a sext if it's non-negative or its narrow
1238   // def was widended by a sext. Same for zext.
1239   auto canWidenBySExt = [&]() {
1240     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1241   };
1242   auto canWidenByZExt = [&]() {
1243     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1244   };
1245 
1246   // Our raison d'etre! Eliminate sign and zero extension.
1247   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1248       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1249     Value *NewDef = DU.WideDef;
1250     if (DU.NarrowUse->getType() != WideType) {
1251       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1252       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1253       if (CastWidth < IVWidth) {
1254         // The cast isn't as wide as the IV, so insert a Trunc.
1255         IRBuilder<> Builder(DU.NarrowUse);
1256         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1257       }
1258       else {
1259         // A wider extend was hidden behind a narrower one. This may induce
1260         // another round of IV widening in which the intermediate IV becomes
1261         // dead. It should be very rare.
1262         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1263                           << " not wide enough to subsume " << *DU.NarrowUse
1264                           << "\n");
1265         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1266         NewDef = DU.NarrowUse;
1267       }
1268     }
1269     if (NewDef != DU.NarrowUse) {
1270       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1271                         << " replaced by " << *DU.WideDef << "\n");
1272       ++NumElimExt;
1273       DU.NarrowUse->replaceAllUsesWith(NewDef);
1274       DeadInsts.emplace_back(DU.NarrowUse);
1275     }
1276     // Now that the extend is gone, we want to expose it's uses for potential
1277     // further simplification. We don't need to directly inform SimplifyIVUsers
1278     // of the new users, because their parent IV will be processed later as a
1279     // new loop phi. If we preserved IVUsers analysis, we would also want to
1280     // push the uses of WideDef here.
1281 
1282     // No further widening is needed. The deceased [sz]ext had done it for us.
1283     return nullptr;
1284   }
1285 
1286   // Does this user itself evaluate to a recurrence after widening?
1287   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1288   if (!WideAddRec.first)
1289     WideAddRec = getWideRecurrence(DU);
1290 
1291   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1292   if (!WideAddRec.first) {
1293     // If use is a loop condition, try to promote the condition instead of
1294     // truncating the IV first.
1295     if (widenLoopCompare(DU))
1296       return nullptr;
1297 
1298     // We are here about to generate a truncate instruction that may hurt
1299     // performance because the scalar evolution expression computed earlier
1300     // in WideAddRec.first does not indicate a polynomial induction expression.
1301     // In that case, look at the operands of the use instruction to determine
1302     // if we can still widen the use instead of truncating its operand.
1303     if (widenWithVariantLoadUse(DU)) {
1304       widenWithVariantLoadUseCodegen(DU);
1305       return nullptr;
1306     }
1307 
1308     // This user does not evaluate to a recurrence after widening, so don't
1309     // follow it. Instead insert a Trunc to kill off the original use,
1310     // eventually isolating the original narrow IV so it can be removed.
1311     truncateIVUse(DU, DT, LI);
1312     return nullptr;
1313   }
1314   // Assume block terminators cannot evaluate to a recurrence. We can't to
1315   // insert a Trunc after a terminator if there happens to be a critical edge.
1316   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1317          "SCEV is not expected to evaluate a block terminator");
1318 
1319   // Reuse the IV increment that SCEVExpander created as long as it dominates
1320   // NarrowUse.
1321   Instruction *WideUse = nullptr;
1322   if (WideAddRec.first == WideIncExpr &&
1323       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1324     WideUse = WideInc;
1325   else {
1326     WideUse = cloneIVUser(DU, WideAddRec.first);
1327     if (!WideUse)
1328       return nullptr;
1329   }
1330   // Evaluation of WideAddRec ensured that the narrow expression could be
1331   // extended outside the loop without overflow. This suggests that the wide use
1332   // evaluates to the same expression as the extended narrow use, but doesn't
1333   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1334   // where it fails, we simply throw away the newly created wide use.
1335   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1336     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1337                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1338                       << "\n");
1339     DeadInsts.emplace_back(WideUse);
1340     return nullptr;
1341   }
1342 
1343   // if we reached this point then we are going to replace
1344   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1345   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1346 
1347   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1348   // Returning WideUse pushes it on the worklist.
1349   return WideUse;
1350 }
1351 
1352 /// Add eligible users of NarrowDef to NarrowIVUsers.
1353 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1354   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1355   bool NonNegativeDef =
1356       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1357                            SE->getConstant(NarrowSCEV->getType(), 0));
1358   for (User *U : NarrowDef->users()) {
1359     Instruction *NarrowUser = cast<Instruction>(U);
1360 
1361     // Handle data flow merges and bizarre phi cycles.
1362     if (!Widened.insert(NarrowUser).second)
1363       continue;
1364 
1365     bool NonNegativeUse = false;
1366     if (!NonNegativeDef) {
1367       // We might have a control-dependent range information for this context.
1368       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1369         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1370     }
1371 
1372     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1373                                NonNegativeDef || NonNegativeUse);
1374   }
1375 }
1376 
1377 /// Process a single induction variable. First use the SCEVExpander to create a
1378 /// wide induction variable that evaluates to the same recurrence as the
1379 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1380 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1381 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1382 ///
1383 /// It would be simpler to delete uses as they are processed, but we must avoid
1384 /// invalidating SCEV expressions.
1385 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1386   // Is this phi an induction variable?
1387   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1388   if (!AddRec)
1389     return nullptr;
1390 
1391   // Widen the induction variable expression.
1392   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1393                                ? SE->getSignExtendExpr(AddRec, WideType)
1394                                : SE->getZeroExtendExpr(AddRec, WideType);
1395 
1396   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1397          "Expect the new IV expression to preserve its type");
1398 
1399   // Can the IV be extended outside the loop without overflow?
1400   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1401   if (!AddRec || AddRec->getLoop() != L)
1402     return nullptr;
1403 
1404   // An AddRec must have loop-invariant operands. Since this AddRec is
1405   // materialized by a loop header phi, the expression cannot have any post-loop
1406   // operands, so they must dominate the loop header.
1407   assert(
1408       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1409       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1410       "Loop header phi recurrence inputs do not dominate the loop");
1411 
1412   // Iterate over IV uses (including transitive ones) looking for IV increments
1413   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1414   // the increment calculate control-dependent range information basing on
1415   // dominating conditions inside of the loop (e.g. a range check inside of the
1416   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1417   //
1418   // Control-dependent range information is later used to prove that a narrow
1419   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1420   // this on demand because when pushNarrowIVUsers needs this information some
1421   // of the dominating conditions might be already widened.
1422   if (UsePostIncrementRanges)
1423     calculatePostIncRanges(OrigPhi);
1424 
1425   // The rewriter provides a value for the desired IV expression. This may
1426   // either find an existing phi or materialize a new one. Either way, we
1427   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1428   // of the phi-SCC dominates the loop entry.
1429   Instruction *InsertPt = &L->getHeader()->front();
1430   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1431 
1432   // Remembering the WideIV increment generated by SCEVExpander allows
1433   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1434   // employ a general reuse mechanism because the call above is the only call to
1435   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1436   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1437     WideInc =
1438       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1439     WideIncExpr = SE->getSCEV(WideInc);
1440     // Propagate the debug location associated with the original loop increment
1441     // to the new (widened) increment.
1442     auto *OrigInc =
1443       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1444     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1445   }
1446 
1447   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1448   ++NumWidened;
1449 
1450   // Traverse the def-use chain using a worklist starting at the original IV.
1451   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1452 
1453   Widened.insert(OrigPhi);
1454   pushNarrowIVUsers(OrigPhi, WidePhi);
1455 
1456   while (!NarrowIVUsers.empty()) {
1457     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1458 
1459     // Process a def-use edge. This may replace the use, so don't hold a
1460     // use_iterator across it.
1461     Instruction *WideUse = widenIVUse(DU, Rewriter);
1462 
1463     // Follow all def-use edges from the previous narrow use.
1464     if (WideUse)
1465       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1466 
1467     // widenIVUse may have removed the def-use edge.
1468     if (DU.NarrowDef->use_empty())
1469       DeadInsts.emplace_back(DU.NarrowDef);
1470   }
1471 
1472   // Attach any debug information to the new PHI.
1473   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1474 
1475   return WidePhi;
1476 }
1477 
1478 /// Calculates control-dependent range for the given def at the given context
1479 /// by looking at dominating conditions inside of the loop
1480 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1481                                     Instruction *NarrowUser) {
1482   using namespace llvm::PatternMatch;
1483 
1484   Value *NarrowDefLHS;
1485   const APInt *NarrowDefRHS;
1486   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1487                                  m_APInt(NarrowDefRHS))) ||
1488       !NarrowDefRHS->isNonNegative())
1489     return;
1490 
1491   auto UpdateRangeFromCondition = [&] (Value *Condition,
1492                                        bool TrueDest) {
1493     CmpInst::Predicate Pred;
1494     Value *CmpRHS;
1495     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1496                                  m_Value(CmpRHS))))
1497       return;
1498 
1499     CmpInst::Predicate P =
1500             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1501 
1502     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1503     auto CmpConstrainedLHSRange =
1504             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1505     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1506         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1507 
1508     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1509   };
1510 
1511   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1512     if (!HasGuards)
1513       return;
1514 
1515     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1516                                      Ctx->getParent()->rend())) {
1517       Value *C = nullptr;
1518       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1519         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1520     }
1521   };
1522 
1523   UpdateRangeFromGuards(NarrowUser);
1524 
1525   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1526   // If NarrowUserBB is statically unreachable asking dominator queries may
1527   // yield surprising results. (e.g. the block may not have a dom tree node)
1528   if (!DT->isReachableFromEntry(NarrowUserBB))
1529     return;
1530 
1531   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1532        L->contains(DTB->getBlock());
1533        DTB = DTB->getIDom()) {
1534     auto *BB = DTB->getBlock();
1535     auto *TI = BB->getTerminator();
1536     UpdateRangeFromGuards(TI);
1537 
1538     auto *BI = dyn_cast<BranchInst>(TI);
1539     if (!BI || !BI->isConditional())
1540       continue;
1541 
1542     auto *TrueSuccessor = BI->getSuccessor(0);
1543     auto *FalseSuccessor = BI->getSuccessor(1);
1544 
1545     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1546       return BBE.isSingleEdge() &&
1547              DT->dominates(BBE, NarrowUser->getParent());
1548     };
1549 
1550     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1551       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1552 
1553     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1554       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1555   }
1556 }
1557 
1558 /// Calculates PostIncRangeInfos map for the given IV
1559 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1560   SmallPtrSet<Instruction *, 16> Visited;
1561   SmallVector<Instruction *, 6> Worklist;
1562   Worklist.push_back(OrigPhi);
1563   Visited.insert(OrigPhi);
1564 
1565   while (!Worklist.empty()) {
1566     Instruction *NarrowDef = Worklist.pop_back_val();
1567 
1568     for (Use &U : NarrowDef->uses()) {
1569       auto *NarrowUser = cast<Instruction>(U.getUser());
1570 
1571       // Don't go looking outside the current loop.
1572       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1573       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1574         continue;
1575 
1576       if (!Visited.insert(NarrowUser).second)
1577         continue;
1578 
1579       Worklist.push_back(NarrowUser);
1580 
1581       calculatePostIncRange(NarrowDef, NarrowUser);
1582     }
1583   }
1584 }
1585 
1586 //===----------------------------------------------------------------------===//
1587 //  Live IV Reduction - Minimize IVs live across the loop.
1588 //===----------------------------------------------------------------------===//
1589 
1590 //===----------------------------------------------------------------------===//
1591 //  Simplification of IV users based on SCEV evaluation.
1592 //===----------------------------------------------------------------------===//
1593 
1594 namespace {
1595 
1596 class IndVarSimplifyVisitor : public IVVisitor {
1597   ScalarEvolution *SE;
1598   const TargetTransformInfo *TTI;
1599   PHINode *IVPhi;
1600 
1601 public:
1602   WideIVInfo WI;
1603 
1604   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1605                         const TargetTransformInfo *TTI,
1606                         const DominatorTree *DTree)
1607     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1608     DT = DTree;
1609     WI.NarrowIV = IVPhi;
1610   }
1611 
1612   // Implement the interface used by simplifyUsersOfIV.
1613   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1614 };
1615 
1616 } // end anonymous namespace
1617 
1618 /// Iteratively perform simplification on a worklist of IV users. Each
1619 /// successive simplification may push more users which may themselves be
1620 /// candidates for simplification.
1621 ///
1622 /// Sign/Zero extend elimination is interleaved with IV simplification.
1623 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1624                                        SCEVExpander &Rewriter,
1625                                        LoopInfo *LI) {
1626   SmallVector<WideIVInfo, 8> WideIVs;
1627 
1628   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1629           Intrinsic::getName(Intrinsic::experimental_guard));
1630   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1631 
1632   SmallVector<PHINode*, 8> LoopPhis;
1633   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1634     LoopPhis.push_back(cast<PHINode>(I));
1635   }
1636   // Each round of simplification iterates through the SimplifyIVUsers worklist
1637   // for all current phis, then determines whether any IVs can be
1638   // widened. Widening adds new phis to LoopPhis, inducing another round of
1639   // simplification on the wide IVs.
1640   bool Changed = false;
1641   while (!LoopPhis.empty()) {
1642     // Evaluate as many IV expressions as possible before widening any IVs. This
1643     // forces SCEV to set no-wrap flags before evaluating sign/zero
1644     // extension. The first time SCEV attempts to normalize sign/zero extension,
1645     // the result becomes final. So for the most predictable results, we delay
1646     // evaluation of sign/zero extend evaluation until needed, and avoid running
1647     // other SCEV based analysis prior to simplifyAndExtend.
1648     do {
1649       PHINode *CurrIV = LoopPhis.pop_back_val();
1650 
1651       // Information about sign/zero extensions of CurrIV.
1652       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1653 
1654       Changed |=
1655           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1656 
1657       if (Visitor.WI.WidestNativeType) {
1658         WideIVs.push_back(Visitor.WI);
1659       }
1660     } while(!LoopPhis.empty());
1661 
1662     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1663       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1664       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1665         Changed = true;
1666         LoopPhis.push_back(WidePhi);
1667       }
1668     }
1669   }
1670   return Changed;
1671 }
1672 
1673 //===----------------------------------------------------------------------===//
1674 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1675 //===----------------------------------------------------------------------===//
1676 
1677 /// Given an Value which is hoped to be part of an add recurance in the given
1678 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1679 /// that this is less general than SCEVs AddRec checking.
1680 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1681   Instruction *IncI = dyn_cast<Instruction>(IncV);
1682   if (!IncI)
1683     return nullptr;
1684 
1685   switch (IncI->getOpcode()) {
1686   case Instruction::Add:
1687   case Instruction::Sub:
1688     break;
1689   case Instruction::GetElementPtr:
1690     // An IV counter must preserve its type.
1691     if (IncI->getNumOperands() == 2)
1692       break;
1693     LLVM_FALLTHROUGH;
1694   default:
1695     return nullptr;
1696   }
1697 
1698   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1699   if (Phi && Phi->getParent() == L->getHeader()) {
1700     if (L->isLoopInvariant(IncI->getOperand(1)))
1701       return Phi;
1702     return nullptr;
1703   }
1704   if (IncI->getOpcode() == Instruction::GetElementPtr)
1705     return nullptr;
1706 
1707   // Allow add/sub to be commuted.
1708   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1709   if (Phi && Phi->getParent() == L->getHeader()) {
1710     if (L->isLoopInvariant(IncI->getOperand(0)))
1711       return Phi;
1712   }
1713   return nullptr;
1714 }
1715 
1716 /// Whether the current loop exit test is based on this value.  Currently this
1717 /// is limited to a direct use in the loop condition.
1718 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1719   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1720   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1721   // TODO: Allow non-icmp loop test.
1722   if (!ICmp)
1723     return false;
1724 
1725   // TODO: Allow indirect use.
1726   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1727 }
1728 
1729 /// linearFunctionTestReplace policy. Return true unless we can show that the
1730 /// current exit test is already sufficiently canonical.
1731 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1732   assert(L->getLoopLatch() && "Must be in simplified form");
1733 
1734   // Avoid converting a constant or loop invariant test back to a runtime
1735   // test.  This is critical for when SCEV's cached ExitCount is less precise
1736   // than the current IR (such as after we've proven a particular exit is
1737   // actually dead and thus the BE count never reaches our ExitCount.)
1738   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1739   if (L->isLoopInvariant(BI->getCondition()))
1740     return false;
1741 
1742   // Do LFTR to simplify the exit condition to an ICMP.
1743   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1744   if (!Cond)
1745     return true;
1746 
1747   // Do LFTR to simplify the exit ICMP to EQ/NE
1748   ICmpInst::Predicate Pred = Cond->getPredicate();
1749   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1750     return true;
1751 
1752   // Look for a loop invariant RHS
1753   Value *LHS = Cond->getOperand(0);
1754   Value *RHS = Cond->getOperand(1);
1755   if (!L->isLoopInvariant(RHS)) {
1756     if (!L->isLoopInvariant(LHS))
1757       return true;
1758     std::swap(LHS, RHS);
1759   }
1760   // Look for a simple IV counter LHS
1761   PHINode *Phi = dyn_cast<PHINode>(LHS);
1762   if (!Phi)
1763     Phi = getLoopPhiForCounter(LHS, L);
1764 
1765   if (!Phi)
1766     return true;
1767 
1768   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1769   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1770   if (Idx < 0)
1771     return true;
1772 
1773   // Do LFTR if the exit condition's IV is *not* a simple counter.
1774   Value *IncV = Phi->getIncomingValue(Idx);
1775   return Phi != getLoopPhiForCounter(IncV, L);
1776 }
1777 
1778 /// Return true if undefined behavior would provable be executed on the path to
1779 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1780 /// anything about whether OnPathTo is actually executed or whether Root is
1781 /// actually poison.  This can be used to assess whether a new use of Root can
1782 /// be added at a location which is control equivalent with OnPathTo (such as
1783 /// immediately before it) without introducing UB which didn't previously
1784 /// exist.  Note that a false result conveys no information.
1785 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1786                                           Instruction *OnPathTo,
1787                                           DominatorTree *DT) {
1788   // Basic approach is to assume Root is poison, propagate poison forward
1789   // through all users we can easily track, and then check whether any of those
1790   // users are provable UB and must execute before out exiting block might
1791   // exit.
1792 
1793   // The set of all recursive users we've visited (which are assumed to all be
1794   // poison because of said visit)
1795   SmallSet<const Value *, 16> KnownPoison;
1796   SmallVector<const Instruction*, 16> Worklist;
1797   Worklist.push_back(Root);
1798   while (!Worklist.empty()) {
1799     const Instruction *I = Worklist.pop_back_val();
1800 
1801     // If we know this must trigger UB on a path leading our target.
1802     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1803       return true;
1804 
1805     // If we can't analyze propagation through this instruction, just skip it
1806     // and transitive users.  Safe as false is a conservative result.
1807     if (!propagatesFullPoison(I) && I != Root)
1808       continue;
1809 
1810     if (KnownPoison.insert(I).second)
1811       for (const User *User : I->users())
1812         Worklist.push_back(cast<Instruction>(User));
1813   }
1814 
1815   // Might be non-UB, or might have a path we couldn't prove must execute on
1816   // way to exiting bb.
1817   return false;
1818 }
1819 
1820 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1821 /// down to checking that all operands are constant and listing instructions
1822 /// that may hide undef.
1823 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1824                                unsigned Depth) {
1825   if (isa<Constant>(V))
1826     return !isa<UndefValue>(V);
1827 
1828   if (Depth >= 6)
1829     return false;
1830 
1831   // Conservatively handle non-constant non-instructions. For example, Arguments
1832   // may be undef.
1833   Instruction *I = dyn_cast<Instruction>(V);
1834   if (!I)
1835     return false;
1836 
1837   // Load and return values may be undef.
1838   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1839     return false;
1840 
1841   // Optimistically handle other instructions.
1842   for (Value *Op : I->operands()) {
1843     if (!Visited.insert(Op).second)
1844       continue;
1845     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1846       return false;
1847   }
1848   return true;
1849 }
1850 
1851 /// Return true if the given value is concrete. We must prove that undef can
1852 /// never reach it.
1853 ///
1854 /// TODO: If we decide that this is a good approach to checking for undef, we
1855 /// may factor it into a common location.
1856 static bool hasConcreteDef(Value *V) {
1857   SmallPtrSet<Value*, 8> Visited;
1858   Visited.insert(V);
1859   return hasConcreteDefImpl(V, Visited, 0);
1860 }
1861 
1862 /// Return true if this IV has any uses other than the (soon to be rewritten)
1863 /// loop exit test.
1864 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1865   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1866   Value *IncV = Phi->getIncomingValue(LatchIdx);
1867 
1868   for (User *U : Phi->users())
1869     if (U != Cond && U != IncV) return false;
1870 
1871   for (User *U : IncV->users())
1872     if (U != Cond && U != Phi) return false;
1873   return true;
1874 }
1875 
1876 /// Return true if the given phi is a "counter" in L.  A counter is an
1877 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1878 /// step of 1.  Note that L must have exactly one latch.
1879 static bool isLoopCounter(PHINode* Phi, Loop *L,
1880                           ScalarEvolution *SE) {
1881   assert(Phi->getParent() == L->getHeader());
1882   assert(L->getLoopLatch());
1883 
1884   if (!SE->isSCEVable(Phi->getType()))
1885     return false;
1886 
1887   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1888   if (!AR || AR->getLoop() != L || !AR->isAffine())
1889     return false;
1890 
1891   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1892   if (!Step || !Step->isOne())
1893     return false;
1894 
1895   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1896   Value *IncV = Phi->getIncomingValue(LatchIdx);
1897   return (getLoopPhiForCounter(IncV, L) == Phi);
1898 }
1899 
1900 /// Search the loop header for a loop counter (anadd rec w/step of one)
1901 /// suitable for use by LFTR.  If multiple counters are available, select the
1902 /// "best" one based profitable heuristics.
1903 ///
1904 /// BECount may be an i8* pointer type. The pointer difference is already
1905 /// valid count without scaling the address stride, so it remains a pointer
1906 /// expression as far as SCEV is concerned.
1907 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1908                                 const SCEV *BECount,
1909                                 ScalarEvolution *SE, DominatorTree *DT) {
1910   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1911 
1912   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1913 
1914   // Loop over all of the PHI nodes, looking for a simple counter.
1915   PHINode *BestPhi = nullptr;
1916   const SCEV *BestInit = nullptr;
1917   BasicBlock *LatchBlock = L->getLoopLatch();
1918   assert(LatchBlock && "Must be in simplified form");
1919   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1920 
1921   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1922     PHINode *Phi = cast<PHINode>(I);
1923     if (!isLoopCounter(Phi, L, SE))
1924       continue;
1925 
1926     // Avoid comparing an integer IV against a pointer Limit.
1927     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1928       continue;
1929 
1930     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1931 
1932     // AR may be a pointer type, while BECount is an integer type.
1933     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1934     // AR may not be a narrower type, or we may never exit.
1935     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1936     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1937       continue;
1938 
1939     // Avoid reusing a potentially undef value to compute other values that may
1940     // have originally had a concrete definition.
1941     if (!hasConcreteDef(Phi)) {
1942       // We explicitly allow unknown phis as long as they are already used by
1943       // the loop exit test.  This is legal since performing LFTR could not
1944       // increase the number of undef users.
1945       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1946       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1947           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1948         continue;
1949     }
1950 
1951     // Avoid introducing undefined behavior due to poison which didn't exist in
1952     // the original program.  (Annoyingly, the rules for poison and undef
1953     // propagation are distinct, so this does NOT cover the undef case above.)
1954     // We have to ensure that we don't introduce UB by introducing a use on an
1955     // iteration where said IV produces poison.  Our strategy here differs for
1956     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1957     // see code in linearFunctionTestReplace.  For pointers, we restrict
1958     // transforms as there is no good way to reinfer inbounds once lost.
1959     if (!Phi->getType()->isIntegerTy() &&
1960         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1961       continue;
1962 
1963     const SCEV *Init = AR->getStart();
1964 
1965     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1966       // Don't force a live loop counter if another IV can be used.
1967       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1968         continue;
1969 
1970       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1971       // also prefers integer to pointer IVs.
1972       if (BestInit->isZero() != Init->isZero()) {
1973         if (BestInit->isZero())
1974           continue;
1975       }
1976       // If two IVs both count from zero or both count from nonzero then the
1977       // narrower is likely a dead phi that has been widened. Use the wider phi
1978       // to allow the other to be eliminated.
1979       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1980         continue;
1981     }
1982     BestPhi = Phi;
1983     BestInit = Init;
1984   }
1985   return BestPhi;
1986 }
1987 
1988 /// Insert an IR expression which computes the value held by the IV IndVar
1989 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1990 /// is taken ExitCount times.
1991 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
1992                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
1993                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
1994   assert(isLoopCounter(IndVar, L, SE));
1995   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1996   const SCEV *IVInit = AR->getStart();
1997 
1998   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
1999   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2000   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2001   // the existing GEPs whenever possible.
2002   if (IndVar->getType()->isPointerTy() &&
2003       !ExitCount->getType()->isPointerTy()) {
2004     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2005     // signed value. ExitCount on the other hand represents the loop trip count,
2006     // which is an unsigned value. FindLoopCounter only allows induction
2007     // variables that have a positive unit stride of one. This means we don't
2008     // have to handle the case of negative offsets (yet) and just need to zero
2009     // extend ExitCount.
2010     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2011     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2012     if (UsePostInc)
2013       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2014 
2015     // Expand the code for the iteration count.
2016     assert(SE->isLoopInvariant(IVOffset, L) &&
2017            "Computed iteration count is not loop invariant!");
2018 
2019     // We could handle pointer IVs other than i8*, but we need to compensate for
2020     // gep index scaling.
2021     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2022                              cast<PointerType>(IndVar->getType())
2023                                  ->getElementType())->isOne() &&
2024            "unit stride pointer IV must be i8*");
2025 
2026     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2027     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2028     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2029   } else {
2030     // In any other case, convert both IVInit and ExitCount to integers before
2031     // comparing. This may result in SCEV expansion of pointers, but in practice
2032     // SCEV will fold the pointer arithmetic away as such:
2033     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2034     //
2035     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2036     // for simple memset-style loops.
2037     //
2038     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2039     // were generated on top of case #2, which is not expected.
2040 
2041     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2042     // For unit stride, IVCount = Start + ExitCount with 2's complement
2043     // overflow.
2044 
2045     // For integer IVs, truncate the IV before computing IVInit + BECount,
2046     // unless we know apriori that the limit must be a constant when evaluated
2047     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2048     // of the IV in the loop over a (potentially) expensive expansion of the
2049     // widened exit count add(zext(add)) expression.
2050     if (SE->getTypeSizeInBits(IVInit->getType())
2051         > SE->getTypeSizeInBits(ExitCount->getType())) {
2052       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2053         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2054       else
2055         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2056     }
2057 
2058     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2059 
2060     if (UsePostInc)
2061       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2062 
2063     // Expand the code for the iteration count.
2064     assert(SE->isLoopInvariant(IVLimit, L) &&
2065            "Computed iteration count is not loop invariant!");
2066     // Ensure that we generate the same type as IndVar, or a smaller integer
2067     // type. In the presence of null pointer values, we have an integer type
2068     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2069     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2070       IndVar->getType() : ExitCount->getType();
2071     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2072     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2073   }
2074 }
2075 
2076 /// This method rewrites the exit condition of the loop to be a canonical !=
2077 /// comparison against the incremented loop induction variable.  This pass is
2078 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2079 /// determine a loop-invariant trip count of the loop, which is actually a much
2080 /// broader range than just linear tests.
2081 bool IndVarSimplify::
2082 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2083                           const SCEV *ExitCount,
2084                           PHINode *IndVar, SCEVExpander &Rewriter) {
2085   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2086   assert(isLoopCounter(IndVar, L, SE));
2087   Instruction * const IncVar =
2088     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2089 
2090   // Initialize CmpIndVar to the preincremented IV.
2091   Value *CmpIndVar = IndVar;
2092   bool UsePostInc = false;
2093 
2094   // If the exiting block is the same as the backedge block, we prefer to
2095   // compare against the post-incremented value, otherwise we must compare
2096   // against the preincremented value.
2097   if (ExitingBB == L->getLoopLatch()) {
2098     // For pointer IVs, we chose to not strip inbounds which requires us not
2099     // to add a potentially UB introducing use.  We need to either a) show
2100     // the loop test we're modifying is already in post-inc form, or b) show
2101     // that adding a use must not introduce UB.
2102     bool SafeToPostInc =
2103         IndVar->getType()->isIntegerTy() ||
2104         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2105         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2106     if (SafeToPostInc) {
2107       UsePostInc = true;
2108       CmpIndVar = IncVar;
2109     }
2110   }
2111 
2112   // It may be necessary to drop nowrap flags on the incrementing instruction
2113   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2114   // case the increment might have previously been poison on the last iteration
2115   // only) or if LFTR switches to a different IV that was previously dynamically
2116   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2117   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2118   // check), because the pre-inc addrec flags may be adopted from the original
2119   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2120   // TODO: This handling is inaccurate for one case: If we switch to a
2121   // dynamically dead IV that wraps on the first loop iteration only, which is
2122   // not covered by the post-inc addrec. (If the new IV was not dynamically
2123   // dead, it could not be poison on the first iteration in the first place.)
2124   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2125     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2126     if (BO->hasNoUnsignedWrap())
2127       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2128     if (BO->hasNoSignedWrap())
2129       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2130   }
2131 
2132   Value *ExitCnt = genLoopLimit(
2133       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2134   assert(ExitCnt->getType()->isPointerTy() ==
2135              IndVar->getType()->isPointerTy() &&
2136          "genLoopLimit missed a cast");
2137 
2138   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2139   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2140   ICmpInst::Predicate P;
2141   if (L->contains(BI->getSuccessor(0)))
2142     P = ICmpInst::ICMP_NE;
2143   else
2144     P = ICmpInst::ICMP_EQ;
2145 
2146   IRBuilder<> Builder(BI);
2147 
2148   // The new loop exit condition should reuse the debug location of the
2149   // original loop exit condition.
2150   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2151     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2152 
2153   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2154   // avoid the expensive expansion of the limit expression in the wider type,
2155   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2156   // since we know (from the exit count bitwidth), that we can't self-wrap in
2157   // the narrower type.
2158   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2159   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2160   if (CmpIndVarSize > ExitCntSize) {
2161     assert(!CmpIndVar->getType()->isPointerTy() &&
2162            !ExitCnt->getType()->isPointerTy());
2163 
2164     // Before resorting to actually inserting the truncate, use the same
2165     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2166     // the other side of the comparison instead.  We still evaluate the limit
2167     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2168     // a truncate within in.
2169     bool Extended = false;
2170     const SCEV *IV = SE->getSCEV(CmpIndVar);
2171     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2172                                                   ExitCnt->getType());
2173     const SCEV *ZExtTrunc =
2174       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2175 
2176     if (ZExtTrunc == IV) {
2177       Extended = true;
2178       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2179                                    "wide.trip.count");
2180     } else {
2181       const SCEV *SExtTrunc =
2182         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2183       if (SExtTrunc == IV) {
2184         Extended = true;
2185         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2186                                      "wide.trip.count");
2187       }
2188     }
2189 
2190     if (Extended) {
2191       bool Discard;
2192       L->makeLoopInvariant(ExitCnt, Discard);
2193     } else
2194       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2195                                       "lftr.wideiv");
2196   }
2197   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2198                     << "      LHS:" << *CmpIndVar << '\n'
2199                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2200                     << "\n"
2201                     << "      RHS:\t" << *ExitCnt << "\n"
2202                     << "ExitCount:\t" << *ExitCount << "\n"
2203                     << "  was: " << *BI->getCondition() << "\n");
2204 
2205   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2206   Value *OrigCond = BI->getCondition();
2207   // It's tempting to use replaceAllUsesWith here to fully replace the old
2208   // comparison, but that's not immediately safe, since users of the old
2209   // comparison may not be dominated by the new comparison. Instead, just
2210   // update the branch to use the new comparison; in the common case this
2211   // will make old comparison dead.
2212   BI->setCondition(Cond);
2213   DeadInsts.push_back(OrigCond);
2214 
2215   ++NumLFTR;
2216   return true;
2217 }
2218 
2219 //===----------------------------------------------------------------------===//
2220 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2221 //===----------------------------------------------------------------------===//
2222 
2223 /// If there's a single exit block, sink any loop-invariant values that
2224 /// were defined in the preheader but not used inside the loop into the
2225 /// exit block to reduce register pressure in the loop.
2226 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2227   BasicBlock *ExitBlock = L->getExitBlock();
2228   if (!ExitBlock) return false;
2229 
2230   BasicBlock *Preheader = L->getLoopPreheader();
2231   if (!Preheader) return false;
2232 
2233   bool MadeAnyChanges = false;
2234   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2235   BasicBlock::iterator I(Preheader->getTerminator());
2236   while (I != Preheader->begin()) {
2237     --I;
2238     // New instructions were inserted at the end of the preheader.
2239     if (isa<PHINode>(I))
2240       break;
2241 
2242     // Don't move instructions which might have side effects, since the side
2243     // effects need to complete before instructions inside the loop.  Also don't
2244     // move instructions which might read memory, since the loop may modify
2245     // memory. Note that it's okay if the instruction might have undefined
2246     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2247     // block.
2248     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2249       continue;
2250 
2251     // Skip debug info intrinsics.
2252     if (isa<DbgInfoIntrinsic>(I))
2253       continue;
2254 
2255     // Skip eh pad instructions.
2256     if (I->isEHPad())
2257       continue;
2258 
2259     // Don't sink alloca: we never want to sink static alloca's out of the
2260     // entry block, and correctly sinking dynamic alloca's requires
2261     // checks for stacksave/stackrestore intrinsics.
2262     // FIXME: Refactor this check somehow?
2263     if (isa<AllocaInst>(I))
2264       continue;
2265 
2266     // Determine if there is a use in or before the loop (direct or
2267     // otherwise).
2268     bool UsedInLoop = false;
2269     for (Use &U : I->uses()) {
2270       Instruction *User = cast<Instruction>(U.getUser());
2271       BasicBlock *UseBB = User->getParent();
2272       if (PHINode *P = dyn_cast<PHINode>(User)) {
2273         unsigned i =
2274           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2275         UseBB = P->getIncomingBlock(i);
2276       }
2277       if (UseBB == Preheader || L->contains(UseBB)) {
2278         UsedInLoop = true;
2279         break;
2280       }
2281     }
2282 
2283     // If there is, the def must remain in the preheader.
2284     if (UsedInLoop)
2285       continue;
2286 
2287     // Otherwise, sink it to the exit block.
2288     Instruction *ToMove = &*I;
2289     bool Done = false;
2290 
2291     if (I != Preheader->begin()) {
2292       // Skip debug info intrinsics.
2293       do {
2294         --I;
2295       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2296 
2297       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2298         Done = true;
2299     } else {
2300       Done = true;
2301     }
2302 
2303     MadeAnyChanges = true;
2304     ToMove->moveBefore(*ExitBlock, InsertPt);
2305     if (Done) break;
2306     InsertPt = ToMove->getIterator();
2307   }
2308 
2309   return MadeAnyChanges;
2310 }
2311 
2312 /// Return a symbolic upper bound for the backedge taken count of the loop.
2313 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2314 /// an arbitrary expression as opposed to only constants.
2315 /// TODO: Move into the ScalarEvolution class.
2316 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2317                                             DominatorTree &DT, Loop *L) {
2318   SmallVector<BasicBlock*, 16> ExitingBlocks;
2319   L->getExitingBlocks(ExitingBlocks);
2320 
2321   // Form an expression for the maximum exit count possible for this loop. We
2322   // merge the max and exact information to approximate a version of
2323   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2324   SmallVector<const SCEV*, 4> ExitCounts;
2325   for (BasicBlock *ExitingBB : ExitingBlocks) {
2326     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2327     if (isa<SCEVCouldNotCompute>(ExitCount))
2328       ExitCount = SE.getExitCount(L, ExitingBB,
2329                                   ScalarEvolution::ConstantMaximum);
2330     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2331       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2332              "We should only have known counts for exiting blocks that "
2333              "dominate latch!");
2334       ExitCounts.push_back(ExitCount);
2335     }
2336   }
2337   if (ExitCounts.empty())
2338     return SE.getCouldNotCompute();
2339   return SE.getUMinFromMismatchedTypes(ExitCounts);
2340 }
2341 
2342 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2343   SmallVector<BasicBlock*, 16> ExitingBlocks;
2344   L->getExitingBlocks(ExitingBlocks);
2345 
2346   // Remove all exits which aren't both rewriteable and analyzeable.
2347   auto NewEnd = llvm::remove_if(ExitingBlocks,
2348                                 [&](BasicBlock *ExitingBB) {
2349     // If our exitting block exits multiple loops, we can only rewrite the
2350     // innermost one.  Otherwise, we're changing how many times the innermost
2351     // loop runs before it exits.
2352     if (LI->getLoopFor(ExitingBB) != L)
2353       return true;
2354 
2355     // Can't rewrite non-branch yet.
2356     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2357     if (!BI)
2358       return true;
2359 
2360     // If already constant, nothing to do.
2361     if (isa<Constant>(BI->getCondition()))
2362       return true;
2363 
2364     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2365     if (isa<SCEVCouldNotCompute>(ExitCount))
2366       return true;
2367     return false;
2368    });
2369   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2370 
2371   if (ExitingBlocks.empty())
2372     return false;
2373 
2374   // Get a symbolic upper bound on the loop backedge taken count.
2375   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2376   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2377     return false;
2378 
2379   // Visit our exit blocks in order of dominance.  We know from the fact that
2380   // all exits (left) are analyzeable that the must be a total dominance order
2381   // between them as each must dominate the latch.  The visit order only
2382   // matters for the provably equal case.
2383   llvm::sort(ExitingBlocks,
2384              [&](BasicBlock *A, BasicBlock *B) {
2385                // std::sort sorts in ascending order, so we want the inverse of
2386                // the normal dominance relation.
2387                if (DT->properlyDominates(A, B)) return true;
2388                if (DT->properlyDominates(B, A)) return false;
2389                llvm_unreachable("expected total dominance order!");
2390              });
2391 #ifdef ASSERT
2392   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2393     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2394   }
2395 #endif
2396 
2397   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2398     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2399     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2400     auto *OldCond = BI->getCondition();
2401     auto *NewCond = ConstantInt::get(OldCond->getType(),
2402                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2403     BI->setCondition(NewCond);
2404     if (OldCond->use_empty())
2405       DeadInsts.push_back(OldCond);
2406   };
2407 
2408   bool Changed = false;
2409   SmallSet<const SCEV*, 8> DominatingExitCounts;
2410   for (BasicBlock *ExitingBB : ExitingBlocks) {
2411     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2412     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2413 
2414     // If we know we'd exit on the first iteration, rewrite the exit to
2415     // reflect this.  This does not imply the loop must exit through this
2416     // exit; there may be an earlier one taken on the first iteration.
2417     // TODO: Given we know the backedge can't be taken, we should go ahead
2418     // and break it.  Or at least, kill all the header phis and simplify.
2419     if (ExitCount->isZero()) {
2420       FoldExit(ExitingBB, true);
2421       Changed = true;
2422       continue;
2423     }
2424 
2425     // If we end up with a pointer exit count, bail.  Note that we can end up
2426     // with a pointer exit count for one exiting block, and not for another in
2427     // the same loop.
2428     if (!ExitCount->getType()->isIntegerTy() ||
2429         !MaxExitCount->getType()->isIntegerTy())
2430       continue;
2431 
2432     Type *WiderType =
2433       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2434     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2435     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2436     assert(MaxExitCount->getType() == ExitCount->getType());
2437 
2438     // Can we prove that some other exit must be taken strictly before this
2439     // one?
2440     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2441                                      MaxExitCount, ExitCount)) {
2442       FoldExit(ExitingBB, false);
2443       Changed = true;
2444       continue;
2445     }
2446 
2447     // As we run, keep track of which exit counts we've encountered.  If we
2448     // find a duplicate, we've found an exit which would have exited on the
2449     // exiting iteration, but (from the visit order) strictly follows another
2450     // which does the same and is thus dead.
2451     if (!DominatingExitCounts.insert(ExitCount).second) {
2452       FoldExit(ExitingBB, false);
2453       Changed = true;
2454       continue;
2455     }
2456 
2457     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2458     // here.  If we kept track of the min of dominanting exits so far, we could
2459     // discharge exits with EC >= MDEC. This is less powerful than the existing
2460     // transform (since later exits aren't considered), but potentially more
2461     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2462     // or a >u b.  Such a case is not currently known.
2463   }
2464   return Changed;
2465 }
2466 
2467 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2468   SmallVector<BasicBlock*, 16> ExitingBlocks;
2469   L->getExitingBlocks(ExitingBlocks);
2470 
2471   bool Changed = false;
2472 
2473   // Finally, see if we can rewrite our exit conditions into a loop invariant
2474   // form.  If we have a read-only loop, and we can tell that we must exit down
2475   // a path which does not need any of the values computed within the loop, we
2476   // can rewrite the loop to exit on the first iteration.  Note that this
2477   // doesn't either a) tell us the loop exits on the first iteration (unless
2478   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2479   // This transformation looks a lot like a restricted form of dead loop
2480   // elimination, but restricted to read-only loops and without neccesssarily
2481   // needing to kill the loop entirely.
2482   if (!LoopPredication)
2483     return Changed;
2484 
2485   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2486     return Changed;
2487 
2488   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2489   // through *explicit* control flow.  We have to eliminate the possibility of
2490   // implicit exits (see below) before we know it's truly exact.
2491   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2492   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2493       !SE->isLoopInvariant(ExactBTC, L) ||
2494       !isSafeToExpand(ExactBTC, *SE))
2495     return Changed;
2496 
2497   // If we end up with a pointer exit count, bail.  It may be unsized.
2498   if (!ExactBTC->getType()->isIntegerTy())
2499     return Changed;
2500 
2501   auto BadExit = [&](BasicBlock *ExitingBB) {
2502     // If our exiting block exits multiple loops, we can only rewrite the
2503     // innermost one.  Otherwise, we're changing how many times the innermost
2504     // loop runs before it exits.
2505     if (LI->getLoopFor(ExitingBB) != L)
2506       return true;
2507 
2508     // Can't rewrite non-branch yet.
2509     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2510     if (!BI)
2511       return true;
2512 
2513     // If already constant, nothing to do.
2514     if (isa<Constant>(BI->getCondition()))
2515       return true;
2516 
2517     // If the exit block has phis, we need to be able to compute the values
2518     // within the loop which contains them.  This assumes trivially lcssa phis
2519     // have already been removed; TODO: generalize
2520     BasicBlock *ExitBlock =
2521     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2522     if (!ExitBlock->phis().empty())
2523       return true;
2524 
2525     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2526     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2527     if (!SE->isLoopInvariant(ExitCount, L) ||
2528         !isSafeToExpand(ExitCount, *SE))
2529       return true;
2530 
2531     // If we end up with a pointer exit count, bail.  It may be unsized.
2532     if (!ExitCount->getType()->isIntegerTy())
2533       return true;
2534 
2535     return false;
2536   };
2537 
2538   // If we have any exits which can't be predicated themselves, than we can't
2539   // predicate any exit which isn't guaranteed to execute before it.  Consider
2540   // two exits (a) and (b) which would both exit on the same iteration.  If we
2541   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2542   // we could convert a loop from exiting through (a) to one exiting through
2543   // (b).  Note that this problem exists only for exits with the same exit
2544   // count, and we could be more aggressive when exit counts are known inequal.
2545   llvm::sort(ExitingBlocks,
2546             [&](BasicBlock *A, BasicBlock *B) {
2547               // std::sort sorts in ascending order, so we want the inverse of
2548               // the normal dominance relation, plus a tie breaker for blocks
2549               // unordered by dominance.
2550               if (DT->properlyDominates(A, B)) return true;
2551               if (DT->properlyDominates(B, A)) return false;
2552               return A->getName() < B->getName();
2553             });
2554   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2555   // exits before the backedge).  If they aren't, reasoning about reachability
2556   // is complicated and we choose not to for now.
2557   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2558     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2559       return Changed;
2560 
2561   // Given our sorted total order, we know that exit[j] must be evaluated
2562   // after all exit[i] such j > i.
2563   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2564     if (BadExit(ExitingBlocks[i])) {
2565       ExitingBlocks.resize(i);
2566       break;
2567     }
2568 
2569   if (ExitingBlocks.empty())
2570     return Changed;
2571 
2572   // We rely on not being able to reach an exiting block on a later iteration
2573   // then it's statically compute exit count.  The implementaton of
2574   // getExitCount currently has this invariant, but assert it here so that
2575   // breakage is obvious if this ever changes..
2576   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2577         return DT->dominates(ExitingBB, L->getLoopLatch());
2578       }));
2579 
2580   // At this point, ExitingBlocks consists of only those blocks which are
2581   // predicatable.  Given that, we know we have at least one exit we can
2582   // predicate if the loop is doesn't have side effects and doesn't have any
2583   // implicit exits (because then our exact BTC isn't actually exact).
2584   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2585   // suggestions on how to improve this?  I can obviously bail out for outer
2586   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2587   // is that enough for *all* side effects?
2588   for (BasicBlock *BB : L->blocks())
2589     for (auto &I : *BB)
2590       // TODO:isGuaranteedToTransfer
2591       if (I.mayHaveSideEffects() || I.mayThrow())
2592         return Changed;
2593 
2594   // Finally, do the actual predication for all predicatable blocks.  A couple
2595   // of notes here:
2596   // 1) We don't bother to constant fold dominated exits with identical exit
2597   //    counts; that's simply a form of CSE/equality propagation and we leave
2598   //    it for dedicated passes.
2599   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2600   //    legality constraints and we leave that to dedicated logic.  We want to
2601   //    predicate even if we can't insert a loop invariant expression as
2602   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2603   //    varying check.
2604   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2605   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2606   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2607   for (BasicBlock *ExitingBB : ExitingBlocks) {
2608     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2609 
2610     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2611     Value *NewCond;
2612     if (ExitCount == ExactBTC) {
2613       NewCond = L->contains(BI->getSuccessor(0)) ?
2614         B.getFalse() : B.getTrue();
2615     } else {
2616       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2617       if (!ExactBTCV)
2618         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2619       Value *RHS = ExactBTCV;
2620       if (ECV->getType() != RHS->getType()) {
2621         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2622         ECV = B.CreateZExt(ECV, WiderTy);
2623         RHS = B.CreateZExt(RHS, WiderTy);
2624       }
2625       auto Pred = L->contains(BI->getSuccessor(0)) ?
2626         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2627       NewCond = B.CreateICmp(Pred, ECV, RHS);
2628     }
2629     Value *OldCond = BI->getCondition();
2630     BI->setCondition(NewCond);
2631     if (OldCond->use_empty())
2632       DeadInsts.push_back(OldCond);
2633     Changed = true;
2634   }
2635 
2636   return Changed;
2637 }
2638 
2639 //===----------------------------------------------------------------------===//
2640 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2641 //===----------------------------------------------------------------------===//
2642 
2643 bool IndVarSimplify::run(Loop *L) {
2644   // We need (and expect!) the incoming loop to be in LCSSA.
2645   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2646          "LCSSA required to run indvars!");
2647   bool Changed = false;
2648 
2649   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2650   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2651   //    canonicalization can be a pessimization without LSR to "clean up"
2652   //    afterwards.
2653   //  - We depend on having a preheader; in particular,
2654   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2655   //    and we're in trouble if we can't find the induction variable even when
2656   //    we've manually inserted one.
2657   //  - LFTR relies on having a single backedge.
2658   if (!L->isLoopSimplifyForm())
2659     return false;
2660 
2661 #ifndef NDEBUG
2662   // Used below for a consistency check only
2663   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2664 #endif
2665 
2666   // If there are any floating-point recurrences, attempt to
2667   // transform them to use integer recurrences.
2668   Changed |= rewriteNonIntegerIVs(L);
2669 
2670   // Create a rewriter object which we'll use to transform the code with.
2671   SCEVExpander Rewriter(*SE, DL, "indvars");
2672 #ifndef NDEBUG
2673   Rewriter.setDebugType(DEBUG_TYPE);
2674 #endif
2675 
2676   // Eliminate redundant IV users.
2677   //
2678   // Simplification works best when run before other consumers of SCEV. We
2679   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2680   // other expressions involving loop IVs have been evaluated. This helps SCEV
2681   // set no-wrap flags before normalizing sign/zero extension.
2682   Rewriter.disableCanonicalMode();
2683   Changed |= simplifyAndExtend(L, Rewriter, LI);
2684 
2685   // Check to see if we can compute the final value of any expressions
2686   // that are recurrent in the loop, and substitute the exit values from the
2687   // loop into any instructions outside of the loop that use the final values
2688   // of the current expressions.
2689   if (ReplaceExitValue != NeverRepl) {
2690     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT,
2691                                              ReplaceExitValue, DeadInsts)) {
2692       NumReplaced += Rewrites;
2693       Changed = true;
2694     }
2695   }
2696 
2697   // Eliminate redundant IV cycles.
2698   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2699 
2700   // Try to eliminate loop exits based on analyzeable exit counts
2701   if (optimizeLoopExits(L, Rewriter))  {
2702     Changed = true;
2703     // Given we've changed exit counts, notify SCEV
2704     SE->forgetLoop(L);
2705   }
2706 
2707   // Try to form loop invariant tests for loop exits by changing how many
2708   // iterations of the loop run when that is unobservable.
2709   if (predicateLoopExits(L, Rewriter)) {
2710     Changed = true;
2711     // Given we've changed exit counts, notify SCEV
2712     SE->forgetLoop(L);
2713   }
2714 
2715   // If we have a trip count expression, rewrite the loop's exit condition
2716   // using it.
2717   if (!DisableLFTR) {
2718     SmallVector<BasicBlock*, 16> ExitingBlocks;
2719     L->getExitingBlocks(ExitingBlocks);
2720     for (BasicBlock *ExitingBB : ExitingBlocks) {
2721       // Can't rewrite non-branch yet.
2722       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2723         continue;
2724 
2725       // If our exitting block exits multiple loops, we can only rewrite the
2726       // innermost one.  Otherwise, we're changing how many times the innermost
2727       // loop runs before it exits.
2728       if (LI->getLoopFor(ExitingBB) != L)
2729         continue;
2730 
2731       if (!needsLFTR(L, ExitingBB))
2732         continue;
2733 
2734       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2735       if (isa<SCEVCouldNotCompute>(ExitCount))
2736         continue;
2737 
2738       // This was handled above, but as we form SCEVs, we can sometimes refine
2739       // existing ones; this allows exit counts to be folded to zero which
2740       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2741       // until stable to handle cases like this better.
2742       if (ExitCount->isZero())
2743         continue;
2744 
2745       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2746       if (!IndVar)
2747         continue;
2748 
2749       // Avoid high cost expansions.  Note: This heuristic is questionable in
2750       // that our definition of "high cost" is not exactly principled.
2751       if (Rewriter.isHighCostExpansion(ExitCount, L))
2752         continue;
2753 
2754       // Check preconditions for proper SCEVExpander operation. SCEV does not
2755       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2756       // any pass that uses the SCEVExpander must do it. This does not work
2757       // well for loop passes because SCEVExpander makes assumptions about
2758       // all loops, while LoopPassManager only forces the current loop to be
2759       // simplified.
2760       //
2761       // FIXME: SCEV expansion has no way to bail out, so the caller must
2762       // explicitly check any assumptions made by SCEV. Brittle.
2763       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2764       if (!AR || AR->getLoop()->getLoopPreheader())
2765         Changed |= linearFunctionTestReplace(L, ExitingBB,
2766                                              ExitCount, IndVar,
2767                                              Rewriter);
2768     }
2769   }
2770   // Clear the rewriter cache, because values that are in the rewriter's cache
2771   // can be deleted in the loop below, causing the AssertingVH in the cache to
2772   // trigger.
2773   Rewriter.clear();
2774 
2775   // Now that we're done iterating through lists, clean up any instructions
2776   // which are now dead.
2777   while (!DeadInsts.empty())
2778     if (Instruction *Inst =
2779             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2780       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2781 
2782   // The Rewriter may not be used from this point on.
2783 
2784   // Loop-invariant instructions in the preheader that aren't used in the
2785   // loop may be sunk below the loop to reduce register pressure.
2786   Changed |= sinkUnusedInvariants(L);
2787 
2788   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2789   // trip count and therefore can further simplify exit values in addition to
2790   // rewriteLoopExitValues.
2791   Changed |= rewriteFirstIterationLoopExitValues(L);
2792 
2793   // Clean up dead instructions.
2794   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2795 
2796   // Check a post-condition.
2797   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2798          "Indvars did not preserve LCSSA!");
2799 
2800   // Verify that LFTR, and any other change have not interfered with SCEV's
2801   // ability to compute trip count.  We may have *changed* the exit count, but
2802   // only by reducing it.
2803 #ifndef NDEBUG
2804   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2805     SE->forgetLoop(L);
2806     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2807     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2808         SE->getTypeSizeInBits(NewBECount->getType()))
2809       NewBECount = SE->getTruncateOrNoop(NewBECount,
2810                                          BackedgeTakenCount->getType());
2811     else
2812       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2813                                                  NewBECount->getType());
2814     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2815                                  NewBECount) && "indvars must preserve SCEV");
2816   }
2817 #endif
2818 
2819   return Changed;
2820 }
2821 
2822 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2823                                           LoopStandardAnalysisResults &AR,
2824                                           LPMUpdater &) {
2825   Function *F = L.getHeader()->getParent();
2826   const DataLayout &DL = F->getParent()->getDataLayout();
2827 
2828   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2829   if (!IVS.run(&L))
2830     return PreservedAnalyses::all();
2831 
2832   auto PA = getLoopPassPreservedAnalyses();
2833   PA.preserveSet<CFGAnalyses>();
2834   return PA;
2835 }
2836 
2837 namespace {
2838 
2839 struct IndVarSimplifyLegacyPass : public LoopPass {
2840   static char ID; // Pass identification, replacement for typeid
2841 
2842   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2843     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2844   }
2845 
2846   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2847     if (skipLoop(L))
2848       return false;
2849 
2850     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2851     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2852     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2853     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2854     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2855     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2856     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2857     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2858 
2859     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2860     return IVS.run(L);
2861   }
2862 
2863   void getAnalysisUsage(AnalysisUsage &AU) const override {
2864     AU.setPreservesCFG();
2865     getLoopAnalysisUsage(AU);
2866   }
2867 };
2868 
2869 } // end anonymous namespace
2870 
2871 char IndVarSimplifyLegacyPass::ID = 0;
2872 
2873 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2874                       "Induction Variable Simplification", false, false)
2875 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2876 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2877                     "Induction Variable Simplification", false, false)
2878 
2879 Pass *llvm::createIndVarSimplifyPass() {
2880   return new IndVarSimplifyLegacyPass();
2881 }
2882