xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 7eb70158e4d03d2c1fe430ce62aa76982a483d1b)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "indvars"
94 
95 STATISTIC(NumWidened     , "Number of indvars widened");
96 STATISTIC(NumReplaced    , "Number of exit values replaced");
97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
100 
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105     "verify-indvars", cl::Hidden,
106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107              "effect in release builds. (Note: this adds additional SCEV "
108              "queries potentially changing the analysis result)"));
109 
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114                clEnumValN(OnlyCheapRepl, "cheap",
115                           "only replace exit value when the cost is cheap"),
116                clEnumValN(NoHardUse, "noharduse",
117                           "only replace exit values when loop def likely dead"),
118                clEnumValN(AlwaysRepl, "always",
119                           "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 struct RewritePhi;
141 
142 class IndVarSimplify {
143   LoopInfo *LI;
144   ScalarEvolution *SE;
145   DominatorTree *DT;
146   const DataLayout &DL;
147   TargetLibraryInfo *TLI;
148   const TargetTransformInfo *TTI;
149   std::unique_ptr<MemorySSAUpdater> MSSAU;
150 
151   SmallVector<WeakTrackingVH, 16> DeadInsts;
152 
153   bool handleFloatingPointIV(Loop *L, PHINode *PH);
154   bool rewriteNonIntegerIVs(Loop *L);
155 
156   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
157   /// Try to eliminate loop exits based on analyzeable exit counts
158   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
159   /// Try to form loop invariant tests for loop exits by changing how many
160   /// iterations of the loop run when that is unobservable.
161   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
162 
163   bool rewriteFirstIterationLoopExitValues(Loop *L);
164 
165   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
166                                  const SCEV *ExitCount,
167                                  PHINode *IndVar, SCEVExpander &Rewriter);
168 
169   bool sinkUnusedInvariants(Loop *L);
170 
171 public:
172   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
173                  const DataLayout &DL, TargetLibraryInfo *TLI,
174                  TargetTransformInfo *TTI, MemorySSA *MSSA)
175       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
176     if (MSSA)
177       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
178   }
179 
180   bool run(Loop *L);
181 };
182 
183 } // end anonymous namespace
184 
185 //===----------------------------------------------------------------------===//
186 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
187 //===----------------------------------------------------------------------===//
188 
189 /// Convert APF to an integer, if possible.
190 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
191   bool isExact = false;
192   // See if we can convert this to an int64_t
193   uint64_t UIntVal;
194   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
195                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
196       !isExact)
197     return false;
198   IntVal = UIntVal;
199   return true;
200 }
201 
202 /// If the loop has floating induction variable then insert corresponding
203 /// integer induction variable if possible.
204 /// For example,
205 /// for(double i = 0; i < 10000; ++i)
206 ///   bar(i)
207 /// is converted into
208 /// for(int i = 0; i < 10000; ++i)
209 ///   bar((double)i);
210 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
211   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
212   unsigned BackEdge     = IncomingEdge^1;
213 
214   // Check incoming value.
215   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
216 
217   int64_t InitValue;
218   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
219     return false;
220 
221   // Check IV increment. Reject this PN if increment operation is not
222   // an add or increment value can not be represented by an integer.
223   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
224   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
225 
226   // If this is not an add of the PHI with a constantfp, or if the constant fp
227   // is not an integer, bail out.
228   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
229   int64_t IncValue;
230   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
231       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
232     return false;
233 
234   // Check Incr uses. One user is PN and the other user is an exit condition
235   // used by the conditional terminator.
236   Value::user_iterator IncrUse = Incr->user_begin();
237   Instruction *U1 = cast<Instruction>(*IncrUse++);
238   if (IncrUse == Incr->user_end()) return false;
239   Instruction *U2 = cast<Instruction>(*IncrUse++);
240   if (IncrUse != Incr->user_end()) return false;
241 
242   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
243   // only used by a branch, we can't transform it.
244   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
245   if (!Compare)
246     Compare = dyn_cast<FCmpInst>(U2);
247   if (!Compare || !Compare->hasOneUse() ||
248       !isa<BranchInst>(Compare->user_back()))
249     return false;
250 
251   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
252 
253   // We need to verify that the branch actually controls the iteration count
254   // of the loop.  If not, the new IV can overflow and no one will notice.
255   // The branch block must be in the loop and one of the successors must be out
256   // of the loop.
257   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
258   if (!L->contains(TheBr->getParent()) ||
259       (L->contains(TheBr->getSuccessor(0)) &&
260        L->contains(TheBr->getSuccessor(1))))
261     return false;
262 
263   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
264   // transform it.
265   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
266   int64_t ExitValue;
267   if (ExitValueVal == nullptr ||
268       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
269     return false;
270 
271   // Find new predicate for integer comparison.
272   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
273   switch (Compare->getPredicate()) {
274   default: return false;  // Unknown comparison.
275   case CmpInst::FCMP_OEQ:
276   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
277   case CmpInst::FCMP_ONE:
278   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
279   case CmpInst::FCMP_OGT:
280   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
281   case CmpInst::FCMP_OGE:
282   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
283   case CmpInst::FCMP_OLT:
284   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
285   case CmpInst::FCMP_OLE:
286   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
287   }
288 
289   // We convert the floating point induction variable to a signed i32 value if
290   // we can.  This is only safe if the comparison will not overflow in a way
291   // that won't be trapped by the integer equivalent operations.  Check for this
292   // now.
293   // TODO: We could use i64 if it is native and the range requires it.
294 
295   // The start/stride/exit values must all fit in signed i32.
296   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
297     return false;
298 
299   // If not actually striding (add x, 0.0), avoid touching the code.
300   if (IncValue == 0)
301     return false;
302 
303   // Positive and negative strides have different safety conditions.
304   if (IncValue > 0) {
305     // If we have a positive stride, we require the init to be less than the
306     // exit value.
307     if (InitValue >= ExitValue)
308       return false;
309 
310     uint32_t Range = uint32_t(ExitValue-InitValue);
311     // Check for infinite loop, either:
312     // while (i <= Exit) or until (i > Exit)
313     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
314       if (++Range == 0) return false;  // Range overflows.
315     }
316 
317     unsigned Leftover = Range % uint32_t(IncValue);
318 
319     // If this is an equality comparison, we require that the strided value
320     // exactly land on the exit value, otherwise the IV condition will wrap
321     // around and do things the fp IV wouldn't.
322     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
323         Leftover != 0)
324       return false;
325 
326     // If the stride would wrap around the i32 before exiting, we can't
327     // transform the IV.
328     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
329       return false;
330   } else {
331     // If we have a negative stride, we require the init to be greater than the
332     // exit value.
333     if (InitValue <= ExitValue)
334       return false;
335 
336     uint32_t Range = uint32_t(InitValue-ExitValue);
337     // Check for infinite loop, either:
338     // while (i >= Exit) or until (i < Exit)
339     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
340       if (++Range == 0) return false;  // Range overflows.
341     }
342 
343     unsigned Leftover = Range % uint32_t(-IncValue);
344 
345     // If this is an equality comparison, we require that the strided value
346     // exactly land on the exit value, otherwise the IV condition will wrap
347     // around and do things the fp IV wouldn't.
348     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
349         Leftover != 0)
350       return false;
351 
352     // If the stride would wrap around the i32 before exiting, we can't
353     // transform the IV.
354     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
355       return false;
356   }
357 
358   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
359 
360   // Insert new integer induction variable.
361   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
362   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
363                       PN->getIncomingBlock(IncomingEdge));
364 
365   Value *NewAdd =
366     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
367                               Incr->getName()+".int", Incr);
368   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
369 
370   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
371                                       ConstantInt::get(Int32Ty, ExitValue),
372                                       Compare->getName());
373 
374   // In the following deletions, PN may become dead and may be deleted.
375   // Use a WeakTrackingVH to observe whether this happens.
376   WeakTrackingVH WeakPH = PN;
377 
378   // Delete the old floating point exit comparison.  The branch starts using the
379   // new comparison.
380   NewCompare->takeName(Compare);
381   Compare->replaceAllUsesWith(NewCompare);
382   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
383 
384   // Delete the old floating point increment.
385   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
386   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
387 
388   // If the FP induction variable still has uses, this is because something else
389   // in the loop uses its value.  In order to canonicalize the induction
390   // variable, we chose to eliminate the IV and rewrite it in terms of an
391   // int->fp cast.
392   //
393   // We give preference to sitofp over uitofp because it is faster on most
394   // platforms.
395   if (WeakPH) {
396     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
397                                  &*PN->getParent()->getFirstInsertionPt());
398     PN->replaceAllUsesWith(Conv);
399     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
400   }
401   return true;
402 }
403 
404 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
405   // First step.  Check to see if there are any floating-point recurrences.
406   // If there are, change them into integer recurrences, permitting analysis by
407   // the SCEV routines.
408   BasicBlock *Header = L->getHeader();
409 
410   SmallVector<WeakTrackingVH, 8> PHIs;
411   for (PHINode &PN : Header->phis())
412     PHIs.push_back(&PN);
413 
414   bool Changed = false;
415   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
416     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
417       Changed |= handleFloatingPointIV(L, PN);
418 
419   // If the loop previously had floating-point IV, ScalarEvolution
420   // may not have been able to compute a trip count. Now that we've done some
421   // re-writing, the trip count may be computable.
422   if (Changed)
423     SE->forgetLoop(L);
424   return Changed;
425 }
426 
427 //===---------------------------------------------------------------------===//
428 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
429 // they will exit at the first iteration.
430 //===---------------------------------------------------------------------===//
431 
432 /// Check to see if this loop has loop invariant conditions which lead to loop
433 /// exits. If so, we know that if the exit path is taken, it is at the first
434 /// loop iteration. This lets us predict exit values of PHI nodes that live in
435 /// loop header.
436 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
437   // Verify the input to the pass is already in LCSSA form.
438   assert(L->isLCSSAForm(*DT));
439 
440   SmallVector<BasicBlock *, 8> ExitBlocks;
441   L->getUniqueExitBlocks(ExitBlocks);
442 
443   bool MadeAnyChanges = false;
444   for (auto *ExitBB : ExitBlocks) {
445     // If there are no more PHI nodes in this exit block, then no more
446     // values defined inside the loop are used on this path.
447     for (PHINode &PN : ExitBB->phis()) {
448       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
449            IncomingValIdx != E; ++IncomingValIdx) {
450         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
451 
452         // Can we prove that the exit must run on the first iteration if it
453         // runs at all?  (i.e. early exits are fine for our purposes, but
454         // traces which lead to this exit being taken on the 2nd iteration
455         // aren't.)  Note that this is about whether the exit branch is
456         // executed, not about whether it is taken.
457         if (!L->getLoopLatch() ||
458             !DT->dominates(IncomingBB, L->getLoopLatch()))
459           continue;
460 
461         // Get condition that leads to the exit path.
462         auto *TermInst = IncomingBB->getTerminator();
463 
464         Value *Cond = nullptr;
465         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
466           // Must be a conditional branch, otherwise the block
467           // should not be in the loop.
468           Cond = BI->getCondition();
469         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
470           Cond = SI->getCondition();
471         else
472           continue;
473 
474         if (!L->isLoopInvariant(Cond))
475           continue;
476 
477         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
478 
479         // Only deal with PHIs in the loop header.
480         if (!ExitVal || ExitVal->getParent() != L->getHeader())
481           continue;
482 
483         // If ExitVal is a PHI on the loop header, then we know its
484         // value along this exit because the exit can only be taken
485         // on the first iteration.
486         auto *LoopPreheader = L->getLoopPreheader();
487         assert(LoopPreheader && "Invalid loop");
488         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
489         if (PreheaderIdx != -1) {
490           assert(ExitVal->getParent() == L->getHeader() &&
491                  "ExitVal must be in loop header");
492           MadeAnyChanges = true;
493           PN.setIncomingValue(IncomingValIdx,
494                               ExitVal->getIncomingValue(PreheaderIdx));
495         }
496       }
497     }
498   }
499   return MadeAnyChanges;
500 }
501 
502 //===----------------------------------------------------------------------===//
503 //  IV Widening - Extend the width of an IV to cover its widest uses.
504 //===----------------------------------------------------------------------===//
505 
506 /// Update information about the induction variable that is extended by this
507 /// sign or zero extend operation. This is used to determine the final width of
508 /// the IV before actually widening it.
509 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
510                         ScalarEvolution *SE,
511                         const TargetTransformInfo *TTI) {
512   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
513   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
514     return;
515 
516   Type *Ty = Cast->getType();
517   uint64_t Width = SE->getTypeSizeInBits(Ty);
518   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
519     return;
520 
521   // Check that `Cast` actually extends the induction variable (we rely on this
522   // later).  This takes care of cases where `Cast` is extending a truncation of
523   // the narrow induction variable, and thus can end up being narrower than the
524   // "narrow" induction variable.
525   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
526   if (NarrowIVWidth >= Width)
527     return;
528 
529   // Cast is either an sext or zext up to this point.
530   // We should not widen an indvar if arithmetics on the wider indvar are more
531   // expensive than those on the narrower indvar. We check only the cost of ADD
532   // because at least an ADD is required to increment the induction variable. We
533   // could compute more comprehensively the cost of all instructions on the
534   // induction variable when necessary.
535   if (TTI &&
536       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
537           TTI->getArithmeticInstrCost(Instruction::Add,
538                                       Cast->getOperand(0)->getType())) {
539     return;
540   }
541 
542   if (!WI.WidestNativeType) {
543     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
544     WI.IsSigned = IsSigned;
545     return;
546   }
547 
548   // We extend the IV to satisfy the sign of its first user, arbitrarily.
549   if (WI.IsSigned != IsSigned)
550     return;
551 
552   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
553     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
554 }
555 
556 //===----------------------------------------------------------------------===//
557 //  Live IV Reduction - Minimize IVs live across the loop.
558 //===----------------------------------------------------------------------===//
559 
560 //===----------------------------------------------------------------------===//
561 //  Simplification of IV users based on SCEV evaluation.
562 //===----------------------------------------------------------------------===//
563 
564 namespace {
565 
566 class IndVarSimplifyVisitor : public IVVisitor {
567   ScalarEvolution *SE;
568   const TargetTransformInfo *TTI;
569   PHINode *IVPhi;
570 
571 public:
572   WideIVInfo WI;
573 
574   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
575                         const TargetTransformInfo *TTI,
576                         const DominatorTree *DTree)
577     : SE(SCEV), TTI(TTI), IVPhi(IV) {
578     DT = DTree;
579     WI.NarrowIV = IVPhi;
580   }
581 
582   // Implement the interface used by simplifyUsersOfIV.
583   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
584 };
585 
586 } // end anonymous namespace
587 
588 /// Iteratively perform simplification on a worklist of IV users. Each
589 /// successive simplification may push more users which may themselves be
590 /// candidates for simplification.
591 ///
592 /// Sign/Zero extend elimination is interleaved with IV simplification.
593 bool IndVarSimplify::simplifyAndExtend(Loop *L,
594                                        SCEVExpander &Rewriter,
595                                        LoopInfo *LI) {
596   SmallVector<WideIVInfo, 8> WideIVs;
597 
598   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
599           Intrinsic::getName(Intrinsic::experimental_guard));
600   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
601 
602   SmallVector<PHINode*, 8> LoopPhis;
603   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
604     LoopPhis.push_back(cast<PHINode>(I));
605   }
606   // Each round of simplification iterates through the SimplifyIVUsers worklist
607   // for all current phis, then determines whether any IVs can be
608   // widened. Widening adds new phis to LoopPhis, inducing another round of
609   // simplification on the wide IVs.
610   bool Changed = false;
611   while (!LoopPhis.empty()) {
612     // Evaluate as many IV expressions as possible before widening any IVs. This
613     // forces SCEV to set no-wrap flags before evaluating sign/zero
614     // extension. The first time SCEV attempts to normalize sign/zero extension,
615     // the result becomes final. So for the most predictable results, we delay
616     // evaluation of sign/zero extend evaluation until needed, and avoid running
617     // other SCEV based analysis prior to simplifyAndExtend.
618     do {
619       PHINode *CurrIV = LoopPhis.pop_back_val();
620 
621       // Information about sign/zero extensions of CurrIV.
622       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
623 
624       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
625                                    &Visitor);
626 
627       if (Visitor.WI.WidestNativeType) {
628         WideIVs.push_back(Visitor.WI);
629       }
630     } while(!LoopPhis.empty());
631 
632     // Continue if we disallowed widening.
633     if (!AllowIVWidening)
634       continue;
635 
636     for (; !WideIVs.empty(); WideIVs.pop_back()) {
637       unsigned ElimExt;
638       unsigned Widened;
639       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
640                                           DT, DeadInsts, ElimExt, Widened,
641                                           HasGuards, UsePostIncrementRanges)) {
642         NumElimExt += ElimExt;
643         NumWidened += Widened;
644         Changed = true;
645         LoopPhis.push_back(WidePhi);
646       }
647     }
648   }
649   return Changed;
650 }
651 
652 //===----------------------------------------------------------------------===//
653 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
654 //===----------------------------------------------------------------------===//
655 
656 /// Given an Value which is hoped to be part of an add recurance in the given
657 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
658 /// that this is less general than SCEVs AddRec checking.
659 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
660   Instruction *IncI = dyn_cast<Instruction>(IncV);
661   if (!IncI)
662     return nullptr;
663 
664   switch (IncI->getOpcode()) {
665   case Instruction::Add:
666   case Instruction::Sub:
667     break;
668   case Instruction::GetElementPtr:
669     // An IV counter must preserve its type.
670     if (IncI->getNumOperands() == 2)
671       break;
672     LLVM_FALLTHROUGH;
673   default:
674     return nullptr;
675   }
676 
677   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
678   if (Phi && Phi->getParent() == L->getHeader()) {
679     if (L->isLoopInvariant(IncI->getOperand(1)))
680       return Phi;
681     return nullptr;
682   }
683   if (IncI->getOpcode() == Instruction::GetElementPtr)
684     return nullptr;
685 
686   // Allow add/sub to be commuted.
687   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
688   if (Phi && Phi->getParent() == L->getHeader()) {
689     if (L->isLoopInvariant(IncI->getOperand(0)))
690       return Phi;
691   }
692   return nullptr;
693 }
694 
695 /// Whether the current loop exit test is based on this value.  Currently this
696 /// is limited to a direct use in the loop condition.
697 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
698   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
699   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
700   // TODO: Allow non-icmp loop test.
701   if (!ICmp)
702     return false;
703 
704   // TODO: Allow indirect use.
705   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
706 }
707 
708 /// linearFunctionTestReplace policy. Return true unless we can show that the
709 /// current exit test is already sufficiently canonical.
710 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
711   assert(L->getLoopLatch() && "Must be in simplified form");
712 
713   // Avoid converting a constant or loop invariant test back to a runtime
714   // test.  This is critical for when SCEV's cached ExitCount is less precise
715   // than the current IR (such as after we've proven a particular exit is
716   // actually dead and thus the BE count never reaches our ExitCount.)
717   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
718   if (L->isLoopInvariant(BI->getCondition()))
719     return false;
720 
721   // Do LFTR to simplify the exit condition to an ICMP.
722   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
723   if (!Cond)
724     return true;
725 
726   // Do LFTR to simplify the exit ICMP to EQ/NE
727   ICmpInst::Predicate Pred = Cond->getPredicate();
728   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
729     return true;
730 
731   // Look for a loop invariant RHS
732   Value *LHS = Cond->getOperand(0);
733   Value *RHS = Cond->getOperand(1);
734   if (!L->isLoopInvariant(RHS)) {
735     if (!L->isLoopInvariant(LHS))
736       return true;
737     std::swap(LHS, RHS);
738   }
739   // Look for a simple IV counter LHS
740   PHINode *Phi = dyn_cast<PHINode>(LHS);
741   if (!Phi)
742     Phi = getLoopPhiForCounter(LHS, L);
743 
744   if (!Phi)
745     return true;
746 
747   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
748   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
749   if (Idx < 0)
750     return true;
751 
752   // Do LFTR if the exit condition's IV is *not* a simple counter.
753   Value *IncV = Phi->getIncomingValue(Idx);
754   return Phi != getLoopPhiForCounter(IncV, L);
755 }
756 
757 /// Return true if undefined behavior would provable be executed on the path to
758 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
759 /// anything about whether OnPathTo is actually executed or whether Root is
760 /// actually poison.  This can be used to assess whether a new use of Root can
761 /// be added at a location which is control equivalent with OnPathTo (such as
762 /// immediately before it) without introducing UB which didn't previously
763 /// exist.  Note that a false result conveys no information.
764 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
765                                           Instruction *OnPathTo,
766                                           DominatorTree *DT) {
767   // Basic approach is to assume Root is poison, propagate poison forward
768   // through all users we can easily track, and then check whether any of those
769   // users are provable UB and must execute before out exiting block might
770   // exit.
771 
772   // The set of all recursive users we've visited (which are assumed to all be
773   // poison because of said visit)
774   SmallSet<const Value *, 16> KnownPoison;
775   SmallVector<const Instruction*, 16> Worklist;
776   Worklist.push_back(Root);
777   while (!Worklist.empty()) {
778     const Instruction *I = Worklist.pop_back_val();
779 
780     // If we know this must trigger UB on a path leading our target.
781     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
782       return true;
783 
784     // If we can't analyze propagation through this instruction, just skip it
785     // and transitive users.  Safe as false is a conservative result.
786     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
787       continue;
788 
789     if (KnownPoison.insert(I).second)
790       for (const User *User : I->users())
791         Worklist.push_back(cast<Instruction>(User));
792   }
793 
794   // Might be non-UB, or might have a path we couldn't prove must execute on
795   // way to exiting bb.
796   return false;
797 }
798 
799 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
800 /// down to checking that all operands are constant and listing instructions
801 /// that may hide undef.
802 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
803                                unsigned Depth) {
804   if (isa<Constant>(V))
805     return !isa<UndefValue>(V);
806 
807   if (Depth >= 6)
808     return false;
809 
810   // Conservatively handle non-constant non-instructions. For example, Arguments
811   // may be undef.
812   Instruction *I = dyn_cast<Instruction>(V);
813   if (!I)
814     return false;
815 
816   // Load and return values may be undef.
817   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
818     return false;
819 
820   // Optimistically handle other instructions.
821   for (Value *Op : I->operands()) {
822     if (!Visited.insert(Op).second)
823       continue;
824     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
825       return false;
826   }
827   return true;
828 }
829 
830 /// Return true if the given value is concrete. We must prove that undef can
831 /// never reach it.
832 ///
833 /// TODO: If we decide that this is a good approach to checking for undef, we
834 /// may factor it into a common location.
835 static bool hasConcreteDef(Value *V) {
836   SmallPtrSet<Value*, 8> Visited;
837   Visited.insert(V);
838   return hasConcreteDefImpl(V, Visited, 0);
839 }
840 
841 /// Return true if this IV has any uses other than the (soon to be rewritten)
842 /// loop exit test.
843 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
844   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
845   Value *IncV = Phi->getIncomingValue(LatchIdx);
846 
847   for (User *U : Phi->users())
848     if (U != Cond && U != IncV) return false;
849 
850   for (User *U : IncV->users())
851     if (U != Cond && U != Phi) return false;
852   return true;
853 }
854 
855 /// Return true if the given phi is a "counter" in L.  A counter is an
856 /// add recurance (of integer or pointer type) with an arbitrary start, and a
857 /// step of 1.  Note that L must have exactly one latch.
858 static bool isLoopCounter(PHINode* Phi, Loop *L,
859                           ScalarEvolution *SE) {
860   assert(Phi->getParent() == L->getHeader());
861   assert(L->getLoopLatch());
862 
863   if (!SE->isSCEVable(Phi->getType()))
864     return false;
865 
866   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
867   if (!AR || AR->getLoop() != L || !AR->isAffine())
868     return false;
869 
870   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
871   if (!Step || !Step->isOne())
872     return false;
873 
874   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
875   Value *IncV = Phi->getIncomingValue(LatchIdx);
876   return (getLoopPhiForCounter(IncV, L) == Phi);
877 }
878 
879 /// Search the loop header for a loop counter (anadd rec w/step of one)
880 /// suitable for use by LFTR.  If multiple counters are available, select the
881 /// "best" one based profitable heuristics.
882 ///
883 /// BECount may be an i8* pointer type. The pointer difference is already
884 /// valid count without scaling the address stride, so it remains a pointer
885 /// expression as far as SCEV is concerned.
886 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
887                                 const SCEV *BECount,
888                                 ScalarEvolution *SE, DominatorTree *DT) {
889   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
890 
891   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
892 
893   // Loop over all of the PHI nodes, looking for a simple counter.
894   PHINode *BestPhi = nullptr;
895   const SCEV *BestInit = nullptr;
896   BasicBlock *LatchBlock = L->getLoopLatch();
897   assert(LatchBlock && "Must be in simplified form");
898   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
899 
900   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
901     PHINode *Phi = cast<PHINode>(I);
902     if (!isLoopCounter(Phi, L, SE))
903       continue;
904 
905     // Avoid comparing an integer IV against a pointer Limit.
906     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
907       continue;
908 
909     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
910 
911     // AR may be a pointer type, while BECount is an integer type.
912     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
913     // AR may not be a narrower type, or we may never exit.
914     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
915     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
916       continue;
917 
918     // Avoid reusing a potentially undef value to compute other values that may
919     // have originally had a concrete definition.
920     if (!hasConcreteDef(Phi)) {
921       // We explicitly allow unknown phis as long as they are already used by
922       // the loop exit test.  This is legal since performing LFTR could not
923       // increase the number of undef users.
924       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
925       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
926           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
927         continue;
928     }
929 
930     // Avoid introducing undefined behavior due to poison which didn't exist in
931     // the original program.  (Annoyingly, the rules for poison and undef
932     // propagation are distinct, so this does NOT cover the undef case above.)
933     // We have to ensure that we don't introduce UB by introducing a use on an
934     // iteration where said IV produces poison.  Our strategy here differs for
935     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
936     // see code in linearFunctionTestReplace.  For pointers, we restrict
937     // transforms as there is no good way to reinfer inbounds once lost.
938     if (!Phi->getType()->isIntegerTy() &&
939         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
940       continue;
941 
942     const SCEV *Init = AR->getStart();
943 
944     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
945       // Don't force a live loop counter if another IV can be used.
946       if (AlmostDeadIV(Phi, LatchBlock, Cond))
947         continue;
948 
949       // Prefer to count-from-zero. This is a more "canonical" counter form. It
950       // also prefers integer to pointer IVs.
951       if (BestInit->isZero() != Init->isZero()) {
952         if (BestInit->isZero())
953           continue;
954       }
955       // If two IVs both count from zero or both count from nonzero then the
956       // narrower is likely a dead phi that has been widened. Use the wider phi
957       // to allow the other to be eliminated.
958       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
959         continue;
960     }
961     BestPhi = Phi;
962     BestInit = Init;
963   }
964   return BestPhi;
965 }
966 
967 /// Insert an IR expression which computes the value held by the IV IndVar
968 /// (which must be an loop counter w/unit stride) after the backedge of loop L
969 /// is taken ExitCount times.
970 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
971                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
972                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
973   assert(isLoopCounter(IndVar, L, SE));
974   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
975   const SCEV *IVInit = AR->getStart();
976 
977   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
978   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
979   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
980   // the existing GEPs whenever possible.
981   if (IndVar->getType()->isPointerTy() &&
982       !ExitCount->getType()->isPointerTy()) {
983     // IVOffset will be the new GEP offset that is interpreted by GEP as a
984     // signed value. ExitCount on the other hand represents the loop trip count,
985     // which is an unsigned value. FindLoopCounter only allows induction
986     // variables that have a positive unit stride of one. This means we don't
987     // have to handle the case of negative offsets (yet) and just need to zero
988     // extend ExitCount.
989     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
990     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
991     if (UsePostInc)
992       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
993 
994     // Expand the code for the iteration count.
995     assert(SE->isLoopInvariant(IVOffset, L) &&
996            "Computed iteration count is not loop invariant!");
997 
998     // We could handle pointer IVs other than i8*, but we need to compensate for
999     // gep index scaling.
1000     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
1001                              cast<PointerType>(IndVar->getType())
1002                                  ->getElementType())->isOne() &&
1003            "unit stride pointer IV must be i8*");
1004 
1005     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1006     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1007     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1008   } else {
1009     // In any other case, convert both IVInit and ExitCount to integers before
1010     // comparing. This may result in SCEV expansion of pointers, but in practice
1011     // SCEV will fold the pointer arithmetic away as such:
1012     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1013     //
1014     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1015     // for simple memset-style loops.
1016     //
1017     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1018     // were generated on top of case #2, which is not expected.
1019 
1020     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1021     // For unit stride, IVCount = Start + ExitCount with 2's complement
1022     // overflow.
1023 
1024     // For integer IVs, truncate the IV before computing IVInit + BECount,
1025     // unless we know apriori that the limit must be a constant when evaluated
1026     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1027     // of the IV in the loop over a (potentially) expensive expansion of the
1028     // widened exit count add(zext(add)) expression.
1029     if (SE->getTypeSizeInBits(IVInit->getType())
1030         > SE->getTypeSizeInBits(ExitCount->getType())) {
1031       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1032         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1033       else
1034         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1035     }
1036 
1037     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1038 
1039     if (UsePostInc)
1040       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1041 
1042     // Expand the code for the iteration count.
1043     assert(SE->isLoopInvariant(IVLimit, L) &&
1044            "Computed iteration count is not loop invariant!");
1045     // Ensure that we generate the same type as IndVar, or a smaller integer
1046     // type. In the presence of null pointer values, we have an integer type
1047     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1048     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1049       IndVar->getType() : ExitCount->getType();
1050     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1051     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1052   }
1053 }
1054 
1055 /// This method rewrites the exit condition of the loop to be a canonical !=
1056 /// comparison against the incremented loop induction variable.  This pass is
1057 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1058 /// determine a loop-invariant trip count of the loop, which is actually a much
1059 /// broader range than just linear tests.
1060 bool IndVarSimplify::
1061 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1062                           const SCEV *ExitCount,
1063                           PHINode *IndVar, SCEVExpander &Rewriter) {
1064   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1065   assert(isLoopCounter(IndVar, L, SE));
1066   Instruction * const IncVar =
1067     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1068 
1069   // Initialize CmpIndVar to the preincremented IV.
1070   Value *CmpIndVar = IndVar;
1071   bool UsePostInc = false;
1072 
1073   // If the exiting block is the same as the backedge block, we prefer to
1074   // compare against the post-incremented value, otherwise we must compare
1075   // against the preincremented value.
1076   if (ExitingBB == L->getLoopLatch()) {
1077     // For pointer IVs, we chose to not strip inbounds which requires us not
1078     // to add a potentially UB introducing use.  We need to either a) show
1079     // the loop test we're modifying is already in post-inc form, or b) show
1080     // that adding a use must not introduce UB.
1081     bool SafeToPostInc =
1082         IndVar->getType()->isIntegerTy() ||
1083         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1084         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1085     if (SafeToPostInc) {
1086       UsePostInc = true;
1087       CmpIndVar = IncVar;
1088     }
1089   }
1090 
1091   // It may be necessary to drop nowrap flags on the incrementing instruction
1092   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1093   // case the increment might have previously been poison on the last iteration
1094   // only) or if LFTR switches to a different IV that was previously dynamically
1095   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1096   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1097   // check), because the pre-inc addrec flags may be adopted from the original
1098   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1099   // TODO: This handling is inaccurate for one case: If we switch to a
1100   // dynamically dead IV that wraps on the first loop iteration only, which is
1101   // not covered by the post-inc addrec. (If the new IV was not dynamically
1102   // dead, it could not be poison on the first iteration in the first place.)
1103   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1104     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1105     if (BO->hasNoUnsignedWrap())
1106       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1107     if (BO->hasNoSignedWrap())
1108       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1109   }
1110 
1111   Value *ExitCnt = genLoopLimit(
1112       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1113   assert(ExitCnt->getType()->isPointerTy() ==
1114              IndVar->getType()->isPointerTy() &&
1115          "genLoopLimit missed a cast");
1116 
1117   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1118   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1119   ICmpInst::Predicate P;
1120   if (L->contains(BI->getSuccessor(0)))
1121     P = ICmpInst::ICMP_NE;
1122   else
1123     P = ICmpInst::ICMP_EQ;
1124 
1125   IRBuilder<> Builder(BI);
1126 
1127   // The new loop exit condition should reuse the debug location of the
1128   // original loop exit condition.
1129   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1130     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1131 
1132   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1133   // avoid the expensive expansion of the limit expression in the wider type,
1134   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1135   // since we know (from the exit count bitwidth), that we can't self-wrap in
1136   // the narrower type.
1137   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1138   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1139   if (CmpIndVarSize > ExitCntSize) {
1140     assert(!CmpIndVar->getType()->isPointerTy() &&
1141            !ExitCnt->getType()->isPointerTy());
1142 
1143     // Before resorting to actually inserting the truncate, use the same
1144     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1145     // the other side of the comparison instead.  We still evaluate the limit
1146     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1147     // a truncate within in.
1148     bool Extended = false;
1149     const SCEV *IV = SE->getSCEV(CmpIndVar);
1150     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1151                                                   ExitCnt->getType());
1152     const SCEV *ZExtTrunc =
1153       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1154 
1155     if (ZExtTrunc == IV) {
1156       Extended = true;
1157       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1158                                    "wide.trip.count");
1159     } else {
1160       const SCEV *SExtTrunc =
1161         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1162       if (SExtTrunc == IV) {
1163         Extended = true;
1164         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1165                                      "wide.trip.count");
1166       }
1167     }
1168 
1169     if (Extended) {
1170       bool Discard;
1171       L->makeLoopInvariant(ExitCnt, Discard);
1172     } else
1173       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1174                                       "lftr.wideiv");
1175   }
1176   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1177                     << "      LHS:" << *CmpIndVar << '\n'
1178                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1179                     << "\n"
1180                     << "      RHS:\t" << *ExitCnt << "\n"
1181                     << "ExitCount:\t" << *ExitCount << "\n"
1182                     << "  was: " << *BI->getCondition() << "\n");
1183 
1184   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1185   Value *OrigCond = BI->getCondition();
1186   // It's tempting to use replaceAllUsesWith here to fully replace the old
1187   // comparison, but that's not immediately safe, since users of the old
1188   // comparison may not be dominated by the new comparison. Instead, just
1189   // update the branch to use the new comparison; in the common case this
1190   // will make old comparison dead.
1191   BI->setCondition(Cond);
1192   DeadInsts.emplace_back(OrigCond);
1193 
1194   ++NumLFTR;
1195   return true;
1196 }
1197 
1198 //===----------------------------------------------------------------------===//
1199 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1200 //===----------------------------------------------------------------------===//
1201 
1202 /// If there's a single exit block, sink any loop-invariant values that
1203 /// were defined in the preheader but not used inside the loop into the
1204 /// exit block to reduce register pressure in the loop.
1205 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1206   BasicBlock *ExitBlock = L->getExitBlock();
1207   if (!ExitBlock) return false;
1208 
1209   BasicBlock *Preheader = L->getLoopPreheader();
1210   if (!Preheader) return false;
1211 
1212   bool MadeAnyChanges = false;
1213   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1214   BasicBlock::iterator I(Preheader->getTerminator());
1215   while (I != Preheader->begin()) {
1216     --I;
1217     // New instructions were inserted at the end of the preheader.
1218     if (isa<PHINode>(I))
1219       break;
1220 
1221     // Don't move instructions which might have side effects, since the side
1222     // effects need to complete before instructions inside the loop.  Also don't
1223     // move instructions which might read memory, since the loop may modify
1224     // memory. Note that it's okay if the instruction might have undefined
1225     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1226     // block.
1227     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1228       continue;
1229 
1230     // Skip debug info intrinsics.
1231     if (isa<DbgInfoIntrinsic>(I))
1232       continue;
1233 
1234     // Skip eh pad instructions.
1235     if (I->isEHPad())
1236       continue;
1237 
1238     // Don't sink alloca: we never want to sink static alloca's out of the
1239     // entry block, and correctly sinking dynamic alloca's requires
1240     // checks for stacksave/stackrestore intrinsics.
1241     // FIXME: Refactor this check somehow?
1242     if (isa<AllocaInst>(I))
1243       continue;
1244 
1245     // Determine if there is a use in or before the loop (direct or
1246     // otherwise).
1247     bool UsedInLoop = false;
1248     for (Use &U : I->uses()) {
1249       Instruction *User = cast<Instruction>(U.getUser());
1250       BasicBlock *UseBB = User->getParent();
1251       if (PHINode *P = dyn_cast<PHINode>(User)) {
1252         unsigned i =
1253           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1254         UseBB = P->getIncomingBlock(i);
1255       }
1256       if (UseBB == Preheader || L->contains(UseBB)) {
1257         UsedInLoop = true;
1258         break;
1259       }
1260     }
1261 
1262     // If there is, the def must remain in the preheader.
1263     if (UsedInLoop)
1264       continue;
1265 
1266     // Otherwise, sink it to the exit block.
1267     Instruction *ToMove = &*I;
1268     bool Done = false;
1269 
1270     if (I != Preheader->begin()) {
1271       // Skip debug info intrinsics.
1272       do {
1273         --I;
1274       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
1275 
1276       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
1277         Done = true;
1278     } else {
1279       Done = true;
1280     }
1281 
1282     MadeAnyChanges = true;
1283     ToMove->moveBefore(*ExitBlock, InsertPt);
1284     if (Done) break;
1285     InsertPt = ToMove->getIterator();
1286   }
1287 
1288   return MadeAnyChanges;
1289 }
1290 
1291 enum ExitCondAnalysisResult {
1292   CanBeRemoved,
1293   CannotOptimize
1294 };
1295 
1296 /// If the condition of BI is trivially true during at least first MaxIter
1297 /// iterations, return CanBeRemoved.
1298 /// Otherwise, return CannotOptimize.
1299 static ExitCondAnalysisResult analyzeCond(const Loop *L, BranchInst *BI,
1300                                           ScalarEvolution *SE,
1301                                           bool ProvingLoopExit,
1302                                           const SCEV *MaxIter) {
1303   ICmpInst::Predicate Pred;
1304   Value *LHS, *RHS;
1305   using namespace PatternMatch;
1306   BasicBlock *TrueSucc, *FalseSucc;
1307   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1308                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1309     return CannotOptimize;
1310 
1311   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1312          "Not a loop exit!");
1313 
1314   // 'LHS pred RHS' should now mean that we stay in loop.
1315   if (L->contains(FalseSucc))
1316     Pred = CmpInst::getInversePredicate(Pred);
1317 
1318   // If we are proving loop exit, invert the predicate.
1319   if (ProvingLoopExit)
1320     Pred = CmpInst::getInversePredicate(Pred);
1321 
1322   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1323   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1324   // Can we prove it to be trivially true?
1325   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI))
1326     return CanBeRemoved;
1327 
1328   if (ProvingLoopExit)
1329     return CannotOptimize;
1330 
1331   ICmpInst::Predicate InvariantPred;
1332   const SCEV *InvariantLHS, *InvariantRHS;
1333 
1334   // Check if there is a loop-invariant predicate equivalent to our check.
1335   if (!SE->isLoopInvariantExitCondDuringFirstIterations(
1336            Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS,
1337            InvariantRHS))
1338     return CannotOptimize;
1339 
1340   // Can we prove it to be trivially true?
1341   if (SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI))
1342     return CanBeRemoved;
1343   return CannotOptimize;
1344 }
1345 
1346 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1347   SmallVector<BasicBlock*, 16> ExitingBlocks;
1348   L->getExitingBlocks(ExitingBlocks);
1349 
1350   // Remove all exits which aren't both rewriteable and execute on every
1351   // iteration.
1352   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1353     // If our exitting block exits multiple loops, we can only rewrite the
1354     // innermost one.  Otherwise, we're changing how many times the innermost
1355     // loop runs before it exits.
1356     if (LI->getLoopFor(ExitingBB) != L)
1357       return true;
1358 
1359     // Can't rewrite non-branch yet.
1360     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1361     if (!BI)
1362       return true;
1363 
1364     // If already constant, nothing to do.
1365     if (isa<Constant>(BI->getCondition()))
1366       return true;
1367 
1368     // Likewise, the loop latch must be dominated by the exiting BB.
1369     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1370       return true;
1371 
1372     return false;
1373   });
1374   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
1375 
1376   if (ExitingBlocks.empty())
1377     return false;
1378 
1379   // Get a symbolic upper bound on the loop backedge taken count.
1380   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1381   if (isa<SCEVCouldNotCompute>(MaxExitCount))
1382     return false;
1383 
1384   // Visit our exit blocks in order of dominance. We know from the fact that
1385   // all exits must dominate the latch, so there is a total dominance order
1386   // between them.
1387   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1388                // std::sort sorts in ascending order, so we want the inverse of
1389                // the normal dominance relation.
1390                if (A == B) return false;
1391                if (DT->properlyDominates(A, B))
1392                  return true;
1393                else {
1394                  assert(DT->properlyDominates(B, A) &&
1395                         "expected total dominance order!");
1396                  return false;
1397                }
1398   });
1399 #ifdef ASSERT
1400   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1401     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1402   }
1403 #endif
1404 
1405   auto ReplaceExitCond = [&](BranchInst *BI, Value *NewCond) {
1406     auto *OldCond = BI->getCondition();
1407     BI->setCondition(NewCond);
1408     if (OldCond->use_empty())
1409       DeadInsts.emplace_back(OldCond);
1410   };
1411 
1412   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
1413     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1414     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1415     auto *OldCond = BI->getCondition();
1416     auto *NewCond = ConstantInt::get(OldCond->getType(),
1417                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
1418     ReplaceExitCond(BI, NewCond);
1419   };
1420 
1421   bool Changed = false;
1422   bool SkipLastIter = false;
1423   SmallSet<const SCEV*, 8> DominatingExitCounts;
1424   for (BasicBlock *ExitingBB : ExitingBlocks) {
1425     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1426     if (isa<SCEVCouldNotCompute>(ExitCount)) {
1427       // Okay, we do not know the exit count here. Can we at least prove that it
1428       // will remain the same within iteration space?
1429       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1430       auto OptimizeCond = [this, L, BI, ExitingBB, MaxExitCount, &FoldExit](
1431           bool Inverted, bool SkipLastIter) {
1432         const SCEV *MaxIter = MaxExitCount;
1433         if (SkipLastIter) {
1434           const SCEV *One = SE->getOne(MaxIter->getType());
1435           MaxIter = SE->getMinusSCEV(MaxIter, One);
1436         }
1437         switch (analyzeCond(L, BI, SE, Inverted, MaxIter)) {
1438         case CanBeRemoved:
1439           FoldExit(ExitingBB, Inverted);
1440           return true;
1441         case CannotOptimize:
1442           return false;
1443         }
1444         llvm_unreachable("Unknown case!");
1445       };
1446 
1447       // TODO: We might have proved that we can skip the last iteration for
1448       // this check. In this case, we only want to check the condition on the
1449       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1450       // corner case:
1451       //
1452       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1453       //
1454       // If we could not prove that len != 0, then we also could not prove that
1455       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1456       // OptimizeCond will likely not prove anything for it, even if it could
1457       // prove the same fact for len.
1458       //
1459       // As a temporary solution, we query both last and pre-last iterations in
1460       // hope that we will be able to prove triviality for at least one of
1461       // them. We can stop querying MaxExitCount for this case once SCEV
1462       // understands that (MaxExitCount - 1) will not overflow here.
1463       if (OptimizeCond(false, false) || OptimizeCond(true, false))
1464         Changed = true;
1465       else if (SkipLastIter)
1466         if (OptimizeCond(false, true) || OptimizeCond(true, true))
1467           Changed = true;
1468       continue;
1469     }
1470 
1471     if (MaxExitCount == ExitCount)
1472       // If the loop has more than 1 iteration, all further checks will be
1473       // executed 1 iteration less.
1474       SkipLastIter = true;
1475 
1476     // If we know we'd exit on the first iteration, rewrite the exit to
1477     // reflect this.  This does not imply the loop must exit through this
1478     // exit; there may be an earlier one taken on the first iteration.
1479     // TODO: Given we know the backedge can't be taken, we should go ahead
1480     // and break it.  Or at least, kill all the header phis and simplify.
1481     if (ExitCount->isZero()) {
1482       FoldExit(ExitingBB, true);
1483       Changed = true;
1484       continue;
1485     }
1486 
1487     // If we end up with a pointer exit count, bail.  Note that we can end up
1488     // with a pointer exit count for one exiting block, and not for another in
1489     // the same loop.
1490     if (!ExitCount->getType()->isIntegerTy() ||
1491         !MaxExitCount->getType()->isIntegerTy())
1492       continue;
1493 
1494     Type *WiderType =
1495       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1496     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1497     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1498     assert(MaxExitCount->getType() == ExitCount->getType());
1499 
1500     // Can we prove that some other exit must be taken strictly before this
1501     // one?
1502     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1503                                      MaxExitCount, ExitCount)) {
1504       FoldExit(ExitingBB, false);
1505       Changed = true;
1506       continue;
1507     }
1508 
1509     // As we run, keep track of which exit counts we've encountered.  If we
1510     // find a duplicate, we've found an exit which would have exited on the
1511     // exiting iteration, but (from the visit order) strictly follows another
1512     // which does the same and is thus dead.
1513     if (!DominatingExitCounts.insert(ExitCount).second) {
1514       FoldExit(ExitingBB, false);
1515       Changed = true;
1516       continue;
1517     }
1518 
1519     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1520     // here.  If we kept track of the min of dominanting exits so far, we could
1521     // discharge exits with EC >= MDEC. This is less powerful than the existing
1522     // transform (since later exits aren't considered), but potentially more
1523     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1524     // or a >u b.  Such a case is not currently known.
1525   }
1526   return Changed;
1527 }
1528 
1529 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1530   SmallVector<BasicBlock*, 16> ExitingBlocks;
1531   L->getExitingBlocks(ExitingBlocks);
1532 
1533   // Finally, see if we can rewrite our exit conditions into a loop invariant
1534   // form. If we have a read-only loop, and we can tell that we must exit down
1535   // a path which does not need any of the values computed within the loop, we
1536   // can rewrite the loop to exit on the first iteration.  Note that this
1537   // doesn't either a) tell us the loop exits on the first iteration (unless
1538   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1539   // This transformation looks a lot like a restricted form of dead loop
1540   // elimination, but restricted to read-only loops and without neccesssarily
1541   // needing to kill the loop entirely.
1542   if (!LoopPredication)
1543     return false;
1544 
1545   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1546     return false;
1547 
1548   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1549   // through *explicit* control flow.  We have to eliminate the possibility of
1550   // implicit exits (see below) before we know it's truly exact.
1551   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1552   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
1553       !SE->isLoopInvariant(ExactBTC, L) ||
1554       !isSafeToExpand(ExactBTC, *SE))
1555     return false;
1556 
1557   // If we end up with a pointer exit count, bail.  It may be unsized.
1558   if (!ExactBTC->getType()->isIntegerTy())
1559     return false;
1560 
1561   auto BadExit = [&](BasicBlock *ExitingBB) {
1562     // If our exiting block exits multiple loops, we can only rewrite the
1563     // innermost one.  Otherwise, we're changing how many times the innermost
1564     // loop runs before it exits.
1565     if (LI->getLoopFor(ExitingBB) != L)
1566       return true;
1567 
1568     // Can't rewrite non-branch yet.
1569     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1570     if (!BI)
1571       return true;
1572 
1573     // If already constant, nothing to do.
1574     if (isa<Constant>(BI->getCondition()))
1575       return true;
1576 
1577     // If the exit block has phis, we need to be able to compute the values
1578     // within the loop which contains them.  This assumes trivially lcssa phis
1579     // have already been removed; TODO: generalize
1580     BasicBlock *ExitBlock =
1581     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1582     if (!ExitBlock->phis().empty())
1583       return true;
1584 
1585     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1586     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
1587     if (!SE->isLoopInvariant(ExitCount, L) ||
1588         !isSafeToExpand(ExitCount, *SE))
1589       return true;
1590 
1591     // If we end up with a pointer exit count, bail.  It may be unsized.
1592     if (!ExitCount->getType()->isIntegerTy())
1593       return true;
1594 
1595     return false;
1596   };
1597 
1598   // If we have any exits which can't be predicated themselves, than we can't
1599   // predicate any exit which isn't guaranteed to execute before it.  Consider
1600   // two exits (a) and (b) which would both exit on the same iteration.  If we
1601   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1602   // we could convert a loop from exiting through (a) to one exiting through
1603   // (b).  Note that this problem exists only for exits with the same exit
1604   // count, and we could be more aggressive when exit counts are known inequal.
1605   llvm::sort(ExitingBlocks,
1606             [&](BasicBlock *A, BasicBlock *B) {
1607               // std::sort sorts in ascending order, so we want the inverse of
1608               // the normal dominance relation, plus a tie breaker for blocks
1609               // unordered by dominance.
1610               if (DT->properlyDominates(A, B)) return true;
1611               if (DT->properlyDominates(B, A)) return false;
1612               return A->getName() < B->getName();
1613             });
1614   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1615   // exits before the backedge).  If they aren't, reasoning about reachability
1616   // is complicated and we choose not to for now.
1617   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1618     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1619       return false;
1620 
1621   // Given our sorted total order, we know that exit[j] must be evaluated
1622   // after all exit[i] such j > i.
1623   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1624     if (BadExit(ExitingBlocks[i])) {
1625       ExitingBlocks.resize(i);
1626       break;
1627     }
1628 
1629   if (ExitingBlocks.empty())
1630     return false;
1631 
1632   // We rely on not being able to reach an exiting block on a later iteration
1633   // then it's statically compute exit count.  The implementaton of
1634   // getExitCount currently has this invariant, but assert it here so that
1635   // breakage is obvious if this ever changes..
1636   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1637         return DT->dominates(ExitingBB, L->getLoopLatch());
1638       }));
1639 
1640   // At this point, ExitingBlocks consists of only those blocks which are
1641   // predicatable.  Given that, we know we have at least one exit we can
1642   // predicate if the loop is doesn't have side effects and doesn't have any
1643   // implicit exits (because then our exact BTC isn't actually exact).
1644   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1645   // suggestions on how to improve this?  I can obviously bail out for outer
1646   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1647   // is that enough for *all* side effects?
1648   for (BasicBlock *BB : L->blocks())
1649     for (auto &I : *BB)
1650       // TODO:isGuaranteedToTransfer
1651       if (I.mayHaveSideEffects() || I.mayThrow())
1652         return false;
1653 
1654   bool Changed = false;
1655   // Finally, do the actual predication for all predicatable blocks.  A couple
1656   // of notes here:
1657   // 1) We don't bother to constant fold dominated exits with identical exit
1658   //    counts; that's simply a form of CSE/equality propagation and we leave
1659   //    it for dedicated passes.
1660   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1661   //    legality constraints and we leave that to dedicated logic.  We want to
1662   //    predicate even if we can't insert a loop invariant expression as
1663   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1664   //    varying check.
1665   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1666   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1667   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1668   for (BasicBlock *ExitingBB : ExitingBlocks) {
1669     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1670 
1671     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1672     Value *NewCond;
1673     if (ExitCount == ExactBTC) {
1674       NewCond = L->contains(BI->getSuccessor(0)) ?
1675         B.getFalse() : B.getTrue();
1676     } else {
1677       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1678       if (!ExactBTCV)
1679         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1680       Value *RHS = ExactBTCV;
1681       if (ECV->getType() != RHS->getType()) {
1682         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1683         ECV = B.CreateZExt(ECV, WiderTy);
1684         RHS = B.CreateZExt(RHS, WiderTy);
1685       }
1686       auto Pred = L->contains(BI->getSuccessor(0)) ?
1687         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1688       NewCond = B.CreateICmp(Pred, ECV, RHS);
1689     }
1690     Value *OldCond = BI->getCondition();
1691     BI->setCondition(NewCond);
1692     if (OldCond->use_empty())
1693       DeadInsts.emplace_back(OldCond);
1694     Changed = true;
1695   }
1696 
1697   return Changed;
1698 }
1699 
1700 //===----------------------------------------------------------------------===//
1701 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1702 //===----------------------------------------------------------------------===//
1703 
1704 bool IndVarSimplify::run(Loop *L) {
1705   // We need (and expect!) the incoming loop to be in LCSSA.
1706   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1707          "LCSSA required to run indvars!");
1708 
1709   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1710   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1711   //    canonicalization can be a pessimization without LSR to "clean up"
1712   //    afterwards.
1713   //  - We depend on having a preheader; in particular,
1714   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1715   //    and we're in trouble if we can't find the induction variable even when
1716   //    we've manually inserted one.
1717   //  - LFTR relies on having a single backedge.
1718   if (!L->isLoopSimplifyForm())
1719     return false;
1720 
1721 #ifndef NDEBUG
1722   // Used below for a consistency check only
1723   // Note: Since the result returned by ScalarEvolution may depend on the order
1724   // in which previous results are added to its cache, the call to
1725   // getBackedgeTakenCount() may change following SCEV queries.
1726   const SCEV *BackedgeTakenCount;
1727   if (VerifyIndvars)
1728     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1729 #endif
1730 
1731   bool Changed = false;
1732   // If there are any floating-point recurrences, attempt to
1733   // transform them to use integer recurrences.
1734   Changed |= rewriteNonIntegerIVs(L);
1735 
1736   // Create a rewriter object which we'll use to transform the code with.
1737   SCEVExpander Rewriter(*SE, DL, "indvars");
1738 #ifndef NDEBUG
1739   Rewriter.setDebugType(DEBUG_TYPE);
1740 #endif
1741 
1742   // Eliminate redundant IV users.
1743   //
1744   // Simplification works best when run before other consumers of SCEV. We
1745   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1746   // other expressions involving loop IVs have been evaluated. This helps SCEV
1747   // set no-wrap flags before normalizing sign/zero extension.
1748   Rewriter.disableCanonicalMode();
1749   Changed |= simplifyAndExtend(L, Rewriter, LI);
1750 
1751   // Check to see if we can compute the final value of any expressions
1752   // that are recurrent in the loop, and substitute the exit values from the
1753   // loop into any instructions outside of the loop that use the final values
1754   // of the current expressions.
1755   if (ReplaceExitValue != NeverRepl) {
1756     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1757                                              ReplaceExitValue, DeadInsts)) {
1758       NumReplaced += Rewrites;
1759       Changed = true;
1760     }
1761   }
1762 
1763   // Eliminate redundant IV cycles.
1764   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1765 
1766   // Try to eliminate loop exits based on analyzeable exit counts
1767   if (optimizeLoopExits(L, Rewriter))  {
1768     Changed = true;
1769     // Given we've changed exit counts, notify SCEV
1770     SE->forgetLoop(L);
1771   }
1772 
1773   // Try to form loop invariant tests for loop exits by changing how many
1774   // iterations of the loop run when that is unobservable.
1775   if (predicateLoopExits(L, Rewriter)) {
1776     Changed = true;
1777     // Given we've changed exit counts, notify SCEV
1778     SE->forgetLoop(L);
1779   }
1780 
1781   // If we have a trip count expression, rewrite the loop's exit condition
1782   // using it.
1783   if (!DisableLFTR) {
1784     BasicBlock *PreHeader = L->getLoopPreheader();
1785     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1786 
1787     SmallVector<BasicBlock*, 16> ExitingBlocks;
1788     L->getExitingBlocks(ExitingBlocks);
1789     for (BasicBlock *ExitingBB : ExitingBlocks) {
1790       // Can't rewrite non-branch yet.
1791       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1792         continue;
1793 
1794       // If our exitting block exits multiple loops, we can only rewrite the
1795       // innermost one.  Otherwise, we're changing how many times the innermost
1796       // loop runs before it exits.
1797       if (LI->getLoopFor(ExitingBB) != L)
1798         continue;
1799 
1800       if (!needsLFTR(L, ExitingBB))
1801         continue;
1802 
1803       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1804       if (isa<SCEVCouldNotCompute>(ExitCount))
1805         continue;
1806 
1807       // This was handled above, but as we form SCEVs, we can sometimes refine
1808       // existing ones; this allows exit counts to be folded to zero which
1809       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1810       // until stable to handle cases like this better.
1811       if (ExitCount->isZero())
1812         continue;
1813 
1814       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1815       if (!IndVar)
1816         continue;
1817 
1818       // Avoid high cost expansions.  Note: This heuristic is questionable in
1819       // that our definition of "high cost" is not exactly principled.
1820       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1821                                        TTI, PreHeaderBR))
1822         continue;
1823 
1824       // Check preconditions for proper SCEVExpander operation. SCEV does not
1825       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1826       // any pass that uses the SCEVExpander must do it. This does not work
1827       // well for loop passes because SCEVExpander makes assumptions about
1828       // all loops, while LoopPassManager only forces the current loop to be
1829       // simplified.
1830       //
1831       // FIXME: SCEV expansion has no way to bail out, so the caller must
1832       // explicitly check any assumptions made by SCEV. Brittle.
1833       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1834       if (!AR || AR->getLoop()->getLoopPreheader())
1835         Changed |= linearFunctionTestReplace(L, ExitingBB,
1836                                              ExitCount, IndVar,
1837                                              Rewriter);
1838     }
1839   }
1840   // Clear the rewriter cache, because values that are in the rewriter's cache
1841   // can be deleted in the loop below, causing the AssertingVH in the cache to
1842   // trigger.
1843   Rewriter.clear();
1844 
1845   // Now that we're done iterating through lists, clean up any instructions
1846   // which are now dead.
1847   while (!DeadInsts.empty())
1848     if (Instruction *Inst =
1849             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
1850       Changed |=
1851           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1852 
1853   // The Rewriter may not be used from this point on.
1854 
1855   // Loop-invariant instructions in the preheader that aren't used in the
1856   // loop may be sunk below the loop to reduce register pressure.
1857   Changed |= sinkUnusedInvariants(L);
1858 
1859   // rewriteFirstIterationLoopExitValues does not rely on the computation of
1860   // trip count and therefore can further simplify exit values in addition to
1861   // rewriteLoopExitValues.
1862   Changed |= rewriteFirstIterationLoopExitValues(L);
1863 
1864   // Clean up dead instructions.
1865   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
1866 
1867   // Check a post-condition.
1868   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1869          "Indvars did not preserve LCSSA!");
1870 
1871   // Verify that LFTR, and any other change have not interfered with SCEV's
1872   // ability to compute trip count.  We may have *changed* the exit count, but
1873   // only by reducing it.
1874 #ifndef NDEBUG
1875   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1876     SE->forgetLoop(L);
1877     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
1878     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
1879         SE->getTypeSizeInBits(NewBECount->getType()))
1880       NewBECount = SE->getTruncateOrNoop(NewBECount,
1881                                          BackedgeTakenCount->getType());
1882     else
1883       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
1884                                                  NewBECount->getType());
1885     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
1886                                  NewBECount) && "indvars must preserve SCEV");
1887   }
1888   if (VerifyMemorySSA && MSSAU)
1889     MSSAU->getMemorySSA()->verifyMemorySSA();
1890 #endif
1891 
1892   return Changed;
1893 }
1894 
1895 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
1896                                           LoopStandardAnalysisResults &AR,
1897                                           LPMUpdater &) {
1898   Function *F = L.getHeader()->getParent();
1899   const DataLayout &DL = F->getParent()->getDataLayout();
1900 
1901   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
1902   if (!IVS.run(&L))
1903     return PreservedAnalyses::all();
1904 
1905   auto PA = getLoopPassPreservedAnalyses();
1906   PA.preserveSet<CFGAnalyses>();
1907   if (AR.MSSA)
1908     PA.preserve<MemorySSAAnalysis>();
1909   return PA;
1910 }
1911 
1912 namespace {
1913 
1914 struct IndVarSimplifyLegacyPass : public LoopPass {
1915   static char ID; // Pass identification, replacement for typeid
1916 
1917   IndVarSimplifyLegacyPass() : LoopPass(ID) {
1918     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
1919   }
1920 
1921   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1922     if (skipLoop(L))
1923       return false;
1924 
1925     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1926     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1927     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1928     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
1929     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
1930     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
1931     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
1932     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1933     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
1934     MemorySSA *MSSA = nullptr;
1935     if (MSSAAnalysis)
1936       MSSA = &MSSAAnalysis->getMSSA();
1937 
1938     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
1939     return IVS.run(L);
1940   }
1941 
1942   void getAnalysisUsage(AnalysisUsage &AU) const override {
1943     AU.setPreservesCFG();
1944     AU.addPreserved<MemorySSAWrapperPass>();
1945     getLoopAnalysisUsage(AU);
1946   }
1947 };
1948 
1949 } // end anonymous namespace
1950 
1951 char IndVarSimplifyLegacyPass::ID = 0;
1952 
1953 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
1954                       "Induction Variable Simplification", false, false)
1955 INITIALIZE_PASS_DEPENDENCY(LoopPass)
1956 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
1957                     "Induction Variable Simplification", false, false)
1958 
1959 Pass *llvm::createIndVarSimplifyPass() {
1960   return new IndVarSimplifyLegacyPass();
1961 }
1962