xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 7b014a0732249ebc08b0364f65a9dc7b7d9c762b)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/InitializePasses.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/MathExtras.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/Transforms/Scalar.h"
76 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
77 #include "llvm/Transforms/Utils/Local.h"
78 #include "llvm/Transforms/Utils/LoopUtils.h"
79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
80 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
81 #include <cassert>
82 #include <cstdint>
83 #include <utility>
84 
85 using namespace llvm;
86 using namespace PatternMatch;
87 
88 #define DEBUG_TYPE "indvars"
89 
90 STATISTIC(NumWidened     , "Number of indvars widened");
91 STATISTIC(NumReplaced    , "Number of exit values replaced");
92 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
93 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
94 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
95 
96 static cl::opt<ReplaceExitVal> ReplaceExitValue(
97     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
98     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
99     cl::values(
100         clEnumValN(NeverRepl, "never", "never replace exit value"),
101         clEnumValN(OnlyCheapRepl, "cheap",
102                    "only replace exit value when the cost is cheap"),
103         clEnumValN(
104             UnusedIndVarInLoop, "unusedindvarinloop",
105             "only replace exit value when it is an unused "
106             "induction variable in the loop and has cheap replacement cost"),
107         clEnumValN(NoHardUse, "noharduse",
108                    "only replace exit values when loop def likely dead"),
109         clEnumValN(AlwaysRepl, "always",
110                    "always replace exit value whenever possible")));
111 
112 static cl::opt<bool> UsePostIncrementRanges(
113   "indvars-post-increment-ranges", cl::Hidden,
114   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
115   cl::init(true));
116 
117 static cl::opt<bool>
118 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
119             cl::desc("Disable Linear Function Test Replace optimization"));
120 
121 static cl::opt<bool>
122 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
123                 cl::desc("Predicate conditions in read only loops"));
124 
125 static cl::opt<bool>
126 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
127                 cl::desc("Allow widening of indvars to eliminate s/zext"));
128 
129 namespace {
130 
131 class IndVarSimplify {
132   LoopInfo *LI;
133   ScalarEvolution *SE;
134   DominatorTree *DT;
135   const DataLayout &DL;
136   TargetLibraryInfo *TLI;
137   const TargetTransformInfo *TTI;
138   std::unique_ptr<MemorySSAUpdater> MSSAU;
139 
140   SmallVector<WeakTrackingVH, 16> DeadInsts;
141   bool WidenIndVars;
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147   /// Try to improve our exit conditions by converting condition from signed
148   /// to unsigned or rotating computation out of the loop.
149   /// (See inline comment about why this is duplicated from simplifyAndExtend)
150   bool canonicalizeExitCondition(Loop *L);
151   /// Try to eliminate loop exits based on analyzeable exit counts
152   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153   /// Try to form loop invariant tests for loop exits by changing how many
154   /// iterations of the loop run when that is unobservable.
155   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156 
157   bool rewriteFirstIterationLoopExitValues(Loop *L);
158 
159   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160                                  const SCEV *ExitCount,
161                                  PHINode *IndVar, SCEVExpander &Rewriter);
162 
163   bool sinkUnusedInvariants(Loop *L);
164 
165 public:
166   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167                  const DataLayout &DL, TargetLibraryInfo *TLI,
168                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170         WidenIndVars(WidenIndVars) {
171     if (MSSA)
172       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173   }
174 
175   bool run(Loop *L);
176 };
177 
178 } // end anonymous namespace
179 
180 //===----------------------------------------------------------------------===//
181 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
182 //===----------------------------------------------------------------------===//
183 
184 /// Convert APF to an integer, if possible.
185 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
186   bool isExact = false;
187   // See if we can convert this to an int64_t
188   uint64_t UIntVal;
189   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
190                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
191       !isExact)
192     return false;
193   IntVal = UIntVal;
194   return true;
195 }
196 
197 /// If the loop has floating induction variable then insert corresponding
198 /// integer induction variable if possible.
199 /// For example,
200 /// for(double i = 0; i < 10000; ++i)
201 ///   bar(i)
202 /// is converted into
203 /// for(int i = 0; i < 10000; ++i)
204 ///   bar((double)i);
205 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
206   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
207   unsigned BackEdge     = IncomingEdge^1;
208 
209   // Check incoming value.
210   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
211 
212   int64_t InitValue;
213   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
214     return false;
215 
216   // Check IV increment. Reject this PN if increment operation is not
217   // an add or increment value can not be represented by an integer.
218   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
219   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
220 
221   // If this is not an add of the PHI with a constantfp, or if the constant fp
222   // is not an integer, bail out.
223   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
224   int64_t IncValue;
225   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
226       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
227     return false;
228 
229   // Check Incr uses. One user is PN and the other user is an exit condition
230   // used by the conditional terminator.
231   Value::user_iterator IncrUse = Incr->user_begin();
232   Instruction *U1 = cast<Instruction>(*IncrUse++);
233   if (IncrUse == Incr->user_end()) return false;
234   Instruction *U2 = cast<Instruction>(*IncrUse++);
235   if (IncrUse != Incr->user_end()) return false;
236 
237   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
238   // only used by a branch, we can't transform it.
239   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
240   if (!Compare)
241     Compare = dyn_cast<FCmpInst>(U2);
242   if (!Compare || !Compare->hasOneUse() ||
243       !isa<BranchInst>(Compare->user_back()))
244     return false;
245 
246   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
247 
248   // We need to verify that the branch actually controls the iteration count
249   // of the loop.  If not, the new IV can overflow and no one will notice.
250   // The branch block must be in the loop and one of the successors must be out
251   // of the loop.
252   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
253   if (!L->contains(TheBr->getParent()) ||
254       (L->contains(TheBr->getSuccessor(0)) &&
255        L->contains(TheBr->getSuccessor(1))))
256     return false;
257 
258   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
259   // transform it.
260   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
261   int64_t ExitValue;
262   if (ExitValueVal == nullptr ||
263       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
264     return false;
265 
266   // Find new predicate for integer comparison.
267   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
268   switch (Compare->getPredicate()) {
269   default: return false;  // Unknown comparison.
270   case CmpInst::FCMP_OEQ:
271   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
272   case CmpInst::FCMP_ONE:
273   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
274   case CmpInst::FCMP_OGT:
275   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
276   case CmpInst::FCMP_OGE:
277   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
278   case CmpInst::FCMP_OLT:
279   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
280   case CmpInst::FCMP_OLE:
281   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
282   }
283 
284   // We convert the floating point induction variable to a signed i32 value if
285   // we can.  This is only safe if the comparison will not overflow in a way
286   // that won't be trapped by the integer equivalent operations.  Check for this
287   // now.
288   // TODO: We could use i64 if it is native and the range requires it.
289 
290   // The start/stride/exit values must all fit in signed i32.
291   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
292     return false;
293 
294   // If not actually striding (add x, 0.0), avoid touching the code.
295   if (IncValue == 0)
296     return false;
297 
298   // Positive and negative strides have different safety conditions.
299   if (IncValue > 0) {
300     // If we have a positive stride, we require the init to be less than the
301     // exit value.
302     if (InitValue >= ExitValue)
303       return false;
304 
305     uint32_t Range = uint32_t(ExitValue-InitValue);
306     // Check for infinite loop, either:
307     // while (i <= Exit) or until (i > Exit)
308     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
309       if (++Range == 0) return false;  // Range overflows.
310     }
311 
312     unsigned Leftover = Range % uint32_t(IncValue);
313 
314     // If this is an equality comparison, we require that the strided value
315     // exactly land on the exit value, otherwise the IV condition will wrap
316     // around and do things the fp IV wouldn't.
317     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
318         Leftover != 0)
319       return false;
320 
321     // If the stride would wrap around the i32 before exiting, we can't
322     // transform the IV.
323     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
324       return false;
325   } else {
326     // If we have a negative stride, we require the init to be greater than the
327     // exit value.
328     if (InitValue <= ExitValue)
329       return false;
330 
331     uint32_t Range = uint32_t(InitValue-ExitValue);
332     // Check for infinite loop, either:
333     // while (i >= Exit) or until (i < Exit)
334     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
335       if (++Range == 0) return false;  // Range overflows.
336     }
337 
338     unsigned Leftover = Range % uint32_t(-IncValue);
339 
340     // If this is an equality comparison, we require that the strided value
341     // exactly land on the exit value, otherwise the IV condition will wrap
342     // around and do things the fp IV wouldn't.
343     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
344         Leftover != 0)
345       return false;
346 
347     // If the stride would wrap around the i32 before exiting, we can't
348     // transform the IV.
349     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
350       return false;
351   }
352 
353   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
354 
355   // Insert new integer induction variable.
356   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
357   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
358                       PN->getIncomingBlock(IncomingEdge));
359 
360   Value *NewAdd =
361     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
362                               Incr->getName()+".int", Incr);
363   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
364 
365   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
366                                       ConstantInt::get(Int32Ty, ExitValue),
367                                       Compare->getName());
368 
369   // In the following deletions, PN may become dead and may be deleted.
370   // Use a WeakTrackingVH to observe whether this happens.
371   WeakTrackingVH WeakPH = PN;
372 
373   // Delete the old floating point exit comparison.  The branch starts using the
374   // new comparison.
375   NewCompare->takeName(Compare);
376   Compare->replaceAllUsesWith(NewCompare);
377   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
378 
379   // Delete the old floating point increment.
380   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
381   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
382 
383   // If the FP induction variable still has uses, this is because something else
384   // in the loop uses its value.  In order to canonicalize the induction
385   // variable, we chose to eliminate the IV and rewrite it in terms of an
386   // int->fp cast.
387   //
388   // We give preference to sitofp over uitofp because it is faster on most
389   // platforms.
390   if (WeakPH) {
391     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
392                                  &*PN->getParent()->getFirstInsertionPt());
393     PN->replaceAllUsesWith(Conv);
394     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
395   }
396   return true;
397 }
398 
399 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
400   // First step.  Check to see if there are any floating-point recurrences.
401   // If there are, change them into integer recurrences, permitting analysis by
402   // the SCEV routines.
403   BasicBlock *Header = L->getHeader();
404 
405   SmallVector<WeakTrackingVH, 8> PHIs;
406   for (PHINode &PN : Header->phis())
407     PHIs.push_back(&PN);
408 
409   bool Changed = false;
410   for (WeakTrackingVH &PHI : PHIs)
411     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
412       Changed |= handleFloatingPointIV(L, PN);
413 
414   // If the loop previously had floating-point IV, ScalarEvolution
415   // may not have been able to compute a trip count. Now that we've done some
416   // re-writing, the trip count may be computable.
417   if (Changed)
418     SE->forgetLoop(L);
419   return Changed;
420 }
421 
422 //===---------------------------------------------------------------------===//
423 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
424 // they will exit at the first iteration.
425 //===---------------------------------------------------------------------===//
426 
427 /// Check to see if this loop has loop invariant conditions which lead to loop
428 /// exits. If so, we know that if the exit path is taken, it is at the first
429 /// loop iteration. This lets us predict exit values of PHI nodes that live in
430 /// loop header.
431 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
432   // Verify the input to the pass is already in LCSSA form.
433   assert(L->isLCSSAForm(*DT));
434 
435   SmallVector<BasicBlock *, 8> ExitBlocks;
436   L->getUniqueExitBlocks(ExitBlocks);
437 
438   bool MadeAnyChanges = false;
439   for (auto *ExitBB : ExitBlocks) {
440     // If there are no more PHI nodes in this exit block, then no more
441     // values defined inside the loop are used on this path.
442     for (PHINode &PN : ExitBB->phis()) {
443       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
444            IncomingValIdx != E; ++IncomingValIdx) {
445         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
446 
447         // Can we prove that the exit must run on the first iteration if it
448         // runs at all?  (i.e. early exits are fine for our purposes, but
449         // traces which lead to this exit being taken on the 2nd iteration
450         // aren't.)  Note that this is about whether the exit branch is
451         // executed, not about whether it is taken.
452         if (!L->getLoopLatch() ||
453             !DT->dominates(IncomingBB, L->getLoopLatch()))
454           continue;
455 
456         // Get condition that leads to the exit path.
457         auto *TermInst = IncomingBB->getTerminator();
458 
459         Value *Cond = nullptr;
460         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
461           // Must be a conditional branch, otherwise the block
462           // should not be in the loop.
463           Cond = BI->getCondition();
464         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
465           Cond = SI->getCondition();
466         else
467           continue;
468 
469         if (!L->isLoopInvariant(Cond))
470           continue;
471 
472         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
473 
474         // Only deal with PHIs in the loop header.
475         if (!ExitVal || ExitVal->getParent() != L->getHeader())
476           continue;
477 
478         // If ExitVal is a PHI on the loop header, then we know its
479         // value along this exit because the exit can only be taken
480         // on the first iteration.
481         auto *LoopPreheader = L->getLoopPreheader();
482         assert(LoopPreheader && "Invalid loop");
483         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
484         if (PreheaderIdx != -1) {
485           assert(ExitVal->getParent() == L->getHeader() &&
486                  "ExitVal must be in loop header");
487           MadeAnyChanges = true;
488           PN.setIncomingValue(IncomingValIdx,
489                               ExitVal->getIncomingValue(PreheaderIdx));
490           SE->forgetValue(&PN);
491         }
492       }
493     }
494   }
495   return MadeAnyChanges;
496 }
497 
498 //===----------------------------------------------------------------------===//
499 //  IV Widening - Extend the width of an IV to cover its widest uses.
500 //===----------------------------------------------------------------------===//
501 
502 /// Update information about the induction variable that is extended by this
503 /// sign or zero extend operation. This is used to determine the final width of
504 /// the IV before actually widening it.
505 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
506                         ScalarEvolution *SE,
507                         const TargetTransformInfo *TTI) {
508   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
509   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
510     return;
511 
512   Type *Ty = Cast->getType();
513   uint64_t Width = SE->getTypeSizeInBits(Ty);
514   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
515     return;
516 
517   // Check that `Cast` actually extends the induction variable (we rely on this
518   // later).  This takes care of cases where `Cast` is extending a truncation of
519   // the narrow induction variable, and thus can end up being narrower than the
520   // "narrow" induction variable.
521   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
522   if (NarrowIVWidth >= Width)
523     return;
524 
525   // Cast is either an sext or zext up to this point.
526   // We should not widen an indvar if arithmetics on the wider indvar are more
527   // expensive than those on the narrower indvar. We check only the cost of ADD
528   // because at least an ADD is required to increment the induction variable. We
529   // could compute more comprehensively the cost of all instructions on the
530   // induction variable when necessary.
531   if (TTI &&
532       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
533           TTI->getArithmeticInstrCost(Instruction::Add,
534                                       Cast->getOperand(0)->getType())) {
535     return;
536   }
537 
538   if (!WI.WidestNativeType ||
539       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
540     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
541     WI.IsSigned = IsSigned;
542     return;
543   }
544 
545   // We extend the IV to satisfy the sign of its user(s), or 'signed'
546   // if there are multiple users with both sign- and zero extensions,
547   // in order not to introduce nondeterministic behaviour based on the
548   // unspecified order of a PHI nodes' users-iterator.
549   WI.IsSigned |= IsSigned;
550 }
551 
552 //===----------------------------------------------------------------------===//
553 //  Live IV Reduction - Minimize IVs live across the loop.
554 //===----------------------------------------------------------------------===//
555 
556 //===----------------------------------------------------------------------===//
557 //  Simplification of IV users based on SCEV evaluation.
558 //===----------------------------------------------------------------------===//
559 
560 namespace {
561 
562 class IndVarSimplifyVisitor : public IVVisitor {
563   ScalarEvolution *SE;
564   const TargetTransformInfo *TTI;
565   PHINode *IVPhi;
566 
567 public:
568   WideIVInfo WI;
569 
570   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
571                         const TargetTransformInfo *TTI,
572                         const DominatorTree *DTree)
573     : SE(SCEV), TTI(TTI), IVPhi(IV) {
574     DT = DTree;
575     WI.NarrowIV = IVPhi;
576   }
577 
578   // Implement the interface used by simplifyUsersOfIV.
579   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
580 };
581 
582 } // end anonymous namespace
583 
584 /// Iteratively perform simplification on a worklist of IV users. Each
585 /// successive simplification may push more users which may themselves be
586 /// candidates for simplification.
587 ///
588 /// Sign/Zero extend elimination is interleaved with IV simplification.
589 bool IndVarSimplify::simplifyAndExtend(Loop *L,
590                                        SCEVExpander &Rewriter,
591                                        LoopInfo *LI) {
592   SmallVector<WideIVInfo, 8> WideIVs;
593 
594   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
595           Intrinsic::getName(Intrinsic::experimental_guard));
596   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
597 
598   SmallVector<PHINode *, 8> LoopPhis;
599   for (PHINode &PN : L->getHeader()->phis())
600     LoopPhis.push_back(&PN);
601 
602   // Each round of simplification iterates through the SimplifyIVUsers worklist
603   // for all current phis, then determines whether any IVs can be
604   // widened. Widening adds new phis to LoopPhis, inducing another round of
605   // simplification on the wide IVs.
606   bool Changed = false;
607   while (!LoopPhis.empty()) {
608     // Evaluate as many IV expressions as possible before widening any IVs. This
609     // forces SCEV to set no-wrap flags before evaluating sign/zero
610     // extension. The first time SCEV attempts to normalize sign/zero extension,
611     // the result becomes final. So for the most predictable results, we delay
612     // evaluation of sign/zero extend evaluation until needed, and avoid running
613     // other SCEV based analysis prior to simplifyAndExtend.
614     do {
615       PHINode *CurrIV = LoopPhis.pop_back_val();
616 
617       // Information about sign/zero extensions of CurrIV.
618       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
619 
620       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
621                                    &Visitor);
622 
623       if (Visitor.WI.WidestNativeType) {
624         WideIVs.push_back(Visitor.WI);
625       }
626     } while(!LoopPhis.empty());
627 
628     // Continue if we disallowed widening.
629     if (!WidenIndVars)
630       continue;
631 
632     for (; !WideIVs.empty(); WideIVs.pop_back()) {
633       unsigned ElimExt;
634       unsigned Widened;
635       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
636                                           DT, DeadInsts, ElimExt, Widened,
637                                           HasGuards, UsePostIncrementRanges)) {
638         NumElimExt += ElimExt;
639         NumWidened += Widened;
640         Changed = true;
641         LoopPhis.push_back(WidePhi);
642       }
643     }
644   }
645   return Changed;
646 }
647 
648 //===----------------------------------------------------------------------===//
649 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
650 //===----------------------------------------------------------------------===//
651 
652 /// Given an Value which is hoped to be part of an add recurance in the given
653 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
654 /// that this is less general than SCEVs AddRec checking.
655 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
656   Instruction *IncI = dyn_cast<Instruction>(IncV);
657   if (!IncI)
658     return nullptr;
659 
660   switch (IncI->getOpcode()) {
661   case Instruction::Add:
662   case Instruction::Sub:
663     break;
664   case Instruction::GetElementPtr:
665     // An IV counter must preserve its type.
666     if (IncI->getNumOperands() == 2)
667       break;
668     [[fallthrough]];
669   default:
670     return nullptr;
671   }
672 
673   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
674   if (Phi && Phi->getParent() == L->getHeader()) {
675     if (L->isLoopInvariant(IncI->getOperand(1)))
676       return Phi;
677     return nullptr;
678   }
679   if (IncI->getOpcode() == Instruction::GetElementPtr)
680     return nullptr;
681 
682   // Allow add/sub to be commuted.
683   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
684   if (Phi && Phi->getParent() == L->getHeader()) {
685     if (L->isLoopInvariant(IncI->getOperand(0)))
686       return Phi;
687   }
688   return nullptr;
689 }
690 
691 /// Whether the current loop exit test is based on this value.  Currently this
692 /// is limited to a direct use in the loop condition.
693 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
694   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
695   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
696   // TODO: Allow non-icmp loop test.
697   if (!ICmp)
698     return false;
699 
700   // TODO: Allow indirect use.
701   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
702 }
703 
704 /// linearFunctionTestReplace policy. Return true unless we can show that the
705 /// current exit test is already sufficiently canonical.
706 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
707   assert(L->getLoopLatch() && "Must be in simplified form");
708 
709   // Avoid converting a constant or loop invariant test back to a runtime
710   // test.  This is critical for when SCEV's cached ExitCount is less precise
711   // than the current IR (such as after we've proven a particular exit is
712   // actually dead and thus the BE count never reaches our ExitCount.)
713   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
714   if (L->isLoopInvariant(BI->getCondition()))
715     return false;
716 
717   // Do LFTR to simplify the exit condition to an ICMP.
718   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
719   if (!Cond)
720     return true;
721 
722   // Do LFTR to simplify the exit ICMP to EQ/NE
723   ICmpInst::Predicate Pred = Cond->getPredicate();
724   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
725     return true;
726 
727   // Look for a loop invariant RHS
728   Value *LHS = Cond->getOperand(0);
729   Value *RHS = Cond->getOperand(1);
730   if (!L->isLoopInvariant(RHS)) {
731     if (!L->isLoopInvariant(LHS))
732       return true;
733     std::swap(LHS, RHS);
734   }
735   // Look for a simple IV counter LHS
736   PHINode *Phi = dyn_cast<PHINode>(LHS);
737   if (!Phi)
738     Phi = getLoopPhiForCounter(LHS, L);
739 
740   if (!Phi)
741     return true;
742 
743   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
744   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
745   if (Idx < 0)
746     return true;
747 
748   // Do LFTR if the exit condition's IV is *not* a simple counter.
749   Value *IncV = Phi->getIncomingValue(Idx);
750   return Phi != getLoopPhiForCounter(IncV, L);
751 }
752 
753 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
754 /// down to checking that all operands are constant and listing instructions
755 /// that may hide undef.
756 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
757                                unsigned Depth) {
758   if (isa<Constant>(V))
759     return !isa<UndefValue>(V);
760 
761   if (Depth >= 6)
762     return false;
763 
764   // Conservatively handle non-constant non-instructions. For example, Arguments
765   // may be undef.
766   Instruction *I = dyn_cast<Instruction>(V);
767   if (!I)
768     return false;
769 
770   // Load and return values may be undef.
771   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
772     return false;
773 
774   // Optimistically handle other instructions.
775   for (Value *Op : I->operands()) {
776     if (!Visited.insert(Op).second)
777       continue;
778     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
779       return false;
780   }
781   return true;
782 }
783 
784 /// Return true if the given value is concrete. We must prove that undef can
785 /// never reach it.
786 ///
787 /// TODO: If we decide that this is a good approach to checking for undef, we
788 /// may factor it into a common location.
789 static bool hasConcreteDef(Value *V) {
790   SmallPtrSet<Value*, 8> Visited;
791   Visited.insert(V);
792   return hasConcreteDefImpl(V, Visited, 0);
793 }
794 
795 /// Return true if the given phi is a "counter" in L.  A counter is an
796 /// add recurance (of integer or pointer type) with an arbitrary start, and a
797 /// step of 1.  Note that L must have exactly one latch.
798 static bool isLoopCounter(PHINode* Phi, Loop *L,
799                           ScalarEvolution *SE) {
800   assert(Phi->getParent() == L->getHeader());
801   assert(L->getLoopLatch());
802 
803   if (!SE->isSCEVable(Phi->getType()))
804     return false;
805 
806   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
807   if (!AR || AR->getLoop() != L || !AR->isAffine())
808     return false;
809 
810   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
811   if (!Step || !Step->isOne())
812     return false;
813 
814   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
815   Value *IncV = Phi->getIncomingValue(LatchIdx);
816   return (getLoopPhiForCounter(IncV, L) == Phi &&
817           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
818 }
819 
820 /// Search the loop header for a loop counter (anadd rec w/step of one)
821 /// suitable for use by LFTR.  If multiple counters are available, select the
822 /// "best" one based profitable heuristics.
823 ///
824 /// BECount may be an i8* pointer type. The pointer difference is already
825 /// valid count without scaling the address stride, so it remains a pointer
826 /// expression as far as SCEV is concerned.
827 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
828                                 const SCEV *BECount,
829                                 ScalarEvolution *SE, DominatorTree *DT) {
830   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
831 
832   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
833 
834   // Loop over all of the PHI nodes, looking for a simple counter.
835   PHINode *BestPhi = nullptr;
836   const SCEV *BestInit = nullptr;
837   BasicBlock *LatchBlock = L->getLoopLatch();
838   assert(LatchBlock && "Must be in simplified form");
839   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
840 
841   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
842     PHINode *Phi = cast<PHINode>(I);
843     if (!isLoopCounter(Phi, L, SE))
844       continue;
845 
846     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
847 
848     // AR may be a pointer type, while BECount is an integer type.
849     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
850     // AR may not be a narrower type, or we may never exit.
851     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
852     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
853       continue;
854 
855     // Avoid reusing a potentially undef value to compute other values that may
856     // have originally had a concrete definition.
857     if (!hasConcreteDef(Phi)) {
858       // We explicitly allow unknown phis as long as they are already used by
859       // the loop exit test.  This is legal since performing LFTR could not
860       // increase the number of undef users.
861       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
862       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
863           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
864         continue;
865     }
866 
867     // Avoid introducing undefined behavior due to poison which didn't exist in
868     // the original program.  (Annoyingly, the rules for poison and undef
869     // propagation are distinct, so this does NOT cover the undef case above.)
870     // We have to ensure that we don't introduce UB by introducing a use on an
871     // iteration where said IV produces poison.  Our strategy here differs for
872     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
873     // see code in linearFunctionTestReplace.  For pointers, we restrict
874     // transforms as there is no good way to reinfer inbounds once lost.
875     if (!Phi->getType()->isIntegerTy() &&
876         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
877       continue;
878 
879     const SCEV *Init = AR->getStart();
880 
881     if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
882       // Don't force a live loop counter if another IV can be used.
883       if (isAlmostDeadIV(Phi, LatchBlock, Cond))
884         continue;
885 
886       // Prefer to count-from-zero. This is a more "canonical" counter form. It
887       // also prefers integer to pointer IVs.
888       if (BestInit->isZero() != Init->isZero()) {
889         if (BestInit->isZero())
890           continue;
891       }
892       // If two IVs both count from zero or both count from nonzero then the
893       // narrower is likely a dead phi that has been widened. Use the wider phi
894       // to allow the other to be eliminated.
895       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
896         continue;
897     }
898     BestPhi = Phi;
899     BestInit = Init;
900   }
901   return BestPhi;
902 }
903 
904 /// Insert an IR expression which computes the value held by the IV IndVar
905 /// (which must be an loop counter w/unit stride) after the backedge of loop L
906 /// is taken ExitCount times.
907 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
908                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
909                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
910   assert(isLoopCounter(IndVar, L, SE));
911   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
912   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
913   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
914 
915   // For integer IVs, truncate the IV before computing the limit unless we
916   // know apriori that the limit must be a constant when evaluated in the
917   // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
918   // IV in the loop over a (potentially) expensive expansion of the widened
919   // exit count add(zext(add)) expression.
920   if (IndVar->getType()->isIntegerTy() &&
921       SE->getTypeSizeInBits(AR->getType()) >
922       SE->getTypeSizeInBits(ExitCount->getType())) {
923     const SCEV *IVInit = AR->getStart();
924     if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
925       AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
926   }
927 
928   const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
929   const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
930   assert(SE->isLoopInvariant(IVLimit, L) &&
931          "Computed iteration count is not loop invariant!");
932   return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
933                                 ExitingBB->getTerminator());
934 }
935 
936 /// This method rewrites the exit condition of the loop to be a canonical !=
937 /// comparison against the incremented loop induction variable.  This pass is
938 /// able to rewrite the exit tests of any loop where the SCEV analysis can
939 /// determine a loop-invariant trip count of the loop, which is actually a much
940 /// broader range than just linear tests.
941 bool IndVarSimplify::
942 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
943                           const SCEV *ExitCount,
944                           PHINode *IndVar, SCEVExpander &Rewriter) {
945   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
946   assert(isLoopCounter(IndVar, L, SE));
947   Instruction * const IncVar =
948     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
949 
950   // Initialize CmpIndVar to the preincremented IV.
951   Value *CmpIndVar = IndVar;
952   bool UsePostInc = false;
953 
954   // If the exiting block is the same as the backedge block, we prefer to
955   // compare against the post-incremented value, otherwise we must compare
956   // against the preincremented value.
957   if (ExitingBB == L->getLoopLatch()) {
958     // For pointer IVs, we chose to not strip inbounds which requires us not
959     // to add a potentially UB introducing use.  We need to either a) show
960     // the loop test we're modifying is already in post-inc form, or b) show
961     // that adding a use must not introduce UB.
962     bool SafeToPostInc =
963         IndVar->getType()->isIntegerTy() ||
964         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
965         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
966     if (SafeToPostInc) {
967       UsePostInc = true;
968       CmpIndVar = IncVar;
969     }
970   }
971 
972   // It may be necessary to drop nowrap flags on the incrementing instruction
973   // if either LFTR moves from a pre-inc check to a post-inc check (in which
974   // case the increment might have previously been poison on the last iteration
975   // only) or if LFTR switches to a different IV that was previously dynamically
976   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
977   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
978   // check), because the pre-inc addrec flags may be adopted from the original
979   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
980   // TODO: This handling is inaccurate for one case: If we switch to a
981   // dynamically dead IV that wraps on the first loop iteration only, which is
982   // not covered by the post-inc addrec. (If the new IV was not dynamically
983   // dead, it could not be poison on the first iteration in the first place.)
984   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
985     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
986     if (BO->hasNoUnsignedWrap())
987       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
988     if (BO->hasNoSignedWrap())
989       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
990   }
991 
992   Value *ExitCnt = genLoopLimit(
993       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
994   assert(ExitCnt->getType()->isPointerTy() ==
995              IndVar->getType()->isPointerTy() &&
996          "genLoopLimit missed a cast");
997 
998   // Insert a new icmp_ne or icmp_eq instruction before the branch.
999   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1000   ICmpInst::Predicate P;
1001   if (L->contains(BI->getSuccessor(0)))
1002     P = ICmpInst::ICMP_NE;
1003   else
1004     P = ICmpInst::ICMP_EQ;
1005 
1006   IRBuilder<> Builder(BI);
1007 
1008   // The new loop exit condition should reuse the debug location of the
1009   // original loop exit condition.
1010   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1011     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1012 
1013   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1014   // avoid the expensive expansion of the limit expression in the wider type,
1015   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1016   // since we know (from the exit count bitwidth), that we can't self-wrap in
1017   // the narrower type.
1018   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1019   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1020   if (CmpIndVarSize > ExitCntSize) {
1021     assert(!CmpIndVar->getType()->isPointerTy() &&
1022            !ExitCnt->getType()->isPointerTy());
1023 
1024     // Before resorting to actually inserting the truncate, use the same
1025     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1026     // the other side of the comparison instead.  We still evaluate the limit
1027     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1028     // a truncate within in.
1029     bool Extended = false;
1030     const SCEV *IV = SE->getSCEV(CmpIndVar);
1031     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1032     const SCEV *ZExtTrunc =
1033       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1034 
1035     if (ZExtTrunc == IV) {
1036       Extended = true;
1037       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1038                                    "wide.trip.count");
1039     } else {
1040       const SCEV *SExtTrunc =
1041         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1042       if (SExtTrunc == IV) {
1043         Extended = true;
1044         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1045                                      "wide.trip.count");
1046       }
1047     }
1048 
1049     if (Extended) {
1050       bool Discard;
1051       L->makeLoopInvariant(ExitCnt, Discard);
1052     } else
1053       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1054                                       "lftr.wideiv");
1055   }
1056   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1057                     << "      LHS:" << *CmpIndVar << '\n'
1058                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1059                     << "\n"
1060                     << "      RHS:\t" << *ExitCnt << "\n"
1061                     << "ExitCount:\t" << *ExitCount << "\n"
1062                     << "  was: " << *BI->getCondition() << "\n");
1063 
1064   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1065   Value *OrigCond = BI->getCondition();
1066   // It's tempting to use replaceAllUsesWith here to fully replace the old
1067   // comparison, but that's not immediately safe, since users of the old
1068   // comparison may not be dominated by the new comparison. Instead, just
1069   // update the branch to use the new comparison; in the common case this
1070   // will make old comparison dead.
1071   BI->setCondition(Cond);
1072   DeadInsts.emplace_back(OrigCond);
1073 
1074   ++NumLFTR;
1075   return true;
1076 }
1077 
1078 //===----------------------------------------------------------------------===//
1079 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1080 //===----------------------------------------------------------------------===//
1081 
1082 /// If there's a single exit block, sink any loop-invariant values that
1083 /// were defined in the preheader but not used inside the loop into the
1084 /// exit block to reduce register pressure in the loop.
1085 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1086   BasicBlock *ExitBlock = L->getExitBlock();
1087   if (!ExitBlock) return false;
1088 
1089   BasicBlock *Preheader = L->getLoopPreheader();
1090   if (!Preheader) return false;
1091 
1092   bool MadeAnyChanges = false;
1093   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1094   BasicBlock::iterator I(Preheader->getTerminator());
1095   while (I != Preheader->begin()) {
1096     --I;
1097     // New instructions were inserted at the end of the preheader.
1098     if (isa<PHINode>(I))
1099       break;
1100 
1101     // Don't move instructions which might have side effects, since the side
1102     // effects need to complete before instructions inside the loop.  Also don't
1103     // move instructions which might read memory, since the loop may modify
1104     // memory. Note that it's okay if the instruction might have undefined
1105     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1106     // block.
1107     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1108       continue;
1109 
1110     // Skip debug info intrinsics.
1111     if (isa<DbgInfoIntrinsic>(I))
1112       continue;
1113 
1114     // Skip eh pad instructions.
1115     if (I->isEHPad())
1116       continue;
1117 
1118     // Don't sink alloca: we never want to sink static alloca's out of the
1119     // entry block, and correctly sinking dynamic alloca's requires
1120     // checks for stacksave/stackrestore intrinsics.
1121     // FIXME: Refactor this check somehow?
1122     if (isa<AllocaInst>(I))
1123       continue;
1124 
1125     // Determine if there is a use in or before the loop (direct or
1126     // otherwise).
1127     bool UsedInLoop = false;
1128     for (Use &U : I->uses()) {
1129       Instruction *User = cast<Instruction>(U.getUser());
1130       BasicBlock *UseBB = User->getParent();
1131       if (PHINode *P = dyn_cast<PHINode>(User)) {
1132         unsigned i =
1133           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1134         UseBB = P->getIncomingBlock(i);
1135       }
1136       if (UseBB == Preheader || L->contains(UseBB)) {
1137         UsedInLoop = true;
1138         break;
1139       }
1140     }
1141 
1142     // If there is, the def must remain in the preheader.
1143     if (UsedInLoop)
1144       continue;
1145 
1146     // Otherwise, sink it to the exit block.
1147     Instruction *ToMove = &*I;
1148     bool Done = false;
1149 
1150     if (I != Preheader->begin()) {
1151       // Skip debug info intrinsics.
1152       do {
1153         --I;
1154       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1155 
1156       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1157         Done = true;
1158     } else {
1159       Done = true;
1160     }
1161 
1162     MadeAnyChanges = true;
1163     ToMove->moveBefore(*ExitBlock, InsertPt);
1164     SE->forgetValue(ToMove);
1165     if (Done) break;
1166     InsertPt = ToMove->getIterator();
1167   }
1168 
1169   return MadeAnyChanges;
1170 }
1171 
1172 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1173                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1174   auto *OldCond = BI->getCondition();
1175   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1176                     << " with " << *NewCond << "\n");
1177   BI->setCondition(NewCond);
1178   if (OldCond->use_empty())
1179     DeadInsts.emplace_back(OldCond);
1180 }
1181 
1182 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1183                                       bool IsTaken) {
1184   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1185   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1186   auto *OldCond = BI->getCondition();
1187   return ConstantInt::get(OldCond->getType(),
1188                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1189 }
1190 
1191 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1192                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1193   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1194   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1195   replaceExitCond(BI, NewCond, DeadInsts);
1196 }
1197 
1198 static void replaceLoopPHINodesWithPreheaderValues(
1199     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1200     ScalarEvolution &SE) {
1201   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1202   auto *LoopPreheader = L->getLoopPreheader();
1203   auto *LoopHeader = L->getHeader();
1204   SmallVector<Instruction *> Worklist;
1205   for (auto &PN : LoopHeader->phis()) {
1206     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1207     for (User *U : PN.users())
1208       Worklist.push_back(cast<Instruction>(U));
1209     SE.forgetValue(&PN);
1210     PN.replaceAllUsesWith(PreheaderIncoming);
1211     DeadInsts.emplace_back(&PN);
1212   }
1213 
1214   // Replacing with the preheader value will often allow IV users to simplify
1215   // (especially if the preheader value is a constant).
1216   SmallPtrSet<Instruction *, 16> Visited;
1217   while (!Worklist.empty()) {
1218     auto *I = cast<Instruction>(Worklist.pop_back_val());
1219     if (!Visited.insert(I).second)
1220       continue;
1221 
1222     // Don't simplify instructions outside the loop.
1223     if (!L->contains(I))
1224       continue;
1225 
1226     Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1227     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1228       for (User *U : I->users())
1229         Worklist.push_back(cast<Instruction>(U));
1230       I->replaceAllUsesWith(Res);
1231       DeadInsts.emplace_back(I);
1232     }
1233   }
1234 }
1235 
1236 static Value *
1237 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1238                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1239                     SCEVExpander &Rewriter) {
1240   ICmpInst::Predicate InvariantPred = LIP.Pred;
1241   BasicBlock *Preheader = L->getLoopPreheader();
1242   assert(Preheader && "Preheader doesn't exist");
1243   Rewriter.setInsertPoint(Preheader->getTerminator());
1244   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1245   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1246   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1247   if (ExitIfTrue)
1248     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1249   IRBuilder<> Builder(Preheader->getTerminator());
1250   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1251   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1252                             BI->getCondition()->getName());
1253 }
1254 
1255 static std::optional<Value *>
1256 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1257                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1258                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1259   ICmpInst::Predicate Pred = ICmp->getPredicate();
1260   Value *LHS = ICmp->getOperand(0);
1261   Value *RHS = ICmp->getOperand(1);
1262 
1263   // 'LHS pred RHS' should now mean that we stay in loop.
1264   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1265   if (Inverted)
1266     Pred = CmpInst::getInversePredicate(Pred);
1267 
1268   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1269   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1270   // Can we prove it to be trivially true or false?
1271   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1272     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1273 
1274   auto *ARTy = LHSS->getType();
1275   auto *MaxIterTy = MaxIter->getType();
1276   // If possible, adjust types.
1277   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1278     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1279   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1280     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1281     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1282     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1283       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1284   }
1285 
1286   if (SkipLastIter) {
1287     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1288     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1289     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1290     // So we manually construct umin(a - 1, b - 1).
1291     SmallVector<const SCEV *, 4> Elements;
1292     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1293       for (auto *Op : UMin->operands())
1294         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1295       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1296     } else
1297       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1298   }
1299 
1300   // Check if there is a loop-invariant predicate equivalent to our check.
1301   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1302                                                                L, BI, MaxIter);
1303   if (!LIP)
1304     return std::nullopt;
1305 
1306   // Can we prove it to be trivially true?
1307   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1308     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1309   else
1310     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1311 }
1312 
1313 static bool optimizeLoopExitWithUnknownExitCount(
1314     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1315     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1316     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1317   assert(
1318       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1319       "Not a loop exit!");
1320 
1321   // For branch that stays in loop by TRUE condition, go through AND. For branch
1322   // that stays in loop by FALSE condition, go through OR. Both gives the
1323   // similar logic: "stay in loop iff all conditions are true(false)".
1324   bool Inverted = L->contains(BI->getSuccessor(1));
1325   SmallVector<ICmpInst *, 4> LeafConditions;
1326   SmallVector<Value *, 4> Worklist;
1327   SmallPtrSet<Value *, 4> Visited;
1328   Value *OldCond = BI->getCondition();
1329   Visited.insert(OldCond);
1330   Worklist.push_back(OldCond);
1331 
1332   auto GoThrough = [&](Value *V) {
1333     Value *LHS = nullptr, *RHS = nullptr;
1334     if (Inverted) {
1335       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1336         return false;
1337     } else {
1338       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1339         return false;
1340     }
1341     if (Visited.insert(LHS).second)
1342       Worklist.push_back(LHS);
1343     if (Visited.insert(RHS).second)
1344       Worklist.push_back(RHS);
1345     return true;
1346   };
1347 
1348   do {
1349     Value *Curr = Worklist.pop_back_val();
1350     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1351     // those with one use, to avoid instruction duplication.
1352     if (Curr->hasOneUse())
1353       if (!GoThrough(Curr))
1354         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1355           LeafConditions.push_back(ICmp);
1356   } while (!Worklist.empty());
1357 
1358   // If the current basic block has the same exit count as the whole loop, and
1359   // it consists of multiple icmp's, try to collect all icmp's that give exact
1360   // same exit count. For all other icmp's, we could use one less iteration,
1361   // because their value on the last iteration doesn't really matter.
1362   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1363   if (!SkipLastIter && LeafConditions.size() > 1 &&
1364       SE->getExitCount(L, ExitingBB,
1365                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1366           MaxIter)
1367     for (auto *ICmp : LeafConditions) {
1368       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1369                                              /*ControlsExit*/ false);
1370       auto *ExitMax = EL.SymbolicMaxNotTaken;
1371       if (isa<SCEVCouldNotCompute>(ExitMax))
1372         continue;
1373       // They could be of different types (specifically this happens after
1374       // IV widening).
1375       auto *WiderType =
1376           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1377       auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1378       auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1379       if (WideExitMax == WideMaxIter)
1380         ICmpsFailingOnLastIter.insert(ICmp);
1381     }
1382 
1383   bool Changed = false;
1384   for (auto *OldCond : LeafConditions) {
1385     // Skip last iteration for this icmp under one of two conditions:
1386     // - We do it for all conditions;
1387     // - There is another ICmp that would fail on last iter, so this one doesn't
1388     // really matter.
1389     bool OptimisticSkipLastIter = SkipLastIter;
1390     if (!OptimisticSkipLastIter) {
1391       if (ICmpsFailingOnLastIter.size() > 1)
1392         OptimisticSkipLastIter = true;
1393       else if (ICmpsFailingOnLastIter.size() == 1)
1394         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1395     }
1396     if (auto Replaced =
1397             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1398                               OptimisticSkipLastIter, SE, Rewriter)) {
1399       Changed = true;
1400       auto *NewCond = *Replaced;
1401       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1402         NCI->setName(OldCond->getName() + ".first_iter");
1403       }
1404       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1405                         << " with " << *NewCond << "\n");
1406       assert(OldCond->hasOneUse() && "Must be!");
1407       OldCond->replaceAllUsesWith(NewCond);
1408       DeadInsts.push_back(OldCond);
1409       // Make sure we no longer consider this condition as failing on last
1410       // iteration.
1411       ICmpsFailingOnLastIter.erase(OldCond);
1412     }
1413   }
1414   return Changed;
1415 }
1416 
1417 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1418   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1419   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1420   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1421   // it already has flags.  The alternative to this would be to extending the
1422   // set of "interesting" IV users to include the icmp, but doing that
1423   // regresses results in practice by querying SCEVs before trip counts which
1424   // rely on them which results in SCEV caching sub-optimal answers.  The
1425   // concern about caching sub-optimal results is why we only query SCEVs of
1426   // the loop invariant RHS here.
1427   SmallVector<BasicBlock*, 16> ExitingBlocks;
1428   L->getExitingBlocks(ExitingBlocks);
1429   bool Changed = false;
1430   for (auto *ExitingBB : ExitingBlocks) {
1431     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1432     if (!BI)
1433       continue;
1434     assert(BI->isConditional() && "exit branch must be conditional");
1435 
1436     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1437     if (!ICmp || !ICmp->hasOneUse())
1438       continue;
1439 
1440     auto *LHS = ICmp->getOperand(0);
1441     auto *RHS = ICmp->getOperand(1);
1442     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1443     // poisoning cache with sub-optimal results.  For the must-execute case,
1444     // this is a neccessary precondition for correctness.
1445     if (!L->isLoopInvariant(RHS)) {
1446       if (!L->isLoopInvariant(LHS))
1447         continue;
1448       // Same logic applies for the inverse case
1449       std::swap(LHS, RHS);
1450     }
1451 
1452     // Match (icmp signed-cond zext, RHS)
1453     Value *LHSOp = nullptr;
1454     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1455       continue;
1456 
1457     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1458     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1459     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1460     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1461     FullCR = FullCR.zeroExtend(OuterBitWidth);
1462     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1463     if (FullCR.contains(RHSCR)) {
1464       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1465       // replace the signed condition with the unsigned version.
1466       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1467       Changed = true;
1468       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1469       // have not changed exit counts, or the values produced by the compare.
1470       continue;
1471     }
1472   }
1473 
1474   // Now that we've canonicalized the condition to match the extend,
1475   // see if we can rotate the extend out of the loop.
1476   for (auto *ExitingBB : ExitingBlocks) {
1477     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1478     if (!BI)
1479       continue;
1480     assert(BI->isConditional() && "exit branch must be conditional");
1481 
1482     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1483     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1484       continue;
1485 
1486     bool Swapped = false;
1487     auto *LHS = ICmp->getOperand(0);
1488     auto *RHS = ICmp->getOperand(1);
1489     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1490       // Nothing to rotate
1491       continue;
1492     if (L->isLoopInvariant(LHS)) {
1493       // Same logic applies for the inverse case until we actually pick
1494       // which operand of the compare to update.
1495       Swapped = true;
1496       std::swap(LHS, RHS);
1497     }
1498     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1499 
1500     // Match (icmp unsigned-cond zext, RHS)
1501     // TODO: Extend to handle corresponding sext/signed-cmp case
1502     // TODO: Extend to other invertible functions
1503     Value *LHSOp = nullptr;
1504     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1505       continue;
1506 
1507     // In general, we only rotate if we can do so without increasing the number
1508     // of instructions.  The exception is when we have an zext(add-rec).  The
1509     // reason for allowing this exception is that we know we need to get rid
1510     // of the zext for SCEV to be able to compute a trip count for said loops;
1511     // we consider the new trip count valuable enough to increase instruction
1512     // count by one.
1513     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1514       continue;
1515 
1516     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1517     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1518     // when zext is loop varying and RHS is loop invariant.  This converts
1519     // loop varying work to loop-invariant work.
1520     auto doRotateTransform = [&]() {
1521       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1522       auto *NewRHS =
1523         CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1524                          L->getLoopPreheader()->getTerminator());
1525       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1526       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1527       if (LHS->use_empty())
1528         DeadInsts.push_back(LHS);
1529     };
1530 
1531 
1532     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1533     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1534     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1535     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1536     FullCR = FullCR.zeroExtend(OuterBitWidth);
1537     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1538     if (FullCR.contains(RHSCR)) {
1539       doRotateTransform();
1540       Changed = true;
1541       // Note, we are leaving SCEV in an unfortunately imprecise case here
1542       // as rotation tends to reveal information about trip counts not
1543       // previously visible.
1544       continue;
1545     }
1546   }
1547 
1548   return Changed;
1549 }
1550 
1551 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1552   SmallVector<BasicBlock*, 16> ExitingBlocks;
1553   L->getExitingBlocks(ExitingBlocks);
1554 
1555   // Remove all exits which aren't both rewriteable and execute on every
1556   // iteration.
1557   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1558     // If our exitting block exits multiple loops, we can only rewrite the
1559     // innermost one.  Otherwise, we're changing how many times the innermost
1560     // loop runs before it exits.
1561     if (LI->getLoopFor(ExitingBB) != L)
1562       return true;
1563 
1564     // Can't rewrite non-branch yet.
1565     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1566     if (!BI)
1567       return true;
1568 
1569     // Likewise, the loop latch must be dominated by the exiting BB.
1570     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1571       return true;
1572 
1573     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1574       // If already constant, nothing to do. However, if this is an
1575       // unconditional exit, we can still replace header phis with their
1576       // preheader value.
1577       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1578         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1579       return true;
1580     }
1581 
1582     return false;
1583   });
1584 
1585   if (ExitingBlocks.empty())
1586     return false;
1587 
1588   // Get a symbolic upper bound on the loop backedge taken count.
1589   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1590   if (isa<SCEVCouldNotCompute>(MaxBECount))
1591     return false;
1592 
1593   // Visit our exit blocks in order of dominance. We know from the fact that
1594   // all exits must dominate the latch, so there is a total dominance order
1595   // between them.
1596   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1597                // std::sort sorts in ascending order, so we want the inverse of
1598                // the normal dominance relation.
1599                if (A == B) return false;
1600                if (DT->properlyDominates(A, B))
1601                  return true;
1602                else {
1603                  assert(DT->properlyDominates(B, A) &&
1604                         "expected total dominance order!");
1605                  return false;
1606                }
1607   });
1608 #ifdef ASSERT
1609   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1610     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1611   }
1612 #endif
1613 
1614   bool Changed = false;
1615   bool SkipLastIter = false;
1616   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1617   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1618     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1619       return;
1620     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1621       CurrMaxExit = MaxExitCount;
1622     else
1623       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1624     // If the loop has more than 1 iteration, all further checks will be
1625     // executed 1 iteration less.
1626     if (CurrMaxExit == MaxBECount)
1627       SkipLastIter = true;
1628   };
1629   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1630   for (BasicBlock *ExitingBB : ExitingBlocks) {
1631     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1632     const SCEV *MaxExitCount = SE->getExitCount(
1633         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1634     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1635       // Okay, we do not know the exit count here. Can we at least prove that it
1636       // will remain the same within iteration space?
1637       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1638       auto OptimizeCond = [&](bool SkipLastIter) {
1639         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1640                                                     MaxBECount, SkipLastIter,
1641                                                     SE, Rewriter, DeadInsts);
1642       };
1643 
1644       // TODO: We might have proved that we can skip the last iteration for
1645       // this check. In this case, we only want to check the condition on the
1646       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1647       // corner case:
1648       //
1649       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1650       //
1651       // If we could not prove that len != 0, then we also could not prove that
1652       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1653       // OptimizeCond will likely not prove anything for it, even if it could
1654       // prove the same fact for len.
1655       //
1656       // As a temporary solution, we query both last and pre-last iterations in
1657       // hope that we will be able to prove triviality for at least one of
1658       // them. We can stop querying MaxBECount for this case once SCEV
1659       // understands that (MaxBECount - 1) will not overflow here.
1660       if (OptimizeCond(false))
1661         Changed = true;
1662       else if (SkipLastIter && OptimizeCond(true))
1663         Changed = true;
1664       UpdateSkipLastIter(MaxExitCount);
1665       continue;
1666     }
1667 
1668     UpdateSkipLastIter(ExactExitCount);
1669 
1670     // If we know we'd exit on the first iteration, rewrite the exit to
1671     // reflect this.  This does not imply the loop must exit through this
1672     // exit; there may be an earlier one taken on the first iteration.
1673     // We know that the backedge can't be taken, so we replace all
1674     // the header PHIs with values coming from the preheader.
1675     if (ExactExitCount->isZero()) {
1676       foldExit(L, ExitingBB, true, DeadInsts);
1677       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1678       Changed = true;
1679       continue;
1680     }
1681 
1682     assert(ExactExitCount->getType()->isIntegerTy() &&
1683            MaxBECount->getType()->isIntegerTy() &&
1684            "Exit counts must be integers");
1685 
1686     Type *WiderType =
1687         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1688     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1689     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1690     assert(MaxBECount->getType() == ExactExitCount->getType());
1691 
1692     // Can we prove that some other exit must be taken strictly before this
1693     // one?
1694     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1695                                      ExactExitCount)) {
1696       foldExit(L, ExitingBB, false, DeadInsts);
1697       Changed = true;
1698       continue;
1699     }
1700 
1701     // As we run, keep track of which exit counts we've encountered.  If we
1702     // find a duplicate, we've found an exit which would have exited on the
1703     // exiting iteration, but (from the visit order) strictly follows another
1704     // which does the same and is thus dead.
1705     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1706       foldExit(L, ExitingBB, false, DeadInsts);
1707       Changed = true;
1708       continue;
1709     }
1710 
1711     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1712     // here.  If we kept track of the min of dominanting exits so far, we could
1713     // discharge exits with EC >= MDEC. This is less powerful than the existing
1714     // transform (since later exits aren't considered), but potentially more
1715     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1716     // or a >u b.  Such a case is not currently known.
1717   }
1718   return Changed;
1719 }
1720 
1721 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1722   SmallVector<BasicBlock*, 16> ExitingBlocks;
1723   L->getExitingBlocks(ExitingBlocks);
1724 
1725   // Finally, see if we can rewrite our exit conditions into a loop invariant
1726   // form. If we have a read-only loop, and we can tell that we must exit down
1727   // a path which does not need any of the values computed within the loop, we
1728   // can rewrite the loop to exit on the first iteration.  Note that this
1729   // doesn't either a) tell us the loop exits on the first iteration (unless
1730   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1731   // This transformation looks a lot like a restricted form of dead loop
1732   // elimination, but restricted to read-only loops and without neccesssarily
1733   // needing to kill the loop entirely.
1734   if (!LoopPredication)
1735     return false;
1736 
1737   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1738   // through *explicit* control flow.  We have to eliminate the possibility of
1739   // implicit exits (see below) before we know it's truly exact.
1740   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1741   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1742     return false;
1743 
1744   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1745   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1746 
1747   auto BadExit = [&](BasicBlock *ExitingBB) {
1748     // If our exiting block exits multiple loops, we can only rewrite the
1749     // innermost one.  Otherwise, we're changing how many times the innermost
1750     // loop runs before it exits.
1751     if (LI->getLoopFor(ExitingBB) != L)
1752       return true;
1753 
1754     // Can't rewrite non-branch yet.
1755     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1756     if (!BI)
1757       return true;
1758 
1759     // If already constant, nothing to do.
1760     if (isa<Constant>(BI->getCondition()))
1761       return true;
1762 
1763     // If the exit block has phis, we need to be able to compute the values
1764     // within the loop which contains them.  This assumes trivially lcssa phis
1765     // have already been removed; TODO: generalize
1766     BasicBlock *ExitBlock =
1767     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1768     if (!ExitBlock->phis().empty())
1769       return true;
1770 
1771     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1772     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1773         !Rewriter.isSafeToExpand(ExitCount))
1774       return true;
1775 
1776     assert(SE->isLoopInvariant(ExitCount, L) &&
1777            "Exit count must be loop invariant");
1778     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1779     return false;
1780   };
1781 
1782   // If we have any exits which can't be predicated themselves, than we can't
1783   // predicate any exit which isn't guaranteed to execute before it.  Consider
1784   // two exits (a) and (b) which would both exit on the same iteration.  If we
1785   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1786   // we could convert a loop from exiting through (a) to one exiting through
1787   // (b).  Note that this problem exists only for exits with the same exit
1788   // count, and we could be more aggressive when exit counts are known inequal.
1789   llvm::sort(ExitingBlocks,
1790             [&](BasicBlock *A, BasicBlock *B) {
1791               // std::sort sorts in ascending order, so we want the inverse of
1792               // the normal dominance relation, plus a tie breaker for blocks
1793               // unordered by dominance.
1794               if (DT->properlyDominates(A, B)) return true;
1795               if (DT->properlyDominates(B, A)) return false;
1796               return A->getName() < B->getName();
1797             });
1798   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1799   // exits before the backedge).  If they aren't, reasoning about reachability
1800   // is complicated and we choose not to for now.
1801   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1802     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1803       return false;
1804 
1805   // Given our sorted total order, we know that exit[j] must be evaluated
1806   // after all exit[i] such j > i.
1807   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1808     if (BadExit(ExitingBlocks[i])) {
1809       ExitingBlocks.resize(i);
1810       break;
1811     }
1812 
1813   if (ExitingBlocks.empty())
1814     return false;
1815 
1816   // We rely on not being able to reach an exiting block on a later iteration
1817   // then it's statically compute exit count.  The implementaton of
1818   // getExitCount currently has this invariant, but assert it here so that
1819   // breakage is obvious if this ever changes..
1820   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1821         return DT->dominates(ExitingBB, L->getLoopLatch());
1822       }));
1823 
1824   // At this point, ExitingBlocks consists of only those blocks which are
1825   // predicatable.  Given that, we know we have at least one exit we can
1826   // predicate if the loop is doesn't have side effects and doesn't have any
1827   // implicit exits (because then our exact BTC isn't actually exact).
1828   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1829   // suggestions on how to improve this?  I can obviously bail out for outer
1830   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1831   // is that enough for *all* side effects?
1832   for (BasicBlock *BB : L->blocks())
1833     for (auto &I : *BB)
1834       // TODO:isGuaranteedToTransfer
1835       if (I.mayHaveSideEffects())
1836         return false;
1837 
1838   bool Changed = false;
1839   // Finally, do the actual predication for all predicatable blocks.  A couple
1840   // of notes here:
1841   // 1) We don't bother to constant fold dominated exits with identical exit
1842   //    counts; that's simply a form of CSE/equality propagation and we leave
1843   //    it for dedicated passes.
1844   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1845   //    legality constraints and we leave that to dedicated logic.  We want to
1846   //    predicate even if we can't insert a loop invariant expression as
1847   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1848   //    varying check.
1849   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1850   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1851   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1852   for (BasicBlock *ExitingBB : ExitingBlocks) {
1853     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1854 
1855     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1856     Value *NewCond;
1857     if (ExitCount == ExactBTC) {
1858       NewCond = L->contains(BI->getSuccessor(0)) ?
1859         B.getFalse() : B.getTrue();
1860     } else {
1861       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1862       if (!ExactBTCV)
1863         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1864       Value *RHS = ExactBTCV;
1865       if (ECV->getType() != RHS->getType()) {
1866         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1867         ECV = B.CreateZExt(ECV, WiderTy);
1868         RHS = B.CreateZExt(RHS, WiderTy);
1869       }
1870       auto Pred = L->contains(BI->getSuccessor(0)) ?
1871         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1872       NewCond = B.CreateICmp(Pred, ECV, RHS);
1873     }
1874     Value *OldCond = BI->getCondition();
1875     BI->setCondition(NewCond);
1876     if (OldCond->use_empty())
1877       DeadInsts.emplace_back(OldCond);
1878     Changed = true;
1879   }
1880 
1881   return Changed;
1882 }
1883 
1884 //===----------------------------------------------------------------------===//
1885 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1886 //===----------------------------------------------------------------------===//
1887 
1888 bool IndVarSimplify::run(Loop *L) {
1889   // We need (and expect!) the incoming loop to be in LCSSA.
1890   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1891          "LCSSA required to run indvars!");
1892 
1893   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1894   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1895   //    canonicalization can be a pessimization without LSR to "clean up"
1896   //    afterwards.
1897   //  - We depend on having a preheader; in particular,
1898   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1899   //    and we're in trouble if we can't find the induction variable even when
1900   //    we've manually inserted one.
1901   //  - LFTR relies on having a single backedge.
1902   if (!L->isLoopSimplifyForm())
1903     return false;
1904 
1905   bool Changed = false;
1906   // If there are any floating-point recurrences, attempt to
1907   // transform them to use integer recurrences.
1908   Changed |= rewriteNonIntegerIVs(L);
1909 
1910   // Create a rewriter object which we'll use to transform the code with.
1911   SCEVExpander Rewriter(*SE, DL, "indvars");
1912 #ifndef NDEBUG
1913   Rewriter.setDebugType(DEBUG_TYPE);
1914 #endif
1915 
1916   // Eliminate redundant IV users.
1917   //
1918   // Simplification works best when run before other consumers of SCEV. We
1919   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1920   // other expressions involving loop IVs have been evaluated. This helps SCEV
1921   // set no-wrap flags before normalizing sign/zero extension.
1922   Rewriter.disableCanonicalMode();
1923   Changed |= simplifyAndExtend(L, Rewriter, LI);
1924 
1925   // Check to see if we can compute the final value of any expressions
1926   // that are recurrent in the loop, and substitute the exit values from the
1927   // loop into any instructions outside of the loop that use the final values
1928   // of the current expressions.
1929   if (ReplaceExitValue != NeverRepl) {
1930     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1931                                              ReplaceExitValue, DeadInsts)) {
1932       NumReplaced += Rewrites;
1933       Changed = true;
1934     }
1935   }
1936 
1937   // Eliminate redundant IV cycles.
1938   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1939 
1940   // Try to convert exit conditions to unsigned and rotate computation
1941   // out of the loop.  Note: Handles invalidation internally if needed.
1942   Changed |= canonicalizeExitCondition(L);
1943 
1944   // Try to eliminate loop exits based on analyzeable exit counts
1945   if (optimizeLoopExits(L, Rewriter))  {
1946     Changed = true;
1947     // Given we've changed exit counts, notify SCEV
1948     // Some nested loops may share same folded exit basic block,
1949     // thus we need to notify top most loop.
1950     SE->forgetTopmostLoop(L);
1951   }
1952 
1953   // Try to form loop invariant tests for loop exits by changing how many
1954   // iterations of the loop run when that is unobservable.
1955   if (predicateLoopExits(L, Rewriter)) {
1956     Changed = true;
1957     // Given we've changed exit counts, notify SCEV
1958     SE->forgetLoop(L);
1959   }
1960 
1961   // If we have a trip count expression, rewrite the loop's exit condition
1962   // using it.
1963   if (!DisableLFTR) {
1964     BasicBlock *PreHeader = L->getLoopPreheader();
1965 
1966     SmallVector<BasicBlock*, 16> ExitingBlocks;
1967     L->getExitingBlocks(ExitingBlocks);
1968     for (BasicBlock *ExitingBB : ExitingBlocks) {
1969       // Can't rewrite non-branch yet.
1970       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1971         continue;
1972 
1973       // If our exitting block exits multiple loops, we can only rewrite the
1974       // innermost one.  Otherwise, we're changing how many times the innermost
1975       // loop runs before it exits.
1976       if (LI->getLoopFor(ExitingBB) != L)
1977         continue;
1978 
1979       if (!needsLFTR(L, ExitingBB))
1980         continue;
1981 
1982       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1983       if (isa<SCEVCouldNotCompute>(ExitCount))
1984         continue;
1985 
1986       // This was handled above, but as we form SCEVs, we can sometimes refine
1987       // existing ones; this allows exit counts to be folded to zero which
1988       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1989       // until stable to handle cases like this better.
1990       if (ExitCount->isZero())
1991         continue;
1992 
1993       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1994       if (!IndVar)
1995         continue;
1996 
1997       // Avoid high cost expansions.  Note: This heuristic is questionable in
1998       // that our definition of "high cost" is not exactly principled.
1999       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2000                                        TTI, PreHeader->getTerminator()))
2001         continue;
2002 
2003       // Check preconditions for proper SCEVExpander operation. SCEV does not
2004       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2005       // any pass that uses the SCEVExpander must do it. This does not work
2006       // well for loop passes because SCEVExpander makes assumptions about
2007       // all loops, while LoopPassManager only forces the current loop to be
2008       // simplified.
2009       //
2010       // FIXME: SCEV expansion has no way to bail out, so the caller must
2011       // explicitly check any assumptions made by SCEV. Brittle.
2012       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2013       if (!AR || AR->getLoop()->getLoopPreheader())
2014         Changed |= linearFunctionTestReplace(L, ExitingBB,
2015                                              ExitCount, IndVar,
2016                                              Rewriter);
2017     }
2018   }
2019   // Clear the rewriter cache, because values that are in the rewriter's cache
2020   // can be deleted in the loop below, causing the AssertingVH in the cache to
2021   // trigger.
2022   Rewriter.clear();
2023 
2024   // Now that we're done iterating through lists, clean up any instructions
2025   // which are now dead.
2026   while (!DeadInsts.empty()) {
2027     Value *V = DeadInsts.pop_back_val();
2028 
2029     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2030       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2031     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2032       Changed |=
2033           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2034   }
2035 
2036   // The Rewriter may not be used from this point on.
2037 
2038   // Loop-invariant instructions in the preheader that aren't used in the
2039   // loop may be sunk below the loop to reduce register pressure.
2040   Changed |= sinkUnusedInvariants(L);
2041 
2042   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2043   // trip count and therefore can further simplify exit values in addition to
2044   // rewriteLoopExitValues.
2045   Changed |= rewriteFirstIterationLoopExitValues(L);
2046 
2047   // Clean up dead instructions.
2048   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2049 
2050   // Check a post-condition.
2051   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2052          "Indvars did not preserve LCSSA!");
2053   if (VerifyMemorySSA && MSSAU)
2054     MSSAU->getMemorySSA()->verifyMemorySSA();
2055 
2056   return Changed;
2057 }
2058 
2059 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2060                                           LoopStandardAnalysisResults &AR,
2061                                           LPMUpdater &) {
2062   Function *F = L.getHeader()->getParent();
2063   const DataLayout &DL = F->getParent()->getDataLayout();
2064 
2065   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2066                      WidenIndVars && AllowIVWidening);
2067   if (!IVS.run(&L))
2068     return PreservedAnalyses::all();
2069 
2070   auto PA = getLoopPassPreservedAnalyses();
2071   PA.preserveSet<CFGAnalyses>();
2072   if (AR.MSSA)
2073     PA.preserve<MemorySSAAnalysis>();
2074   return PA;
2075 }
2076