xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 0f82469314f34a086669dfcd190a9f89260fbee5)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/MathExtras.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
74 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
75 #include "llvm/Transforms/Utils/Local.h"
76 #include "llvm/Transforms/Utils/LoopUtils.h"
77 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
78 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
79 #include <cassert>
80 #include <cstdint>
81 #include <utility>
82 
83 using namespace llvm;
84 using namespace PatternMatch;
85 
86 #define DEBUG_TYPE "indvars"
87 
88 STATISTIC(NumWidened     , "Number of indvars widened");
89 STATISTIC(NumReplaced    , "Number of exit values replaced");
90 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
91 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
92 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
93 
94 static cl::opt<ReplaceExitVal> ReplaceExitValue(
95     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
96     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
97     cl::values(
98         clEnumValN(NeverRepl, "never", "never replace exit value"),
99         clEnumValN(OnlyCheapRepl, "cheap",
100                    "only replace exit value when the cost is cheap"),
101         clEnumValN(
102             UnusedIndVarInLoop, "unusedindvarinloop",
103             "only replace exit value when it is an unused "
104             "induction variable in the loop and has cheap replacement cost"),
105         clEnumValN(NoHardUse, "noharduse",
106                    "only replace exit values when loop def likely dead"),
107         clEnumValN(AlwaysRepl, "always",
108                    "always replace exit value whenever possible")));
109 
110 static cl::opt<bool> UsePostIncrementRanges(
111   "indvars-post-increment-ranges", cl::Hidden,
112   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
113   cl::init(true));
114 
115 static cl::opt<bool>
116 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
117             cl::desc("Disable Linear Function Test Replace optimization"));
118 
119 static cl::opt<bool>
120 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
121                 cl::desc("Predicate conditions in read only loops"));
122 
123 static cl::opt<bool>
124 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
125                 cl::desc("Allow widening of indvars to eliminate s/zext"));
126 
127 namespace {
128 
129 class IndVarSimplify {
130   LoopInfo *LI;
131   ScalarEvolution *SE;
132   DominatorTree *DT;
133   const DataLayout &DL;
134   TargetLibraryInfo *TLI;
135   const TargetTransformInfo *TTI;
136   std::unique_ptr<MemorySSAUpdater> MSSAU;
137 
138   SmallVector<WeakTrackingVH, 16> DeadInsts;
139   bool WidenIndVars;
140 
141   bool RunUnswitching = false;
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147   /// Try to improve our exit conditions by converting condition from signed
148   /// to unsigned or rotating computation out of the loop.
149   /// (See inline comment about why this is duplicated from simplifyAndExtend)
150   bool canonicalizeExitCondition(Loop *L);
151   /// Try to eliminate loop exits based on analyzeable exit counts
152   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153   /// Try to form loop invariant tests for loop exits by changing how many
154   /// iterations of the loop run when that is unobservable.
155   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156 
157   bool rewriteFirstIterationLoopExitValues(Loop *L);
158 
159   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160                                  const SCEV *ExitCount,
161                                  PHINode *IndVar, SCEVExpander &Rewriter);
162 
163   bool sinkUnusedInvariants(Loop *L);
164 
165 public:
166   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167                  const DataLayout &DL, TargetLibraryInfo *TLI,
168                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170         WidenIndVars(WidenIndVars) {
171     if (MSSA)
172       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173   }
174 
175   bool run(Loop *L);
176 
177   bool runUnswitching() const { return RunUnswitching; }
178 };
179 
180 } // end anonymous namespace
181 
182 //===----------------------------------------------------------------------===//
183 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
184 //===----------------------------------------------------------------------===//
185 
186 /// Convert APF to an integer, if possible.
187 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
188   bool isExact = false;
189   // See if we can convert this to an int64_t
190   uint64_t UIntVal;
191   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
192                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
193       !isExact)
194     return false;
195   IntVal = UIntVal;
196   return true;
197 }
198 
199 /// If the loop has floating induction variable then insert corresponding
200 /// integer induction variable if possible.
201 /// For example,
202 /// for(double i = 0; i < 10000; ++i)
203 ///   bar(i)
204 /// is converted into
205 /// for(int i = 0; i < 10000; ++i)
206 ///   bar((double)i);
207 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
208   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
209   unsigned BackEdge     = IncomingEdge^1;
210 
211   // Check incoming value.
212   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
213 
214   int64_t InitValue;
215   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
216     return false;
217 
218   // Check IV increment. Reject this PN if increment operation is not
219   // an add or increment value can not be represented by an integer.
220   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
221   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
222 
223   // If this is not an add of the PHI with a constantfp, or if the constant fp
224   // is not an integer, bail out.
225   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
226   int64_t IncValue;
227   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
228       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
229     return false;
230 
231   // Check Incr uses. One user is PN and the other user is an exit condition
232   // used by the conditional terminator.
233   Value::user_iterator IncrUse = Incr->user_begin();
234   Instruction *U1 = cast<Instruction>(*IncrUse++);
235   if (IncrUse == Incr->user_end()) return false;
236   Instruction *U2 = cast<Instruction>(*IncrUse++);
237   if (IncrUse != Incr->user_end()) return false;
238 
239   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
240   // only used by a branch, we can't transform it.
241   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
242   if (!Compare)
243     Compare = dyn_cast<FCmpInst>(U2);
244   if (!Compare || !Compare->hasOneUse() ||
245       !isa<BranchInst>(Compare->user_back()))
246     return false;
247 
248   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
249 
250   // We need to verify that the branch actually controls the iteration count
251   // of the loop.  If not, the new IV can overflow and no one will notice.
252   // The branch block must be in the loop and one of the successors must be out
253   // of the loop.
254   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
255   if (!L->contains(TheBr->getParent()) ||
256       (L->contains(TheBr->getSuccessor(0)) &&
257        L->contains(TheBr->getSuccessor(1))))
258     return false;
259 
260   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
261   // transform it.
262   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
263   int64_t ExitValue;
264   if (ExitValueVal == nullptr ||
265       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
266     return false;
267 
268   // Find new predicate for integer comparison.
269   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
270   switch (Compare->getPredicate()) {
271   default: return false;  // Unknown comparison.
272   case CmpInst::FCMP_OEQ:
273   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
274   case CmpInst::FCMP_ONE:
275   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
276   case CmpInst::FCMP_OGT:
277   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
278   case CmpInst::FCMP_OGE:
279   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
280   case CmpInst::FCMP_OLT:
281   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
282   case CmpInst::FCMP_OLE:
283   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
284   }
285 
286   // We convert the floating point induction variable to a signed i32 value if
287   // we can.  This is only safe if the comparison will not overflow in a way
288   // that won't be trapped by the integer equivalent operations.  Check for this
289   // now.
290   // TODO: We could use i64 if it is native and the range requires it.
291 
292   // The start/stride/exit values must all fit in signed i32.
293   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
294     return false;
295 
296   // If not actually striding (add x, 0.0), avoid touching the code.
297   if (IncValue == 0)
298     return false;
299 
300   // Positive and negative strides have different safety conditions.
301   if (IncValue > 0) {
302     // If we have a positive stride, we require the init to be less than the
303     // exit value.
304     if (InitValue >= ExitValue)
305       return false;
306 
307     uint32_t Range = uint32_t(ExitValue-InitValue);
308     // Check for infinite loop, either:
309     // while (i <= Exit) or until (i > Exit)
310     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
311       if (++Range == 0) return false;  // Range overflows.
312     }
313 
314     unsigned Leftover = Range % uint32_t(IncValue);
315 
316     // If this is an equality comparison, we require that the strided value
317     // exactly land on the exit value, otherwise the IV condition will wrap
318     // around and do things the fp IV wouldn't.
319     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
320         Leftover != 0)
321       return false;
322 
323     // If the stride would wrap around the i32 before exiting, we can't
324     // transform the IV.
325     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
326       return false;
327   } else {
328     // If we have a negative stride, we require the init to be greater than the
329     // exit value.
330     if (InitValue <= ExitValue)
331       return false;
332 
333     uint32_t Range = uint32_t(InitValue-ExitValue);
334     // Check for infinite loop, either:
335     // while (i >= Exit) or until (i < Exit)
336     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
337       if (++Range == 0) return false;  // Range overflows.
338     }
339 
340     unsigned Leftover = Range % uint32_t(-IncValue);
341 
342     // If this is an equality comparison, we require that the strided value
343     // exactly land on the exit value, otherwise the IV condition will wrap
344     // around and do things the fp IV wouldn't.
345     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
346         Leftover != 0)
347       return false;
348 
349     // If the stride would wrap around the i32 before exiting, we can't
350     // transform the IV.
351     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
352       return false;
353   }
354 
355   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
356 
357   // Insert new integer induction variable.
358   PHINode *NewPHI =
359       PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
360   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
361                       PN->getIncomingBlock(IncomingEdge));
362 
363   Value *NewAdd =
364       BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
365                                 Incr->getName() + ".int", Incr->getIterator());
366   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
367 
368   ICmpInst *NewCompare =
369       new ICmpInst(TheBr->getIterator(), NewPred, NewAdd,
370                    ConstantInt::get(Int32Ty, ExitValue), Compare->getName());
371 
372   // In the following deletions, PN may become dead and may be deleted.
373   // Use a WeakTrackingVH to observe whether this happens.
374   WeakTrackingVH WeakPH = PN;
375 
376   // Delete the old floating point exit comparison.  The branch starts using the
377   // new comparison.
378   NewCompare->takeName(Compare);
379   Compare->replaceAllUsesWith(NewCompare);
380   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
381 
382   // Delete the old floating point increment.
383   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
384   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
385 
386   // If the FP induction variable still has uses, this is because something else
387   // in the loop uses its value.  In order to canonicalize the induction
388   // variable, we chose to eliminate the IV and rewrite it in terms of an
389   // int->fp cast.
390   //
391   // We give preference to sitofp over uitofp because it is faster on most
392   // platforms.
393   if (WeakPH) {
394     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
395                                  PN->getParent()->getFirstInsertionPt());
396     PN->replaceAllUsesWith(Conv);
397     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
398   }
399   return true;
400 }
401 
402 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
403   // First step.  Check to see if there are any floating-point recurrences.
404   // If there are, change them into integer recurrences, permitting analysis by
405   // the SCEV routines.
406   BasicBlock *Header = L->getHeader();
407 
408   SmallVector<WeakTrackingVH, 8> PHIs;
409   for (PHINode &PN : Header->phis())
410     PHIs.push_back(&PN);
411 
412   bool Changed = false;
413   for (WeakTrackingVH &PHI : PHIs)
414     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
415       Changed |= handleFloatingPointIV(L, PN);
416 
417   // If the loop previously had floating-point IV, ScalarEvolution
418   // may not have been able to compute a trip count. Now that we've done some
419   // re-writing, the trip count may be computable.
420   if (Changed)
421     SE->forgetLoop(L);
422   return Changed;
423 }
424 
425 //===---------------------------------------------------------------------===//
426 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
427 // they will exit at the first iteration.
428 //===---------------------------------------------------------------------===//
429 
430 /// Check to see if this loop has loop invariant conditions which lead to loop
431 /// exits. If so, we know that if the exit path is taken, it is at the first
432 /// loop iteration. This lets us predict exit values of PHI nodes that live in
433 /// loop header.
434 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
435   // Verify the input to the pass is already in LCSSA form.
436   assert(L->isLCSSAForm(*DT));
437 
438   SmallVector<BasicBlock *, 8> ExitBlocks;
439   L->getUniqueExitBlocks(ExitBlocks);
440 
441   bool MadeAnyChanges = false;
442   for (auto *ExitBB : ExitBlocks) {
443     // If there are no more PHI nodes in this exit block, then no more
444     // values defined inside the loop are used on this path.
445     for (PHINode &PN : ExitBB->phis()) {
446       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
447            IncomingValIdx != E; ++IncomingValIdx) {
448         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
449 
450         // Can we prove that the exit must run on the first iteration if it
451         // runs at all?  (i.e. early exits are fine for our purposes, but
452         // traces which lead to this exit being taken on the 2nd iteration
453         // aren't.)  Note that this is about whether the exit branch is
454         // executed, not about whether it is taken.
455         if (!L->getLoopLatch() ||
456             !DT->dominates(IncomingBB, L->getLoopLatch()))
457           continue;
458 
459         // Get condition that leads to the exit path.
460         auto *TermInst = IncomingBB->getTerminator();
461 
462         Value *Cond = nullptr;
463         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
464           // Must be a conditional branch, otherwise the block
465           // should not be in the loop.
466           Cond = BI->getCondition();
467         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
468           Cond = SI->getCondition();
469         else
470           continue;
471 
472         if (!L->isLoopInvariant(Cond))
473           continue;
474 
475         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
476 
477         // Only deal with PHIs in the loop header.
478         if (!ExitVal || ExitVal->getParent() != L->getHeader())
479           continue;
480 
481         // If ExitVal is a PHI on the loop header, then we know its
482         // value along this exit because the exit can only be taken
483         // on the first iteration.
484         auto *LoopPreheader = L->getLoopPreheader();
485         assert(LoopPreheader && "Invalid loop");
486         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
487         if (PreheaderIdx != -1) {
488           assert(ExitVal->getParent() == L->getHeader() &&
489                  "ExitVal must be in loop header");
490           MadeAnyChanges = true;
491           PN.setIncomingValue(IncomingValIdx,
492                               ExitVal->getIncomingValue(PreheaderIdx));
493           SE->forgetValue(&PN);
494         }
495       }
496     }
497   }
498   return MadeAnyChanges;
499 }
500 
501 //===----------------------------------------------------------------------===//
502 //  IV Widening - Extend the width of an IV to cover its widest uses.
503 //===----------------------------------------------------------------------===//
504 
505 /// Update information about the induction variable that is extended by this
506 /// sign or zero extend operation. This is used to determine the final width of
507 /// the IV before actually widening it.
508 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
509                         ScalarEvolution *SE,
510                         const TargetTransformInfo *TTI) {
511   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
512   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
513     return;
514 
515   Type *Ty = Cast->getType();
516   uint64_t Width = SE->getTypeSizeInBits(Ty);
517   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
518     return;
519 
520   // Check that `Cast` actually extends the induction variable (we rely on this
521   // later).  This takes care of cases where `Cast` is extending a truncation of
522   // the narrow induction variable, and thus can end up being narrower than the
523   // "narrow" induction variable.
524   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
525   if (NarrowIVWidth >= Width)
526     return;
527 
528   // Cast is either an sext or zext up to this point.
529   // We should not widen an indvar if arithmetics on the wider indvar are more
530   // expensive than those on the narrower indvar. We check only the cost of ADD
531   // because at least an ADD is required to increment the induction variable. We
532   // could compute more comprehensively the cost of all instructions on the
533   // induction variable when necessary.
534   if (TTI &&
535       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
536           TTI->getArithmeticInstrCost(Instruction::Add,
537                                       Cast->getOperand(0)->getType())) {
538     return;
539   }
540 
541   if (!WI.WidestNativeType ||
542       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
543     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
544     WI.IsSigned = IsSigned;
545     return;
546   }
547 
548   // We extend the IV to satisfy the sign of its user(s), or 'signed'
549   // if there are multiple users with both sign- and zero extensions,
550   // in order not to introduce nondeterministic behaviour based on the
551   // unspecified order of a PHI nodes' users-iterator.
552   WI.IsSigned |= IsSigned;
553 }
554 
555 //===----------------------------------------------------------------------===//
556 //  Live IV Reduction - Minimize IVs live across the loop.
557 //===----------------------------------------------------------------------===//
558 
559 //===----------------------------------------------------------------------===//
560 //  Simplification of IV users based on SCEV evaluation.
561 //===----------------------------------------------------------------------===//
562 
563 namespace {
564 
565 class IndVarSimplifyVisitor : public IVVisitor {
566   ScalarEvolution *SE;
567   const TargetTransformInfo *TTI;
568   PHINode *IVPhi;
569 
570 public:
571   WideIVInfo WI;
572 
573   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
574                         const TargetTransformInfo *TTI,
575                         const DominatorTree *DTree)
576     : SE(SCEV), TTI(TTI), IVPhi(IV) {
577     DT = DTree;
578     WI.NarrowIV = IVPhi;
579   }
580 
581   // Implement the interface used by simplifyUsersOfIV.
582   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
583 };
584 
585 } // end anonymous namespace
586 
587 /// Iteratively perform simplification on a worklist of IV users. Each
588 /// successive simplification may push more users which may themselves be
589 /// candidates for simplification.
590 ///
591 /// Sign/Zero extend elimination is interleaved with IV simplification.
592 bool IndVarSimplify::simplifyAndExtend(Loop *L,
593                                        SCEVExpander &Rewriter,
594                                        LoopInfo *LI) {
595   SmallVector<WideIVInfo, 8> WideIVs;
596 
597   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
598           Intrinsic::getName(Intrinsic::experimental_guard));
599   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
600 
601   SmallVector<PHINode *, 8> LoopPhis;
602   for (PHINode &PN : L->getHeader()->phis())
603     LoopPhis.push_back(&PN);
604 
605   // Each round of simplification iterates through the SimplifyIVUsers worklist
606   // for all current phis, then determines whether any IVs can be
607   // widened. Widening adds new phis to LoopPhis, inducing another round of
608   // simplification on the wide IVs.
609   bool Changed = false;
610   while (!LoopPhis.empty()) {
611     // Evaluate as many IV expressions as possible before widening any IVs. This
612     // forces SCEV to set no-wrap flags before evaluating sign/zero
613     // extension. The first time SCEV attempts to normalize sign/zero extension,
614     // the result becomes final. So for the most predictable results, we delay
615     // evaluation of sign/zero extend evaluation until needed, and avoid running
616     // other SCEV based analysis prior to simplifyAndExtend.
617     do {
618       PHINode *CurrIV = LoopPhis.pop_back_val();
619 
620       // Information about sign/zero extensions of CurrIV.
621       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
622 
623       const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
624                                              Rewriter, &Visitor);
625 
626       Changed |= C;
627       RunUnswitching |= U;
628       if (Visitor.WI.WidestNativeType) {
629         WideIVs.push_back(Visitor.WI);
630       }
631     } while(!LoopPhis.empty());
632 
633     // Continue if we disallowed widening.
634     if (!WidenIndVars)
635       continue;
636 
637     for (; !WideIVs.empty(); WideIVs.pop_back()) {
638       unsigned ElimExt;
639       unsigned Widened;
640       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
641                                           DT, DeadInsts, ElimExt, Widened,
642                                           HasGuards, UsePostIncrementRanges)) {
643         NumElimExt += ElimExt;
644         NumWidened += Widened;
645         Changed = true;
646         LoopPhis.push_back(WidePhi);
647       }
648     }
649   }
650   return Changed;
651 }
652 
653 //===----------------------------------------------------------------------===//
654 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
655 //===----------------------------------------------------------------------===//
656 
657 /// Given an Value which is hoped to be part of an add recurance in the given
658 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
659 /// that this is less general than SCEVs AddRec checking.
660 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
661   Instruction *IncI = dyn_cast<Instruction>(IncV);
662   if (!IncI)
663     return nullptr;
664 
665   switch (IncI->getOpcode()) {
666   case Instruction::Add:
667   case Instruction::Sub:
668     break;
669   case Instruction::GetElementPtr:
670     // An IV counter must preserve its type.
671     if (IncI->getNumOperands() == 2)
672       break;
673     [[fallthrough]];
674   default:
675     return nullptr;
676   }
677 
678   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
679   if (Phi && Phi->getParent() == L->getHeader()) {
680     if (L->isLoopInvariant(IncI->getOperand(1)))
681       return Phi;
682     return nullptr;
683   }
684   if (IncI->getOpcode() == Instruction::GetElementPtr)
685     return nullptr;
686 
687   // Allow add/sub to be commuted.
688   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
689   if (Phi && Phi->getParent() == L->getHeader()) {
690     if (L->isLoopInvariant(IncI->getOperand(0)))
691       return Phi;
692   }
693   return nullptr;
694 }
695 
696 /// Whether the current loop exit test is based on this value.  Currently this
697 /// is limited to a direct use in the loop condition.
698 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
699   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
700   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
701   // TODO: Allow non-icmp loop test.
702   if (!ICmp)
703     return false;
704 
705   // TODO: Allow indirect use.
706   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
707 }
708 
709 /// linearFunctionTestReplace policy. Return true unless we can show that the
710 /// current exit test is already sufficiently canonical.
711 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
712   assert(L->getLoopLatch() && "Must be in simplified form");
713 
714   // Avoid converting a constant or loop invariant test back to a runtime
715   // test.  This is critical for when SCEV's cached ExitCount is less precise
716   // than the current IR (such as after we've proven a particular exit is
717   // actually dead and thus the BE count never reaches our ExitCount.)
718   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
719   if (L->isLoopInvariant(BI->getCondition()))
720     return false;
721 
722   // Do LFTR to simplify the exit condition to an ICMP.
723   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
724   if (!Cond)
725     return true;
726 
727   // Do LFTR to simplify the exit ICMP to EQ/NE
728   ICmpInst::Predicate Pred = Cond->getPredicate();
729   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
730     return true;
731 
732   // Look for a loop invariant RHS
733   Value *LHS = Cond->getOperand(0);
734   Value *RHS = Cond->getOperand(1);
735   if (!L->isLoopInvariant(RHS)) {
736     if (!L->isLoopInvariant(LHS))
737       return true;
738     std::swap(LHS, RHS);
739   }
740   // Look for a simple IV counter LHS
741   PHINode *Phi = dyn_cast<PHINode>(LHS);
742   if (!Phi)
743     Phi = getLoopPhiForCounter(LHS, L);
744 
745   if (!Phi)
746     return true;
747 
748   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
749   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
750   if (Idx < 0)
751     return true;
752 
753   // Do LFTR if the exit condition's IV is *not* a simple counter.
754   Value *IncV = Phi->getIncomingValue(Idx);
755   return Phi != getLoopPhiForCounter(IncV, L);
756 }
757 
758 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
759 /// down to checking that all operands are constant and listing instructions
760 /// that may hide undef.
761 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
762                                unsigned Depth) {
763   if (isa<Constant>(V))
764     return !isa<UndefValue>(V);
765 
766   if (Depth >= 6)
767     return false;
768 
769   // Conservatively handle non-constant non-instructions. For example, Arguments
770   // may be undef.
771   Instruction *I = dyn_cast<Instruction>(V);
772   if (!I)
773     return false;
774 
775   // Load and return values may be undef.
776   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
777     return false;
778 
779   // Optimistically handle other instructions.
780   for (Value *Op : I->operands()) {
781     if (!Visited.insert(Op).second)
782       continue;
783     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
784       return false;
785   }
786   return true;
787 }
788 
789 /// Return true if the given value is concrete. We must prove that undef can
790 /// never reach it.
791 ///
792 /// TODO: If we decide that this is a good approach to checking for undef, we
793 /// may factor it into a common location.
794 static bool hasConcreteDef(Value *V) {
795   SmallPtrSet<Value*, 8> Visited;
796   Visited.insert(V);
797   return hasConcreteDefImpl(V, Visited, 0);
798 }
799 
800 /// Return true if the given phi is a "counter" in L.  A counter is an
801 /// add recurance (of integer or pointer type) with an arbitrary start, and a
802 /// step of 1.  Note that L must have exactly one latch.
803 static bool isLoopCounter(PHINode* Phi, Loop *L,
804                           ScalarEvolution *SE) {
805   assert(Phi->getParent() == L->getHeader());
806   assert(L->getLoopLatch());
807 
808   if (!SE->isSCEVable(Phi->getType()))
809     return false;
810 
811   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
812   if (!AR || AR->getLoop() != L || !AR->isAffine())
813     return false;
814 
815   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
816   if (!Step || !Step->isOne())
817     return false;
818 
819   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
820   Value *IncV = Phi->getIncomingValue(LatchIdx);
821   return (getLoopPhiForCounter(IncV, L) == Phi &&
822           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
823 }
824 
825 /// Search the loop header for a loop counter (anadd rec w/step of one)
826 /// suitable for use by LFTR.  If multiple counters are available, select the
827 /// "best" one based profitable heuristics.
828 ///
829 /// BECount may be an i8* pointer type. The pointer difference is already
830 /// valid count without scaling the address stride, so it remains a pointer
831 /// expression as far as SCEV is concerned.
832 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
833                                 const SCEV *BECount,
834                                 ScalarEvolution *SE, DominatorTree *DT) {
835   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
836 
837   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
838 
839   // Loop over all of the PHI nodes, looking for a simple counter.
840   PHINode *BestPhi = nullptr;
841   const SCEV *BestInit = nullptr;
842   BasicBlock *LatchBlock = L->getLoopLatch();
843   assert(LatchBlock && "Must be in simplified form");
844   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
845 
846   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
847     PHINode *Phi = cast<PHINode>(I);
848     if (!isLoopCounter(Phi, L, SE))
849       continue;
850 
851     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
852 
853     // AR may be a pointer type, while BECount is an integer type.
854     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
855     // AR may not be a narrower type, or we may never exit.
856     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
857     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
858       continue;
859 
860     // Avoid reusing a potentially undef value to compute other values that may
861     // have originally had a concrete definition.
862     if (!hasConcreteDef(Phi)) {
863       // We explicitly allow unknown phis as long as they are already used by
864       // the loop exit test.  This is legal since performing LFTR could not
865       // increase the number of undef users.
866       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
867       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
868           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
869         continue;
870     }
871 
872     // Avoid introducing undefined behavior due to poison which didn't exist in
873     // the original program.  (Annoyingly, the rules for poison and undef
874     // propagation are distinct, so this does NOT cover the undef case above.)
875     // We have to ensure that we don't introduce UB by introducing a use on an
876     // iteration where said IV produces poison.  Our strategy here differs for
877     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
878     // see code in linearFunctionTestReplace.  For pointers, we restrict
879     // transforms as there is no good way to reinfer inbounds once lost.
880     if (!Phi->getType()->isIntegerTy() &&
881         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
882       continue;
883 
884     const SCEV *Init = AR->getStart();
885 
886     if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
887       // Don't force a live loop counter if another IV can be used.
888       if (isAlmostDeadIV(Phi, LatchBlock, Cond))
889         continue;
890 
891       // Prefer to count-from-zero. This is a more "canonical" counter form. It
892       // also prefers integer to pointer IVs.
893       if (BestInit->isZero() != Init->isZero()) {
894         if (BestInit->isZero())
895           continue;
896       }
897       // If two IVs both count from zero or both count from nonzero then the
898       // narrower is likely a dead phi that has been widened. Use the wider phi
899       // to allow the other to be eliminated.
900       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
901         continue;
902     }
903     BestPhi = Phi;
904     BestInit = Init;
905   }
906   return BestPhi;
907 }
908 
909 /// Insert an IR expression which computes the value held by the IV IndVar
910 /// (which must be an loop counter w/unit stride) after the backedge of loop L
911 /// is taken ExitCount times.
912 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
913                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
914                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
915   assert(isLoopCounter(IndVar, L, SE));
916   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
917   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
918   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
919 
920   // For integer IVs, truncate the IV before computing the limit unless we
921   // know apriori that the limit must be a constant when evaluated in the
922   // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
923   // IV in the loop over a (potentially) expensive expansion of the widened
924   // exit count add(zext(add)) expression.
925   if (IndVar->getType()->isIntegerTy() &&
926       SE->getTypeSizeInBits(AR->getType()) >
927       SE->getTypeSizeInBits(ExitCount->getType())) {
928     const SCEV *IVInit = AR->getStart();
929     if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
930       AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
931   }
932 
933   const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
934   const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
935   assert(SE->isLoopInvariant(IVLimit, L) &&
936          "Computed iteration count is not loop invariant!");
937   return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
938                                 ExitingBB->getTerminator());
939 }
940 
941 /// This method rewrites the exit condition of the loop to be a canonical !=
942 /// comparison against the incremented loop induction variable.  This pass is
943 /// able to rewrite the exit tests of any loop where the SCEV analysis can
944 /// determine a loop-invariant trip count of the loop, which is actually a much
945 /// broader range than just linear tests.
946 bool IndVarSimplify::
947 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
948                           const SCEV *ExitCount,
949                           PHINode *IndVar, SCEVExpander &Rewriter) {
950   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
951   assert(isLoopCounter(IndVar, L, SE));
952   Instruction * const IncVar =
953     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
954 
955   // Initialize CmpIndVar to the preincremented IV.
956   Value *CmpIndVar = IndVar;
957   bool UsePostInc = false;
958 
959   // If the exiting block is the same as the backedge block, we prefer to
960   // compare against the post-incremented value, otherwise we must compare
961   // against the preincremented value.
962   if (ExitingBB == L->getLoopLatch()) {
963     // For pointer IVs, we chose to not strip inbounds which requires us not
964     // to add a potentially UB introducing use.  We need to either a) show
965     // the loop test we're modifying is already in post-inc form, or b) show
966     // that adding a use must not introduce UB.
967     bool SafeToPostInc =
968         IndVar->getType()->isIntegerTy() ||
969         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
970         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
971     if (SafeToPostInc) {
972       UsePostInc = true;
973       CmpIndVar = IncVar;
974     }
975   }
976 
977   // It may be necessary to drop nowrap flags on the incrementing instruction
978   // if either LFTR moves from a pre-inc check to a post-inc check (in which
979   // case the increment might have previously been poison on the last iteration
980   // only) or if LFTR switches to a different IV that was previously dynamically
981   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
982   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
983   // check), because the pre-inc addrec flags may be adopted from the original
984   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
985   // TODO: This handling is inaccurate for one case: If we switch to a
986   // dynamically dead IV that wraps on the first loop iteration only, which is
987   // not covered by the post-inc addrec. (If the new IV was not dynamically
988   // dead, it could not be poison on the first iteration in the first place.)
989   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
990     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
991     if (BO->hasNoUnsignedWrap())
992       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
993     if (BO->hasNoSignedWrap())
994       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
995   }
996 
997   Value *ExitCnt = genLoopLimit(
998       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
999   assert(ExitCnt->getType()->isPointerTy() ==
1000              IndVar->getType()->isPointerTy() &&
1001          "genLoopLimit missed a cast");
1002 
1003   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1004   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1005   ICmpInst::Predicate P;
1006   if (L->contains(BI->getSuccessor(0)))
1007     P = ICmpInst::ICMP_NE;
1008   else
1009     P = ICmpInst::ICMP_EQ;
1010 
1011   IRBuilder<> Builder(BI);
1012 
1013   // The new loop exit condition should reuse the debug location of the
1014   // original loop exit condition.
1015   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1016     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1017 
1018   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1019   // avoid the expensive expansion of the limit expression in the wider type,
1020   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1021   // since we know (from the exit count bitwidth), that we can't self-wrap in
1022   // the narrower type.
1023   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1024   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1025   if (CmpIndVarSize > ExitCntSize) {
1026     assert(!CmpIndVar->getType()->isPointerTy() &&
1027            !ExitCnt->getType()->isPointerTy());
1028 
1029     // Before resorting to actually inserting the truncate, use the same
1030     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1031     // the other side of the comparison instead.  We still evaluate the limit
1032     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1033     // a truncate within in.
1034     bool Extended = false;
1035     const SCEV *IV = SE->getSCEV(CmpIndVar);
1036     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1037     const SCEV *ZExtTrunc =
1038       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1039 
1040     if (ZExtTrunc == IV) {
1041       Extended = true;
1042       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1043                                    "wide.trip.count");
1044     } else {
1045       const SCEV *SExtTrunc =
1046         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1047       if (SExtTrunc == IV) {
1048         Extended = true;
1049         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1050                                      "wide.trip.count");
1051       }
1052     }
1053 
1054     if (Extended) {
1055       bool Discard;
1056       L->makeLoopInvariant(ExitCnt, Discard);
1057     } else
1058       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1059                                       "lftr.wideiv");
1060   }
1061   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1062                     << "      LHS:" << *CmpIndVar << '\n'
1063                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1064                     << "\n"
1065                     << "      RHS:\t" << *ExitCnt << "\n"
1066                     << "ExitCount:\t" << *ExitCount << "\n"
1067                     << "  was: " << *BI->getCondition() << "\n");
1068 
1069   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1070   Value *OrigCond = BI->getCondition();
1071   // It's tempting to use replaceAllUsesWith here to fully replace the old
1072   // comparison, but that's not immediately safe, since users of the old
1073   // comparison may not be dominated by the new comparison. Instead, just
1074   // update the branch to use the new comparison; in the common case this
1075   // will make old comparison dead.
1076   BI->setCondition(Cond);
1077   DeadInsts.emplace_back(OrigCond);
1078 
1079   ++NumLFTR;
1080   return true;
1081 }
1082 
1083 //===----------------------------------------------------------------------===//
1084 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1085 //===----------------------------------------------------------------------===//
1086 
1087 /// If there's a single exit block, sink any loop-invariant values that
1088 /// were defined in the preheader but not used inside the loop into the
1089 /// exit block to reduce register pressure in the loop.
1090 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1091   BasicBlock *ExitBlock = L->getExitBlock();
1092   if (!ExitBlock) return false;
1093 
1094   BasicBlock *Preheader = L->getLoopPreheader();
1095   if (!Preheader) return false;
1096 
1097   bool MadeAnyChanges = false;
1098   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1099   BasicBlock::iterator I(Preheader->getTerminator());
1100   while (I != Preheader->begin()) {
1101     --I;
1102     // New instructions were inserted at the end of the preheader.
1103     if (isa<PHINode>(I))
1104       break;
1105 
1106     // Don't move instructions which might have side effects, since the side
1107     // effects need to complete before instructions inside the loop.  Also don't
1108     // move instructions which might read memory, since the loop may modify
1109     // memory. Note that it's okay if the instruction might have undefined
1110     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1111     // block.
1112     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1113       continue;
1114 
1115     // Skip debug info intrinsics.
1116     if (isa<DbgInfoIntrinsic>(I))
1117       continue;
1118 
1119     // Skip eh pad instructions.
1120     if (I->isEHPad())
1121       continue;
1122 
1123     // Don't sink alloca: we never want to sink static alloca's out of the
1124     // entry block, and correctly sinking dynamic alloca's requires
1125     // checks for stacksave/stackrestore intrinsics.
1126     // FIXME: Refactor this check somehow?
1127     if (isa<AllocaInst>(I))
1128       continue;
1129 
1130     // Determine if there is a use in or before the loop (direct or
1131     // otherwise).
1132     bool UsedInLoop = false;
1133     for (Use &U : I->uses()) {
1134       Instruction *User = cast<Instruction>(U.getUser());
1135       BasicBlock *UseBB = User->getParent();
1136       if (PHINode *P = dyn_cast<PHINode>(User)) {
1137         unsigned i =
1138           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1139         UseBB = P->getIncomingBlock(i);
1140       }
1141       if (UseBB == Preheader || L->contains(UseBB)) {
1142         UsedInLoop = true;
1143         break;
1144       }
1145     }
1146 
1147     // If there is, the def must remain in the preheader.
1148     if (UsedInLoop)
1149       continue;
1150 
1151     // Otherwise, sink it to the exit block.
1152     Instruction *ToMove = &*I;
1153     bool Done = false;
1154 
1155     if (I != Preheader->begin()) {
1156       // Skip debug info intrinsics.
1157       do {
1158         --I;
1159       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1160 
1161       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1162         Done = true;
1163     } else {
1164       Done = true;
1165     }
1166 
1167     MadeAnyChanges = true;
1168     ToMove->moveBefore(*ExitBlock, InsertPt);
1169     SE->forgetValue(ToMove);
1170     if (Done) break;
1171     InsertPt = ToMove->getIterator();
1172   }
1173 
1174   return MadeAnyChanges;
1175 }
1176 
1177 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1178                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1179   auto *OldCond = BI->getCondition();
1180   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1181                     << " with " << *NewCond << "\n");
1182   BI->setCondition(NewCond);
1183   if (OldCond->use_empty())
1184     DeadInsts.emplace_back(OldCond);
1185 }
1186 
1187 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1188                                       bool IsTaken) {
1189   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1190   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1191   auto *OldCond = BI->getCondition();
1192   return ConstantInt::get(OldCond->getType(),
1193                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1194 }
1195 
1196 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1197                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1198   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1199   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1200   replaceExitCond(BI, NewCond, DeadInsts);
1201 }
1202 
1203 static void replaceLoopPHINodesWithPreheaderValues(
1204     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1205     ScalarEvolution &SE) {
1206   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1207   auto *LoopPreheader = L->getLoopPreheader();
1208   auto *LoopHeader = L->getHeader();
1209   SmallVector<Instruction *> Worklist;
1210   for (auto &PN : LoopHeader->phis()) {
1211     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1212     for (User *U : PN.users())
1213       Worklist.push_back(cast<Instruction>(U));
1214     SE.forgetValue(&PN);
1215     PN.replaceAllUsesWith(PreheaderIncoming);
1216     DeadInsts.emplace_back(&PN);
1217   }
1218 
1219   // Replacing with the preheader value will often allow IV users to simplify
1220   // (especially if the preheader value is a constant).
1221   SmallPtrSet<Instruction *, 16> Visited;
1222   while (!Worklist.empty()) {
1223     auto *I = cast<Instruction>(Worklist.pop_back_val());
1224     if (!Visited.insert(I).second)
1225       continue;
1226 
1227     // Don't simplify instructions outside the loop.
1228     if (!L->contains(I))
1229       continue;
1230 
1231     Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1232     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1233       for (User *U : I->users())
1234         Worklist.push_back(cast<Instruction>(U));
1235       I->replaceAllUsesWith(Res);
1236       DeadInsts.emplace_back(I);
1237     }
1238   }
1239 }
1240 
1241 static Value *
1242 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1243                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1244                     SCEVExpander &Rewriter) {
1245   ICmpInst::Predicate InvariantPred = LIP.Pred;
1246   BasicBlock *Preheader = L->getLoopPreheader();
1247   assert(Preheader && "Preheader doesn't exist");
1248   Rewriter.setInsertPoint(Preheader->getTerminator());
1249   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1250   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1251   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1252   if (ExitIfTrue)
1253     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1254   IRBuilder<> Builder(Preheader->getTerminator());
1255   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1256   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1257                             BI->getCondition()->getName());
1258 }
1259 
1260 static std::optional<Value *>
1261 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1262                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1263                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1264   ICmpInst::Predicate Pred = ICmp->getPredicate();
1265   Value *LHS = ICmp->getOperand(0);
1266   Value *RHS = ICmp->getOperand(1);
1267 
1268   // 'LHS pred RHS' should now mean that we stay in loop.
1269   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1270   if (Inverted)
1271     Pred = CmpInst::getInversePredicate(Pred);
1272 
1273   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1274   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1275   // Can we prove it to be trivially true or false?
1276   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1277     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1278 
1279   auto *ARTy = LHSS->getType();
1280   auto *MaxIterTy = MaxIter->getType();
1281   // If possible, adjust types.
1282   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1283     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1284   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1285     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1286     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1287     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1288       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1289   }
1290 
1291   if (SkipLastIter) {
1292     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1293     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1294     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1295     // So we manually construct umin(a - 1, b - 1).
1296     SmallVector<const SCEV *, 4> Elements;
1297     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1298       for (auto *Op : UMin->operands())
1299         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1300       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1301     } else
1302       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1303   }
1304 
1305   // Check if there is a loop-invariant predicate equivalent to our check.
1306   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1307                                                                L, BI, MaxIter);
1308   if (!LIP)
1309     return std::nullopt;
1310 
1311   // Can we prove it to be trivially true?
1312   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1313     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1314   else
1315     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1316 }
1317 
1318 static bool optimizeLoopExitWithUnknownExitCount(
1319     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1320     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1321     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1322   assert(
1323       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1324       "Not a loop exit!");
1325 
1326   // For branch that stays in loop by TRUE condition, go through AND. For branch
1327   // that stays in loop by FALSE condition, go through OR. Both gives the
1328   // similar logic: "stay in loop iff all conditions are true(false)".
1329   bool Inverted = L->contains(BI->getSuccessor(1));
1330   SmallVector<ICmpInst *, 4> LeafConditions;
1331   SmallVector<Value *, 4> Worklist;
1332   SmallPtrSet<Value *, 4> Visited;
1333   Value *OldCond = BI->getCondition();
1334   Visited.insert(OldCond);
1335   Worklist.push_back(OldCond);
1336 
1337   auto GoThrough = [&](Value *V) {
1338     Value *LHS = nullptr, *RHS = nullptr;
1339     if (Inverted) {
1340       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1341         return false;
1342     } else {
1343       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1344         return false;
1345     }
1346     if (Visited.insert(LHS).second)
1347       Worklist.push_back(LHS);
1348     if (Visited.insert(RHS).second)
1349       Worklist.push_back(RHS);
1350     return true;
1351   };
1352 
1353   do {
1354     Value *Curr = Worklist.pop_back_val();
1355     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1356     // those with one use, to avoid instruction duplication.
1357     if (Curr->hasOneUse())
1358       if (!GoThrough(Curr))
1359         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1360           LeafConditions.push_back(ICmp);
1361   } while (!Worklist.empty());
1362 
1363   // If the current basic block has the same exit count as the whole loop, and
1364   // it consists of multiple icmp's, try to collect all icmp's that give exact
1365   // same exit count. For all other icmp's, we could use one less iteration,
1366   // because their value on the last iteration doesn't really matter.
1367   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1368   if (!SkipLastIter && LeafConditions.size() > 1 &&
1369       SE->getExitCount(L, ExitingBB,
1370                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1371           MaxIter)
1372     for (auto *ICmp : LeafConditions) {
1373       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1374                                              /*ControlsExit*/ false);
1375       auto *ExitMax = EL.SymbolicMaxNotTaken;
1376       if (isa<SCEVCouldNotCompute>(ExitMax))
1377         continue;
1378       // They could be of different types (specifically this happens after
1379       // IV widening).
1380       auto *WiderType =
1381           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1382       auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1383       auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1384       if (WideExitMax == WideMaxIter)
1385         ICmpsFailingOnLastIter.insert(ICmp);
1386     }
1387 
1388   bool Changed = false;
1389   for (auto *OldCond : LeafConditions) {
1390     // Skip last iteration for this icmp under one of two conditions:
1391     // - We do it for all conditions;
1392     // - There is another ICmp that would fail on last iter, so this one doesn't
1393     // really matter.
1394     bool OptimisticSkipLastIter = SkipLastIter;
1395     if (!OptimisticSkipLastIter) {
1396       if (ICmpsFailingOnLastIter.size() > 1)
1397         OptimisticSkipLastIter = true;
1398       else if (ICmpsFailingOnLastIter.size() == 1)
1399         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1400     }
1401     if (auto Replaced =
1402             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1403                               OptimisticSkipLastIter, SE, Rewriter)) {
1404       Changed = true;
1405       auto *NewCond = *Replaced;
1406       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1407         NCI->setName(OldCond->getName() + ".first_iter");
1408       }
1409       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1410                         << " with " << *NewCond << "\n");
1411       assert(OldCond->hasOneUse() && "Must be!");
1412       OldCond->replaceAllUsesWith(NewCond);
1413       DeadInsts.push_back(OldCond);
1414       // Make sure we no longer consider this condition as failing on last
1415       // iteration.
1416       ICmpsFailingOnLastIter.erase(OldCond);
1417     }
1418   }
1419   return Changed;
1420 }
1421 
1422 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1423   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1424   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1425   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1426   // it already has flags.  The alternative to this would be to extending the
1427   // set of "interesting" IV users to include the icmp, but doing that
1428   // regresses results in practice by querying SCEVs before trip counts which
1429   // rely on them which results in SCEV caching sub-optimal answers.  The
1430   // concern about caching sub-optimal results is why we only query SCEVs of
1431   // the loop invariant RHS here.
1432   SmallVector<BasicBlock*, 16> ExitingBlocks;
1433   L->getExitingBlocks(ExitingBlocks);
1434   bool Changed = false;
1435   for (auto *ExitingBB : ExitingBlocks) {
1436     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1437     if (!BI)
1438       continue;
1439     assert(BI->isConditional() && "exit branch must be conditional");
1440 
1441     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1442     if (!ICmp || !ICmp->hasOneUse())
1443       continue;
1444 
1445     auto *LHS = ICmp->getOperand(0);
1446     auto *RHS = ICmp->getOperand(1);
1447     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1448     // poisoning cache with sub-optimal results.  For the must-execute case,
1449     // this is a neccessary precondition for correctness.
1450     if (!L->isLoopInvariant(RHS)) {
1451       if (!L->isLoopInvariant(LHS))
1452         continue;
1453       // Same logic applies for the inverse case
1454       std::swap(LHS, RHS);
1455     }
1456 
1457     // Match (icmp signed-cond zext, RHS)
1458     Value *LHSOp = nullptr;
1459     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1460       continue;
1461 
1462     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1463     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1464     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1465     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1466     FullCR = FullCR.zeroExtend(OuterBitWidth);
1467     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1468     if (FullCR.contains(RHSCR)) {
1469       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1470       // replace the signed condition with the unsigned version.
1471       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1472       Changed = true;
1473       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1474       // have not changed exit counts, or the values produced by the compare.
1475       continue;
1476     }
1477   }
1478 
1479   // Now that we've canonicalized the condition to match the extend,
1480   // see if we can rotate the extend out of the loop.
1481   for (auto *ExitingBB : ExitingBlocks) {
1482     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1483     if (!BI)
1484       continue;
1485     assert(BI->isConditional() && "exit branch must be conditional");
1486 
1487     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1488     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1489       continue;
1490 
1491     bool Swapped = false;
1492     auto *LHS = ICmp->getOperand(0);
1493     auto *RHS = ICmp->getOperand(1);
1494     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1495       // Nothing to rotate
1496       continue;
1497     if (L->isLoopInvariant(LHS)) {
1498       // Same logic applies for the inverse case until we actually pick
1499       // which operand of the compare to update.
1500       Swapped = true;
1501       std::swap(LHS, RHS);
1502     }
1503     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1504 
1505     // Match (icmp unsigned-cond zext, RHS)
1506     // TODO: Extend to handle corresponding sext/signed-cmp case
1507     // TODO: Extend to other invertible functions
1508     Value *LHSOp = nullptr;
1509     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1510       continue;
1511 
1512     // In general, we only rotate if we can do so without increasing the number
1513     // of instructions.  The exception is when we have an zext(add-rec).  The
1514     // reason for allowing this exception is that we know we need to get rid
1515     // of the zext for SCEV to be able to compute a trip count for said loops;
1516     // we consider the new trip count valuable enough to increase instruction
1517     // count by one.
1518     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1519       continue;
1520 
1521     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1522     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1523     // when zext is loop varying and RHS is loop invariant.  This converts
1524     // loop varying work to loop-invariant work.
1525     auto doRotateTransform = [&]() {
1526       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1527       auto *NewRHS = CastInst::Create(
1528           Instruction::Trunc, RHS, LHSOp->getType(), "",
1529           L->getLoopPreheader()->getTerminator()->getIterator());
1530       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1531       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1532       if (LHS->use_empty())
1533         DeadInsts.push_back(LHS);
1534     };
1535 
1536 
1537     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1538     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1539     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1540     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1541     FullCR = FullCR.zeroExtend(OuterBitWidth);
1542     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1543     if (FullCR.contains(RHSCR)) {
1544       doRotateTransform();
1545       Changed = true;
1546       // Note, we are leaving SCEV in an unfortunately imprecise case here
1547       // as rotation tends to reveal information about trip counts not
1548       // previously visible.
1549       continue;
1550     }
1551   }
1552 
1553   return Changed;
1554 }
1555 
1556 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1557   SmallVector<BasicBlock*, 16> ExitingBlocks;
1558   L->getExitingBlocks(ExitingBlocks);
1559 
1560   // Remove all exits which aren't both rewriteable and execute on every
1561   // iteration.
1562   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1563     // If our exitting block exits multiple loops, we can only rewrite the
1564     // innermost one.  Otherwise, we're changing how many times the innermost
1565     // loop runs before it exits.
1566     if (LI->getLoopFor(ExitingBB) != L)
1567       return true;
1568 
1569     // Can't rewrite non-branch yet.
1570     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1571     if (!BI)
1572       return true;
1573 
1574     // Likewise, the loop latch must be dominated by the exiting BB.
1575     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1576       return true;
1577 
1578     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1579       // If already constant, nothing to do. However, if this is an
1580       // unconditional exit, we can still replace header phis with their
1581       // preheader value.
1582       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1583         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1584       return true;
1585     }
1586 
1587     return false;
1588   });
1589 
1590   if (ExitingBlocks.empty())
1591     return false;
1592 
1593   // Get a symbolic upper bound on the loop backedge taken count.
1594   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1595   if (isa<SCEVCouldNotCompute>(MaxBECount))
1596     return false;
1597 
1598   // Visit our exit blocks in order of dominance. We know from the fact that
1599   // all exits must dominate the latch, so there is a total dominance order
1600   // between them.
1601   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1602                // std::sort sorts in ascending order, so we want the inverse of
1603                // the normal dominance relation.
1604                if (A == B) return false;
1605                if (DT->properlyDominates(A, B))
1606                  return true;
1607                else {
1608                  assert(DT->properlyDominates(B, A) &&
1609                         "expected total dominance order!");
1610                  return false;
1611                }
1612   });
1613 #ifdef ASSERT
1614   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1615     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1616   }
1617 #endif
1618 
1619   bool Changed = false;
1620   bool SkipLastIter = false;
1621   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1622   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1623     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1624       return;
1625     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1626       CurrMaxExit = MaxExitCount;
1627     else
1628       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1629     // If the loop has more than 1 iteration, all further checks will be
1630     // executed 1 iteration less.
1631     if (CurrMaxExit == MaxBECount)
1632       SkipLastIter = true;
1633   };
1634   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1635   for (BasicBlock *ExitingBB : ExitingBlocks) {
1636     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1637     const SCEV *MaxExitCount = SE->getExitCount(
1638         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1639     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1640       // Okay, we do not know the exit count here. Can we at least prove that it
1641       // will remain the same within iteration space?
1642       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1643       auto OptimizeCond = [&](bool SkipLastIter) {
1644         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1645                                                     MaxBECount, SkipLastIter,
1646                                                     SE, Rewriter, DeadInsts);
1647       };
1648 
1649       // TODO: We might have proved that we can skip the last iteration for
1650       // this check. In this case, we only want to check the condition on the
1651       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1652       // corner case:
1653       //
1654       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1655       //
1656       // If we could not prove that len != 0, then we also could not prove that
1657       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1658       // OptimizeCond will likely not prove anything for it, even if it could
1659       // prove the same fact for len.
1660       //
1661       // As a temporary solution, we query both last and pre-last iterations in
1662       // hope that we will be able to prove triviality for at least one of
1663       // them. We can stop querying MaxBECount for this case once SCEV
1664       // understands that (MaxBECount - 1) will not overflow here.
1665       if (OptimizeCond(false))
1666         Changed = true;
1667       else if (SkipLastIter && OptimizeCond(true))
1668         Changed = true;
1669       UpdateSkipLastIter(MaxExitCount);
1670       continue;
1671     }
1672 
1673     UpdateSkipLastIter(ExactExitCount);
1674 
1675     // If we know we'd exit on the first iteration, rewrite the exit to
1676     // reflect this.  This does not imply the loop must exit through this
1677     // exit; there may be an earlier one taken on the first iteration.
1678     // We know that the backedge can't be taken, so we replace all
1679     // the header PHIs with values coming from the preheader.
1680     if (ExactExitCount->isZero()) {
1681       foldExit(L, ExitingBB, true, DeadInsts);
1682       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1683       Changed = true;
1684       continue;
1685     }
1686 
1687     assert(ExactExitCount->getType()->isIntegerTy() &&
1688            MaxBECount->getType()->isIntegerTy() &&
1689            "Exit counts must be integers");
1690 
1691     Type *WiderType =
1692         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1693     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1694     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1695     assert(MaxBECount->getType() == ExactExitCount->getType());
1696 
1697     // Can we prove that some other exit must be taken strictly before this
1698     // one?
1699     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1700                                      ExactExitCount)) {
1701       foldExit(L, ExitingBB, false, DeadInsts);
1702       Changed = true;
1703       continue;
1704     }
1705 
1706     // As we run, keep track of which exit counts we've encountered.  If we
1707     // find a duplicate, we've found an exit which would have exited on the
1708     // exiting iteration, but (from the visit order) strictly follows another
1709     // which does the same and is thus dead.
1710     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1711       foldExit(L, ExitingBB, false, DeadInsts);
1712       Changed = true;
1713       continue;
1714     }
1715 
1716     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1717     // here.  If we kept track of the min of dominanting exits so far, we could
1718     // discharge exits with EC >= MDEC. This is less powerful than the existing
1719     // transform (since later exits aren't considered), but potentially more
1720     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1721     // or a >u b.  Such a case is not currently known.
1722   }
1723   return Changed;
1724 }
1725 
1726 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1727   SmallVector<BasicBlock*, 16> ExitingBlocks;
1728   L->getExitingBlocks(ExitingBlocks);
1729 
1730   // Finally, see if we can rewrite our exit conditions into a loop invariant
1731   // form. If we have a read-only loop, and we can tell that we must exit down
1732   // a path which does not need any of the values computed within the loop, we
1733   // can rewrite the loop to exit on the first iteration.  Note that this
1734   // doesn't either a) tell us the loop exits on the first iteration (unless
1735   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1736   // This transformation looks a lot like a restricted form of dead loop
1737   // elimination, but restricted to read-only loops and without neccesssarily
1738   // needing to kill the loop entirely.
1739   if (!LoopPredication)
1740     return false;
1741 
1742   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1743   // through *explicit* control flow.  We have to eliminate the possibility of
1744   // implicit exits (see below) before we know it's truly exact.
1745   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1746   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1747     return false;
1748 
1749   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1750   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1751 
1752   auto BadExit = [&](BasicBlock *ExitingBB) {
1753     // If our exiting block exits multiple loops, we can only rewrite the
1754     // innermost one.  Otherwise, we're changing how many times the innermost
1755     // loop runs before it exits.
1756     if (LI->getLoopFor(ExitingBB) != L)
1757       return true;
1758 
1759     // Can't rewrite non-branch yet.
1760     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1761     if (!BI)
1762       return true;
1763 
1764     // If already constant, nothing to do.
1765     if (isa<Constant>(BI->getCondition()))
1766       return true;
1767 
1768     // If the exit block has phis, we need to be able to compute the values
1769     // within the loop which contains them.  This assumes trivially lcssa phis
1770     // have already been removed; TODO: generalize
1771     BasicBlock *ExitBlock =
1772     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1773     if (!ExitBlock->phis().empty())
1774       return true;
1775 
1776     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1777     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1778         !Rewriter.isSafeToExpand(ExitCount))
1779       return true;
1780 
1781     assert(SE->isLoopInvariant(ExitCount, L) &&
1782            "Exit count must be loop invariant");
1783     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1784     return false;
1785   };
1786 
1787   // If we have any exits which can't be predicated themselves, than we can't
1788   // predicate any exit which isn't guaranteed to execute before it.  Consider
1789   // two exits (a) and (b) which would both exit on the same iteration.  If we
1790   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1791   // we could convert a loop from exiting through (a) to one exiting through
1792   // (b).  Note that this problem exists only for exits with the same exit
1793   // count, and we could be more aggressive when exit counts are known inequal.
1794   llvm::sort(ExitingBlocks,
1795             [&](BasicBlock *A, BasicBlock *B) {
1796               // std::sort sorts in ascending order, so we want the inverse of
1797               // the normal dominance relation, plus a tie breaker for blocks
1798               // unordered by dominance.
1799               if (DT->properlyDominates(A, B)) return true;
1800               if (DT->properlyDominates(B, A)) return false;
1801               return A->getName() < B->getName();
1802             });
1803   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1804   // exits before the backedge).  If they aren't, reasoning about reachability
1805   // is complicated and we choose not to for now.
1806   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1807     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1808       return false;
1809 
1810   // Given our sorted total order, we know that exit[j] must be evaluated
1811   // after all exit[i] such j > i.
1812   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1813     if (BadExit(ExitingBlocks[i])) {
1814       ExitingBlocks.resize(i);
1815       break;
1816     }
1817 
1818   if (ExitingBlocks.empty())
1819     return false;
1820 
1821   // We rely on not being able to reach an exiting block on a later iteration
1822   // then it's statically compute exit count.  The implementaton of
1823   // getExitCount currently has this invariant, but assert it here so that
1824   // breakage is obvious if this ever changes..
1825   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1826         return DT->dominates(ExitingBB, L->getLoopLatch());
1827       }));
1828 
1829   // At this point, ExitingBlocks consists of only those blocks which are
1830   // predicatable.  Given that, we know we have at least one exit we can
1831   // predicate if the loop is doesn't have side effects and doesn't have any
1832   // implicit exits (because then our exact BTC isn't actually exact).
1833   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1834   // suggestions on how to improve this?  I can obviously bail out for outer
1835   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1836   // is that enough for *all* side effects?
1837   for (BasicBlock *BB : L->blocks())
1838     for (auto &I : *BB)
1839       // TODO:isGuaranteedToTransfer
1840       if (I.mayHaveSideEffects())
1841         return false;
1842 
1843   bool Changed = false;
1844   // Finally, do the actual predication for all predicatable blocks.  A couple
1845   // of notes here:
1846   // 1) We don't bother to constant fold dominated exits with identical exit
1847   //    counts; that's simply a form of CSE/equality propagation and we leave
1848   //    it for dedicated passes.
1849   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1850   //    legality constraints and we leave that to dedicated logic.  We want to
1851   //    predicate even if we can't insert a loop invariant expression as
1852   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1853   //    varying check.
1854   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1855   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1856   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1857   for (BasicBlock *ExitingBB : ExitingBlocks) {
1858     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1859 
1860     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1861     Value *NewCond;
1862     if (ExitCount == ExactBTC) {
1863       NewCond = L->contains(BI->getSuccessor(0)) ?
1864         B.getFalse() : B.getTrue();
1865     } else {
1866       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1867       if (!ExactBTCV)
1868         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1869       Value *RHS = ExactBTCV;
1870       if (ECV->getType() != RHS->getType()) {
1871         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1872         ECV = B.CreateZExt(ECV, WiderTy);
1873         RHS = B.CreateZExt(RHS, WiderTy);
1874       }
1875       auto Pred = L->contains(BI->getSuccessor(0)) ?
1876         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1877       NewCond = B.CreateICmp(Pred, ECV, RHS);
1878     }
1879     Value *OldCond = BI->getCondition();
1880     BI->setCondition(NewCond);
1881     if (OldCond->use_empty())
1882       DeadInsts.emplace_back(OldCond);
1883     Changed = true;
1884     RunUnswitching = true;
1885   }
1886 
1887   return Changed;
1888 }
1889 
1890 //===----------------------------------------------------------------------===//
1891 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1892 //===----------------------------------------------------------------------===//
1893 
1894 bool IndVarSimplify::run(Loop *L) {
1895   // We need (and expect!) the incoming loop to be in LCSSA.
1896   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1897          "LCSSA required to run indvars!");
1898 
1899   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1900   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1901   //    canonicalization can be a pessimization without LSR to "clean up"
1902   //    afterwards.
1903   //  - We depend on having a preheader; in particular,
1904   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1905   //    and we're in trouble if we can't find the induction variable even when
1906   //    we've manually inserted one.
1907   //  - LFTR relies on having a single backedge.
1908   if (!L->isLoopSimplifyForm())
1909     return false;
1910 
1911   bool Changed = false;
1912   // If there are any floating-point recurrences, attempt to
1913   // transform them to use integer recurrences.
1914   Changed |= rewriteNonIntegerIVs(L);
1915 
1916   // Create a rewriter object which we'll use to transform the code with.
1917   SCEVExpander Rewriter(*SE, DL, "indvars");
1918 #ifndef NDEBUG
1919   Rewriter.setDebugType(DEBUG_TYPE);
1920 #endif
1921 
1922   // Eliminate redundant IV users.
1923   //
1924   // Simplification works best when run before other consumers of SCEV. We
1925   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1926   // other expressions involving loop IVs have been evaluated. This helps SCEV
1927   // set no-wrap flags before normalizing sign/zero extension.
1928   Rewriter.disableCanonicalMode();
1929   Changed |= simplifyAndExtend(L, Rewriter, LI);
1930 
1931   // Check to see if we can compute the final value of any expressions
1932   // that are recurrent in the loop, and substitute the exit values from the
1933   // loop into any instructions outside of the loop that use the final values
1934   // of the current expressions.
1935   if (ReplaceExitValue != NeverRepl) {
1936     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1937                                              ReplaceExitValue, DeadInsts)) {
1938       NumReplaced += Rewrites;
1939       Changed = true;
1940     }
1941   }
1942 
1943   // Eliminate redundant IV cycles.
1944   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1945 
1946   // Try to convert exit conditions to unsigned and rotate computation
1947   // out of the loop.  Note: Handles invalidation internally if needed.
1948   Changed |= canonicalizeExitCondition(L);
1949 
1950   // Try to eliminate loop exits based on analyzeable exit counts
1951   if (optimizeLoopExits(L, Rewriter))  {
1952     Changed = true;
1953     // Given we've changed exit counts, notify SCEV
1954     // Some nested loops may share same folded exit basic block,
1955     // thus we need to notify top most loop.
1956     SE->forgetTopmostLoop(L);
1957   }
1958 
1959   // Try to form loop invariant tests for loop exits by changing how many
1960   // iterations of the loop run when that is unobservable.
1961   if (predicateLoopExits(L, Rewriter)) {
1962     Changed = true;
1963     // Given we've changed exit counts, notify SCEV
1964     SE->forgetLoop(L);
1965   }
1966 
1967   // If we have a trip count expression, rewrite the loop's exit condition
1968   // using it.
1969   if (!DisableLFTR) {
1970     BasicBlock *PreHeader = L->getLoopPreheader();
1971 
1972     SmallVector<BasicBlock*, 16> ExitingBlocks;
1973     L->getExitingBlocks(ExitingBlocks);
1974     for (BasicBlock *ExitingBB : ExitingBlocks) {
1975       // Can't rewrite non-branch yet.
1976       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1977         continue;
1978 
1979       // If our exitting block exits multiple loops, we can only rewrite the
1980       // innermost one.  Otherwise, we're changing how many times the innermost
1981       // loop runs before it exits.
1982       if (LI->getLoopFor(ExitingBB) != L)
1983         continue;
1984 
1985       if (!needsLFTR(L, ExitingBB))
1986         continue;
1987 
1988       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1989       if (isa<SCEVCouldNotCompute>(ExitCount))
1990         continue;
1991 
1992       // This was handled above, but as we form SCEVs, we can sometimes refine
1993       // existing ones; this allows exit counts to be folded to zero which
1994       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1995       // until stable to handle cases like this better.
1996       if (ExitCount->isZero())
1997         continue;
1998 
1999       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2000       if (!IndVar)
2001         continue;
2002 
2003       // Avoid high cost expansions.  Note: This heuristic is questionable in
2004       // that our definition of "high cost" is not exactly principled.
2005       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2006                                        TTI, PreHeader->getTerminator()))
2007         continue;
2008 
2009       if (!Rewriter.isSafeToExpand(ExitCount))
2010         continue;
2011 
2012       Changed |= linearFunctionTestReplace(L, ExitingBB,
2013                                            ExitCount, IndVar,
2014                                            Rewriter);
2015     }
2016   }
2017   // Clear the rewriter cache, because values that are in the rewriter's cache
2018   // can be deleted in the loop below, causing the AssertingVH in the cache to
2019   // trigger.
2020   Rewriter.clear();
2021 
2022   // Now that we're done iterating through lists, clean up any instructions
2023   // which are now dead.
2024   while (!DeadInsts.empty()) {
2025     Value *V = DeadInsts.pop_back_val();
2026 
2027     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2028       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2029     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2030       Changed |=
2031           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2032   }
2033 
2034   // The Rewriter may not be used from this point on.
2035 
2036   // Loop-invariant instructions in the preheader that aren't used in the
2037   // loop may be sunk below the loop to reduce register pressure.
2038   Changed |= sinkUnusedInvariants(L);
2039 
2040   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2041   // trip count and therefore can further simplify exit values in addition to
2042   // rewriteLoopExitValues.
2043   Changed |= rewriteFirstIterationLoopExitValues(L);
2044 
2045   // Clean up dead instructions.
2046   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2047 
2048   // Check a post-condition.
2049   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2050          "Indvars did not preserve LCSSA!");
2051   if (VerifyMemorySSA && MSSAU)
2052     MSSAU->getMemorySSA()->verifyMemorySSA();
2053 
2054   return Changed;
2055 }
2056 
2057 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2058                                           LoopStandardAnalysisResults &AR,
2059                                           LPMUpdater &) {
2060   Function *F = L.getHeader()->getParent();
2061   const DataLayout &DL = F->getParent()->getDataLayout();
2062 
2063   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2064                      WidenIndVars && AllowIVWidening);
2065   if (!IVS.run(&L))
2066     return PreservedAnalyses::all();
2067 
2068   auto PA = getLoopPassPreservedAnalyses();
2069   PA.preserveSet<CFGAnalyses>();
2070   if (IVS.runUnswitching()) {
2071     AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
2072     PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2073   }
2074 
2075   if (AR.MSSA)
2076     PA.preserve<MemorySSAAnalysis>();
2077   return PA;
2078 }
2079