xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 40a647fc7dc6048c92e2d580b61f5feca0785980)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/PassManager.h"
59 #include "llvm/IR/PatternMatch.h"
60 #include "llvm/IR/Type.h"
61 #include "llvm/IR/Use.h"
62 #include "llvm/IR/User.h"
63 #include "llvm/IR/Value.h"
64 #include "llvm/IR/ValueHandle.h"
65 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/CommandLine.h"
67 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/MathExtras.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
71 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
72 #include "llvm/Transforms/Utils/Local.h"
73 #include "llvm/Transforms/Utils/LoopUtils.h"
74 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
75 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
76 #include <cassert>
77 #include <cstdint>
78 #include <utility>
79 
80 using namespace llvm;
81 using namespace PatternMatch;
82 
83 #define DEBUG_TYPE "indvars"
84 
85 STATISTIC(NumWidened     , "Number of indvars widened");
86 STATISTIC(NumReplaced    , "Number of exit values replaced");
87 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
88 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
89 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
90 
91 static cl::opt<ReplaceExitVal> ReplaceExitValue(
92     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
93     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
94     cl::values(
95         clEnumValN(NeverRepl, "never", "never replace exit value"),
96         clEnumValN(OnlyCheapRepl, "cheap",
97                    "only replace exit value when the cost is cheap"),
98         clEnumValN(
99             UnusedIndVarInLoop, "unusedindvarinloop",
100             "only replace exit value when it is an unused "
101             "induction variable in the loop and has cheap replacement cost"),
102         clEnumValN(NoHardUse, "noharduse",
103                    "only replace exit values when loop def likely dead"),
104         clEnumValN(AlwaysRepl, "always",
105                    "always replace exit value whenever possible")));
106 
107 static cl::opt<bool> UsePostIncrementRanges(
108   "indvars-post-increment-ranges", cl::Hidden,
109   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
110   cl::init(true));
111 
112 static cl::opt<bool>
113 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
114             cl::desc("Disable Linear Function Test Replace optimization"));
115 
116 static cl::opt<bool>
117 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
118                 cl::desc("Predicate conditions in read only loops"));
119 
120 static cl::opt<bool>
121 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
122                 cl::desc("Allow widening of indvars to eliminate s/zext"));
123 
124 namespace {
125 
126 class IndVarSimplify {
127   LoopInfo *LI;
128   ScalarEvolution *SE;
129   DominatorTree *DT;
130   const DataLayout &DL;
131   TargetLibraryInfo *TLI;
132   const TargetTransformInfo *TTI;
133   std::unique_ptr<MemorySSAUpdater> MSSAU;
134 
135   SmallVector<WeakTrackingVH, 16> DeadInsts;
136   bool WidenIndVars;
137 
138   bool RunUnswitching = false;
139 
140   bool handleFloatingPointIV(Loop *L, PHINode *PH);
141   bool rewriteNonIntegerIVs(Loop *L);
142 
143   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144   /// Try to improve our exit conditions by converting condition from signed
145   /// to unsigned or rotating computation out of the loop.
146   /// (See inline comment about why this is duplicated from simplifyAndExtend)
147   bool canonicalizeExitCondition(Loop *L);
148   /// Try to eliminate loop exits based on analyzeable exit counts
149   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
150   /// Try to form loop invariant tests for loop exits by changing how many
151   /// iterations of the loop run when that is unobservable.
152   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
153 
154   bool rewriteFirstIterationLoopExitValues(Loop *L);
155 
156   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
157                                  const SCEV *ExitCount,
158                                  PHINode *IndVar, SCEVExpander &Rewriter);
159 
160   bool sinkUnusedInvariants(Loop *L);
161 
162 public:
163   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
164                  const DataLayout &DL, TargetLibraryInfo *TLI,
165                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
166       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
167         WidenIndVars(WidenIndVars) {
168     if (MSSA)
169       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
170   }
171 
172   bool run(Loop *L);
173 
174   bool runUnswitching() const { return RunUnswitching; }
175 };
176 
177 } // end anonymous namespace
178 
179 //===----------------------------------------------------------------------===//
180 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
181 //===----------------------------------------------------------------------===//
182 
183 /// Convert APF to an integer, if possible.
184 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
185   bool isExact = false;
186   // See if we can convert this to an int64_t
187   uint64_t UIntVal;
188   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
189                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
190       !isExact)
191     return false;
192   IntVal = UIntVal;
193   return true;
194 }
195 
196 /// If the loop has floating induction variable then insert corresponding
197 /// integer induction variable if possible.
198 /// For example,
199 /// for(double i = 0; i < 10000; ++i)
200 ///   bar(i)
201 /// is converted into
202 /// for(int i = 0; i < 10000; ++i)
203 ///   bar((double)i);
204 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
205   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
206   unsigned BackEdge     = IncomingEdge^1;
207 
208   // Check incoming value.
209   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
210 
211   int64_t InitValue;
212   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
213     return false;
214 
215   // Check IV increment. Reject this PN if increment operation is not
216   // an add or increment value can not be represented by an integer.
217   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
218   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
219 
220   // If this is not an add of the PHI with a constantfp, or if the constant fp
221   // is not an integer, bail out.
222   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
223   int64_t IncValue;
224   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
225       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
226     return false;
227 
228   // Check Incr uses. One user is PN and the other user is an exit condition
229   // used by the conditional terminator.
230   Value::user_iterator IncrUse = Incr->user_begin();
231   Instruction *U1 = cast<Instruction>(*IncrUse++);
232   if (IncrUse == Incr->user_end()) return false;
233   Instruction *U2 = cast<Instruction>(*IncrUse++);
234   if (IncrUse != Incr->user_end()) return false;
235 
236   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
237   // only used by a branch, we can't transform it.
238   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
239   if (!Compare)
240     Compare = dyn_cast<FCmpInst>(U2);
241   if (!Compare || !Compare->hasOneUse() ||
242       !isa<BranchInst>(Compare->user_back()))
243     return false;
244 
245   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
246 
247   // We need to verify that the branch actually controls the iteration count
248   // of the loop.  If not, the new IV can overflow and no one will notice.
249   // The branch block must be in the loop and one of the successors must be out
250   // of the loop.
251   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
252   if (!L->contains(TheBr->getParent()) ||
253       (L->contains(TheBr->getSuccessor(0)) &&
254        L->contains(TheBr->getSuccessor(1))))
255     return false;
256 
257   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
258   // transform it.
259   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
260   int64_t ExitValue;
261   if (ExitValueVal == nullptr ||
262       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
263     return false;
264 
265   // Find new predicate for integer comparison.
266   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
267   switch (Compare->getPredicate()) {
268   default: return false;  // Unknown comparison.
269   case CmpInst::FCMP_OEQ:
270   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
271   case CmpInst::FCMP_ONE:
272   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
273   case CmpInst::FCMP_OGT:
274   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
275   case CmpInst::FCMP_OGE:
276   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
277   case CmpInst::FCMP_OLT:
278   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
279   case CmpInst::FCMP_OLE:
280   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
281   }
282 
283   // We convert the floating point induction variable to a signed i32 value if
284   // we can.  This is only safe if the comparison will not overflow in a way
285   // that won't be trapped by the integer equivalent operations.  Check for this
286   // now.
287   // TODO: We could use i64 if it is native and the range requires it.
288 
289   // The start/stride/exit values must all fit in signed i32.
290   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
291     return false;
292 
293   // If not actually striding (add x, 0.0), avoid touching the code.
294   if (IncValue == 0)
295     return false;
296 
297   // Positive and negative strides have different safety conditions.
298   if (IncValue > 0) {
299     // If we have a positive stride, we require the init to be less than the
300     // exit value.
301     if (InitValue >= ExitValue)
302       return false;
303 
304     uint32_t Range = uint32_t(ExitValue-InitValue);
305     // Check for infinite loop, either:
306     // while (i <= Exit) or until (i > Exit)
307     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
308       if (++Range == 0) return false;  // Range overflows.
309     }
310 
311     unsigned Leftover = Range % uint32_t(IncValue);
312 
313     // If this is an equality comparison, we require that the strided value
314     // exactly land on the exit value, otherwise the IV condition will wrap
315     // around and do things the fp IV wouldn't.
316     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
317         Leftover != 0)
318       return false;
319 
320     // If the stride would wrap around the i32 before exiting, we can't
321     // transform the IV.
322     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
323       return false;
324   } else {
325     // If we have a negative stride, we require the init to be greater than the
326     // exit value.
327     if (InitValue <= ExitValue)
328       return false;
329 
330     uint32_t Range = uint32_t(InitValue-ExitValue);
331     // Check for infinite loop, either:
332     // while (i >= Exit) or until (i < Exit)
333     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
334       if (++Range == 0) return false;  // Range overflows.
335     }
336 
337     unsigned Leftover = Range % uint32_t(-IncValue);
338 
339     // If this is an equality comparison, we require that the strided value
340     // exactly land on the exit value, otherwise the IV condition will wrap
341     // around and do things the fp IV wouldn't.
342     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
343         Leftover != 0)
344       return false;
345 
346     // If the stride would wrap around the i32 before exiting, we can't
347     // transform the IV.
348     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
349       return false;
350   }
351 
352   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
353 
354   // Insert new integer induction variable.
355   PHINode *NewPHI =
356       PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
357   NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
358                       PN->getIncomingBlock(IncomingEdge));
359   NewPHI->setDebugLoc(PN->getDebugLoc());
360 
361   Instruction *NewAdd = BinaryOperator::CreateAdd(
362       NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
363       Incr->getName() + ".int", Incr->getIterator());
364   NewAdd->setDebugLoc(Incr->getDebugLoc());
365   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
366 
367   ICmpInst *NewCompare = new ICmpInst(
368       TheBr->getIterator(), NewPred, NewAdd,
369       ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
370   NewCompare->setDebugLoc(Compare->getDebugLoc());
371 
372   // In the following deletions, PN may become dead and may be deleted.
373   // Use a WeakTrackingVH to observe whether this happens.
374   WeakTrackingVH WeakPH = PN;
375 
376   // Delete the old floating point exit comparison.  The branch starts using the
377   // new comparison.
378   NewCompare->takeName(Compare);
379   Compare->replaceAllUsesWith(NewCompare);
380   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
381 
382   // Delete the old floating point increment.
383   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
384   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
385 
386   // If the FP induction variable still has uses, this is because something else
387   // in the loop uses its value.  In order to canonicalize the induction
388   // variable, we chose to eliminate the IV and rewrite it in terms of an
389   // int->fp cast.
390   //
391   // We give preference to sitofp over uitofp because it is faster on most
392   // platforms.
393   if (WeakPH) {
394     Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
395                                        PN->getParent()->getFirstInsertionPt());
396     Conv->setDebugLoc(PN->getDebugLoc());
397     PN->replaceAllUsesWith(Conv);
398     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
399   }
400   return true;
401 }
402 
403 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
404   // First step.  Check to see if there are any floating-point recurrences.
405   // If there are, change them into integer recurrences, permitting analysis by
406   // the SCEV routines.
407   BasicBlock *Header = L->getHeader();
408 
409   SmallVector<WeakTrackingVH, 8> PHIs;
410   for (PHINode &PN : Header->phis())
411     PHIs.push_back(&PN);
412 
413   bool Changed = false;
414   for (WeakTrackingVH &PHI : PHIs)
415     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
416       Changed |= handleFloatingPointIV(L, PN);
417 
418   // If the loop previously had floating-point IV, ScalarEvolution
419   // may not have been able to compute a trip count. Now that we've done some
420   // re-writing, the trip count may be computable.
421   if (Changed)
422     SE->forgetLoop(L);
423   return Changed;
424 }
425 
426 //===---------------------------------------------------------------------===//
427 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
428 // they will exit at the first iteration.
429 //===---------------------------------------------------------------------===//
430 
431 /// Check to see if this loop has loop invariant conditions which lead to loop
432 /// exits. If so, we know that if the exit path is taken, it is at the first
433 /// loop iteration. This lets us predict exit values of PHI nodes that live in
434 /// loop header.
435 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
436   // Verify the input to the pass is already in LCSSA form.
437   assert(L->isLCSSAForm(*DT));
438 
439   SmallVector<BasicBlock *, 8> ExitBlocks;
440   L->getUniqueExitBlocks(ExitBlocks);
441 
442   bool MadeAnyChanges = false;
443   for (auto *ExitBB : ExitBlocks) {
444     // If there are no more PHI nodes in this exit block, then no more
445     // values defined inside the loop are used on this path.
446     for (PHINode &PN : ExitBB->phis()) {
447       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
448            IncomingValIdx != E; ++IncomingValIdx) {
449         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
450 
451         // Can we prove that the exit must run on the first iteration if it
452         // runs at all?  (i.e. early exits are fine for our purposes, but
453         // traces which lead to this exit being taken on the 2nd iteration
454         // aren't.)  Note that this is about whether the exit branch is
455         // executed, not about whether it is taken.
456         if (!L->getLoopLatch() ||
457             !DT->dominates(IncomingBB, L->getLoopLatch()))
458           continue;
459 
460         // Get condition that leads to the exit path.
461         auto *TermInst = IncomingBB->getTerminator();
462 
463         Value *Cond = nullptr;
464         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
465           // Must be a conditional branch, otherwise the block
466           // should not be in the loop.
467           Cond = BI->getCondition();
468         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
469           Cond = SI->getCondition();
470         else
471           continue;
472 
473         if (!L->isLoopInvariant(Cond))
474           continue;
475 
476         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
477 
478         // Only deal with PHIs in the loop header.
479         if (!ExitVal || ExitVal->getParent() != L->getHeader())
480           continue;
481 
482         // If ExitVal is a PHI on the loop header, then we know its
483         // value along this exit because the exit can only be taken
484         // on the first iteration.
485         auto *LoopPreheader = L->getLoopPreheader();
486         assert(LoopPreheader && "Invalid loop");
487         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
488         if (PreheaderIdx != -1) {
489           assert(ExitVal->getParent() == L->getHeader() &&
490                  "ExitVal must be in loop header");
491           MadeAnyChanges = true;
492           PN.setIncomingValue(IncomingValIdx,
493                               ExitVal->getIncomingValue(PreheaderIdx));
494           SE->forgetValue(&PN);
495         }
496       }
497     }
498   }
499   return MadeAnyChanges;
500 }
501 
502 //===----------------------------------------------------------------------===//
503 //  IV Widening - Extend the width of an IV to cover its widest uses.
504 //===----------------------------------------------------------------------===//
505 
506 /// Update information about the induction variable that is extended by this
507 /// sign or zero extend operation. This is used to determine the final width of
508 /// the IV before actually widening it.
509 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
510                         ScalarEvolution *SE,
511                         const TargetTransformInfo *TTI) {
512   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
513   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
514     return;
515 
516   Type *Ty = Cast->getType();
517   uint64_t Width = SE->getTypeSizeInBits(Ty);
518   if (!Cast->getDataLayout().isLegalInteger(Width))
519     return;
520 
521   // Check that `Cast` actually extends the induction variable (we rely on this
522   // later).  This takes care of cases where `Cast` is extending a truncation of
523   // the narrow induction variable, and thus can end up being narrower than the
524   // "narrow" induction variable.
525   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
526   if (NarrowIVWidth >= Width)
527     return;
528 
529   // Cast is either an sext or zext up to this point.
530   // We should not widen an indvar if arithmetics on the wider indvar are more
531   // expensive than those on the narrower indvar. We check only the cost of ADD
532   // because at least an ADD is required to increment the induction variable. We
533   // could compute more comprehensively the cost of all instructions on the
534   // induction variable when necessary.
535   if (TTI &&
536       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
537           TTI->getArithmeticInstrCost(Instruction::Add,
538                                       Cast->getOperand(0)->getType())) {
539     return;
540   }
541 
542   if (!WI.WidestNativeType ||
543       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
544     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
545     WI.IsSigned = IsSigned;
546     return;
547   }
548 
549   // We extend the IV to satisfy the sign of its user(s), or 'signed'
550   // if there are multiple users with both sign- and zero extensions,
551   // in order not to introduce nondeterministic behaviour based on the
552   // unspecified order of a PHI nodes' users-iterator.
553   WI.IsSigned |= IsSigned;
554 }
555 
556 //===----------------------------------------------------------------------===//
557 //  Live IV Reduction - Minimize IVs live across the loop.
558 //===----------------------------------------------------------------------===//
559 
560 //===----------------------------------------------------------------------===//
561 //  Simplification of IV users based on SCEV evaluation.
562 //===----------------------------------------------------------------------===//
563 
564 namespace {
565 
566 class IndVarSimplifyVisitor : public IVVisitor {
567   ScalarEvolution *SE;
568   const TargetTransformInfo *TTI;
569   PHINode *IVPhi;
570 
571 public:
572   WideIVInfo WI;
573 
574   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
575                         const TargetTransformInfo *TTI,
576                         const DominatorTree *DTree)
577     : SE(SCEV), TTI(TTI), IVPhi(IV) {
578     DT = DTree;
579     WI.NarrowIV = IVPhi;
580   }
581 
582   // Implement the interface used by simplifyUsersOfIV.
583   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
584 };
585 
586 } // end anonymous namespace
587 
588 /// Iteratively perform simplification on a worklist of IV users. Each
589 /// successive simplification may push more users which may themselves be
590 /// candidates for simplification.
591 ///
592 /// Sign/Zero extend elimination is interleaved with IV simplification.
593 bool IndVarSimplify::simplifyAndExtend(Loop *L,
594                                        SCEVExpander &Rewriter,
595                                        LoopInfo *LI) {
596   SmallVector<WideIVInfo, 8> WideIVs;
597 
598   auto *GuardDecl = Intrinsic::getDeclarationIfExists(
599       L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
600   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
601 
602   SmallVector<PHINode *, 8> LoopPhis;
603   for (PHINode &PN : L->getHeader()->phis())
604     LoopPhis.push_back(&PN);
605 
606   // Each round of simplification iterates through the SimplifyIVUsers worklist
607   // for all current phis, then determines whether any IVs can be
608   // widened. Widening adds new phis to LoopPhis, inducing another round of
609   // simplification on the wide IVs.
610   bool Changed = false;
611   while (!LoopPhis.empty()) {
612     // Evaluate as many IV expressions as possible before widening any IVs. This
613     // forces SCEV to set no-wrap flags before evaluating sign/zero
614     // extension. The first time SCEV attempts to normalize sign/zero extension,
615     // the result becomes final. So for the most predictable results, we delay
616     // evaluation of sign/zero extend evaluation until needed, and avoid running
617     // other SCEV based analysis prior to simplifyAndExtend.
618     do {
619       PHINode *CurrIV = LoopPhis.pop_back_val();
620 
621       // Information about sign/zero extensions of CurrIV.
622       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
623 
624       const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
625                                              Rewriter, &Visitor);
626 
627       Changed |= C;
628       RunUnswitching |= U;
629       if (Visitor.WI.WidestNativeType) {
630         WideIVs.push_back(Visitor.WI);
631       }
632     } while(!LoopPhis.empty());
633 
634     // Continue if we disallowed widening.
635     if (!WidenIndVars)
636       continue;
637 
638     for (; !WideIVs.empty(); WideIVs.pop_back()) {
639       unsigned ElimExt;
640       unsigned Widened;
641       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
642                                           DT, DeadInsts, ElimExt, Widened,
643                                           HasGuards, UsePostIncrementRanges)) {
644         NumElimExt += ElimExt;
645         NumWidened += Widened;
646         Changed = true;
647         LoopPhis.push_back(WidePhi);
648       }
649     }
650   }
651   return Changed;
652 }
653 
654 //===----------------------------------------------------------------------===//
655 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
656 //===----------------------------------------------------------------------===//
657 
658 /// Given an Value which is hoped to be part of an add recurance in the given
659 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
660 /// that this is less general than SCEVs AddRec checking.
661 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
662   Instruction *IncI = dyn_cast<Instruction>(IncV);
663   if (!IncI)
664     return nullptr;
665 
666   switch (IncI->getOpcode()) {
667   case Instruction::Add:
668   case Instruction::Sub:
669     break;
670   case Instruction::GetElementPtr:
671     // An IV counter must preserve its type.
672     if (IncI->getNumOperands() == 2)
673       break;
674     [[fallthrough]];
675   default:
676     return nullptr;
677   }
678 
679   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
680   if (Phi && Phi->getParent() == L->getHeader()) {
681     if (L->isLoopInvariant(IncI->getOperand(1)))
682       return Phi;
683     return nullptr;
684   }
685   if (IncI->getOpcode() == Instruction::GetElementPtr)
686     return nullptr;
687 
688   // Allow add/sub to be commuted.
689   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
690   if (Phi && Phi->getParent() == L->getHeader()) {
691     if (L->isLoopInvariant(IncI->getOperand(0)))
692       return Phi;
693   }
694   return nullptr;
695 }
696 
697 /// Whether the current loop exit test is based on this value.  Currently this
698 /// is limited to a direct use in the loop condition.
699 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
700   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
701   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
702   // TODO: Allow non-icmp loop test.
703   if (!ICmp)
704     return false;
705 
706   // TODO: Allow indirect use.
707   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
708 }
709 
710 /// linearFunctionTestReplace policy. Return true unless we can show that the
711 /// current exit test is already sufficiently canonical.
712 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
713   assert(L->getLoopLatch() && "Must be in simplified form");
714 
715   // Avoid converting a constant or loop invariant test back to a runtime
716   // test.  This is critical for when SCEV's cached ExitCount is less precise
717   // than the current IR (such as after we've proven a particular exit is
718   // actually dead and thus the BE count never reaches our ExitCount.)
719   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
720   if (L->isLoopInvariant(BI->getCondition()))
721     return false;
722 
723   // Do LFTR to simplify the exit condition to an ICMP.
724   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
725   if (!Cond)
726     return true;
727 
728   // Do LFTR to simplify the exit ICMP to EQ/NE
729   ICmpInst::Predicate Pred = Cond->getPredicate();
730   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
731     return true;
732 
733   // Look for a loop invariant RHS
734   Value *LHS = Cond->getOperand(0);
735   Value *RHS = Cond->getOperand(1);
736   if (!L->isLoopInvariant(RHS)) {
737     if (!L->isLoopInvariant(LHS))
738       return true;
739     std::swap(LHS, RHS);
740   }
741   // Look for a simple IV counter LHS
742   PHINode *Phi = dyn_cast<PHINode>(LHS);
743   if (!Phi)
744     Phi = getLoopPhiForCounter(LHS, L);
745 
746   if (!Phi)
747     return true;
748 
749   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
750   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
751   if (Idx < 0)
752     return true;
753 
754   // Do LFTR if the exit condition's IV is *not* a simple counter.
755   Value *IncV = Phi->getIncomingValue(Idx);
756   return Phi != getLoopPhiForCounter(IncV, L);
757 }
758 
759 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
760 /// down to checking that all operands are constant and listing instructions
761 /// that may hide undef.
762 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
763                                unsigned Depth) {
764   if (isa<Constant>(V))
765     return !isa<UndefValue>(V);
766 
767   if (Depth >= 6)
768     return false;
769 
770   // Conservatively handle non-constant non-instructions. For example, Arguments
771   // may be undef.
772   Instruction *I = dyn_cast<Instruction>(V);
773   if (!I)
774     return false;
775 
776   // Load and return values may be undef.
777   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
778     return false;
779 
780   // Optimistically handle other instructions.
781   for (Value *Op : I->operands()) {
782     if (!Visited.insert(Op).second)
783       continue;
784     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
785       return false;
786   }
787   return true;
788 }
789 
790 /// Return true if the given value is concrete. We must prove that undef can
791 /// never reach it.
792 ///
793 /// TODO: If we decide that this is a good approach to checking for undef, we
794 /// may factor it into a common location.
795 static bool hasConcreteDef(Value *V) {
796   SmallPtrSet<Value*, 8> Visited;
797   Visited.insert(V);
798   return hasConcreteDefImpl(V, Visited, 0);
799 }
800 
801 /// Return true if the given phi is a "counter" in L.  A counter is an
802 /// add recurance (of integer or pointer type) with an arbitrary start, and a
803 /// step of 1.  Note that L must have exactly one latch.
804 static bool isLoopCounter(PHINode* Phi, Loop *L,
805                           ScalarEvolution *SE) {
806   assert(Phi->getParent() == L->getHeader());
807   assert(L->getLoopLatch());
808 
809   if (!SE->isSCEVable(Phi->getType()))
810     return false;
811 
812   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
813   if (!AR || AR->getLoop() != L || !AR->isAffine())
814     return false;
815 
816   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
817   if (!Step || !Step->isOne())
818     return false;
819 
820   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
821   Value *IncV = Phi->getIncomingValue(LatchIdx);
822   return (getLoopPhiForCounter(IncV, L) == Phi &&
823           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
824 }
825 
826 /// Search the loop header for a loop counter (anadd rec w/step of one)
827 /// suitable for use by LFTR.  If multiple counters are available, select the
828 /// "best" one based profitable heuristics.
829 ///
830 /// BECount may be an i8* pointer type. The pointer difference is already
831 /// valid count without scaling the address stride, so it remains a pointer
832 /// expression as far as SCEV is concerned.
833 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
834                                 const SCEV *BECount,
835                                 ScalarEvolution *SE, DominatorTree *DT) {
836   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
837 
838   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
839 
840   // Loop over all of the PHI nodes, looking for a simple counter.
841   PHINode *BestPhi = nullptr;
842   const SCEV *BestInit = nullptr;
843   BasicBlock *LatchBlock = L->getLoopLatch();
844   assert(LatchBlock && "Must be in simplified form");
845   const DataLayout &DL = L->getHeader()->getDataLayout();
846 
847   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
848     PHINode *Phi = cast<PHINode>(I);
849     if (!isLoopCounter(Phi, L, SE))
850       continue;
851 
852     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
853 
854     // AR may be a pointer type, while BECount is an integer type.
855     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
856     // AR may not be a narrower type, or we may never exit.
857     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
858     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
859       continue;
860 
861     // Avoid reusing a potentially undef value to compute other values that may
862     // have originally had a concrete definition.
863     if (!hasConcreteDef(Phi)) {
864       // We explicitly allow unknown phis as long as they are already used by
865       // the loop exit test.  This is legal since performing LFTR could not
866       // increase the number of undef users.
867       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
868       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
869           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
870         continue;
871     }
872 
873     // Avoid introducing undefined behavior due to poison which didn't exist in
874     // the original program.  (Annoyingly, the rules for poison and undef
875     // propagation are distinct, so this does NOT cover the undef case above.)
876     // We have to ensure that we don't introduce UB by introducing a use on an
877     // iteration where said IV produces poison.  Our strategy here differs for
878     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
879     // see code in linearFunctionTestReplace.  For pointers, we restrict
880     // transforms as there is no good way to reinfer inbounds once lost.
881     if (!Phi->getType()->isIntegerTy() &&
882         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
883       continue;
884 
885     const SCEV *Init = AR->getStart();
886 
887     if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
888       // Don't force a live loop counter if another IV can be used.
889       if (isAlmostDeadIV(Phi, LatchBlock, Cond))
890         continue;
891 
892       // Prefer to count-from-zero. This is a more "canonical" counter form. It
893       // also prefers integer to pointer IVs.
894       if (BestInit->isZero() != Init->isZero()) {
895         if (BestInit->isZero())
896           continue;
897       }
898       // If two IVs both count from zero or both count from nonzero then the
899       // narrower is likely a dead phi that has been widened. Use the wider phi
900       // to allow the other to be eliminated.
901       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
902         continue;
903     }
904     BestPhi = Phi;
905     BestInit = Init;
906   }
907   return BestPhi;
908 }
909 
910 /// Insert an IR expression which computes the value held by the IV IndVar
911 /// (which must be an loop counter w/unit stride) after the backedge of loop L
912 /// is taken ExitCount times.
913 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
914                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
915                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
916   assert(isLoopCounter(IndVar, L, SE));
917   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
918   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
919   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
920 
921   // For integer IVs, truncate the IV before computing the limit unless we
922   // know apriori that the limit must be a constant when evaluated in the
923   // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
924   // IV in the loop over a (potentially) expensive expansion of the widened
925   // exit count add(zext(add)) expression.
926   if (IndVar->getType()->isIntegerTy() &&
927       SE->getTypeSizeInBits(AR->getType()) >
928       SE->getTypeSizeInBits(ExitCount->getType())) {
929     const SCEV *IVInit = AR->getStart();
930     if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
931       AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
932   }
933 
934   const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
935   const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
936   assert(SE->isLoopInvariant(IVLimit, L) &&
937          "Computed iteration count is not loop invariant!");
938   return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
939                                 ExitingBB->getTerminator());
940 }
941 
942 /// This method rewrites the exit condition of the loop to be a canonical !=
943 /// comparison against the incremented loop induction variable.  This pass is
944 /// able to rewrite the exit tests of any loop where the SCEV analysis can
945 /// determine a loop-invariant trip count of the loop, which is actually a much
946 /// broader range than just linear tests.
947 bool IndVarSimplify::
948 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
949                           const SCEV *ExitCount,
950                           PHINode *IndVar, SCEVExpander &Rewriter) {
951   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
952   assert(isLoopCounter(IndVar, L, SE));
953   Instruction * const IncVar =
954     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
955 
956   // Initialize CmpIndVar to the preincremented IV.
957   Value *CmpIndVar = IndVar;
958   bool UsePostInc = false;
959 
960   // If the exiting block is the same as the backedge block, we prefer to
961   // compare against the post-incremented value, otherwise we must compare
962   // against the preincremented value.
963   if (ExitingBB == L->getLoopLatch()) {
964     // For pointer IVs, we chose to not strip inbounds which requires us not
965     // to add a potentially UB introducing use.  We need to either a) show
966     // the loop test we're modifying is already in post-inc form, or b) show
967     // that adding a use must not introduce UB.
968     bool SafeToPostInc =
969         IndVar->getType()->isIntegerTy() ||
970         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
971         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
972     if (SafeToPostInc) {
973       UsePostInc = true;
974       CmpIndVar = IncVar;
975     }
976   }
977 
978   // It may be necessary to drop nowrap flags on the incrementing instruction
979   // if either LFTR moves from a pre-inc check to a post-inc check (in which
980   // case the increment might have previously been poison on the last iteration
981   // only) or if LFTR switches to a different IV that was previously dynamically
982   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
983   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
984   // check), because the pre-inc addrec flags may be adopted from the original
985   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
986   // TODO: This handling is inaccurate for one case: If we switch to a
987   // dynamically dead IV that wraps on the first loop iteration only, which is
988   // not covered by the post-inc addrec. (If the new IV was not dynamically
989   // dead, it could not be poison on the first iteration in the first place.)
990   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
991     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
992     if (BO->hasNoUnsignedWrap())
993       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
994     if (BO->hasNoSignedWrap())
995       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
996   }
997 
998   Value *ExitCnt = genLoopLimit(
999       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1000   assert(ExitCnt->getType()->isPointerTy() ==
1001              IndVar->getType()->isPointerTy() &&
1002          "genLoopLimit missed a cast");
1003 
1004   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1005   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1006   ICmpInst::Predicate P;
1007   if (L->contains(BI->getSuccessor(0)))
1008     P = ICmpInst::ICMP_NE;
1009   else
1010     P = ICmpInst::ICMP_EQ;
1011 
1012   IRBuilder<> Builder(BI);
1013 
1014   // The new loop exit condition should reuse the debug location of the
1015   // original loop exit condition.
1016   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1017     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1018 
1019   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1020   // avoid the expensive expansion of the limit expression in the wider type,
1021   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1022   // since we know (from the exit count bitwidth), that we can't self-wrap in
1023   // the narrower type.
1024   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1025   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1026   if (CmpIndVarSize > ExitCntSize) {
1027     assert(!CmpIndVar->getType()->isPointerTy() &&
1028            !ExitCnt->getType()->isPointerTy());
1029 
1030     // Before resorting to actually inserting the truncate, use the same
1031     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1032     // the other side of the comparison instead.  We still evaluate the limit
1033     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1034     // a truncate within in.
1035     bool Extended = false;
1036     const SCEV *IV = SE->getSCEV(CmpIndVar);
1037     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1038     const SCEV *ZExtTrunc =
1039       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1040 
1041     if (ZExtTrunc == IV) {
1042       Extended = true;
1043       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1044                                    "wide.trip.count");
1045     } else {
1046       const SCEV *SExtTrunc =
1047         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1048       if (SExtTrunc == IV) {
1049         Extended = true;
1050         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1051                                      "wide.trip.count");
1052       }
1053     }
1054 
1055     if (Extended) {
1056       bool Discard;
1057       L->makeLoopInvariant(ExitCnt, Discard);
1058     } else
1059       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1060                                       "lftr.wideiv");
1061   }
1062   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1063                     << "      LHS:" << *CmpIndVar << '\n'
1064                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1065                     << "\n"
1066                     << "      RHS:\t" << *ExitCnt << "\n"
1067                     << "ExitCount:\t" << *ExitCount << "\n"
1068                     << "  was: " << *BI->getCondition() << "\n");
1069 
1070   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1071   Value *OrigCond = BI->getCondition();
1072   // It's tempting to use replaceAllUsesWith here to fully replace the old
1073   // comparison, but that's not immediately safe, since users of the old
1074   // comparison may not be dominated by the new comparison. Instead, just
1075   // update the branch to use the new comparison; in the common case this
1076   // will make old comparison dead.
1077   BI->setCondition(Cond);
1078   DeadInsts.emplace_back(OrigCond);
1079 
1080   ++NumLFTR;
1081   return true;
1082 }
1083 
1084 //===----------------------------------------------------------------------===//
1085 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1086 //===----------------------------------------------------------------------===//
1087 
1088 /// If there's a single exit block, sink any loop-invariant values that
1089 /// were defined in the preheader but not used inside the loop into the
1090 /// exit block to reduce register pressure in the loop.
1091 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1092   BasicBlock *ExitBlock = L->getExitBlock();
1093   if (!ExitBlock) return false;
1094 
1095   BasicBlock *Preheader = L->getLoopPreheader();
1096   if (!Preheader) return false;
1097 
1098   bool MadeAnyChanges = false;
1099   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1100   BasicBlock::iterator I(Preheader->getTerminator());
1101   while (I != Preheader->begin()) {
1102     --I;
1103     // New instructions were inserted at the end of the preheader.
1104     if (isa<PHINode>(I))
1105       break;
1106 
1107     // Don't move instructions which might have side effects, since the side
1108     // effects need to complete before instructions inside the loop.  Also don't
1109     // move instructions which might read memory, since the loop may modify
1110     // memory. Note that it's okay if the instruction might have undefined
1111     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1112     // block.
1113     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1114       continue;
1115 
1116     // Skip debug info intrinsics.
1117     if (isa<DbgInfoIntrinsic>(I))
1118       continue;
1119 
1120     // Skip eh pad instructions.
1121     if (I->isEHPad())
1122       continue;
1123 
1124     // Don't sink alloca: we never want to sink static alloca's out of the
1125     // entry block, and correctly sinking dynamic alloca's requires
1126     // checks for stacksave/stackrestore intrinsics.
1127     // FIXME: Refactor this check somehow?
1128     if (isa<AllocaInst>(I))
1129       continue;
1130 
1131     // Determine if there is a use in or before the loop (direct or
1132     // otherwise).
1133     bool UsedInLoop = false;
1134     for (Use &U : I->uses()) {
1135       Instruction *User = cast<Instruction>(U.getUser());
1136       BasicBlock *UseBB = User->getParent();
1137       if (PHINode *P = dyn_cast<PHINode>(User)) {
1138         unsigned i =
1139           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1140         UseBB = P->getIncomingBlock(i);
1141       }
1142       if (UseBB == Preheader || L->contains(UseBB)) {
1143         UsedInLoop = true;
1144         break;
1145       }
1146     }
1147 
1148     // If there is, the def must remain in the preheader.
1149     if (UsedInLoop)
1150       continue;
1151 
1152     // Otherwise, sink it to the exit block.
1153     Instruction *ToMove = &*I;
1154     bool Done = false;
1155 
1156     if (I != Preheader->begin()) {
1157       // Skip debug info intrinsics.
1158       do {
1159         --I;
1160       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1161 
1162       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1163         Done = true;
1164     } else {
1165       Done = true;
1166     }
1167 
1168     MadeAnyChanges = true;
1169     ToMove->moveBefore(*ExitBlock, InsertPt);
1170     SE->forgetValue(ToMove);
1171     if (Done) break;
1172     InsertPt = ToMove->getIterator();
1173   }
1174 
1175   return MadeAnyChanges;
1176 }
1177 
1178 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1179                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1180   auto *OldCond = BI->getCondition();
1181   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1182                     << " with " << *NewCond << "\n");
1183   BI->setCondition(NewCond);
1184   if (OldCond->use_empty())
1185     DeadInsts.emplace_back(OldCond);
1186 }
1187 
1188 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1189                                       bool IsTaken) {
1190   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1191   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1192   auto *OldCond = BI->getCondition();
1193   return ConstantInt::get(OldCond->getType(),
1194                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1195 }
1196 
1197 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1198                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1199   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1200   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1201   replaceExitCond(BI, NewCond, DeadInsts);
1202 }
1203 
1204 static void replaceLoopPHINodesWithPreheaderValues(
1205     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1206     ScalarEvolution &SE) {
1207   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1208   auto *LoopPreheader = L->getLoopPreheader();
1209   auto *LoopHeader = L->getHeader();
1210   SmallVector<Instruction *> Worklist;
1211   for (auto &PN : LoopHeader->phis()) {
1212     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1213     for (User *U : PN.users())
1214       Worklist.push_back(cast<Instruction>(U));
1215     SE.forgetValue(&PN);
1216     PN.replaceAllUsesWith(PreheaderIncoming);
1217     DeadInsts.emplace_back(&PN);
1218   }
1219 
1220   // Replacing with the preheader value will often allow IV users to simplify
1221   // (especially if the preheader value is a constant).
1222   SmallPtrSet<Instruction *, 16> Visited;
1223   while (!Worklist.empty()) {
1224     auto *I = cast<Instruction>(Worklist.pop_back_val());
1225     if (!Visited.insert(I).second)
1226       continue;
1227 
1228     // Don't simplify instructions outside the loop.
1229     if (!L->contains(I))
1230       continue;
1231 
1232     Value *Res = simplifyInstruction(I, I->getDataLayout());
1233     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1234       for (User *U : I->users())
1235         Worklist.push_back(cast<Instruction>(U));
1236       I->replaceAllUsesWith(Res);
1237       DeadInsts.emplace_back(I);
1238     }
1239   }
1240 }
1241 
1242 static Value *
1243 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1244                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1245                     SCEVExpander &Rewriter) {
1246   ICmpInst::Predicate InvariantPred = LIP.Pred;
1247   BasicBlock *Preheader = L->getLoopPreheader();
1248   assert(Preheader && "Preheader doesn't exist");
1249   Rewriter.setInsertPoint(Preheader->getTerminator());
1250   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1251   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1252   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1253   if (ExitIfTrue)
1254     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1255   IRBuilder<> Builder(Preheader->getTerminator());
1256   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1257   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1258                             BI->getCondition()->getName());
1259 }
1260 
1261 static std::optional<Value *>
1262 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1263                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1264                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1265   ICmpInst::Predicate Pred = ICmp->getPredicate();
1266   Value *LHS = ICmp->getOperand(0);
1267   Value *RHS = ICmp->getOperand(1);
1268 
1269   // 'LHS pred RHS' should now mean that we stay in loop.
1270   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1271   if (Inverted)
1272     Pred = CmpInst::getInversePredicate(Pred);
1273 
1274   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1275   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1276   // Can we prove it to be trivially true or false?
1277   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1278     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1279 
1280   auto *ARTy = LHSS->getType();
1281   auto *MaxIterTy = MaxIter->getType();
1282   // If possible, adjust types.
1283   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1284     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1285   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1286     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1287     const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1288     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1289       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1290   }
1291 
1292   if (SkipLastIter) {
1293     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1294     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1295     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1296     // So we manually construct umin(a - 1, b - 1).
1297     SmallVector<const SCEV *, 4> Elements;
1298     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1299       for (const SCEV *Op : UMin->operands())
1300         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1301       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1302     } else
1303       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1304   }
1305 
1306   // Check if there is a loop-invariant predicate equivalent to our check.
1307   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1308                                                                L, BI, MaxIter);
1309   if (!LIP)
1310     return std::nullopt;
1311 
1312   // Can we prove it to be trivially true?
1313   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1314     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1315   else
1316     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1317 }
1318 
1319 static bool optimizeLoopExitWithUnknownExitCount(
1320     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1321     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1322     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1323   assert(
1324       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1325       "Not a loop exit!");
1326 
1327   // For branch that stays in loop by TRUE condition, go through AND. For branch
1328   // that stays in loop by FALSE condition, go through OR. Both gives the
1329   // similar logic: "stay in loop iff all conditions are true(false)".
1330   bool Inverted = L->contains(BI->getSuccessor(1));
1331   SmallVector<ICmpInst *, 4> LeafConditions;
1332   SmallVector<Value *, 4> Worklist;
1333   SmallPtrSet<Value *, 4> Visited;
1334   Value *OldCond = BI->getCondition();
1335   Visited.insert(OldCond);
1336   Worklist.push_back(OldCond);
1337 
1338   auto GoThrough = [&](Value *V) {
1339     Value *LHS = nullptr, *RHS = nullptr;
1340     if (Inverted) {
1341       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1342         return false;
1343     } else {
1344       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1345         return false;
1346     }
1347     if (Visited.insert(LHS).second)
1348       Worklist.push_back(LHS);
1349     if (Visited.insert(RHS).second)
1350       Worklist.push_back(RHS);
1351     return true;
1352   };
1353 
1354   do {
1355     Value *Curr = Worklist.pop_back_val();
1356     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1357     // those with one use, to avoid instruction duplication.
1358     if (Curr->hasOneUse())
1359       if (!GoThrough(Curr))
1360         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1361           LeafConditions.push_back(ICmp);
1362   } while (!Worklist.empty());
1363 
1364   // If the current basic block has the same exit count as the whole loop, and
1365   // it consists of multiple icmp's, try to collect all icmp's that give exact
1366   // same exit count. For all other icmp's, we could use one less iteration,
1367   // because their value on the last iteration doesn't really matter.
1368   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1369   if (!SkipLastIter && LeafConditions.size() > 1 &&
1370       SE->getExitCount(L, ExitingBB,
1371                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1372           MaxIter)
1373     for (auto *ICmp : LeafConditions) {
1374       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1375                                              /*ControlsExit*/ false);
1376       const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1377       if (isa<SCEVCouldNotCompute>(ExitMax))
1378         continue;
1379       // They could be of different types (specifically this happens after
1380       // IV widening).
1381       auto *WiderType =
1382           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1383       const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1384       const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1385       if (WideExitMax == WideMaxIter)
1386         ICmpsFailingOnLastIter.insert(ICmp);
1387     }
1388 
1389   bool Changed = false;
1390   for (auto *OldCond : LeafConditions) {
1391     // Skip last iteration for this icmp under one of two conditions:
1392     // - We do it for all conditions;
1393     // - There is another ICmp that would fail on last iter, so this one doesn't
1394     // really matter.
1395     bool OptimisticSkipLastIter = SkipLastIter;
1396     if (!OptimisticSkipLastIter) {
1397       if (ICmpsFailingOnLastIter.size() > 1)
1398         OptimisticSkipLastIter = true;
1399       else if (ICmpsFailingOnLastIter.size() == 1)
1400         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1401     }
1402     if (auto Replaced =
1403             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1404                               OptimisticSkipLastIter, SE, Rewriter)) {
1405       Changed = true;
1406       auto *NewCond = *Replaced;
1407       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1408         NCI->setName(OldCond->getName() + ".first_iter");
1409       }
1410       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1411                         << " with " << *NewCond << "\n");
1412       assert(OldCond->hasOneUse() && "Must be!");
1413       OldCond->replaceAllUsesWith(NewCond);
1414       DeadInsts.push_back(OldCond);
1415       // Make sure we no longer consider this condition as failing on last
1416       // iteration.
1417       ICmpsFailingOnLastIter.erase(OldCond);
1418     }
1419   }
1420   return Changed;
1421 }
1422 
1423 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1424   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1425   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1426   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1427   // it already has flags.  The alternative to this would be to extending the
1428   // set of "interesting" IV users to include the icmp, but doing that
1429   // regresses results in practice by querying SCEVs before trip counts which
1430   // rely on them which results in SCEV caching sub-optimal answers.  The
1431   // concern about caching sub-optimal results is why we only query SCEVs of
1432   // the loop invariant RHS here.
1433   SmallVector<BasicBlock*, 16> ExitingBlocks;
1434   L->getExitingBlocks(ExitingBlocks);
1435   bool Changed = false;
1436   for (auto *ExitingBB : ExitingBlocks) {
1437     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1438     if (!BI)
1439       continue;
1440     assert(BI->isConditional() && "exit branch must be conditional");
1441 
1442     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1443     if (!ICmp || !ICmp->hasOneUse())
1444       continue;
1445 
1446     auto *LHS = ICmp->getOperand(0);
1447     auto *RHS = ICmp->getOperand(1);
1448     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1449     // poisoning cache with sub-optimal results.  For the must-execute case,
1450     // this is a neccessary precondition for correctness.
1451     if (!L->isLoopInvariant(RHS)) {
1452       if (!L->isLoopInvariant(LHS))
1453         continue;
1454       // Same logic applies for the inverse case
1455       std::swap(LHS, RHS);
1456     }
1457 
1458     // Match (icmp signed-cond zext, RHS)
1459     Value *LHSOp = nullptr;
1460     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1461       continue;
1462 
1463     const DataLayout &DL = ExitingBB->getDataLayout();
1464     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1465     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1466     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1467     FullCR = FullCR.zeroExtend(OuterBitWidth);
1468     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1469     if (FullCR.contains(RHSCR)) {
1470       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1471       // replace the signed condition with the unsigned version.
1472       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1473       Changed = true;
1474       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1475       // have not changed exit counts, or the values produced by the compare.
1476       continue;
1477     }
1478   }
1479 
1480   // Now that we've canonicalized the condition to match the extend,
1481   // see if we can rotate the extend out of the loop.
1482   for (auto *ExitingBB : ExitingBlocks) {
1483     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1484     if (!BI)
1485       continue;
1486     assert(BI->isConditional() && "exit branch must be conditional");
1487 
1488     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1489     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1490       continue;
1491 
1492     bool Swapped = false;
1493     auto *LHS = ICmp->getOperand(0);
1494     auto *RHS = ICmp->getOperand(1);
1495     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1496       // Nothing to rotate
1497       continue;
1498     if (L->isLoopInvariant(LHS)) {
1499       // Same logic applies for the inverse case until we actually pick
1500       // which operand of the compare to update.
1501       Swapped = true;
1502       std::swap(LHS, RHS);
1503     }
1504     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1505 
1506     // Match (icmp unsigned-cond zext, RHS)
1507     // TODO: Extend to handle corresponding sext/signed-cmp case
1508     // TODO: Extend to other invertible functions
1509     Value *LHSOp = nullptr;
1510     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1511       continue;
1512 
1513     // In general, we only rotate if we can do so without increasing the number
1514     // of instructions.  The exception is when we have an zext(add-rec).  The
1515     // reason for allowing this exception is that we know we need to get rid
1516     // of the zext for SCEV to be able to compute a trip count for said loops;
1517     // we consider the new trip count valuable enough to increase instruction
1518     // count by one.
1519     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1520       continue;
1521 
1522     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1523     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1524     // when zext is loop varying and RHS is loop invariant.  This converts
1525     // loop varying work to loop-invariant work.
1526     auto doRotateTransform = [&]() {
1527       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1528       auto *NewRHS = CastInst::Create(
1529           Instruction::Trunc, RHS, LHSOp->getType(), "",
1530           L->getLoopPreheader()->getTerminator()->getIterator());
1531       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1532       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1533       // Samesign flag cannot be preserved after narrowing the compare.
1534       ICmp->setSameSign(false);
1535       if (LHS->use_empty())
1536         DeadInsts.push_back(LHS);
1537     };
1538 
1539 
1540     const DataLayout &DL = ExitingBB->getDataLayout();
1541     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1542     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1543     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1544     FullCR = FullCR.zeroExtend(OuterBitWidth);
1545     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1546     if (FullCR.contains(RHSCR)) {
1547       doRotateTransform();
1548       Changed = true;
1549       // Note, we are leaving SCEV in an unfortunately imprecise case here
1550       // as rotation tends to reveal information about trip counts not
1551       // previously visible.
1552       continue;
1553     }
1554   }
1555 
1556   return Changed;
1557 }
1558 
1559 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1560   SmallVector<BasicBlock*, 16> ExitingBlocks;
1561   L->getExitingBlocks(ExitingBlocks);
1562 
1563   // Remove all exits which aren't both rewriteable and execute on every
1564   // iteration.
1565   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1566     // If our exitting block exits multiple loops, we can only rewrite the
1567     // innermost one.  Otherwise, we're changing how many times the innermost
1568     // loop runs before it exits.
1569     if (LI->getLoopFor(ExitingBB) != L)
1570       return true;
1571 
1572     // Can't rewrite non-branch yet.
1573     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1574     if (!BI)
1575       return true;
1576 
1577     // Likewise, the loop latch must be dominated by the exiting BB.
1578     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1579       return true;
1580 
1581     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1582       // If already constant, nothing to do. However, if this is an
1583       // unconditional exit, we can still replace header phis with their
1584       // preheader value.
1585       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1586         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1587       return true;
1588     }
1589 
1590     return false;
1591   });
1592 
1593   if (ExitingBlocks.empty())
1594     return false;
1595 
1596   // Get a symbolic upper bound on the loop backedge taken count.
1597   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1598   if (isa<SCEVCouldNotCompute>(MaxBECount))
1599     return false;
1600 
1601   // Visit our exit blocks in order of dominance. We know from the fact that
1602   // all exits must dominate the latch, so there is a total dominance order
1603   // between them.
1604   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1605                // std::sort sorts in ascending order, so we want the inverse of
1606                // the normal dominance relation.
1607                if (A == B) return false;
1608                if (DT->properlyDominates(A, B))
1609                  return true;
1610                else {
1611                  assert(DT->properlyDominates(B, A) &&
1612                         "expected total dominance order!");
1613                  return false;
1614                }
1615   });
1616 #ifdef ASSERT
1617   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1618     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1619   }
1620 #endif
1621 
1622   bool Changed = false;
1623   bool SkipLastIter = false;
1624   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1625   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1626     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1627       return;
1628     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1629       CurrMaxExit = MaxExitCount;
1630     else
1631       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1632     // If the loop has more than 1 iteration, all further checks will be
1633     // executed 1 iteration less.
1634     if (CurrMaxExit == MaxBECount)
1635       SkipLastIter = true;
1636   };
1637   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1638   for (BasicBlock *ExitingBB : ExitingBlocks) {
1639     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1640     const SCEV *MaxExitCount = SE->getExitCount(
1641         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1642     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1643       // Okay, we do not know the exit count here. Can we at least prove that it
1644       // will remain the same within iteration space?
1645       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1646       auto OptimizeCond = [&](bool SkipLastIter) {
1647         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1648                                                     MaxBECount, SkipLastIter,
1649                                                     SE, Rewriter, DeadInsts);
1650       };
1651 
1652       // TODO: We might have proved that we can skip the last iteration for
1653       // this check. In this case, we only want to check the condition on the
1654       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1655       // corner case:
1656       //
1657       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1658       //
1659       // If we could not prove that len != 0, then we also could not prove that
1660       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1661       // OptimizeCond will likely not prove anything for it, even if it could
1662       // prove the same fact for len.
1663       //
1664       // As a temporary solution, we query both last and pre-last iterations in
1665       // hope that we will be able to prove triviality for at least one of
1666       // them. We can stop querying MaxBECount for this case once SCEV
1667       // understands that (MaxBECount - 1) will not overflow here.
1668       if (OptimizeCond(false))
1669         Changed = true;
1670       else if (SkipLastIter && OptimizeCond(true))
1671         Changed = true;
1672       UpdateSkipLastIter(MaxExitCount);
1673       continue;
1674     }
1675 
1676     UpdateSkipLastIter(ExactExitCount);
1677 
1678     // If we know we'd exit on the first iteration, rewrite the exit to
1679     // reflect this.  This does not imply the loop must exit through this
1680     // exit; there may be an earlier one taken on the first iteration.
1681     // We know that the backedge can't be taken, so we replace all
1682     // the header PHIs with values coming from the preheader.
1683     if (ExactExitCount->isZero()) {
1684       foldExit(L, ExitingBB, true, DeadInsts);
1685       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1686       Changed = true;
1687       continue;
1688     }
1689 
1690     assert(ExactExitCount->getType()->isIntegerTy() &&
1691            MaxBECount->getType()->isIntegerTy() &&
1692            "Exit counts must be integers");
1693 
1694     Type *WiderType =
1695         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1696     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1697     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1698     assert(MaxBECount->getType() == ExactExitCount->getType());
1699 
1700     // Can we prove that some other exit must be taken strictly before this
1701     // one?
1702     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1703                                      ExactExitCount)) {
1704       foldExit(L, ExitingBB, false, DeadInsts);
1705       Changed = true;
1706       continue;
1707     }
1708 
1709     // As we run, keep track of which exit counts we've encountered.  If we
1710     // find a duplicate, we've found an exit which would have exited on the
1711     // exiting iteration, but (from the visit order) strictly follows another
1712     // which does the same and is thus dead.
1713     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1714       foldExit(L, ExitingBB, false, DeadInsts);
1715       Changed = true;
1716       continue;
1717     }
1718 
1719     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1720     // here.  If we kept track of the min of dominanting exits so far, we could
1721     // discharge exits with EC >= MDEC. This is less powerful than the existing
1722     // transform (since later exits aren't considered), but potentially more
1723     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1724     // or a >u b.  Such a case is not currently known.
1725   }
1726   return Changed;
1727 }
1728 
1729 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1730   SmallVector<BasicBlock*, 16> ExitingBlocks;
1731   L->getExitingBlocks(ExitingBlocks);
1732 
1733   // Finally, see if we can rewrite our exit conditions into a loop invariant
1734   // form. If we have a read-only loop, and we can tell that we must exit down
1735   // a path which does not need any of the values computed within the loop, we
1736   // can rewrite the loop to exit on the first iteration.  Note that this
1737   // doesn't either a) tell us the loop exits on the first iteration (unless
1738   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1739   // This transformation looks a lot like a restricted form of dead loop
1740   // elimination, but restricted to read-only loops and without neccesssarily
1741   // needing to kill the loop entirely.
1742   if (!LoopPredication)
1743     return false;
1744 
1745   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1746   // through *explicit* control flow.  We have to eliminate the possibility of
1747   // implicit exits (see below) before we know it's truly exact.
1748   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1749   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1750     return false;
1751 
1752   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1753   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1754 
1755   auto BadExit = [&](BasicBlock *ExitingBB) {
1756     // If our exiting block exits multiple loops, we can only rewrite the
1757     // innermost one.  Otherwise, we're changing how many times the innermost
1758     // loop runs before it exits.
1759     if (LI->getLoopFor(ExitingBB) != L)
1760       return true;
1761 
1762     // Can't rewrite non-branch yet.
1763     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1764     if (!BI)
1765       return true;
1766 
1767     // If already constant, nothing to do.
1768     if (isa<Constant>(BI->getCondition()))
1769       return true;
1770 
1771     // If the exit block has phis, we need to be able to compute the values
1772     // within the loop which contains them.  This assumes trivially lcssa phis
1773     // have already been removed; TODO: generalize
1774     BasicBlock *ExitBlock =
1775     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1776     if (!ExitBlock->phis().empty())
1777       return true;
1778 
1779     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1780     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1781         !Rewriter.isSafeToExpand(ExitCount))
1782       return true;
1783 
1784     assert(SE->isLoopInvariant(ExitCount, L) &&
1785            "Exit count must be loop invariant");
1786     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1787     return false;
1788   };
1789 
1790   // Make sure all exits dominate the latch. This means there is a linear chain
1791   // of exits. We check this before sorting so we have a total order.
1792   BasicBlock *Latch = L->getLoopLatch();
1793   for (BasicBlock *ExitingBB : ExitingBlocks)
1794     if (!DT->dominates(ExitingBB, Latch))
1795       return false;
1796 
1797   // If we have any exits which can't be predicated themselves, than we can't
1798   // predicate any exit which isn't guaranteed to execute before it.  Consider
1799   // two exits (a) and (b) which would both exit on the same iteration.  If we
1800   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1801   // we could convert a loop from exiting through (a) to one exiting through
1802   // (b).  Note that this problem exists only for exits with the same exit
1803   // count, and we could be more aggressive when exit counts are known inequal.
1804   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1805     // llvm::sort sorts in ascending order, so we want the inverse of
1806     // the normal dominance relation.
1807     if (A == B)
1808       return false;
1809     if (DT->properlyDominates(A, B))
1810       return true;
1811     if (DT->properlyDominates(B, A))
1812       return false;
1813     llvm_unreachable("Should have total dominance order");
1814   });
1815 
1816   // Make sure our exit blocks are really a total order (i.e. a linear chain of
1817   // exits before the backedge).
1818   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1819     assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1820            "Not sorted by dominance");
1821 
1822   // Given our sorted total order, we know that exit[j] must be evaluated
1823   // after all exit[i] such j > i.
1824   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1825     if (BadExit(ExitingBlocks[i])) {
1826       ExitingBlocks.resize(i);
1827       break;
1828     }
1829 
1830   if (ExitingBlocks.empty())
1831     return false;
1832 
1833   // At this point, ExitingBlocks consists of only those blocks which are
1834   // predicatable.  Given that, we know we have at least one exit we can
1835   // predicate if the loop is doesn't have side effects and doesn't have any
1836   // implicit exits (because then our exact BTC isn't actually exact).
1837   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1838   // suggestions on how to improve this?  I can obviously bail out for outer
1839   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1840   // is that enough for *all* side effects?
1841   for (BasicBlock *BB : L->blocks())
1842     for (auto &I : *BB)
1843       // TODO:isGuaranteedToTransfer
1844       if (I.mayHaveSideEffects())
1845         return false;
1846 
1847   bool Changed = false;
1848   // Finally, do the actual predication for all predicatable blocks.  A couple
1849   // of notes here:
1850   // 1) We don't bother to constant fold dominated exits with identical exit
1851   //    counts; that's simply a form of CSE/equality propagation and we leave
1852   //    it for dedicated passes.
1853   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1854   //    legality constraints and we leave that to dedicated logic.  We want to
1855   //    predicate even if we can't insert a loop invariant expression as
1856   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1857   //    varying check.
1858   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1859   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1860   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1861   for (BasicBlock *ExitingBB : ExitingBlocks) {
1862     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1863 
1864     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1865     Value *NewCond;
1866     if (ExitCount == ExactBTC) {
1867       NewCond = L->contains(BI->getSuccessor(0)) ?
1868         B.getFalse() : B.getTrue();
1869     } else {
1870       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1871       if (!ExactBTCV)
1872         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1873       Value *RHS = ExactBTCV;
1874       if (ECV->getType() != RHS->getType()) {
1875         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1876         ECV = B.CreateZExt(ECV, WiderTy);
1877         RHS = B.CreateZExt(RHS, WiderTy);
1878       }
1879       auto Pred = L->contains(BI->getSuccessor(0)) ?
1880         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1881       NewCond = B.CreateICmp(Pred, ECV, RHS);
1882     }
1883     Value *OldCond = BI->getCondition();
1884     BI->setCondition(NewCond);
1885     if (OldCond->use_empty())
1886       DeadInsts.emplace_back(OldCond);
1887     Changed = true;
1888     RunUnswitching = true;
1889   }
1890 
1891   return Changed;
1892 }
1893 
1894 //===----------------------------------------------------------------------===//
1895 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1896 //===----------------------------------------------------------------------===//
1897 
1898 bool IndVarSimplify::run(Loop *L) {
1899   // We need (and expect!) the incoming loop to be in LCSSA.
1900   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1901          "LCSSA required to run indvars!");
1902 
1903   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1904   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1905   //    canonicalization can be a pessimization without LSR to "clean up"
1906   //    afterwards.
1907   //  - We depend on having a preheader; in particular,
1908   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1909   //    and we're in trouble if we can't find the induction variable even when
1910   //    we've manually inserted one.
1911   //  - LFTR relies on having a single backedge.
1912   if (!L->isLoopSimplifyForm())
1913     return false;
1914 
1915   bool Changed = false;
1916   // If there are any floating-point recurrences, attempt to
1917   // transform them to use integer recurrences.
1918   Changed |= rewriteNonIntegerIVs(L);
1919 
1920   // Create a rewriter object which we'll use to transform the code with.
1921   SCEVExpander Rewriter(*SE, DL, "indvars");
1922 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
1923   Rewriter.setDebugType(DEBUG_TYPE);
1924 #endif
1925 
1926   // Eliminate redundant IV users.
1927   //
1928   // Simplification works best when run before other consumers of SCEV. We
1929   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1930   // other expressions involving loop IVs have been evaluated. This helps SCEV
1931   // set no-wrap flags before normalizing sign/zero extension.
1932   Rewriter.disableCanonicalMode();
1933   Changed |= simplifyAndExtend(L, Rewriter, LI);
1934 
1935   // Check to see if we can compute the final value of any expressions
1936   // that are recurrent in the loop, and substitute the exit values from the
1937   // loop into any instructions outside of the loop that use the final values
1938   // of the current expressions.
1939   if (ReplaceExitValue != NeverRepl) {
1940     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1941                                              ReplaceExitValue, DeadInsts)) {
1942       NumReplaced += Rewrites;
1943       Changed = true;
1944     }
1945   }
1946 
1947   // Eliminate redundant IV cycles.
1948   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1949 
1950   // Try to convert exit conditions to unsigned and rotate computation
1951   // out of the loop.  Note: Handles invalidation internally if needed.
1952   Changed |= canonicalizeExitCondition(L);
1953 
1954   // Try to eliminate loop exits based on analyzeable exit counts
1955   if (optimizeLoopExits(L, Rewriter))  {
1956     Changed = true;
1957     // Given we've changed exit counts, notify SCEV
1958     // Some nested loops may share same folded exit basic block,
1959     // thus we need to notify top most loop.
1960     SE->forgetTopmostLoop(L);
1961   }
1962 
1963   // Try to form loop invariant tests for loop exits by changing how many
1964   // iterations of the loop run when that is unobservable.
1965   if (predicateLoopExits(L, Rewriter)) {
1966     Changed = true;
1967     // Given we've changed exit counts, notify SCEV
1968     SE->forgetLoop(L);
1969   }
1970 
1971   // If we have a trip count expression, rewrite the loop's exit condition
1972   // using it.
1973   if (!DisableLFTR) {
1974     BasicBlock *PreHeader = L->getLoopPreheader();
1975 
1976     SmallVector<BasicBlock*, 16> ExitingBlocks;
1977     L->getExitingBlocks(ExitingBlocks);
1978     for (BasicBlock *ExitingBB : ExitingBlocks) {
1979       // Can't rewrite non-branch yet.
1980       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1981         continue;
1982 
1983       // If our exitting block exits multiple loops, we can only rewrite the
1984       // innermost one.  Otherwise, we're changing how many times the innermost
1985       // loop runs before it exits.
1986       if (LI->getLoopFor(ExitingBB) != L)
1987         continue;
1988 
1989       if (!needsLFTR(L, ExitingBB))
1990         continue;
1991 
1992       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1993       if (isa<SCEVCouldNotCompute>(ExitCount))
1994         continue;
1995 
1996       // This was handled above, but as we form SCEVs, we can sometimes refine
1997       // existing ones; this allows exit counts to be folded to zero which
1998       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1999       // until stable to handle cases like this better.
2000       if (ExitCount->isZero())
2001         continue;
2002 
2003       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2004       if (!IndVar)
2005         continue;
2006 
2007       // Avoid high cost expansions.  Note: This heuristic is questionable in
2008       // that our definition of "high cost" is not exactly principled.
2009       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2010                                        TTI, PreHeader->getTerminator()))
2011         continue;
2012 
2013       if (!Rewriter.isSafeToExpand(ExitCount))
2014         continue;
2015 
2016       Changed |= linearFunctionTestReplace(L, ExitingBB,
2017                                            ExitCount, IndVar,
2018                                            Rewriter);
2019     }
2020   }
2021   // Clear the rewriter cache, because values that are in the rewriter's cache
2022   // can be deleted in the loop below, causing the AssertingVH in the cache to
2023   // trigger.
2024   Rewriter.clear();
2025 
2026   // Now that we're done iterating through lists, clean up any instructions
2027   // which are now dead.
2028   while (!DeadInsts.empty()) {
2029     Value *V = DeadInsts.pop_back_val();
2030 
2031     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2032       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2033     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2034       Changed |=
2035           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2036   }
2037 
2038   // The Rewriter may not be used from this point on.
2039 
2040   // Loop-invariant instructions in the preheader that aren't used in the
2041   // loop may be sunk below the loop to reduce register pressure.
2042   Changed |= sinkUnusedInvariants(L);
2043 
2044   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2045   // trip count and therefore can further simplify exit values in addition to
2046   // rewriteLoopExitValues.
2047   Changed |= rewriteFirstIterationLoopExitValues(L);
2048 
2049   // Clean up dead instructions.
2050   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2051 
2052   // Check a post-condition.
2053   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2054          "Indvars did not preserve LCSSA!");
2055   if (VerifyMemorySSA && MSSAU)
2056     MSSAU->getMemorySSA()->verifyMemorySSA();
2057 
2058   return Changed;
2059 }
2060 
2061 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2062                                           LoopStandardAnalysisResults &AR,
2063                                           LPMUpdater &) {
2064   Function *F = L.getHeader()->getParent();
2065   const DataLayout &DL = F->getDataLayout();
2066 
2067   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2068                      WidenIndVars && AllowIVWidening);
2069   if (!IVS.run(&L))
2070     return PreservedAnalyses::all();
2071 
2072   auto PA = getLoopPassPreservedAnalyses();
2073   PA.preserveSet<CFGAnalyses>();
2074   if (IVS.runUnswitching()) {
2075     AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
2076     PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2077   }
2078 
2079   if (AR.MSSA)
2080     PA.preserve<MemorySSAAnalysis>();
2081   return PA;
2082 }
2083