xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 2fe81edef6f0b35abffbbc59b649b30ea9c15a62)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/MathExtras.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
74 #include "llvm/Transforms/Utils/Local.h"
75 #include "llvm/Transforms/Utils/LoopUtils.h"
76 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
77 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
78 #include <cassert>
79 #include <cstdint>
80 #include <utility>
81 
82 using namespace llvm;
83 using namespace PatternMatch;
84 
85 #define DEBUG_TYPE "indvars"
86 
87 STATISTIC(NumWidened     , "Number of indvars widened");
88 STATISTIC(NumReplaced    , "Number of exit values replaced");
89 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
90 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
91 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
92 
93 static cl::opt<ReplaceExitVal> ReplaceExitValue(
94     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
95     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
96     cl::values(
97         clEnumValN(NeverRepl, "never", "never replace exit value"),
98         clEnumValN(OnlyCheapRepl, "cheap",
99                    "only replace exit value when the cost is cheap"),
100         clEnumValN(
101             UnusedIndVarInLoop, "unusedindvarinloop",
102             "only replace exit value when it is an unused "
103             "induction variable in the loop and has cheap replacement cost"),
104         clEnumValN(NoHardUse, "noharduse",
105                    "only replace exit values when loop def likely dead"),
106         clEnumValN(AlwaysRepl, "always",
107                    "always replace exit value whenever possible")));
108 
109 static cl::opt<bool> UsePostIncrementRanges(
110   "indvars-post-increment-ranges", cl::Hidden,
111   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
112   cl::init(true));
113 
114 static cl::opt<bool>
115 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
116             cl::desc("Disable Linear Function Test Replace optimization"));
117 
118 static cl::opt<bool>
119 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
120                 cl::desc("Predicate conditions in read only loops"));
121 
122 static cl::opt<bool>
123 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
124                 cl::desc("Allow widening of indvars to eliminate s/zext"));
125 
126 namespace {
127 
128 class IndVarSimplify {
129   LoopInfo *LI;
130   ScalarEvolution *SE;
131   DominatorTree *DT;
132   const DataLayout &DL;
133   TargetLibraryInfo *TLI;
134   const TargetTransformInfo *TTI;
135   std::unique_ptr<MemorySSAUpdater> MSSAU;
136 
137   SmallVector<WeakTrackingVH, 16> DeadInsts;
138   bool WidenIndVars;
139 
140   bool handleFloatingPointIV(Loop *L, PHINode *PH);
141   bool rewriteNonIntegerIVs(Loop *L);
142 
143   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
144   /// Try to improve our exit conditions by converting condition from signed
145   /// to unsigned or rotating computation out of the loop.
146   /// (See inline comment about why this is duplicated from simplifyAndExtend)
147   bool canonicalizeExitCondition(Loop *L);
148   /// Try to eliminate loop exits based on analyzeable exit counts
149   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
150   /// Try to form loop invariant tests for loop exits by changing how many
151   /// iterations of the loop run when that is unobservable.
152   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
153 
154   bool rewriteFirstIterationLoopExitValues(Loop *L);
155 
156   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
157                                  const SCEV *ExitCount,
158                                  PHINode *IndVar, SCEVExpander &Rewriter);
159 
160   bool sinkUnusedInvariants(Loop *L);
161 
162 public:
163   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
164                  const DataLayout &DL, TargetLibraryInfo *TLI,
165                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
166       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
167         WidenIndVars(WidenIndVars) {
168     if (MSSA)
169       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
170   }
171 
172   bool run(Loop *L);
173 };
174 
175 } // end anonymous namespace
176 
177 //===----------------------------------------------------------------------===//
178 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
179 //===----------------------------------------------------------------------===//
180 
181 /// Convert APF to an integer, if possible.
182 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
183   bool isExact = false;
184   // See if we can convert this to an int64_t
185   uint64_t UIntVal;
186   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
187                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
188       !isExact)
189     return false;
190   IntVal = UIntVal;
191   return true;
192 }
193 
194 /// If the loop has floating induction variable then insert corresponding
195 /// integer induction variable if possible.
196 /// For example,
197 /// for(double i = 0; i < 10000; ++i)
198 ///   bar(i)
199 /// is converted into
200 /// for(int i = 0; i < 10000; ++i)
201 ///   bar((double)i);
202 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
203   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
204   unsigned BackEdge     = IncomingEdge^1;
205 
206   // Check incoming value.
207   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
208 
209   int64_t InitValue;
210   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
211     return false;
212 
213   // Check IV increment. Reject this PN if increment operation is not
214   // an add or increment value can not be represented by an integer.
215   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
216   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
217 
218   // If this is not an add of the PHI with a constantfp, or if the constant fp
219   // is not an integer, bail out.
220   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
221   int64_t IncValue;
222   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
223       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
224     return false;
225 
226   // Check Incr uses. One user is PN and the other user is an exit condition
227   // used by the conditional terminator.
228   Value::user_iterator IncrUse = Incr->user_begin();
229   Instruction *U1 = cast<Instruction>(*IncrUse++);
230   if (IncrUse == Incr->user_end()) return false;
231   Instruction *U2 = cast<Instruction>(*IncrUse++);
232   if (IncrUse != Incr->user_end()) return false;
233 
234   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
235   // only used by a branch, we can't transform it.
236   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
237   if (!Compare)
238     Compare = dyn_cast<FCmpInst>(U2);
239   if (!Compare || !Compare->hasOneUse() ||
240       !isa<BranchInst>(Compare->user_back()))
241     return false;
242 
243   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
244 
245   // We need to verify that the branch actually controls the iteration count
246   // of the loop.  If not, the new IV can overflow and no one will notice.
247   // The branch block must be in the loop and one of the successors must be out
248   // of the loop.
249   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
250   if (!L->contains(TheBr->getParent()) ||
251       (L->contains(TheBr->getSuccessor(0)) &&
252        L->contains(TheBr->getSuccessor(1))))
253     return false;
254 
255   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
256   // transform it.
257   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
258   int64_t ExitValue;
259   if (ExitValueVal == nullptr ||
260       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
261     return false;
262 
263   // Find new predicate for integer comparison.
264   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
265   switch (Compare->getPredicate()) {
266   default: return false;  // Unknown comparison.
267   case CmpInst::FCMP_OEQ:
268   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
269   case CmpInst::FCMP_ONE:
270   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
271   case CmpInst::FCMP_OGT:
272   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
273   case CmpInst::FCMP_OGE:
274   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
275   case CmpInst::FCMP_OLT:
276   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
277   case CmpInst::FCMP_OLE:
278   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
279   }
280 
281   // We convert the floating point induction variable to a signed i32 value if
282   // we can.  This is only safe if the comparison will not overflow in a way
283   // that won't be trapped by the integer equivalent operations.  Check for this
284   // now.
285   // TODO: We could use i64 if it is native and the range requires it.
286 
287   // The start/stride/exit values must all fit in signed i32.
288   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
289     return false;
290 
291   // If not actually striding (add x, 0.0), avoid touching the code.
292   if (IncValue == 0)
293     return false;
294 
295   // Positive and negative strides have different safety conditions.
296   if (IncValue > 0) {
297     // If we have a positive stride, we require the init to be less than the
298     // exit value.
299     if (InitValue >= ExitValue)
300       return false;
301 
302     uint32_t Range = uint32_t(ExitValue-InitValue);
303     // Check for infinite loop, either:
304     // while (i <= Exit) or until (i > Exit)
305     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
306       if (++Range == 0) return false;  // Range overflows.
307     }
308 
309     unsigned Leftover = Range % uint32_t(IncValue);
310 
311     // If this is an equality comparison, we require that the strided value
312     // exactly land on the exit value, otherwise the IV condition will wrap
313     // around and do things the fp IV wouldn't.
314     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
315         Leftover != 0)
316       return false;
317 
318     // If the stride would wrap around the i32 before exiting, we can't
319     // transform the IV.
320     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
321       return false;
322   } else {
323     // If we have a negative stride, we require the init to be greater than the
324     // exit value.
325     if (InitValue <= ExitValue)
326       return false;
327 
328     uint32_t Range = uint32_t(InitValue-ExitValue);
329     // Check for infinite loop, either:
330     // while (i >= Exit) or until (i < Exit)
331     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
332       if (++Range == 0) return false;  // Range overflows.
333     }
334 
335     unsigned Leftover = Range % uint32_t(-IncValue);
336 
337     // If this is an equality comparison, we require that the strided value
338     // exactly land on the exit value, otherwise the IV condition will wrap
339     // around and do things the fp IV wouldn't.
340     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
341         Leftover != 0)
342       return false;
343 
344     // If the stride would wrap around the i32 before exiting, we can't
345     // transform the IV.
346     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
347       return false;
348   }
349 
350   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
351 
352   // Insert new integer induction variable.
353   PHINode *NewPHI =
354       PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
355   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
356                       PN->getIncomingBlock(IncomingEdge));
357 
358   Value *NewAdd =
359       BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
360                                 Incr->getName() + ".int", Incr->getIterator());
361   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
362 
363   ICmpInst *NewCompare =
364       new ICmpInst(TheBr->getIterator(), NewPred, NewAdd,
365                    ConstantInt::get(Int32Ty, ExitValue), Compare->getName());
366 
367   // In the following deletions, PN may become dead and may be deleted.
368   // Use a WeakTrackingVH to observe whether this happens.
369   WeakTrackingVH WeakPH = PN;
370 
371   // Delete the old floating point exit comparison.  The branch starts using the
372   // new comparison.
373   NewCompare->takeName(Compare);
374   Compare->replaceAllUsesWith(NewCompare);
375   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
376 
377   // Delete the old floating point increment.
378   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
379   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
380 
381   // If the FP induction variable still has uses, this is because something else
382   // in the loop uses its value.  In order to canonicalize the induction
383   // variable, we chose to eliminate the IV and rewrite it in terms of an
384   // int->fp cast.
385   //
386   // We give preference to sitofp over uitofp because it is faster on most
387   // platforms.
388   if (WeakPH) {
389     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
390                                  PN->getParent()->getFirstInsertionPt());
391     PN->replaceAllUsesWith(Conv);
392     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
393   }
394   return true;
395 }
396 
397 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
398   // First step.  Check to see if there are any floating-point recurrences.
399   // If there are, change them into integer recurrences, permitting analysis by
400   // the SCEV routines.
401   BasicBlock *Header = L->getHeader();
402 
403   SmallVector<WeakTrackingVH, 8> PHIs;
404   for (PHINode &PN : Header->phis())
405     PHIs.push_back(&PN);
406 
407   bool Changed = false;
408   for (WeakTrackingVH &PHI : PHIs)
409     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
410       Changed |= handleFloatingPointIV(L, PN);
411 
412   // If the loop previously had floating-point IV, ScalarEvolution
413   // may not have been able to compute a trip count. Now that we've done some
414   // re-writing, the trip count may be computable.
415   if (Changed)
416     SE->forgetLoop(L);
417   return Changed;
418 }
419 
420 //===---------------------------------------------------------------------===//
421 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
422 // they will exit at the first iteration.
423 //===---------------------------------------------------------------------===//
424 
425 /// Check to see if this loop has loop invariant conditions which lead to loop
426 /// exits. If so, we know that if the exit path is taken, it is at the first
427 /// loop iteration. This lets us predict exit values of PHI nodes that live in
428 /// loop header.
429 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
430   // Verify the input to the pass is already in LCSSA form.
431   assert(L->isLCSSAForm(*DT));
432 
433   SmallVector<BasicBlock *, 8> ExitBlocks;
434   L->getUniqueExitBlocks(ExitBlocks);
435 
436   bool MadeAnyChanges = false;
437   for (auto *ExitBB : ExitBlocks) {
438     // If there are no more PHI nodes in this exit block, then no more
439     // values defined inside the loop are used on this path.
440     for (PHINode &PN : ExitBB->phis()) {
441       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
442            IncomingValIdx != E; ++IncomingValIdx) {
443         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
444 
445         // Can we prove that the exit must run on the first iteration if it
446         // runs at all?  (i.e. early exits are fine for our purposes, but
447         // traces which lead to this exit being taken on the 2nd iteration
448         // aren't.)  Note that this is about whether the exit branch is
449         // executed, not about whether it is taken.
450         if (!L->getLoopLatch() ||
451             !DT->dominates(IncomingBB, L->getLoopLatch()))
452           continue;
453 
454         // Get condition that leads to the exit path.
455         auto *TermInst = IncomingBB->getTerminator();
456 
457         Value *Cond = nullptr;
458         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
459           // Must be a conditional branch, otherwise the block
460           // should not be in the loop.
461           Cond = BI->getCondition();
462         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
463           Cond = SI->getCondition();
464         else
465           continue;
466 
467         if (!L->isLoopInvariant(Cond))
468           continue;
469 
470         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
471 
472         // Only deal with PHIs in the loop header.
473         if (!ExitVal || ExitVal->getParent() != L->getHeader())
474           continue;
475 
476         // If ExitVal is a PHI on the loop header, then we know its
477         // value along this exit because the exit can only be taken
478         // on the first iteration.
479         auto *LoopPreheader = L->getLoopPreheader();
480         assert(LoopPreheader && "Invalid loop");
481         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
482         if (PreheaderIdx != -1) {
483           assert(ExitVal->getParent() == L->getHeader() &&
484                  "ExitVal must be in loop header");
485           MadeAnyChanges = true;
486           PN.setIncomingValue(IncomingValIdx,
487                               ExitVal->getIncomingValue(PreheaderIdx));
488           SE->forgetValue(&PN);
489         }
490       }
491     }
492   }
493   return MadeAnyChanges;
494 }
495 
496 //===----------------------------------------------------------------------===//
497 //  IV Widening - Extend the width of an IV to cover its widest uses.
498 //===----------------------------------------------------------------------===//
499 
500 /// Update information about the induction variable that is extended by this
501 /// sign or zero extend operation. This is used to determine the final width of
502 /// the IV before actually widening it.
503 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
504                         ScalarEvolution *SE,
505                         const TargetTransformInfo *TTI) {
506   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
507   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
508     return;
509 
510   Type *Ty = Cast->getType();
511   uint64_t Width = SE->getTypeSizeInBits(Ty);
512   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
513     return;
514 
515   // Check that `Cast` actually extends the induction variable (we rely on this
516   // later).  This takes care of cases where `Cast` is extending a truncation of
517   // the narrow induction variable, and thus can end up being narrower than the
518   // "narrow" induction variable.
519   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
520   if (NarrowIVWidth >= Width)
521     return;
522 
523   // Cast is either an sext or zext up to this point.
524   // We should not widen an indvar if arithmetics on the wider indvar are more
525   // expensive than those on the narrower indvar. We check only the cost of ADD
526   // because at least an ADD is required to increment the induction variable. We
527   // could compute more comprehensively the cost of all instructions on the
528   // induction variable when necessary.
529   if (TTI &&
530       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
531           TTI->getArithmeticInstrCost(Instruction::Add,
532                                       Cast->getOperand(0)->getType())) {
533     return;
534   }
535 
536   if (!WI.WidestNativeType ||
537       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
538     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
539     WI.IsSigned = IsSigned;
540     return;
541   }
542 
543   // We extend the IV to satisfy the sign of its user(s), or 'signed'
544   // if there are multiple users with both sign- and zero extensions,
545   // in order not to introduce nondeterministic behaviour based on the
546   // unspecified order of a PHI nodes' users-iterator.
547   WI.IsSigned |= IsSigned;
548 }
549 
550 //===----------------------------------------------------------------------===//
551 //  Live IV Reduction - Minimize IVs live across the loop.
552 //===----------------------------------------------------------------------===//
553 
554 //===----------------------------------------------------------------------===//
555 //  Simplification of IV users based on SCEV evaluation.
556 //===----------------------------------------------------------------------===//
557 
558 namespace {
559 
560 class IndVarSimplifyVisitor : public IVVisitor {
561   ScalarEvolution *SE;
562   const TargetTransformInfo *TTI;
563   PHINode *IVPhi;
564 
565 public:
566   WideIVInfo WI;
567 
568   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
569                         const TargetTransformInfo *TTI,
570                         const DominatorTree *DTree)
571     : SE(SCEV), TTI(TTI), IVPhi(IV) {
572     DT = DTree;
573     WI.NarrowIV = IVPhi;
574   }
575 
576   // Implement the interface used by simplifyUsersOfIV.
577   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
578 };
579 
580 } // end anonymous namespace
581 
582 /// Iteratively perform simplification on a worklist of IV users. Each
583 /// successive simplification may push more users which may themselves be
584 /// candidates for simplification.
585 ///
586 /// Sign/Zero extend elimination is interleaved with IV simplification.
587 bool IndVarSimplify::simplifyAndExtend(Loop *L,
588                                        SCEVExpander &Rewriter,
589                                        LoopInfo *LI) {
590   SmallVector<WideIVInfo, 8> WideIVs;
591 
592   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
593           Intrinsic::getName(Intrinsic::experimental_guard));
594   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
595 
596   SmallVector<PHINode *, 8> LoopPhis;
597   for (PHINode &PN : L->getHeader()->phis())
598     LoopPhis.push_back(&PN);
599 
600   // Each round of simplification iterates through the SimplifyIVUsers worklist
601   // for all current phis, then determines whether any IVs can be
602   // widened. Widening adds new phis to LoopPhis, inducing another round of
603   // simplification on the wide IVs.
604   bool Changed = false;
605   while (!LoopPhis.empty()) {
606     // Evaluate as many IV expressions as possible before widening any IVs. This
607     // forces SCEV to set no-wrap flags before evaluating sign/zero
608     // extension. The first time SCEV attempts to normalize sign/zero extension,
609     // the result becomes final. So for the most predictable results, we delay
610     // evaluation of sign/zero extend evaluation until needed, and avoid running
611     // other SCEV based analysis prior to simplifyAndExtend.
612     do {
613       PHINode *CurrIV = LoopPhis.pop_back_val();
614 
615       // Information about sign/zero extensions of CurrIV.
616       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
617 
618       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
619                                    &Visitor);
620 
621       if (Visitor.WI.WidestNativeType) {
622         WideIVs.push_back(Visitor.WI);
623       }
624     } while(!LoopPhis.empty());
625 
626     // Continue if we disallowed widening.
627     if (!WidenIndVars)
628       continue;
629 
630     for (; !WideIVs.empty(); WideIVs.pop_back()) {
631       unsigned ElimExt;
632       unsigned Widened;
633       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
634                                           DT, DeadInsts, ElimExt, Widened,
635                                           HasGuards, UsePostIncrementRanges)) {
636         NumElimExt += ElimExt;
637         NumWidened += Widened;
638         Changed = true;
639         LoopPhis.push_back(WidePhi);
640       }
641     }
642   }
643   return Changed;
644 }
645 
646 //===----------------------------------------------------------------------===//
647 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
648 //===----------------------------------------------------------------------===//
649 
650 /// Given an Value which is hoped to be part of an add recurance in the given
651 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
652 /// that this is less general than SCEVs AddRec checking.
653 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
654   Instruction *IncI = dyn_cast<Instruction>(IncV);
655   if (!IncI)
656     return nullptr;
657 
658   switch (IncI->getOpcode()) {
659   case Instruction::Add:
660   case Instruction::Sub:
661     break;
662   case Instruction::GetElementPtr:
663     // An IV counter must preserve its type.
664     if (IncI->getNumOperands() == 2)
665       break;
666     [[fallthrough]];
667   default:
668     return nullptr;
669   }
670 
671   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
672   if (Phi && Phi->getParent() == L->getHeader()) {
673     if (L->isLoopInvariant(IncI->getOperand(1)))
674       return Phi;
675     return nullptr;
676   }
677   if (IncI->getOpcode() == Instruction::GetElementPtr)
678     return nullptr;
679 
680   // Allow add/sub to be commuted.
681   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
682   if (Phi && Phi->getParent() == L->getHeader()) {
683     if (L->isLoopInvariant(IncI->getOperand(0)))
684       return Phi;
685   }
686   return nullptr;
687 }
688 
689 /// Whether the current loop exit test is based on this value.  Currently this
690 /// is limited to a direct use in the loop condition.
691 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
692   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
693   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
694   // TODO: Allow non-icmp loop test.
695   if (!ICmp)
696     return false;
697 
698   // TODO: Allow indirect use.
699   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
700 }
701 
702 /// linearFunctionTestReplace policy. Return true unless we can show that the
703 /// current exit test is already sufficiently canonical.
704 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
705   assert(L->getLoopLatch() && "Must be in simplified form");
706 
707   // Avoid converting a constant or loop invariant test back to a runtime
708   // test.  This is critical for when SCEV's cached ExitCount is less precise
709   // than the current IR (such as after we've proven a particular exit is
710   // actually dead and thus the BE count never reaches our ExitCount.)
711   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
712   if (L->isLoopInvariant(BI->getCondition()))
713     return false;
714 
715   // Do LFTR to simplify the exit condition to an ICMP.
716   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
717   if (!Cond)
718     return true;
719 
720   // Do LFTR to simplify the exit ICMP to EQ/NE
721   ICmpInst::Predicate Pred = Cond->getPredicate();
722   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
723     return true;
724 
725   // Look for a loop invariant RHS
726   Value *LHS = Cond->getOperand(0);
727   Value *RHS = Cond->getOperand(1);
728   if (!L->isLoopInvariant(RHS)) {
729     if (!L->isLoopInvariant(LHS))
730       return true;
731     std::swap(LHS, RHS);
732   }
733   // Look for a simple IV counter LHS
734   PHINode *Phi = dyn_cast<PHINode>(LHS);
735   if (!Phi)
736     Phi = getLoopPhiForCounter(LHS, L);
737 
738   if (!Phi)
739     return true;
740 
741   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
742   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
743   if (Idx < 0)
744     return true;
745 
746   // Do LFTR if the exit condition's IV is *not* a simple counter.
747   Value *IncV = Phi->getIncomingValue(Idx);
748   return Phi != getLoopPhiForCounter(IncV, L);
749 }
750 
751 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
752 /// down to checking that all operands are constant and listing instructions
753 /// that may hide undef.
754 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
755                                unsigned Depth) {
756   if (isa<Constant>(V))
757     return !isa<UndefValue>(V);
758 
759   if (Depth >= 6)
760     return false;
761 
762   // Conservatively handle non-constant non-instructions. For example, Arguments
763   // may be undef.
764   Instruction *I = dyn_cast<Instruction>(V);
765   if (!I)
766     return false;
767 
768   // Load and return values may be undef.
769   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
770     return false;
771 
772   // Optimistically handle other instructions.
773   for (Value *Op : I->operands()) {
774     if (!Visited.insert(Op).second)
775       continue;
776     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
777       return false;
778   }
779   return true;
780 }
781 
782 /// Return true if the given value is concrete. We must prove that undef can
783 /// never reach it.
784 ///
785 /// TODO: If we decide that this is a good approach to checking for undef, we
786 /// may factor it into a common location.
787 static bool hasConcreteDef(Value *V) {
788   SmallPtrSet<Value*, 8> Visited;
789   Visited.insert(V);
790   return hasConcreteDefImpl(V, Visited, 0);
791 }
792 
793 /// Return true if the given phi is a "counter" in L.  A counter is an
794 /// add recurance (of integer or pointer type) with an arbitrary start, and a
795 /// step of 1.  Note that L must have exactly one latch.
796 static bool isLoopCounter(PHINode* Phi, Loop *L,
797                           ScalarEvolution *SE) {
798   assert(Phi->getParent() == L->getHeader());
799   assert(L->getLoopLatch());
800 
801   if (!SE->isSCEVable(Phi->getType()))
802     return false;
803 
804   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
805   if (!AR || AR->getLoop() != L || !AR->isAffine())
806     return false;
807 
808   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
809   if (!Step || !Step->isOne())
810     return false;
811 
812   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
813   Value *IncV = Phi->getIncomingValue(LatchIdx);
814   return (getLoopPhiForCounter(IncV, L) == Phi &&
815           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
816 }
817 
818 /// Search the loop header for a loop counter (anadd rec w/step of one)
819 /// suitable for use by LFTR.  If multiple counters are available, select the
820 /// "best" one based profitable heuristics.
821 ///
822 /// BECount may be an i8* pointer type. The pointer difference is already
823 /// valid count without scaling the address stride, so it remains a pointer
824 /// expression as far as SCEV is concerned.
825 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
826                                 const SCEV *BECount,
827                                 ScalarEvolution *SE, DominatorTree *DT) {
828   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
829 
830   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
831 
832   // Loop over all of the PHI nodes, looking for a simple counter.
833   PHINode *BestPhi = nullptr;
834   const SCEV *BestInit = nullptr;
835   BasicBlock *LatchBlock = L->getLoopLatch();
836   assert(LatchBlock && "Must be in simplified form");
837   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
838 
839   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
840     PHINode *Phi = cast<PHINode>(I);
841     if (!isLoopCounter(Phi, L, SE))
842       continue;
843 
844     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
845 
846     // AR may be a pointer type, while BECount is an integer type.
847     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
848     // AR may not be a narrower type, or we may never exit.
849     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
850     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
851       continue;
852 
853     // Avoid reusing a potentially undef value to compute other values that may
854     // have originally had a concrete definition.
855     if (!hasConcreteDef(Phi)) {
856       // We explicitly allow unknown phis as long as they are already used by
857       // the loop exit test.  This is legal since performing LFTR could not
858       // increase the number of undef users.
859       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
860       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
861           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
862         continue;
863     }
864 
865     // Avoid introducing undefined behavior due to poison which didn't exist in
866     // the original program.  (Annoyingly, the rules for poison and undef
867     // propagation are distinct, so this does NOT cover the undef case above.)
868     // We have to ensure that we don't introduce UB by introducing a use on an
869     // iteration where said IV produces poison.  Our strategy here differs for
870     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
871     // see code in linearFunctionTestReplace.  For pointers, we restrict
872     // transforms as there is no good way to reinfer inbounds once lost.
873     if (!Phi->getType()->isIntegerTy() &&
874         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
875       continue;
876 
877     const SCEV *Init = AR->getStart();
878 
879     if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
880       // Don't force a live loop counter if another IV can be used.
881       if (isAlmostDeadIV(Phi, LatchBlock, Cond))
882         continue;
883 
884       // Prefer to count-from-zero. This is a more "canonical" counter form. It
885       // also prefers integer to pointer IVs.
886       if (BestInit->isZero() != Init->isZero()) {
887         if (BestInit->isZero())
888           continue;
889       }
890       // If two IVs both count from zero or both count from nonzero then the
891       // narrower is likely a dead phi that has been widened. Use the wider phi
892       // to allow the other to be eliminated.
893       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
894         continue;
895     }
896     BestPhi = Phi;
897     BestInit = Init;
898   }
899   return BestPhi;
900 }
901 
902 /// Insert an IR expression which computes the value held by the IV IndVar
903 /// (which must be an loop counter w/unit stride) after the backedge of loop L
904 /// is taken ExitCount times.
905 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
906                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
907                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
908   assert(isLoopCounter(IndVar, L, SE));
909   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
910   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
911   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
912 
913   // For integer IVs, truncate the IV before computing the limit unless we
914   // know apriori that the limit must be a constant when evaluated in the
915   // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
916   // IV in the loop over a (potentially) expensive expansion of the widened
917   // exit count add(zext(add)) expression.
918   if (IndVar->getType()->isIntegerTy() &&
919       SE->getTypeSizeInBits(AR->getType()) >
920       SE->getTypeSizeInBits(ExitCount->getType())) {
921     const SCEV *IVInit = AR->getStart();
922     if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
923       AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
924   }
925 
926   const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
927   const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
928   assert(SE->isLoopInvariant(IVLimit, L) &&
929          "Computed iteration count is not loop invariant!");
930   return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
931                                 ExitingBB->getTerminator());
932 }
933 
934 /// This method rewrites the exit condition of the loop to be a canonical !=
935 /// comparison against the incremented loop induction variable.  This pass is
936 /// able to rewrite the exit tests of any loop where the SCEV analysis can
937 /// determine a loop-invariant trip count of the loop, which is actually a much
938 /// broader range than just linear tests.
939 bool IndVarSimplify::
940 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
941                           const SCEV *ExitCount,
942                           PHINode *IndVar, SCEVExpander &Rewriter) {
943   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
944   assert(isLoopCounter(IndVar, L, SE));
945   Instruction * const IncVar =
946     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
947 
948   // Initialize CmpIndVar to the preincremented IV.
949   Value *CmpIndVar = IndVar;
950   bool UsePostInc = false;
951 
952   // If the exiting block is the same as the backedge block, we prefer to
953   // compare against the post-incremented value, otherwise we must compare
954   // against the preincremented value.
955   if (ExitingBB == L->getLoopLatch()) {
956     // For pointer IVs, we chose to not strip inbounds which requires us not
957     // to add a potentially UB introducing use.  We need to either a) show
958     // the loop test we're modifying is already in post-inc form, or b) show
959     // that adding a use must not introduce UB.
960     bool SafeToPostInc =
961         IndVar->getType()->isIntegerTy() ||
962         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
963         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
964     if (SafeToPostInc) {
965       UsePostInc = true;
966       CmpIndVar = IncVar;
967     }
968   }
969 
970   // It may be necessary to drop nowrap flags on the incrementing instruction
971   // if either LFTR moves from a pre-inc check to a post-inc check (in which
972   // case the increment might have previously been poison on the last iteration
973   // only) or if LFTR switches to a different IV that was previously dynamically
974   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
975   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
976   // check), because the pre-inc addrec flags may be adopted from the original
977   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
978   // TODO: This handling is inaccurate for one case: If we switch to a
979   // dynamically dead IV that wraps on the first loop iteration only, which is
980   // not covered by the post-inc addrec. (If the new IV was not dynamically
981   // dead, it could not be poison on the first iteration in the first place.)
982   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
983     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
984     if (BO->hasNoUnsignedWrap())
985       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
986     if (BO->hasNoSignedWrap())
987       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
988   }
989 
990   Value *ExitCnt = genLoopLimit(
991       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
992   assert(ExitCnt->getType()->isPointerTy() ==
993              IndVar->getType()->isPointerTy() &&
994          "genLoopLimit missed a cast");
995 
996   // Insert a new icmp_ne or icmp_eq instruction before the branch.
997   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
998   ICmpInst::Predicate P;
999   if (L->contains(BI->getSuccessor(0)))
1000     P = ICmpInst::ICMP_NE;
1001   else
1002     P = ICmpInst::ICMP_EQ;
1003 
1004   IRBuilder<> Builder(BI);
1005 
1006   // The new loop exit condition should reuse the debug location of the
1007   // original loop exit condition.
1008   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1009     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1010 
1011   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1012   // avoid the expensive expansion of the limit expression in the wider type,
1013   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1014   // since we know (from the exit count bitwidth), that we can't self-wrap in
1015   // the narrower type.
1016   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1017   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1018   if (CmpIndVarSize > ExitCntSize) {
1019     assert(!CmpIndVar->getType()->isPointerTy() &&
1020            !ExitCnt->getType()->isPointerTy());
1021 
1022     // Before resorting to actually inserting the truncate, use the same
1023     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1024     // the other side of the comparison instead.  We still evaluate the limit
1025     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1026     // a truncate within in.
1027     bool Extended = false;
1028     const SCEV *IV = SE->getSCEV(CmpIndVar);
1029     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1030     const SCEV *ZExtTrunc =
1031       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1032 
1033     if (ZExtTrunc == IV) {
1034       Extended = true;
1035       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1036                                    "wide.trip.count");
1037     } else {
1038       const SCEV *SExtTrunc =
1039         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1040       if (SExtTrunc == IV) {
1041         Extended = true;
1042         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1043                                      "wide.trip.count");
1044       }
1045     }
1046 
1047     if (Extended) {
1048       bool Discard;
1049       L->makeLoopInvariant(ExitCnt, Discard);
1050     } else
1051       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1052                                       "lftr.wideiv");
1053   }
1054   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1055                     << "      LHS:" << *CmpIndVar << '\n'
1056                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1057                     << "\n"
1058                     << "      RHS:\t" << *ExitCnt << "\n"
1059                     << "ExitCount:\t" << *ExitCount << "\n"
1060                     << "  was: " << *BI->getCondition() << "\n");
1061 
1062   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1063   Value *OrigCond = BI->getCondition();
1064   // It's tempting to use replaceAllUsesWith here to fully replace the old
1065   // comparison, but that's not immediately safe, since users of the old
1066   // comparison may not be dominated by the new comparison. Instead, just
1067   // update the branch to use the new comparison; in the common case this
1068   // will make old comparison dead.
1069   BI->setCondition(Cond);
1070   DeadInsts.emplace_back(OrigCond);
1071 
1072   ++NumLFTR;
1073   return true;
1074 }
1075 
1076 //===----------------------------------------------------------------------===//
1077 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1078 //===----------------------------------------------------------------------===//
1079 
1080 /// If there's a single exit block, sink any loop-invariant values that
1081 /// were defined in the preheader but not used inside the loop into the
1082 /// exit block to reduce register pressure in the loop.
1083 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1084   BasicBlock *ExitBlock = L->getExitBlock();
1085   if (!ExitBlock) return false;
1086 
1087   BasicBlock *Preheader = L->getLoopPreheader();
1088   if (!Preheader) return false;
1089 
1090   bool MadeAnyChanges = false;
1091   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1092   BasicBlock::iterator I(Preheader->getTerminator());
1093   while (I != Preheader->begin()) {
1094     --I;
1095     // New instructions were inserted at the end of the preheader.
1096     if (isa<PHINode>(I))
1097       break;
1098 
1099     // Don't move instructions which might have side effects, since the side
1100     // effects need to complete before instructions inside the loop.  Also don't
1101     // move instructions which might read memory, since the loop may modify
1102     // memory. Note that it's okay if the instruction might have undefined
1103     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1104     // block.
1105     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1106       continue;
1107 
1108     // Skip debug info intrinsics.
1109     if (isa<DbgInfoIntrinsic>(I))
1110       continue;
1111 
1112     // Skip eh pad instructions.
1113     if (I->isEHPad())
1114       continue;
1115 
1116     // Don't sink alloca: we never want to sink static alloca's out of the
1117     // entry block, and correctly sinking dynamic alloca's requires
1118     // checks for stacksave/stackrestore intrinsics.
1119     // FIXME: Refactor this check somehow?
1120     if (isa<AllocaInst>(I))
1121       continue;
1122 
1123     // Determine if there is a use in or before the loop (direct or
1124     // otherwise).
1125     bool UsedInLoop = false;
1126     for (Use &U : I->uses()) {
1127       Instruction *User = cast<Instruction>(U.getUser());
1128       BasicBlock *UseBB = User->getParent();
1129       if (PHINode *P = dyn_cast<PHINode>(User)) {
1130         unsigned i =
1131           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1132         UseBB = P->getIncomingBlock(i);
1133       }
1134       if (UseBB == Preheader || L->contains(UseBB)) {
1135         UsedInLoop = true;
1136         break;
1137       }
1138     }
1139 
1140     // If there is, the def must remain in the preheader.
1141     if (UsedInLoop)
1142       continue;
1143 
1144     // Otherwise, sink it to the exit block.
1145     Instruction *ToMove = &*I;
1146     bool Done = false;
1147 
1148     if (I != Preheader->begin()) {
1149       // Skip debug info intrinsics.
1150       do {
1151         --I;
1152       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1153 
1154       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1155         Done = true;
1156     } else {
1157       Done = true;
1158     }
1159 
1160     MadeAnyChanges = true;
1161     ToMove->moveBefore(*ExitBlock, InsertPt);
1162     SE->forgetValue(ToMove);
1163     if (Done) break;
1164     InsertPt = ToMove->getIterator();
1165   }
1166 
1167   return MadeAnyChanges;
1168 }
1169 
1170 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1171                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1172   auto *OldCond = BI->getCondition();
1173   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1174                     << " with " << *NewCond << "\n");
1175   BI->setCondition(NewCond);
1176   if (OldCond->use_empty())
1177     DeadInsts.emplace_back(OldCond);
1178 }
1179 
1180 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1181                                       bool IsTaken) {
1182   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1183   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1184   auto *OldCond = BI->getCondition();
1185   return ConstantInt::get(OldCond->getType(),
1186                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1187 }
1188 
1189 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1190                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1191   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1192   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1193   replaceExitCond(BI, NewCond, DeadInsts);
1194 }
1195 
1196 static void replaceLoopPHINodesWithPreheaderValues(
1197     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1198     ScalarEvolution &SE) {
1199   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1200   auto *LoopPreheader = L->getLoopPreheader();
1201   auto *LoopHeader = L->getHeader();
1202   SmallVector<Instruction *> Worklist;
1203   for (auto &PN : LoopHeader->phis()) {
1204     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1205     for (User *U : PN.users())
1206       Worklist.push_back(cast<Instruction>(U));
1207     SE.forgetValue(&PN);
1208     PN.replaceAllUsesWith(PreheaderIncoming);
1209     DeadInsts.emplace_back(&PN);
1210   }
1211 
1212   // Replacing with the preheader value will often allow IV users to simplify
1213   // (especially if the preheader value is a constant).
1214   SmallPtrSet<Instruction *, 16> Visited;
1215   while (!Worklist.empty()) {
1216     auto *I = cast<Instruction>(Worklist.pop_back_val());
1217     if (!Visited.insert(I).second)
1218       continue;
1219 
1220     // Don't simplify instructions outside the loop.
1221     if (!L->contains(I))
1222       continue;
1223 
1224     Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1225     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1226       for (User *U : I->users())
1227         Worklist.push_back(cast<Instruction>(U));
1228       I->replaceAllUsesWith(Res);
1229       DeadInsts.emplace_back(I);
1230     }
1231   }
1232 }
1233 
1234 static Value *
1235 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1236                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1237                     SCEVExpander &Rewriter) {
1238   ICmpInst::Predicate InvariantPred = LIP.Pred;
1239   BasicBlock *Preheader = L->getLoopPreheader();
1240   assert(Preheader && "Preheader doesn't exist");
1241   Rewriter.setInsertPoint(Preheader->getTerminator());
1242   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1243   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1244   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1245   if (ExitIfTrue)
1246     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1247   IRBuilder<> Builder(Preheader->getTerminator());
1248   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1249   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1250                             BI->getCondition()->getName());
1251 }
1252 
1253 static std::optional<Value *>
1254 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1255                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1256                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1257   ICmpInst::Predicate Pred = ICmp->getPredicate();
1258   Value *LHS = ICmp->getOperand(0);
1259   Value *RHS = ICmp->getOperand(1);
1260 
1261   // 'LHS pred RHS' should now mean that we stay in loop.
1262   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1263   if (Inverted)
1264     Pred = CmpInst::getInversePredicate(Pred);
1265 
1266   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1267   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1268   // Can we prove it to be trivially true or false?
1269   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1270     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1271 
1272   auto *ARTy = LHSS->getType();
1273   auto *MaxIterTy = MaxIter->getType();
1274   // If possible, adjust types.
1275   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1276     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1277   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1278     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1279     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1280     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1281       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1282   }
1283 
1284   if (SkipLastIter) {
1285     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1286     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1287     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1288     // So we manually construct umin(a - 1, b - 1).
1289     SmallVector<const SCEV *, 4> Elements;
1290     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1291       for (auto *Op : UMin->operands())
1292         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1293       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1294     } else
1295       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1296   }
1297 
1298   // Check if there is a loop-invariant predicate equivalent to our check.
1299   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1300                                                                L, BI, MaxIter);
1301   if (!LIP)
1302     return std::nullopt;
1303 
1304   // Can we prove it to be trivially true?
1305   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1306     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1307   else
1308     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1309 }
1310 
1311 static bool optimizeLoopExitWithUnknownExitCount(
1312     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1313     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1314     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1315   assert(
1316       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1317       "Not a loop exit!");
1318 
1319   // For branch that stays in loop by TRUE condition, go through AND. For branch
1320   // that stays in loop by FALSE condition, go through OR. Both gives the
1321   // similar logic: "stay in loop iff all conditions are true(false)".
1322   bool Inverted = L->contains(BI->getSuccessor(1));
1323   SmallVector<ICmpInst *, 4> LeafConditions;
1324   SmallVector<Value *, 4> Worklist;
1325   SmallPtrSet<Value *, 4> Visited;
1326   Value *OldCond = BI->getCondition();
1327   Visited.insert(OldCond);
1328   Worklist.push_back(OldCond);
1329 
1330   auto GoThrough = [&](Value *V) {
1331     Value *LHS = nullptr, *RHS = nullptr;
1332     if (Inverted) {
1333       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1334         return false;
1335     } else {
1336       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1337         return false;
1338     }
1339     if (Visited.insert(LHS).second)
1340       Worklist.push_back(LHS);
1341     if (Visited.insert(RHS).second)
1342       Worklist.push_back(RHS);
1343     return true;
1344   };
1345 
1346   do {
1347     Value *Curr = Worklist.pop_back_val();
1348     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1349     // those with one use, to avoid instruction duplication.
1350     if (Curr->hasOneUse())
1351       if (!GoThrough(Curr))
1352         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1353           LeafConditions.push_back(ICmp);
1354   } while (!Worklist.empty());
1355 
1356   // If the current basic block has the same exit count as the whole loop, and
1357   // it consists of multiple icmp's, try to collect all icmp's that give exact
1358   // same exit count. For all other icmp's, we could use one less iteration,
1359   // because their value on the last iteration doesn't really matter.
1360   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1361   if (!SkipLastIter && LeafConditions.size() > 1 &&
1362       SE->getExitCount(L, ExitingBB,
1363                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1364           MaxIter)
1365     for (auto *ICmp : LeafConditions) {
1366       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1367                                              /*ControlsExit*/ false);
1368       auto *ExitMax = EL.SymbolicMaxNotTaken;
1369       if (isa<SCEVCouldNotCompute>(ExitMax))
1370         continue;
1371       // They could be of different types (specifically this happens after
1372       // IV widening).
1373       auto *WiderType =
1374           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1375       auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1376       auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1377       if (WideExitMax == WideMaxIter)
1378         ICmpsFailingOnLastIter.insert(ICmp);
1379     }
1380 
1381   bool Changed = false;
1382   for (auto *OldCond : LeafConditions) {
1383     // Skip last iteration for this icmp under one of two conditions:
1384     // - We do it for all conditions;
1385     // - There is another ICmp that would fail on last iter, so this one doesn't
1386     // really matter.
1387     bool OptimisticSkipLastIter = SkipLastIter;
1388     if (!OptimisticSkipLastIter) {
1389       if (ICmpsFailingOnLastIter.size() > 1)
1390         OptimisticSkipLastIter = true;
1391       else if (ICmpsFailingOnLastIter.size() == 1)
1392         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1393     }
1394     if (auto Replaced =
1395             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1396                               OptimisticSkipLastIter, SE, Rewriter)) {
1397       Changed = true;
1398       auto *NewCond = *Replaced;
1399       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1400         NCI->setName(OldCond->getName() + ".first_iter");
1401       }
1402       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1403                         << " with " << *NewCond << "\n");
1404       assert(OldCond->hasOneUse() && "Must be!");
1405       OldCond->replaceAllUsesWith(NewCond);
1406       DeadInsts.push_back(OldCond);
1407       // Make sure we no longer consider this condition as failing on last
1408       // iteration.
1409       ICmpsFailingOnLastIter.erase(OldCond);
1410     }
1411   }
1412   return Changed;
1413 }
1414 
1415 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1416   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1417   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1418   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1419   // it already has flags.  The alternative to this would be to extending the
1420   // set of "interesting" IV users to include the icmp, but doing that
1421   // regresses results in practice by querying SCEVs before trip counts which
1422   // rely on them which results in SCEV caching sub-optimal answers.  The
1423   // concern about caching sub-optimal results is why we only query SCEVs of
1424   // the loop invariant RHS here.
1425   SmallVector<BasicBlock*, 16> ExitingBlocks;
1426   L->getExitingBlocks(ExitingBlocks);
1427   bool Changed = false;
1428   for (auto *ExitingBB : ExitingBlocks) {
1429     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1430     if (!BI)
1431       continue;
1432     assert(BI->isConditional() && "exit branch must be conditional");
1433 
1434     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1435     if (!ICmp || !ICmp->hasOneUse())
1436       continue;
1437 
1438     auto *LHS = ICmp->getOperand(0);
1439     auto *RHS = ICmp->getOperand(1);
1440     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1441     // poisoning cache with sub-optimal results.  For the must-execute case,
1442     // this is a neccessary precondition for correctness.
1443     if (!L->isLoopInvariant(RHS)) {
1444       if (!L->isLoopInvariant(LHS))
1445         continue;
1446       // Same logic applies for the inverse case
1447       std::swap(LHS, RHS);
1448     }
1449 
1450     // Match (icmp signed-cond zext, RHS)
1451     Value *LHSOp = nullptr;
1452     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1453       continue;
1454 
1455     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1456     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1457     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1458     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1459     FullCR = FullCR.zeroExtend(OuterBitWidth);
1460     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1461     if (FullCR.contains(RHSCR)) {
1462       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1463       // replace the signed condition with the unsigned version.
1464       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1465       Changed = true;
1466       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1467       // have not changed exit counts, or the values produced by the compare.
1468       continue;
1469     }
1470   }
1471 
1472   // Now that we've canonicalized the condition to match the extend,
1473   // see if we can rotate the extend out of the loop.
1474   for (auto *ExitingBB : ExitingBlocks) {
1475     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1476     if (!BI)
1477       continue;
1478     assert(BI->isConditional() && "exit branch must be conditional");
1479 
1480     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1481     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1482       continue;
1483 
1484     bool Swapped = false;
1485     auto *LHS = ICmp->getOperand(0);
1486     auto *RHS = ICmp->getOperand(1);
1487     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1488       // Nothing to rotate
1489       continue;
1490     if (L->isLoopInvariant(LHS)) {
1491       // Same logic applies for the inverse case until we actually pick
1492       // which operand of the compare to update.
1493       Swapped = true;
1494       std::swap(LHS, RHS);
1495     }
1496     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1497 
1498     // Match (icmp unsigned-cond zext, RHS)
1499     // TODO: Extend to handle corresponding sext/signed-cmp case
1500     // TODO: Extend to other invertible functions
1501     Value *LHSOp = nullptr;
1502     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1503       continue;
1504 
1505     // In general, we only rotate if we can do so without increasing the number
1506     // of instructions.  The exception is when we have an zext(add-rec).  The
1507     // reason for allowing this exception is that we know we need to get rid
1508     // of the zext for SCEV to be able to compute a trip count for said loops;
1509     // we consider the new trip count valuable enough to increase instruction
1510     // count by one.
1511     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1512       continue;
1513 
1514     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1515     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1516     // when zext is loop varying and RHS is loop invariant.  This converts
1517     // loop varying work to loop-invariant work.
1518     auto doRotateTransform = [&]() {
1519       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1520       auto *NewRHS = CastInst::Create(
1521           Instruction::Trunc, RHS, LHSOp->getType(), "",
1522           L->getLoopPreheader()->getTerminator()->getIterator());
1523       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1524       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1525       if (LHS->use_empty())
1526         DeadInsts.push_back(LHS);
1527     };
1528 
1529 
1530     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1531     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1532     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1533     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1534     FullCR = FullCR.zeroExtend(OuterBitWidth);
1535     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1536     if (FullCR.contains(RHSCR)) {
1537       doRotateTransform();
1538       Changed = true;
1539       // Note, we are leaving SCEV in an unfortunately imprecise case here
1540       // as rotation tends to reveal information about trip counts not
1541       // previously visible.
1542       continue;
1543     }
1544   }
1545 
1546   return Changed;
1547 }
1548 
1549 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1550   SmallVector<BasicBlock*, 16> ExitingBlocks;
1551   L->getExitingBlocks(ExitingBlocks);
1552 
1553   // Remove all exits which aren't both rewriteable and execute on every
1554   // iteration.
1555   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1556     // If our exitting block exits multiple loops, we can only rewrite the
1557     // innermost one.  Otherwise, we're changing how many times the innermost
1558     // loop runs before it exits.
1559     if (LI->getLoopFor(ExitingBB) != L)
1560       return true;
1561 
1562     // Can't rewrite non-branch yet.
1563     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1564     if (!BI)
1565       return true;
1566 
1567     // Likewise, the loop latch must be dominated by the exiting BB.
1568     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1569       return true;
1570 
1571     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1572       // If already constant, nothing to do. However, if this is an
1573       // unconditional exit, we can still replace header phis with their
1574       // preheader value.
1575       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1576         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1577       return true;
1578     }
1579 
1580     return false;
1581   });
1582 
1583   if (ExitingBlocks.empty())
1584     return false;
1585 
1586   // Get a symbolic upper bound on the loop backedge taken count.
1587   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1588   if (isa<SCEVCouldNotCompute>(MaxBECount))
1589     return false;
1590 
1591   // Visit our exit blocks in order of dominance. We know from the fact that
1592   // all exits must dominate the latch, so there is a total dominance order
1593   // between them.
1594   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1595                // std::sort sorts in ascending order, so we want the inverse of
1596                // the normal dominance relation.
1597                if (A == B) return false;
1598                if (DT->properlyDominates(A, B))
1599                  return true;
1600                else {
1601                  assert(DT->properlyDominates(B, A) &&
1602                         "expected total dominance order!");
1603                  return false;
1604                }
1605   });
1606 #ifdef ASSERT
1607   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1608     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1609   }
1610 #endif
1611 
1612   bool Changed = false;
1613   bool SkipLastIter = false;
1614   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1615   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1616     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1617       return;
1618     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1619       CurrMaxExit = MaxExitCount;
1620     else
1621       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1622     // If the loop has more than 1 iteration, all further checks will be
1623     // executed 1 iteration less.
1624     if (CurrMaxExit == MaxBECount)
1625       SkipLastIter = true;
1626   };
1627   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1628   for (BasicBlock *ExitingBB : ExitingBlocks) {
1629     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1630     const SCEV *MaxExitCount = SE->getExitCount(
1631         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1632     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1633       // Okay, we do not know the exit count here. Can we at least prove that it
1634       // will remain the same within iteration space?
1635       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1636       auto OptimizeCond = [&](bool SkipLastIter) {
1637         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1638                                                     MaxBECount, SkipLastIter,
1639                                                     SE, Rewriter, DeadInsts);
1640       };
1641 
1642       // TODO: We might have proved that we can skip the last iteration for
1643       // this check. In this case, we only want to check the condition on the
1644       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1645       // corner case:
1646       //
1647       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1648       //
1649       // If we could not prove that len != 0, then we also could not prove that
1650       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1651       // OptimizeCond will likely not prove anything for it, even if it could
1652       // prove the same fact for len.
1653       //
1654       // As a temporary solution, we query both last and pre-last iterations in
1655       // hope that we will be able to prove triviality for at least one of
1656       // them. We can stop querying MaxBECount for this case once SCEV
1657       // understands that (MaxBECount - 1) will not overflow here.
1658       if (OptimizeCond(false))
1659         Changed = true;
1660       else if (SkipLastIter && OptimizeCond(true))
1661         Changed = true;
1662       UpdateSkipLastIter(MaxExitCount);
1663       continue;
1664     }
1665 
1666     UpdateSkipLastIter(ExactExitCount);
1667 
1668     // If we know we'd exit on the first iteration, rewrite the exit to
1669     // reflect this.  This does not imply the loop must exit through this
1670     // exit; there may be an earlier one taken on the first iteration.
1671     // We know that the backedge can't be taken, so we replace all
1672     // the header PHIs with values coming from the preheader.
1673     if (ExactExitCount->isZero()) {
1674       foldExit(L, ExitingBB, true, DeadInsts);
1675       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1676       Changed = true;
1677       continue;
1678     }
1679 
1680     assert(ExactExitCount->getType()->isIntegerTy() &&
1681            MaxBECount->getType()->isIntegerTy() &&
1682            "Exit counts must be integers");
1683 
1684     Type *WiderType =
1685         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1686     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1687     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1688     assert(MaxBECount->getType() == ExactExitCount->getType());
1689 
1690     // Can we prove that some other exit must be taken strictly before this
1691     // one?
1692     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1693                                      ExactExitCount)) {
1694       foldExit(L, ExitingBB, false, DeadInsts);
1695       Changed = true;
1696       continue;
1697     }
1698 
1699     // As we run, keep track of which exit counts we've encountered.  If we
1700     // find a duplicate, we've found an exit which would have exited on the
1701     // exiting iteration, but (from the visit order) strictly follows another
1702     // which does the same and is thus dead.
1703     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1704       foldExit(L, ExitingBB, false, DeadInsts);
1705       Changed = true;
1706       continue;
1707     }
1708 
1709     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1710     // here.  If we kept track of the min of dominanting exits so far, we could
1711     // discharge exits with EC >= MDEC. This is less powerful than the existing
1712     // transform (since later exits aren't considered), but potentially more
1713     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1714     // or a >u b.  Such a case is not currently known.
1715   }
1716   return Changed;
1717 }
1718 
1719 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1720   SmallVector<BasicBlock*, 16> ExitingBlocks;
1721   L->getExitingBlocks(ExitingBlocks);
1722 
1723   // Finally, see if we can rewrite our exit conditions into a loop invariant
1724   // form. If we have a read-only loop, and we can tell that we must exit down
1725   // a path which does not need any of the values computed within the loop, we
1726   // can rewrite the loop to exit on the first iteration.  Note that this
1727   // doesn't either a) tell us the loop exits on the first iteration (unless
1728   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1729   // This transformation looks a lot like a restricted form of dead loop
1730   // elimination, but restricted to read-only loops and without neccesssarily
1731   // needing to kill the loop entirely.
1732   if (!LoopPredication)
1733     return false;
1734 
1735   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1736   // through *explicit* control flow.  We have to eliminate the possibility of
1737   // implicit exits (see below) before we know it's truly exact.
1738   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1739   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1740     return false;
1741 
1742   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1743   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1744 
1745   auto BadExit = [&](BasicBlock *ExitingBB) {
1746     // If our exiting block exits multiple loops, we can only rewrite the
1747     // innermost one.  Otherwise, we're changing how many times the innermost
1748     // loop runs before it exits.
1749     if (LI->getLoopFor(ExitingBB) != L)
1750       return true;
1751 
1752     // Can't rewrite non-branch yet.
1753     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1754     if (!BI)
1755       return true;
1756 
1757     // If already constant, nothing to do.
1758     if (isa<Constant>(BI->getCondition()))
1759       return true;
1760 
1761     // If the exit block has phis, we need to be able to compute the values
1762     // within the loop which contains them.  This assumes trivially lcssa phis
1763     // have already been removed; TODO: generalize
1764     BasicBlock *ExitBlock =
1765     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1766     if (!ExitBlock->phis().empty())
1767       return true;
1768 
1769     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1770     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1771         !Rewriter.isSafeToExpand(ExitCount))
1772       return true;
1773 
1774     assert(SE->isLoopInvariant(ExitCount, L) &&
1775            "Exit count must be loop invariant");
1776     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1777     return false;
1778   };
1779 
1780   // If we have any exits which can't be predicated themselves, than we can't
1781   // predicate any exit which isn't guaranteed to execute before it.  Consider
1782   // two exits (a) and (b) which would both exit on the same iteration.  If we
1783   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1784   // we could convert a loop from exiting through (a) to one exiting through
1785   // (b).  Note that this problem exists only for exits with the same exit
1786   // count, and we could be more aggressive when exit counts are known inequal.
1787   llvm::sort(ExitingBlocks,
1788             [&](BasicBlock *A, BasicBlock *B) {
1789               // std::sort sorts in ascending order, so we want the inverse of
1790               // the normal dominance relation, plus a tie breaker for blocks
1791               // unordered by dominance.
1792               if (DT->properlyDominates(A, B)) return true;
1793               if (DT->properlyDominates(B, A)) return false;
1794               return A->getName() < B->getName();
1795             });
1796   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1797   // exits before the backedge).  If they aren't, reasoning about reachability
1798   // is complicated and we choose not to for now.
1799   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1800     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1801       return false;
1802 
1803   // Given our sorted total order, we know that exit[j] must be evaluated
1804   // after all exit[i] such j > i.
1805   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1806     if (BadExit(ExitingBlocks[i])) {
1807       ExitingBlocks.resize(i);
1808       break;
1809     }
1810 
1811   if (ExitingBlocks.empty())
1812     return false;
1813 
1814   // We rely on not being able to reach an exiting block on a later iteration
1815   // then it's statically compute exit count.  The implementaton of
1816   // getExitCount currently has this invariant, but assert it here so that
1817   // breakage is obvious if this ever changes..
1818   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1819         return DT->dominates(ExitingBB, L->getLoopLatch());
1820       }));
1821 
1822   // At this point, ExitingBlocks consists of only those blocks which are
1823   // predicatable.  Given that, we know we have at least one exit we can
1824   // predicate if the loop is doesn't have side effects and doesn't have any
1825   // implicit exits (because then our exact BTC isn't actually exact).
1826   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1827   // suggestions on how to improve this?  I can obviously bail out for outer
1828   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1829   // is that enough for *all* side effects?
1830   for (BasicBlock *BB : L->blocks())
1831     for (auto &I : *BB)
1832       // TODO:isGuaranteedToTransfer
1833       if (I.mayHaveSideEffects())
1834         return false;
1835 
1836   bool Changed = false;
1837   // Finally, do the actual predication for all predicatable blocks.  A couple
1838   // of notes here:
1839   // 1) We don't bother to constant fold dominated exits with identical exit
1840   //    counts; that's simply a form of CSE/equality propagation and we leave
1841   //    it for dedicated passes.
1842   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1843   //    legality constraints and we leave that to dedicated logic.  We want to
1844   //    predicate even if we can't insert a loop invariant expression as
1845   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1846   //    varying check.
1847   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1848   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1849   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1850   for (BasicBlock *ExitingBB : ExitingBlocks) {
1851     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1852 
1853     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1854     Value *NewCond;
1855     if (ExitCount == ExactBTC) {
1856       NewCond = L->contains(BI->getSuccessor(0)) ?
1857         B.getFalse() : B.getTrue();
1858     } else {
1859       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1860       if (!ExactBTCV)
1861         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1862       Value *RHS = ExactBTCV;
1863       if (ECV->getType() != RHS->getType()) {
1864         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1865         ECV = B.CreateZExt(ECV, WiderTy);
1866         RHS = B.CreateZExt(RHS, WiderTy);
1867       }
1868       auto Pred = L->contains(BI->getSuccessor(0)) ?
1869         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1870       NewCond = B.CreateICmp(Pred, ECV, RHS);
1871     }
1872     Value *OldCond = BI->getCondition();
1873     BI->setCondition(NewCond);
1874     if (OldCond->use_empty())
1875       DeadInsts.emplace_back(OldCond);
1876     Changed = true;
1877   }
1878 
1879   return Changed;
1880 }
1881 
1882 //===----------------------------------------------------------------------===//
1883 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1884 //===----------------------------------------------------------------------===//
1885 
1886 bool IndVarSimplify::run(Loop *L) {
1887   // We need (and expect!) the incoming loop to be in LCSSA.
1888   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1889          "LCSSA required to run indvars!");
1890 
1891   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1892   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1893   //    canonicalization can be a pessimization without LSR to "clean up"
1894   //    afterwards.
1895   //  - We depend on having a preheader; in particular,
1896   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1897   //    and we're in trouble if we can't find the induction variable even when
1898   //    we've manually inserted one.
1899   //  - LFTR relies on having a single backedge.
1900   if (!L->isLoopSimplifyForm())
1901     return false;
1902 
1903   bool Changed = false;
1904   // If there are any floating-point recurrences, attempt to
1905   // transform them to use integer recurrences.
1906   Changed |= rewriteNonIntegerIVs(L);
1907 
1908   // Create a rewriter object which we'll use to transform the code with.
1909   SCEVExpander Rewriter(*SE, DL, "indvars");
1910 #ifndef NDEBUG
1911   Rewriter.setDebugType(DEBUG_TYPE);
1912 #endif
1913 
1914   // Eliminate redundant IV users.
1915   //
1916   // Simplification works best when run before other consumers of SCEV. We
1917   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1918   // other expressions involving loop IVs have been evaluated. This helps SCEV
1919   // set no-wrap flags before normalizing sign/zero extension.
1920   Rewriter.disableCanonicalMode();
1921   Changed |= simplifyAndExtend(L, Rewriter, LI);
1922 
1923   // Check to see if we can compute the final value of any expressions
1924   // that are recurrent in the loop, and substitute the exit values from the
1925   // loop into any instructions outside of the loop that use the final values
1926   // of the current expressions.
1927   if (ReplaceExitValue != NeverRepl) {
1928     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1929                                              ReplaceExitValue, DeadInsts)) {
1930       NumReplaced += Rewrites;
1931       Changed = true;
1932     }
1933   }
1934 
1935   // Eliminate redundant IV cycles.
1936   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1937 
1938   // Try to convert exit conditions to unsigned and rotate computation
1939   // out of the loop.  Note: Handles invalidation internally if needed.
1940   Changed |= canonicalizeExitCondition(L);
1941 
1942   // Try to eliminate loop exits based on analyzeable exit counts
1943   if (optimizeLoopExits(L, Rewriter))  {
1944     Changed = true;
1945     // Given we've changed exit counts, notify SCEV
1946     // Some nested loops may share same folded exit basic block,
1947     // thus we need to notify top most loop.
1948     SE->forgetTopmostLoop(L);
1949   }
1950 
1951   // Try to form loop invariant tests for loop exits by changing how many
1952   // iterations of the loop run when that is unobservable.
1953   if (predicateLoopExits(L, Rewriter)) {
1954     Changed = true;
1955     // Given we've changed exit counts, notify SCEV
1956     SE->forgetLoop(L);
1957   }
1958 
1959   // If we have a trip count expression, rewrite the loop's exit condition
1960   // using it.
1961   if (!DisableLFTR) {
1962     BasicBlock *PreHeader = L->getLoopPreheader();
1963 
1964     SmallVector<BasicBlock*, 16> ExitingBlocks;
1965     L->getExitingBlocks(ExitingBlocks);
1966     for (BasicBlock *ExitingBB : ExitingBlocks) {
1967       // Can't rewrite non-branch yet.
1968       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1969         continue;
1970 
1971       // If our exitting block exits multiple loops, we can only rewrite the
1972       // innermost one.  Otherwise, we're changing how many times the innermost
1973       // loop runs before it exits.
1974       if (LI->getLoopFor(ExitingBB) != L)
1975         continue;
1976 
1977       if (!needsLFTR(L, ExitingBB))
1978         continue;
1979 
1980       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1981       if (isa<SCEVCouldNotCompute>(ExitCount))
1982         continue;
1983 
1984       // This was handled above, but as we form SCEVs, we can sometimes refine
1985       // existing ones; this allows exit counts to be folded to zero which
1986       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1987       // until stable to handle cases like this better.
1988       if (ExitCount->isZero())
1989         continue;
1990 
1991       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1992       if (!IndVar)
1993         continue;
1994 
1995       // Avoid high cost expansions.  Note: This heuristic is questionable in
1996       // that our definition of "high cost" is not exactly principled.
1997       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1998                                        TTI, PreHeader->getTerminator()))
1999         continue;
2000 
2001       if (!Rewriter.isSafeToExpand(ExitCount))
2002         continue;
2003 
2004       Changed |= linearFunctionTestReplace(L, ExitingBB,
2005                                            ExitCount, IndVar,
2006                                            Rewriter);
2007     }
2008   }
2009   // Clear the rewriter cache, because values that are in the rewriter's cache
2010   // can be deleted in the loop below, causing the AssertingVH in the cache to
2011   // trigger.
2012   Rewriter.clear();
2013 
2014   // Now that we're done iterating through lists, clean up any instructions
2015   // which are now dead.
2016   while (!DeadInsts.empty()) {
2017     Value *V = DeadInsts.pop_back_val();
2018 
2019     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2020       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2021     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2022       Changed |=
2023           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2024   }
2025 
2026   // The Rewriter may not be used from this point on.
2027 
2028   // Loop-invariant instructions in the preheader that aren't used in the
2029   // loop may be sunk below the loop to reduce register pressure.
2030   Changed |= sinkUnusedInvariants(L);
2031 
2032   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2033   // trip count and therefore can further simplify exit values in addition to
2034   // rewriteLoopExitValues.
2035   Changed |= rewriteFirstIterationLoopExitValues(L);
2036 
2037   // Clean up dead instructions.
2038   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2039 
2040   // Check a post-condition.
2041   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2042          "Indvars did not preserve LCSSA!");
2043   if (VerifyMemorySSA && MSSAU)
2044     MSSAU->getMemorySSA()->verifyMemorySSA();
2045 
2046   return Changed;
2047 }
2048 
2049 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2050                                           LoopStandardAnalysisResults &AR,
2051                                           LPMUpdater &) {
2052   Function *F = L.getHeader()->getParent();
2053   const DataLayout &DL = F->getParent()->getDataLayout();
2054 
2055   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2056                      WidenIndVars && AllowIVWidening);
2057   if (!IVS.run(&L))
2058     return PreservedAnalyses::all();
2059 
2060   auto PA = getLoopPassPreservedAnalyses();
2061   PA.preserveSet<CFGAnalyses>();
2062   if (AR.MSSA)
2063     PA.preserve<MemorySSAAnalysis>();
2064   return PA;
2065 }
2066