xref: /llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 24caf0196d03858bd9fe90d14133fb69c8cea444)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/InitializePasses.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/MathExtras.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/Transforms/Scalar.h"
76 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
77 #include "llvm/Transforms/Utils/Local.h"
78 #include "llvm/Transforms/Utils/LoopUtils.h"
79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
80 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
81 #include <cassert>
82 #include <cstdint>
83 #include <utility>
84 
85 using namespace llvm;
86 using namespace PatternMatch;
87 
88 #define DEBUG_TYPE "indvars"
89 
90 STATISTIC(NumWidened     , "Number of indvars widened");
91 STATISTIC(NumReplaced    , "Number of exit values replaced");
92 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
93 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
94 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
95 
96 static cl::opt<ReplaceExitVal> ReplaceExitValue(
97     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
98     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
99     cl::values(
100         clEnumValN(NeverRepl, "never", "never replace exit value"),
101         clEnumValN(OnlyCheapRepl, "cheap",
102                    "only replace exit value when the cost is cheap"),
103         clEnumValN(
104             UnusedIndVarInLoop, "unusedindvarinloop",
105             "only replace exit value when it is an unused "
106             "induction variable in the loop and has cheap replacement cost"),
107         clEnumValN(NoHardUse, "noharduse",
108                    "only replace exit values when loop def likely dead"),
109         clEnumValN(AlwaysRepl, "always",
110                    "always replace exit value whenever possible")));
111 
112 static cl::opt<bool> UsePostIncrementRanges(
113   "indvars-post-increment-ranges", cl::Hidden,
114   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
115   cl::init(true));
116 
117 static cl::opt<bool>
118 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
119             cl::desc("Disable Linear Function Test Replace optimization"));
120 
121 static cl::opt<bool>
122 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
123                 cl::desc("Predicate conditions in read only loops"));
124 
125 static cl::opt<bool>
126 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
127                 cl::desc("Allow widening of indvars to eliminate s/zext"));
128 
129 namespace {
130 
131 class IndVarSimplify {
132   LoopInfo *LI;
133   ScalarEvolution *SE;
134   DominatorTree *DT;
135   const DataLayout &DL;
136   TargetLibraryInfo *TLI;
137   const TargetTransformInfo *TTI;
138   std::unique_ptr<MemorySSAUpdater> MSSAU;
139 
140   SmallVector<WeakTrackingVH, 16> DeadInsts;
141   bool WidenIndVars;
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147   /// Try to improve our exit conditions by converting condition from signed
148   /// to unsigned or rotating computation out of the loop.
149   /// (See inline comment about why this is duplicated from simplifyAndExtend)
150   bool canonicalizeExitCondition(Loop *L);
151   /// Try to eliminate loop exits based on analyzeable exit counts
152   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153   /// Try to form loop invariant tests for loop exits by changing how many
154   /// iterations of the loop run when that is unobservable.
155   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156 
157   bool rewriteFirstIterationLoopExitValues(Loop *L);
158 
159   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160                                  const SCEV *ExitCount,
161                                  PHINode *IndVar, SCEVExpander &Rewriter);
162 
163   bool sinkUnusedInvariants(Loop *L);
164 
165 public:
166   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167                  const DataLayout &DL, TargetLibraryInfo *TLI,
168                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170         WidenIndVars(WidenIndVars) {
171     if (MSSA)
172       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173   }
174 
175   bool run(Loop *L);
176 };
177 
178 } // end anonymous namespace
179 
180 //===----------------------------------------------------------------------===//
181 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
182 //===----------------------------------------------------------------------===//
183 
184 /// Convert APF to an integer, if possible.
185 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
186   bool isExact = false;
187   // See if we can convert this to an int64_t
188   uint64_t UIntVal;
189   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
190                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
191       !isExact)
192     return false;
193   IntVal = UIntVal;
194   return true;
195 }
196 
197 /// If the loop has floating induction variable then insert corresponding
198 /// integer induction variable if possible.
199 /// For example,
200 /// for(double i = 0; i < 10000; ++i)
201 ///   bar(i)
202 /// is converted into
203 /// for(int i = 0; i < 10000; ++i)
204 ///   bar((double)i);
205 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
206   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
207   unsigned BackEdge     = IncomingEdge^1;
208 
209   // Check incoming value.
210   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
211 
212   int64_t InitValue;
213   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
214     return false;
215 
216   // Check IV increment. Reject this PN if increment operation is not
217   // an add or increment value can not be represented by an integer.
218   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
219   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
220 
221   // If this is not an add of the PHI with a constantfp, or if the constant fp
222   // is not an integer, bail out.
223   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
224   int64_t IncValue;
225   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
226       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
227     return false;
228 
229   // Check Incr uses. One user is PN and the other user is an exit condition
230   // used by the conditional terminator.
231   Value::user_iterator IncrUse = Incr->user_begin();
232   Instruction *U1 = cast<Instruction>(*IncrUse++);
233   if (IncrUse == Incr->user_end()) return false;
234   Instruction *U2 = cast<Instruction>(*IncrUse++);
235   if (IncrUse != Incr->user_end()) return false;
236 
237   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
238   // only used by a branch, we can't transform it.
239   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
240   if (!Compare)
241     Compare = dyn_cast<FCmpInst>(U2);
242   if (!Compare || !Compare->hasOneUse() ||
243       !isa<BranchInst>(Compare->user_back()))
244     return false;
245 
246   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
247 
248   // We need to verify that the branch actually controls the iteration count
249   // of the loop.  If not, the new IV can overflow and no one will notice.
250   // The branch block must be in the loop and one of the successors must be out
251   // of the loop.
252   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
253   if (!L->contains(TheBr->getParent()) ||
254       (L->contains(TheBr->getSuccessor(0)) &&
255        L->contains(TheBr->getSuccessor(1))))
256     return false;
257 
258   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
259   // transform it.
260   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
261   int64_t ExitValue;
262   if (ExitValueVal == nullptr ||
263       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
264     return false;
265 
266   // Find new predicate for integer comparison.
267   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
268   switch (Compare->getPredicate()) {
269   default: return false;  // Unknown comparison.
270   case CmpInst::FCMP_OEQ:
271   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
272   case CmpInst::FCMP_ONE:
273   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
274   case CmpInst::FCMP_OGT:
275   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
276   case CmpInst::FCMP_OGE:
277   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
278   case CmpInst::FCMP_OLT:
279   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
280   case CmpInst::FCMP_OLE:
281   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
282   }
283 
284   // We convert the floating point induction variable to a signed i32 value if
285   // we can.  This is only safe if the comparison will not overflow in a way
286   // that won't be trapped by the integer equivalent operations.  Check for this
287   // now.
288   // TODO: We could use i64 if it is native and the range requires it.
289 
290   // The start/stride/exit values must all fit in signed i32.
291   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
292     return false;
293 
294   // If not actually striding (add x, 0.0), avoid touching the code.
295   if (IncValue == 0)
296     return false;
297 
298   // Positive and negative strides have different safety conditions.
299   if (IncValue > 0) {
300     // If we have a positive stride, we require the init to be less than the
301     // exit value.
302     if (InitValue >= ExitValue)
303       return false;
304 
305     uint32_t Range = uint32_t(ExitValue-InitValue);
306     // Check for infinite loop, either:
307     // while (i <= Exit) or until (i > Exit)
308     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
309       if (++Range == 0) return false;  // Range overflows.
310     }
311 
312     unsigned Leftover = Range % uint32_t(IncValue);
313 
314     // If this is an equality comparison, we require that the strided value
315     // exactly land on the exit value, otherwise the IV condition will wrap
316     // around and do things the fp IV wouldn't.
317     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
318         Leftover != 0)
319       return false;
320 
321     // If the stride would wrap around the i32 before exiting, we can't
322     // transform the IV.
323     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
324       return false;
325   } else {
326     // If we have a negative stride, we require the init to be greater than the
327     // exit value.
328     if (InitValue <= ExitValue)
329       return false;
330 
331     uint32_t Range = uint32_t(InitValue-ExitValue);
332     // Check for infinite loop, either:
333     // while (i >= Exit) or until (i < Exit)
334     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
335       if (++Range == 0) return false;  // Range overflows.
336     }
337 
338     unsigned Leftover = Range % uint32_t(-IncValue);
339 
340     // If this is an equality comparison, we require that the strided value
341     // exactly land on the exit value, otherwise the IV condition will wrap
342     // around and do things the fp IV wouldn't.
343     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
344         Leftover != 0)
345       return false;
346 
347     // If the stride would wrap around the i32 before exiting, we can't
348     // transform the IV.
349     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
350       return false;
351   }
352 
353   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
354 
355   // Insert new integer induction variable.
356   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
357   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
358                       PN->getIncomingBlock(IncomingEdge));
359 
360   Value *NewAdd =
361     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
362                               Incr->getName()+".int", Incr);
363   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
364 
365   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
366                                       ConstantInt::get(Int32Ty, ExitValue),
367                                       Compare->getName());
368 
369   // In the following deletions, PN may become dead and may be deleted.
370   // Use a WeakTrackingVH to observe whether this happens.
371   WeakTrackingVH WeakPH = PN;
372 
373   // Delete the old floating point exit comparison.  The branch starts using the
374   // new comparison.
375   NewCompare->takeName(Compare);
376   Compare->replaceAllUsesWith(NewCompare);
377   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
378 
379   // Delete the old floating point increment.
380   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
381   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
382 
383   // If the FP induction variable still has uses, this is because something else
384   // in the loop uses its value.  In order to canonicalize the induction
385   // variable, we chose to eliminate the IV and rewrite it in terms of an
386   // int->fp cast.
387   //
388   // We give preference to sitofp over uitofp because it is faster on most
389   // platforms.
390   if (WeakPH) {
391     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
392                                  &*PN->getParent()->getFirstInsertionPt());
393     PN->replaceAllUsesWith(Conv);
394     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
395   }
396   return true;
397 }
398 
399 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
400   // First step.  Check to see if there are any floating-point recurrences.
401   // If there are, change them into integer recurrences, permitting analysis by
402   // the SCEV routines.
403   BasicBlock *Header = L->getHeader();
404 
405   SmallVector<WeakTrackingVH, 8> PHIs;
406   for (PHINode &PN : Header->phis())
407     PHIs.push_back(&PN);
408 
409   bool Changed = false;
410   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
411     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
412       Changed |= handleFloatingPointIV(L, PN);
413 
414   // If the loop previously had floating-point IV, ScalarEvolution
415   // may not have been able to compute a trip count. Now that we've done some
416   // re-writing, the trip count may be computable.
417   if (Changed)
418     SE->forgetLoop(L);
419   return Changed;
420 }
421 
422 //===---------------------------------------------------------------------===//
423 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
424 // they will exit at the first iteration.
425 //===---------------------------------------------------------------------===//
426 
427 /// Check to see if this loop has loop invariant conditions which lead to loop
428 /// exits. If so, we know that if the exit path is taken, it is at the first
429 /// loop iteration. This lets us predict exit values of PHI nodes that live in
430 /// loop header.
431 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
432   // Verify the input to the pass is already in LCSSA form.
433   assert(L->isLCSSAForm(*DT));
434 
435   SmallVector<BasicBlock *, 8> ExitBlocks;
436   L->getUniqueExitBlocks(ExitBlocks);
437 
438   bool MadeAnyChanges = false;
439   for (auto *ExitBB : ExitBlocks) {
440     // If there are no more PHI nodes in this exit block, then no more
441     // values defined inside the loop are used on this path.
442     for (PHINode &PN : ExitBB->phis()) {
443       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
444            IncomingValIdx != E; ++IncomingValIdx) {
445         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
446 
447         // Can we prove that the exit must run on the first iteration if it
448         // runs at all?  (i.e. early exits are fine for our purposes, but
449         // traces which lead to this exit being taken on the 2nd iteration
450         // aren't.)  Note that this is about whether the exit branch is
451         // executed, not about whether it is taken.
452         if (!L->getLoopLatch() ||
453             !DT->dominates(IncomingBB, L->getLoopLatch()))
454           continue;
455 
456         // Get condition that leads to the exit path.
457         auto *TermInst = IncomingBB->getTerminator();
458 
459         Value *Cond = nullptr;
460         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
461           // Must be a conditional branch, otherwise the block
462           // should not be in the loop.
463           Cond = BI->getCondition();
464         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
465           Cond = SI->getCondition();
466         else
467           continue;
468 
469         if (!L->isLoopInvariant(Cond))
470           continue;
471 
472         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
473 
474         // Only deal with PHIs in the loop header.
475         if (!ExitVal || ExitVal->getParent() != L->getHeader())
476           continue;
477 
478         // If ExitVal is a PHI on the loop header, then we know its
479         // value along this exit because the exit can only be taken
480         // on the first iteration.
481         auto *LoopPreheader = L->getLoopPreheader();
482         assert(LoopPreheader && "Invalid loop");
483         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
484         if (PreheaderIdx != -1) {
485           assert(ExitVal->getParent() == L->getHeader() &&
486                  "ExitVal must be in loop header");
487           MadeAnyChanges = true;
488           PN.setIncomingValue(IncomingValIdx,
489                               ExitVal->getIncomingValue(PreheaderIdx));
490           SE->forgetValue(&PN);
491         }
492       }
493     }
494   }
495   return MadeAnyChanges;
496 }
497 
498 //===----------------------------------------------------------------------===//
499 //  IV Widening - Extend the width of an IV to cover its widest uses.
500 //===----------------------------------------------------------------------===//
501 
502 /// Update information about the induction variable that is extended by this
503 /// sign or zero extend operation. This is used to determine the final width of
504 /// the IV before actually widening it.
505 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
506                         ScalarEvolution *SE,
507                         const TargetTransformInfo *TTI) {
508   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
509   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
510     return;
511 
512   Type *Ty = Cast->getType();
513   uint64_t Width = SE->getTypeSizeInBits(Ty);
514   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
515     return;
516 
517   // Check that `Cast` actually extends the induction variable (we rely on this
518   // later).  This takes care of cases where `Cast` is extending a truncation of
519   // the narrow induction variable, and thus can end up being narrower than the
520   // "narrow" induction variable.
521   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
522   if (NarrowIVWidth >= Width)
523     return;
524 
525   // Cast is either an sext or zext up to this point.
526   // We should not widen an indvar if arithmetics on the wider indvar are more
527   // expensive than those on the narrower indvar. We check only the cost of ADD
528   // because at least an ADD is required to increment the induction variable. We
529   // could compute more comprehensively the cost of all instructions on the
530   // induction variable when necessary.
531   if (TTI &&
532       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
533           TTI->getArithmeticInstrCost(Instruction::Add,
534                                       Cast->getOperand(0)->getType())) {
535     return;
536   }
537 
538   if (!WI.WidestNativeType ||
539       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
540     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
541     WI.IsSigned = IsSigned;
542     return;
543   }
544 
545   // We extend the IV to satisfy the sign of its user(s), or 'signed'
546   // if there are multiple users with both sign- and zero extensions,
547   // in order not to introduce nondeterministic behaviour based on the
548   // unspecified order of a PHI nodes' users-iterator.
549   WI.IsSigned |= IsSigned;
550 }
551 
552 //===----------------------------------------------------------------------===//
553 //  Live IV Reduction - Minimize IVs live across the loop.
554 //===----------------------------------------------------------------------===//
555 
556 //===----------------------------------------------------------------------===//
557 //  Simplification of IV users based on SCEV evaluation.
558 //===----------------------------------------------------------------------===//
559 
560 namespace {
561 
562 class IndVarSimplifyVisitor : public IVVisitor {
563   ScalarEvolution *SE;
564   const TargetTransformInfo *TTI;
565   PHINode *IVPhi;
566 
567 public:
568   WideIVInfo WI;
569 
570   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
571                         const TargetTransformInfo *TTI,
572                         const DominatorTree *DTree)
573     : SE(SCEV), TTI(TTI), IVPhi(IV) {
574     DT = DTree;
575     WI.NarrowIV = IVPhi;
576   }
577 
578   // Implement the interface used by simplifyUsersOfIV.
579   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
580 };
581 
582 } // end anonymous namespace
583 
584 /// Iteratively perform simplification on a worklist of IV users. Each
585 /// successive simplification may push more users which may themselves be
586 /// candidates for simplification.
587 ///
588 /// Sign/Zero extend elimination is interleaved with IV simplification.
589 bool IndVarSimplify::simplifyAndExtend(Loop *L,
590                                        SCEVExpander &Rewriter,
591                                        LoopInfo *LI) {
592   SmallVector<WideIVInfo, 8> WideIVs;
593 
594   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
595           Intrinsic::getName(Intrinsic::experimental_guard));
596   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
597 
598   SmallVector<PHINode *, 8> LoopPhis;
599   for (PHINode &PN : L->getHeader()->phis())
600     LoopPhis.push_back(&PN);
601 
602   // Each round of simplification iterates through the SimplifyIVUsers worklist
603   // for all current phis, then determines whether any IVs can be
604   // widened. Widening adds new phis to LoopPhis, inducing another round of
605   // simplification on the wide IVs.
606   bool Changed = false;
607   while (!LoopPhis.empty()) {
608     // Evaluate as many IV expressions as possible before widening any IVs. This
609     // forces SCEV to set no-wrap flags before evaluating sign/zero
610     // extension. The first time SCEV attempts to normalize sign/zero extension,
611     // the result becomes final. So for the most predictable results, we delay
612     // evaluation of sign/zero extend evaluation until needed, and avoid running
613     // other SCEV based analysis prior to simplifyAndExtend.
614     do {
615       PHINode *CurrIV = LoopPhis.pop_back_val();
616 
617       // Information about sign/zero extensions of CurrIV.
618       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
619 
620       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
621                                    &Visitor);
622 
623       if (Visitor.WI.WidestNativeType) {
624         WideIVs.push_back(Visitor.WI);
625       }
626     } while(!LoopPhis.empty());
627 
628     // Continue if we disallowed widening.
629     if (!WidenIndVars)
630       continue;
631 
632     for (; !WideIVs.empty(); WideIVs.pop_back()) {
633       unsigned ElimExt;
634       unsigned Widened;
635       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
636                                           DT, DeadInsts, ElimExt, Widened,
637                                           HasGuards, UsePostIncrementRanges)) {
638         NumElimExt += ElimExt;
639         NumWidened += Widened;
640         Changed = true;
641         LoopPhis.push_back(WidePhi);
642       }
643     }
644   }
645   return Changed;
646 }
647 
648 //===----------------------------------------------------------------------===//
649 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
650 //===----------------------------------------------------------------------===//
651 
652 /// Given an Value which is hoped to be part of an add recurance in the given
653 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
654 /// that this is less general than SCEVs AddRec checking.
655 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
656   Instruction *IncI = dyn_cast<Instruction>(IncV);
657   if (!IncI)
658     return nullptr;
659 
660   switch (IncI->getOpcode()) {
661   case Instruction::Add:
662   case Instruction::Sub:
663     break;
664   case Instruction::GetElementPtr:
665     // An IV counter must preserve its type.
666     if (IncI->getNumOperands() == 2)
667       break;
668     [[fallthrough]];
669   default:
670     return nullptr;
671   }
672 
673   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
674   if (Phi && Phi->getParent() == L->getHeader()) {
675     if (L->isLoopInvariant(IncI->getOperand(1)))
676       return Phi;
677     return nullptr;
678   }
679   if (IncI->getOpcode() == Instruction::GetElementPtr)
680     return nullptr;
681 
682   // Allow add/sub to be commuted.
683   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
684   if (Phi && Phi->getParent() == L->getHeader()) {
685     if (L->isLoopInvariant(IncI->getOperand(0)))
686       return Phi;
687   }
688   return nullptr;
689 }
690 
691 /// Whether the current loop exit test is based on this value.  Currently this
692 /// is limited to a direct use in the loop condition.
693 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
694   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
695   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
696   // TODO: Allow non-icmp loop test.
697   if (!ICmp)
698     return false;
699 
700   // TODO: Allow indirect use.
701   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
702 }
703 
704 /// linearFunctionTestReplace policy. Return true unless we can show that the
705 /// current exit test is already sufficiently canonical.
706 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
707   assert(L->getLoopLatch() && "Must be in simplified form");
708 
709   // Avoid converting a constant or loop invariant test back to a runtime
710   // test.  This is critical for when SCEV's cached ExitCount is less precise
711   // than the current IR (such as after we've proven a particular exit is
712   // actually dead and thus the BE count never reaches our ExitCount.)
713   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
714   if (L->isLoopInvariant(BI->getCondition()))
715     return false;
716 
717   // Do LFTR to simplify the exit condition to an ICMP.
718   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
719   if (!Cond)
720     return true;
721 
722   // Do LFTR to simplify the exit ICMP to EQ/NE
723   ICmpInst::Predicate Pred = Cond->getPredicate();
724   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
725     return true;
726 
727   // Look for a loop invariant RHS
728   Value *LHS = Cond->getOperand(0);
729   Value *RHS = Cond->getOperand(1);
730   if (!L->isLoopInvariant(RHS)) {
731     if (!L->isLoopInvariant(LHS))
732       return true;
733     std::swap(LHS, RHS);
734   }
735   // Look for a simple IV counter LHS
736   PHINode *Phi = dyn_cast<PHINode>(LHS);
737   if (!Phi)
738     Phi = getLoopPhiForCounter(LHS, L);
739 
740   if (!Phi)
741     return true;
742 
743   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
744   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
745   if (Idx < 0)
746     return true;
747 
748   // Do LFTR if the exit condition's IV is *not* a simple counter.
749   Value *IncV = Phi->getIncomingValue(Idx);
750   return Phi != getLoopPhiForCounter(IncV, L);
751 }
752 
753 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
754 /// down to checking that all operands are constant and listing instructions
755 /// that may hide undef.
756 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
757                                unsigned Depth) {
758   if (isa<Constant>(V))
759     return !isa<UndefValue>(V);
760 
761   if (Depth >= 6)
762     return false;
763 
764   // Conservatively handle non-constant non-instructions. For example, Arguments
765   // may be undef.
766   Instruction *I = dyn_cast<Instruction>(V);
767   if (!I)
768     return false;
769 
770   // Load and return values may be undef.
771   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
772     return false;
773 
774   // Optimistically handle other instructions.
775   for (Value *Op : I->operands()) {
776     if (!Visited.insert(Op).second)
777       continue;
778     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
779       return false;
780   }
781   return true;
782 }
783 
784 /// Return true if the given value is concrete. We must prove that undef can
785 /// never reach it.
786 ///
787 /// TODO: If we decide that this is a good approach to checking for undef, we
788 /// may factor it into a common location.
789 static bool hasConcreteDef(Value *V) {
790   SmallPtrSet<Value*, 8> Visited;
791   Visited.insert(V);
792   return hasConcreteDefImpl(V, Visited, 0);
793 }
794 
795 /// Return true if the given phi is a "counter" in L.  A counter is an
796 /// add recurance (of integer or pointer type) with an arbitrary start, and a
797 /// step of 1.  Note that L must have exactly one latch.
798 static bool isLoopCounter(PHINode* Phi, Loop *L,
799                           ScalarEvolution *SE) {
800   assert(Phi->getParent() == L->getHeader());
801   assert(L->getLoopLatch());
802 
803   if (!SE->isSCEVable(Phi->getType()))
804     return false;
805 
806   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
807   if (!AR || AR->getLoop() != L || !AR->isAffine())
808     return false;
809 
810   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
811   if (!Step || !Step->isOne())
812     return false;
813 
814   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
815   Value *IncV = Phi->getIncomingValue(LatchIdx);
816   return (getLoopPhiForCounter(IncV, L) == Phi &&
817           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
818 }
819 
820 /// Search the loop header for a loop counter (anadd rec w/step of one)
821 /// suitable for use by LFTR.  If multiple counters are available, select the
822 /// "best" one based profitable heuristics.
823 ///
824 /// BECount may be an i8* pointer type. The pointer difference is already
825 /// valid count without scaling the address stride, so it remains a pointer
826 /// expression as far as SCEV is concerned.
827 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
828                                 const SCEV *BECount,
829                                 ScalarEvolution *SE, DominatorTree *DT) {
830   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
831 
832   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
833 
834   // Loop over all of the PHI nodes, looking for a simple counter.
835   PHINode *BestPhi = nullptr;
836   const SCEV *BestInit = nullptr;
837   BasicBlock *LatchBlock = L->getLoopLatch();
838   assert(LatchBlock && "Must be in simplified form");
839   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
840 
841   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
842     PHINode *Phi = cast<PHINode>(I);
843     if (!isLoopCounter(Phi, L, SE))
844       continue;
845 
846     // Avoid comparing an integer IV against a pointer Limit.
847     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
848       continue;
849 
850     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
851 
852     // AR may be a pointer type, while BECount is an integer type.
853     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
854     // AR may not be a narrower type, or we may never exit.
855     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
856     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
857       continue;
858 
859     // Avoid reusing a potentially undef value to compute other values that may
860     // have originally had a concrete definition.
861     if (!hasConcreteDef(Phi)) {
862       // We explicitly allow unknown phis as long as they are already used by
863       // the loop exit test.  This is legal since performing LFTR could not
864       // increase the number of undef users.
865       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
866       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
867           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
868         continue;
869     }
870 
871     // Avoid introducing undefined behavior due to poison which didn't exist in
872     // the original program.  (Annoyingly, the rules for poison and undef
873     // propagation are distinct, so this does NOT cover the undef case above.)
874     // We have to ensure that we don't introduce UB by introducing a use on an
875     // iteration where said IV produces poison.  Our strategy here differs for
876     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
877     // see code in linearFunctionTestReplace.  For pointers, we restrict
878     // transforms as there is no good way to reinfer inbounds once lost.
879     if (!Phi->getType()->isIntegerTy() &&
880         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
881       continue;
882 
883     const SCEV *Init = AR->getStart();
884 
885     if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
886       // Don't force a live loop counter if another IV can be used.
887       if (isAlmostDeadIV(Phi, LatchBlock, Cond))
888         continue;
889 
890       // Prefer to count-from-zero. This is a more "canonical" counter form. It
891       // also prefers integer to pointer IVs.
892       if (BestInit->isZero() != Init->isZero()) {
893         if (BestInit->isZero())
894           continue;
895       }
896       // If two IVs both count from zero or both count from nonzero then the
897       // narrower is likely a dead phi that has been widened. Use the wider phi
898       // to allow the other to be eliminated.
899       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
900         continue;
901     }
902     BestPhi = Phi;
903     BestInit = Init;
904   }
905   return BestPhi;
906 }
907 
908 /// Insert an IR expression which computes the value held by the IV IndVar
909 /// (which must be an loop counter w/unit stride) after the backedge of loop L
910 /// is taken ExitCount times.
911 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
912                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
913                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
914   assert(isLoopCounter(IndVar, L, SE));
915   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
916   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
917   const SCEV *IVInit = AR->getStart();
918   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
919 
920   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
921   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
922   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
923   // the existing GEPs whenever possible.
924   if (IndVar->getType()->isPointerTy()) {
925     // IVOffset will be the new GEP offset that is interpreted by GEP as a
926     // signed value. ExitCount on the other hand represents the loop trip count,
927     // which is an unsigned value. FindLoopCounter only allows induction
928     // variables that have a positive unit stride of one. This means we don't
929     // have to handle the case of negative offsets (yet) and just need to zero
930     // extend ExitCount.
931     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
932     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
933     if (UsePostInc)
934       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
935 
936     // Expand the code for the iteration count.
937     assert(SE->isLoopInvariant(IVOffset, L) &&
938            "Computed iteration count is not loop invariant!");
939 
940     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
941     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
942     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
943   } else {
944     // In any other case, convert both IVInit and ExitCount to integers before
945     // comparing. This may result in SCEV expansion of pointers, but in practice
946     // SCEV will fold the pointer arithmetic away as such:
947     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
948     //
949     // Valid Cases: (1) both integers is most common; (2) both may be pointers
950     // for simple memset-style loops.
951     //
952     // IVInit integer and ExitCount pointer would only occur if a canonical IV
953     // were generated on top of case #2, which is not expected.
954 
955     // For unit stride, IVCount = Start + ExitCount with 2's complement
956     // overflow.
957 
958     // For integer IVs, truncate the IV before computing IVInit + BECount,
959     // unless we know apriori that the limit must be a constant when evaluated
960     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
961     // of the IV in the loop over a (potentially) expensive expansion of the
962     // widened exit count add(zext(add)) expression.
963     if (SE->getTypeSizeInBits(IVInit->getType())
964         > SE->getTypeSizeInBits(ExitCount->getType())) {
965       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
966         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
967       else
968         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
969     }
970 
971     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
972 
973     if (UsePostInc)
974       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
975 
976     // Expand the code for the iteration count.
977     assert(SE->isLoopInvariant(IVLimit, L) &&
978            "Computed iteration count is not loop invariant!");
979     // Ensure that we generate the same type as IndVar, or a smaller integer
980     // type. In the presence of null pointer values, we have an integer type
981     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
982     Type *LimitTy = ExitCount->getType();
983     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
984     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
985   }
986 }
987 
988 /// This method rewrites the exit condition of the loop to be a canonical !=
989 /// comparison against the incremented loop induction variable.  This pass is
990 /// able to rewrite the exit tests of any loop where the SCEV analysis can
991 /// determine a loop-invariant trip count of the loop, which is actually a much
992 /// broader range than just linear tests.
993 bool IndVarSimplify::
994 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
995                           const SCEV *ExitCount,
996                           PHINode *IndVar, SCEVExpander &Rewriter) {
997   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
998   assert(isLoopCounter(IndVar, L, SE));
999   Instruction * const IncVar =
1000     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1001 
1002   // Initialize CmpIndVar to the preincremented IV.
1003   Value *CmpIndVar = IndVar;
1004   bool UsePostInc = false;
1005 
1006   // If the exiting block is the same as the backedge block, we prefer to
1007   // compare against the post-incremented value, otherwise we must compare
1008   // against the preincremented value.
1009   if (ExitingBB == L->getLoopLatch()) {
1010     // For pointer IVs, we chose to not strip inbounds which requires us not
1011     // to add a potentially UB introducing use.  We need to either a) show
1012     // the loop test we're modifying is already in post-inc form, or b) show
1013     // that adding a use must not introduce UB.
1014     bool SafeToPostInc =
1015         IndVar->getType()->isIntegerTy() ||
1016         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1017         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1018     if (SafeToPostInc) {
1019       UsePostInc = true;
1020       CmpIndVar = IncVar;
1021     }
1022   }
1023 
1024   // It may be necessary to drop nowrap flags on the incrementing instruction
1025   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1026   // case the increment might have previously been poison on the last iteration
1027   // only) or if LFTR switches to a different IV that was previously dynamically
1028   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1029   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1030   // check), because the pre-inc addrec flags may be adopted from the original
1031   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1032   // TODO: This handling is inaccurate for one case: If we switch to a
1033   // dynamically dead IV that wraps on the first loop iteration only, which is
1034   // not covered by the post-inc addrec. (If the new IV was not dynamically
1035   // dead, it could not be poison on the first iteration in the first place.)
1036   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1037     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1038     if (BO->hasNoUnsignedWrap())
1039       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1040     if (BO->hasNoSignedWrap())
1041       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1042   }
1043 
1044   Value *ExitCnt = genLoopLimit(
1045       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1046   assert(ExitCnt->getType()->isPointerTy() ==
1047              IndVar->getType()->isPointerTy() &&
1048          "genLoopLimit missed a cast");
1049 
1050   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1051   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1052   ICmpInst::Predicate P;
1053   if (L->contains(BI->getSuccessor(0)))
1054     P = ICmpInst::ICMP_NE;
1055   else
1056     P = ICmpInst::ICMP_EQ;
1057 
1058   IRBuilder<> Builder(BI);
1059 
1060   // The new loop exit condition should reuse the debug location of the
1061   // original loop exit condition.
1062   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1063     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1064 
1065   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1066   // avoid the expensive expansion of the limit expression in the wider type,
1067   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1068   // since we know (from the exit count bitwidth), that we can't self-wrap in
1069   // the narrower type.
1070   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1071   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1072   if (CmpIndVarSize > ExitCntSize) {
1073     assert(!CmpIndVar->getType()->isPointerTy() &&
1074            !ExitCnt->getType()->isPointerTy());
1075 
1076     // Before resorting to actually inserting the truncate, use the same
1077     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1078     // the other side of the comparison instead.  We still evaluate the limit
1079     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1080     // a truncate within in.
1081     bool Extended = false;
1082     const SCEV *IV = SE->getSCEV(CmpIndVar);
1083     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1084     const SCEV *ZExtTrunc =
1085       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1086 
1087     if (ZExtTrunc == IV) {
1088       Extended = true;
1089       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1090                                    "wide.trip.count");
1091     } else {
1092       const SCEV *SExtTrunc =
1093         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1094       if (SExtTrunc == IV) {
1095         Extended = true;
1096         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1097                                      "wide.trip.count");
1098       }
1099     }
1100 
1101     if (Extended) {
1102       bool Discard;
1103       L->makeLoopInvariant(ExitCnt, Discard);
1104     } else
1105       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1106                                       "lftr.wideiv");
1107   }
1108   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1109                     << "      LHS:" << *CmpIndVar << '\n'
1110                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1111                     << "\n"
1112                     << "      RHS:\t" << *ExitCnt << "\n"
1113                     << "ExitCount:\t" << *ExitCount << "\n"
1114                     << "  was: " << *BI->getCondition() << "\n");
1115 
1116   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1117   Value *OrigCond = BI->getCondition();
1118   // It's tempting to use replaceAllUsesWith here to fully replace the old
1119   // comparison, but that's not immediately safe, since users of the old
1120   // comparison may not be dominated by the new comparison. Instead, just
1121   // update the branch to use the new comparison; in the common case this
1122   // will make old comparison dead.
1123   BI->setCondition(Cond);
1124   DeadInsts.emplace_back(OrigCond);
1125 
1126   ++NumLFTR;
1127   return true;
1128 }
1129 
1130 //===----------------------------------------------------------------------===//
1131 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1132 //===----------------------------------------------------------------------===//
1133 
1134 /// If there's a single exit block, sink any loop-invariant values that
1135 /// were defined in the preheader but not used inside the loop into the
1136 /// exit block to reduce register pressure in the loop.
1137 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1138   BasicBlock *ExitBlock = L->getExitBlock();
1139   if (!ExitBlock) return false;
1140 
1141   BasicBlock *Preheader = L->getLoopPreheader();
1142   if (!Preheader) return false;
1143 
1144   bool MadeAnyChanges = false;
1145   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1146   BasicBlock::iterator I(Preheader->getTerminator());
1147   while (I != Preheader->begin()) {
1148     --I;
1149     // New instructions were inserted at the end of the preheader.
1150     if (isa<PHINode>(I))
1151       break;
1152 
1153     // Don't move instructions which might have side effects, since the side
1154     // effects need to complete before instructions inside the loop.  Also don't
1155     // move instructions which might read memory, since the loop may modify
1156     // memory. Note that it's okay if the instruction might have undefined
1157     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1158     // block.
1159     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1160       continue;
1161 
1162     // Skip debug info intrinsics.
1163     if (isa<DbgInfoIntrinsic>(I))
1164       continue;
1165 
1166     // Skip eh pad instructions.
1167     if (I->isEHPad())
1168       continue;
1169 
1170     // Don't sink alloca: we never want to sink static alloca's out of the
1171     // entry block, and correctly sinking dynamic alloca's requires
1172     // checks for stacksave/stackrestore intrinsics.
1173     // FIXME: Refactor this check somehow?
1174     if (isa<AllocaInst>(I))
1175       continue;
1176 
1177     // Determine if there is a use in or before the loop (direct or
1178     // otherwise).
1179     bool UsedInLoop = false;
1180     for (Use &U : I->uses()) {
1181       Instruction *User = cast<Instruction>(U.getUser());
1182       BasicBlock *UseBB = User->getParent();
1183       if (PHINode *P = dyn_cast<PHINode>(User)) {
1184         unsigned i =
1185           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1186         UseBB = P->getIncomingBlock(i);
1187       }
1188       if (UseBB == Preheader || L->contains(UseBB)) {
1189         UsedInLoop = true;
1190         break;
1191       }
1192     }
1193 
1194     // If there is, the def must remain in the preheader.
1195     if (UsedInLoop)
1196       continue;
1197 
1198     // Otherwise, sink it to the exit block.
1199     Instruction *ToMove = &*I;
1200     bool Done = false;
1201 
1202     if (I != Preheader->begin()) {
1203       // Skip debug info intrinsics.
1204       do {
1205         --I;
1206       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1207 
1208       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1209         Done = true;
1210     } else {
1211       Done = true;
1212     }
1213 
1214     MadeAnyChanges = true;
1215     ToMove->moveBefore(*ExitBlock, InsertPt);
1216     SE->forgetValue(ToMove);
1217     if (Done) break;
1218     InsertPt = ToMove->getIterator();
1219   }
1220 
1221   return MadeAnyChanges;
1222 }
1223 
1224 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1225                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1226   auto *OldCond = BI->getCondition();
1227   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1228                     << " with " << *NewCond << "\n");
1229   BI->setCondition(NewCond);
1230   if (OldCond->use_empty())
1231     DeadInsts.emplace_back(OldCond);
1232 }
1233 
1234 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1235                                       bool IsTaken) {
1236   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1237   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1238   auto *OldCond = BI->getCondition();
1239   return ConstantInt::get(OldCond->getType(),
1240                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1241 }
1242 
1243 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1244                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1245   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1246   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1247   replaceExitCond(BI, NewCond, DeadInsts);
1248 }
1249 
1250 static void replaceLoopPHINodesWithPreheaderValues(
1251     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1252     ScalarEvolution &SE) {
1253   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1254   auto *LoopPreheader = L->getLoopPreheader();
1255   auto *LoopHeader = L->getHeader();
1256   SmallVector<Instruction *> Worklist;
1257   for (auto &PN : LoopHeader->phis()) {
1258     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1259     for (User *U : PN.users())
1260       Worklist.push_back(cast<Instruction>(U));
1261     SE.forgetValue(&PN);
1262     PN.replaceAllUsesWith(PreheaderIncoming);
1263     DeadInsts.emplace_back(&PN);
1264   }
1265 
1266   // Replacing with the preheader value will often allow IV users to simplify
1267   // (especially if the preheader value is a constant).
1268   SmallPtrSet<Instruction *, 16> Visited;
1269   while (!Worklist.empty()) {
1270     auto *I = cast<Instruction>(Worklist.pop_back_val());
1271     if (!Visited.insert(I).second)
1272       continue;
1273 
1274     // Don't simplify instructions outside the loop.
1275     if (!L->contains(I))
1276       continue;
1277 
1278     Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1279     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1280       for (User *U : I->users())
1281         Worklist.push_back(cast<Instruction>(U));
1282       I->replaceAllUsesWith(Res);
1283       DeadInsts.emplace_back(I);
1284     }
1285   }
1286 }
1287 
1288 static Value *
1289 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1290                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1291                     SCEVExpander &Rewriter) {
1292   ICmpInst::Predicate InvariantPred = LIP.Pred;
1293   BasicBlock *Preheader = L->getLoopPreheader();
1294   assert(Preheader && "Preheader doesn't exist");
1295   Rewriter.setInsertPoint(Preheader->getTerminator());
1296   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1297   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1298   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1299   if (ExitIfTrue)
1300     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1301   IRBuilder<> Builder(Preheader->getTerminator());
1302   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1303   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1304                             BI->getCondition()->getName());
1305 }
1306 
1307 static std::optional<Value *>
1308 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1309                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1310                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1311   ICmpInst::Predicate Pred = ICmp->getPredicate();
1312   Value *LHS = ICmp->getOperand(0);
1313   Value *RHS = ICmp->getOperand(1);
1314 
1315   // 'LHS pred RHS' should now mean that we stay in loop.
1316   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1317   if (Inverted)
1318     Pred = CmpInst::getInversePredicate(Pred);
1319 
1320   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1321   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1322   // Can we prove it to be trivially true or false?
1323   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1324     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1325 
1326   auto *ARTy = LHSS->getType();
1327   auto *MaxIterTy = MaxIter->getType();
1328   // If possible, adjust types.
1329   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1330     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1331   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1332     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1333     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1334     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1335       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1336   }
1337 
1338   if (SkipLastIter) {
1339     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1340     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1341     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1342     // So we manually construct umin(a - 1, b - 1).
1343     SmallVector<const SCEV *, 4> Elements;
1344     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1345       for (auto *Op : UMin->operands())
1346         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1347       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1348     } else
1349       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1350   }
1351 
1352   // Check if there is a loop-invariant predicate equivalent to our check.
1353   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1354                                                                L, BI, MaxIter);
1355   if (!LIP)
1356     return std::nullopt;
1357 
1358   // Can we prove it to be trivially true?
1359   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1360     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1361   else
1362     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1363 }
1364 
1365 static bool optimizeLoopExitWithUnknownExitCount(
1366     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1367     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1368     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1369   assert(
1370       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1371       "Not a loop exit!");
1372 
1373   // For branch that stays in loop by TRUE condition, go through AND. For branch
1374   // that stays in loop by FALSE condition, go through OR. Both gives the
1375   // similar logic: "stay in loop iff all conditions are true(false)".
1376   bool Inverted = L->contains(BI->getSuccessor(1));
1377   SmallVector<ICmpInst *, 4> LeafConditions;
1378   SmallVector<Value *, 4> Worklist;
1379   SmallPtrSet<Value *, 4> Visited;
1380   Value *OldCond = BI->getCondition();
1381   Visited.insert(OldCond);
1382   Worklist.push_back(OldCond);
1383 
1384   auto GoThrough = [&](Value *V) {
1385     Value *LHS = nullptr, *RHS = nullptr;
1386     if (Inverted) {
1387       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1388         return false;
1389     } else {
1390       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1391         return false;
1392     }
1393     if (Visited.insert(LHS).second)
1394       Worklist.push_back(LHS);
1395     if (Visited.insert(RHS).second)
1396       Worklist.push_back(RHS);
1397     return true;
1398   };
1399 
1400   do {
1401     Value *Curr = Worklist.pop_back_val();
1402     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1403     // those with one use, to avoid instruction duplication.
1404     if (Curr->hasOneUse())
1405       if (!GoThrough(Curr))
1406         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1407           LeafConditions.push_back(ICmp);
1408   } while (!Worklist.empty());
1409 
1410   // If the current basic block has the same exit count as the whole loop, and
1411   // it consists of multiple icmp's, try to collect all icmp's that give exact
1412   // same exit count. For all other icmp's, we could use one less iteration,
1413   // because their value on the last iteration doesn't really matter.
1414   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1415   if (!SkipLastIter && LeafConditions.size() > 1 &&
1416       SE->getExitCount(L, ExitingBB,
1417                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1418           MaxIter)
1419     for (auto *ICmp : LeafConditions) {
1420       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1421                                              /*ControlsExit*/ false);
1422       auto *ExitMax = EL.SymbolicMaxNotTaken;
1423       if (isa<SCEVCouldNotCompute>(ExitMax))
1424         continue;
1425       // They could be of different types (specifically this happens after
1426       // IV widening).
1427       auto *WiderType =
1428           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1429       auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1430       auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1431       if (WideExitMax == WideMaxIter)
1432         ICmpsFailingOnLastIter.insert(ICmp);
1433     }
1434 
1435   bool Changed = false;
1436   for (auto *OldCond : LeafConditions) {
1437     // Skip last iteration for this icmp under one of two conditions:
1438     // - We do it for all conditions;
1439     // - There is another ICmp that would fail on last iter, so this one doesn't
1440     // really matter.
1441     bool OptimisticSkipLastIter = SkipLastIter;
1442     if (!OptimisticSkipLastIter) {
1443       if (ICmpsFailingOnLastIter.size() > 1)
1444         OptimisticSkipLastIter = true;
1445       else if (ICmpsFailingOnLastIter.size() == 1)
1446         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1447     }
1448     if (auto Replaced =
1449             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1450                               OptimisticSkipLastIter, SE, Rewriter)) {
1451       Changed = true;
1452       auto *NewCond = *Replaced;
1453       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1454         NCI->setName(OldCond->getName() + ".first_iter");
1455       }
1456       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1457                         << " with " << *NewCond << "\n");
1458       assert(OldCond->hasOneUse() && "Must be!");
1459       OldCond->replaceAllUsesWith(NewCond);
1460       DeadInsts.push_back(OldCond);
1461       // Make sure we no longer consider this condition as failing on last
1462       // iteration.
1463       ICmpsFailingOnLastIter.erase(OldCond);
1464     }
1465   }
1466   return Changed;
1467 }
1468 
1469 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1470   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1471   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1472   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1473   // it already has flags.  The alternative to this would be to extending the
1474   // set of "interesting" IV users to include the icmp, but doing that
1475   // regresses results in practice by querying SCEVs before trip counts which
1476   // rely on them which results in SCEV caching sub-optimal answers.  The
1477   // concern about caching sub-optimal results is why we only query SCEVs of
1478   // the loop invariant RHS here.
1479   SmallVector<BasicBlock*, 16> ExitingBlocks;
1480   L->getExitingBlocks(ExitingBlocks);
1481   bool Changed = false;
1482   for (auto *ExitingBB : ExitingBlocks) {
1483     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1484     if (!BI)
1485       continue;
1486     assert(BI->isConditional() && "exit branch must be conditional");
1487 
1488     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1489     if (!ICmp || !ICmp->hasOneUse())
1490       continue;
1491 
1492     auto *LHS = ICmp->getOperand(0);
1493     auto *RHS = ICmp->getOperand(1);
1494     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1495     // poisoning cache with sub-optimal results.  For the must-execute case,
1496     // this is a neccessary precondition for correctness.
1497     if (!L->isLoopInvariant(RHS)) {
1498       if (!L->isLoopInvariant(LHS))
1499         continue;
1500       // Same logic applies for the inverse case
1501       std::swap(LHS, RHS);
1502     }
1503 
1504     // Match (icmp signed-cond zext, RHS)
1505     Value *LHSOp = nullptr;
1506     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1507       continue;
1508 
1509     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1510     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1511     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1512     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1513     FullCR = FullCR.zeroExtend(OuterBitWidth);
1514     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1515     if (FullCR.contains(RHSCR)) {
1516       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1517       // replace the signed condition with the unsigned version.
1518       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1519       Changed = true;
1520       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1521       // have not changed exit counts, or the values produced by the compare.
1522       continue;
1523     }
1524   }
1525 
1526   // Now that we've canonicalized the condition to match the extend,
1527   // see if we can rotate the extend out of the loop.
1528   for (auto *ExitingBB : ExitingBlocks) {
1529     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1530     if (!BI)
1531       continue;
1532     assert(BI->isConditional() && "exit branch must be conditional");
1533 
1534     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1535     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1536       continue;
1537 
1538     bool Swapped = false;
1539     auto *LHS = ICmp->getOperand(0);
1540     auto *RHS = ICmp->getOperand(1);
1541     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1542       // Nothing to rotate
1543       continue;
1544     if (L->isLoopInvariant(LHS)) {
1545       // Same logic applies for the inverse case until we actually pick
1546       // which operand of the compare to update.
1547       Swapped = true;
1548       std::swap(LHS, RHS);
1549     }
1550     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1551 
1552     // Match (icmp unsigned-cond zext, RHS)
1553     // TODO: Extend to handle corresponding sext/signed-cmp case
1554     // TODO: Extend to other invertible functions
1555     Value *LHSOp = nullptr;
1556     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1557       continue;
1558 
1559     // In general, we only rotate if we can do so without increasing the number
1560     // of instructions.  The exception is when we have an zext(add-rec).  The
1561     // reason for allowing this exception is that we know we need to get rid
1562     // of the zext for SCEV to be able to compute a trip count for said loops;
1563     // we consider the new trip count valuable enough to increase instruction
1564     // count by one.
1565     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1566       continue;
1567 
1568     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1569     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1570     // when zext is loop varying and RHS is loop invariant.  This converts
1571     // loop varying work to loop-invariant work.
1572     auto doRotateTransform = [&]() {
1573       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1574       auto *NewRHS =
1575         CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1576                          L->getLoopPreheader()->getTerminator());
1577       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1578       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1579       if (LHS->use_empty())
1580         DeadInsts.push_back(LHS);
1581     };
1582 
1583 
1584     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1585     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1586     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1587     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1588     FullCR = FullCR.zeroExtend(OuterBitWidth);
1589     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1590     if (FullCR.contains(RHSCR)) {
1591       doRotateTransform();
1592       Changed = true;
1593       // Note, we are leaving SCEV in an unfortunately imprecise case here
1594       // as rotation tends to reveal information about trip counts not
1595       // previously visible.
1596       continue;
1597     }
1598   }
1599 
1600   return Changed;
1601 }
1602 
1603 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1604   SmallVector<BasicBlock*, 16> ExitingBlocks;
1605   L->getExitingBlocks(ExitingBlocks);
1606 
1607   // Remove all exits which aren't both rewriteable and execute on every
1608   // iteration.
1609   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1610     // If our exitting block exits multiple loops, we can only rewrite the
1611     // innermost one.  Otherwise, we're changing how many times the innermost
1612     // loop runs before it exits.
1613     if (LI->getLoopFor(ExitingBB) != L)
1614       return true;
1615 
1616     // Can't rewrite non-branch yet.
1617     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1618     if (!BI)
1619       return true;
1620 
1621     // Likewise, the loop latch must be dominated by the exiting BB.
1622     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1623       return true;
1624 
1625     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1626       // If already constant, nothing to do. However, if this is an
1627       // unconditional exit, we can still replace header phis with their
1628       // preheader value.
1629       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1630         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1631       return true;
1632     }
1633 
1634     return false;
1635   });
1636 
1637   if (ExitingBlocks.empty())
1638     return false;
1639 
1640   // Get a symbolic upper bound on the loop backedge taken count.
1641   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1642   if (isa<SCEVCouldNotCompute>(MaxBECount))
1643     return false;
1644 
1645   // Visit our exit blocks in order of dominance. We know from the fact that
1646   // all exits must dominate the latch, so there is a total dominance order
1647   // between them.
1648   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1649                // std::sort sorts in ascending order, so we want the inverse of
1650                // the normal dominance relation.
1651                if (A == B) return false;
1652                if (DT->properlyDominates(A, B))
1653                  return true;
1654                else {
1655                  assert(DT->properlyDominates(B, A) &&
1656                         "expected total dominance order!");
1657                  return false;
1658                }
1659   });
1660 #ifdef ASSERT
1661   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1662     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1663   }
1664 #endif
1665 
1666   bool Changed = false;
1667   bool SkipLastIter = false;
1668   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1669   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1670     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1671       return;
1672     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1673       CurrMaxExit = MaxExitCount;
1674     else
1675       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1676     // If the loop has more than 1 iteration, all further checks will be
1677     // executed 1 iteration less.
1678     if (CurrMaxExit == MaxBECount)
1679       SkipLastIter = true;
1680   };
1681   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1682   for (BasicBlock *ExitingBB : ExitingBlocks) {
1683     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1684     const SCEV *MaxExitCount = SE->getExitCount(
1685         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1686     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1687       // Okay, we do not know the exit count here. Can we at least prove that it
1688       // will remain the same within iteration space?
1689       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1690       auto OptimizeCond = [&](bool SkipLastIter) {
1691         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1692                                                     MaxBECount, SkipLastIter,
1693                                                     SE, Rewriter, DeadInsts);
1694       };
1695 
1696       // TODO: We might have proved that we can skip the last iteration for
1697       // this check. In this case, we only want to check the condition on the
1698       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1699       // corner case:
1700       //
1701       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1702       //
1703       // If we could not prove that len != 0, then we also could not prove that
1704       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1705       // OptimizeCond will likely not prove anything for it, even if it could
1706       // prove the same fact for len.
1707       //
1708       // As a temporary solution, we query both last and pre-last iterations in
1709       // hope that we will be able to prove triviality for at least one of
1710       // them. We can stop querying MaxBECount for this case once SCEV
1711       // understands that (MaxBECount - 1) will not overflow here.
1712       if (OptimizeCond(false))
1713         Changed = true;
1714       else if (SkipLastIter && OptimizeCond(true))
1715         Changed = true;
1716       UpdateSkipLastIter(MaxExitCount);
1717       continue;
1718     }
1719 
1720     UpdateSkipLastIter(ExactExitCount);
1721 
1722     // If we know we'd exit on the first iteration, rewrite the exit to
1723     // reflect this.  This does not imply the loop must exit through this
1724     // exit; there may be an earlier one taken on the first iteration.
1725     // We know that the backedge can't be taken, so we replace all
1726     // the header PHIs with values coming from the preheader.
1727     if (ExactExitCount->isZero()) {
1728       foldExit(L, ExitingBB, true, DeadInsts);
1729       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1730       Changed = true;
1731       continue;
1732     }
1733 
1734     assert(ExactExitCount->getType()->isIntegerTy() &&
1735            MaxBECount->getType()->isIntegerTy() &&
1736            "Exit counts must be integers");
1737 
1738     Type *WiderType =
1739         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1740     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1741     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1742     assert(MaxBECount->getType() == ExactExitCount->getType());
1743 
1744     // Can we prove that some other exit must be taken strictly before this
1745     // one?
1746     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1747                                      ExactExitCount)) {
1748       foldExit(L, ExitingBB, false, DeadInsts);
1749       Changed = true;
1750       continue;
1751     }
1752 
1753     // As we run, keep track of which exit counts we've encountered.  If we
1754     // find a duplicate, we've found an exit which would have exited on the
1755     // exiting iteration, but (from the visit order) strictly follows another
1756     // which does the same and is thus dead.
1757     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1758       foldExit(L, ExitingBB, false, DeadInsts);
1759       Changed = true;
1760       continue;
1761     }
1762 
1763     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1764     // here.  If we kept track of the min of dominanting exits so far, we could
1765     // discharge exits with EC >= MDEC. This is less powerful than the existing
1766     // transform (since later exits aren't considered), but potentially more
1767     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1768     // or a >u b.  Such a case is not currently known.
1769   }
1770   return Changed;
1771 }
1772 
1773 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1774   SmallVector<BasicBlock*, 16> ExitingBlocks;
1775   L->getExitingBlocks(ExitingBlocks);
1776 
1777   // Finally, see if we can rewrite our exit conditions into a loop invariant
1778   // form. If we have a read-only loop, and we can tell that we must exit down
1779   // a path which does not need any of the values computed within the loop, we
1780   // can rewrite the loop to exit on the first iteration.  Note that this
1781   // doesn't either a) tell us the loop exits on the first iteration (unless
1782   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1783   // This transformation looks a lot like a restricted form of dead loop
1784   // elimination, but restricted to read-only loops and without neccesssarily
1785   // needing to kill the loop entirely.
1786   if (!LoopPredication)
1787     return false;
1788 
1789   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1790   // through *explicit* control flow.  We have to eliminate the possibility of
1791   // implicit exits (see below) before we know it's truly exact.
1792   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1793   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1794     return false;
1795 
1796   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1797   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1798 
1799   auto BadExit = [&](BasicBlock *ExitingBB) {
1800     // If our exiting block exits multiple loops, we can only rewrite the
1801     // innermost one.  Otherwise, we're changing how many times the innermost
1802     // loop runs before it exits.
1803     if (LI->getLoopFor(ExitingBB) != L)
1804       return true;
1805 
1806     // Can't rewrite non-branch yet.
1807     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1808     if (!BI)
1809       return true;
1810 
1811     // If already constant, nothing to do.
1812     if (isa<Constant>(BI->getCondition()))
1813       return true;
1814 
1815     // If the exit block has phis, we need to be able to compute the values
1816     // within the loop which contains them.  This assumes trivially lcssa phis
1817     // have already been removed; TODO: generalize
1818     BasicBlock *ExitBlock =
1819     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1820     if (!ExitBlock->phis().empty())
1821       return true;
1822 
1823     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1824     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1825         !Rewriter.isSafeToExpand(ExitCount))
1826       return true;
1827 
1828     assert(SE->isLoopInvariant(ExitCount, L) &&
1829            "Exit count must be loop invariant");
1830     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1831     return false;
1832   };
1833 
1834   // If we have any exits which can't be predicated themselves, than we can't
1835   // predicate any exit which isn't guaranteed to execute before it.  Consider
1836   // two exits (a) and (b) which would both exit on the same iteration.  If we
1837   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1838   // we could convert a loop from exiting through (a) to one exiting through
1839   // (b).  Note that this problem exists only for exits with the same exit
1840   // count, and we could be more aggressive when exit counts are known inequal.
1841   llvm::sort(ExitingBlocks,
1842             [&](BasicBlock *A, BasicBlock *B) {
1843               // std::sort sorts in ascending order, so we want the inverse of
1844               // the normal dominance relation, plus a tie breaker for blocks
1845               // unordered by dominance.
1846               if (DT->properlyDominates(A, B)) return true;
1847               if (DT->properlyDominates(B, A)) return false;
1848               return A->getName() < B->getName();
1849             });
1850   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1851   // exits before the backedge).  If they aren't, reasoning about reachability
1852   // is complicated and we choose not to for now.
1853   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1854     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1855       return false;
1856 
1857   // Given our sorted total order, we know that exit[j] must be evaluated
1858   // after all exit[i] such j > i.
1859   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1860     if (BadExit(ExitingBlocks[i])) {
1861       ExitingBlocks.resize(i);
1862       break;
1863     }
1864 
1865   if (ExitingBlocks.empty())
1866     return false;
1867 
1868   // We rely on not being able to reach an exiting block on a later iteration
1869   // then it's statically compute exit count.  The implementaton of
1870   // getExitCount currently has this invariant, but assert it here so that
1871   // breakage is obvious if this ever changes..
1872   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1873         return DT->dominates(ExitingBB, L->getLoopLatch());
1874       }));
1875 
1876   // At this point, ExitingBlocks consists of only those blocks which are
1877   // predicatable.  Given that, we know we have at least one exit we can
1878   // predicate if the loop is doesn't have side effects and doesn't have any
1879   // implicit exits (because then our exact BTC isn't actually exact).
1880   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1881   // suggestions on how to improve this?  I can obviously bail out for outer
1882   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1883   // is that enough for *all* side effects?
1884   for (BasicBlock *BB : L->blocks())
1885     for (auto &I : *BB)
1886       // TODO:isGuaranteedToTransfer
1887       if (I.mayHaveSideEffects())
1888         return false;
1889 
1890   bool Changed = false;
1891   // Finally, do the actual predication for all predicatable blocks.  A couple
1892   // of notes here:
1893   // 1) We don't bother to constant fold dominated exits with identical exit
1894   //    counts; that's simply a form of CSE/equality propagation and we leave
1895   //    it for dedicated passes.
1896   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1897   //    legality constraints and we leave that to dedicated logic.  We want to
1898   //    predicate even if we can't insert a loop invariant expression as
1899   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1900   //    varying check.
1901   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1902   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1903   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1904   for (BasicBlock *ExitingBB : ExitingBlocks) {
1905     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1906 
1907     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1908     Value *NewCond;
1909     if (ExitCount == ExactBTC) {
1910       NewCond = L->contains(BI->getSuccessor(0)) ?
1911         B.getFalse() : B.getTrue();
1912     } else {
1913       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1914       if (!ExactBTCV)
1915         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1916       Value *RHS = ExactBTCV;
1917       if (ECV->getType() != RHS->getType()) {
1918         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1919         ECV = B.CreateZExt(ECV, WiderTy);
1920         RHS = B.CreateZExt(RHS, WiderTy);
1921       }
1922       auto Pred = L->contains(BI->getSuccessor(0)) ?
1923         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1924       NewCond = B.CreateICmp(Pred, ECV, RHS);
1925     }
1926     Value *OldCond = BI->getCondition();
1927     BI->setCondition(NewCond);
1928     if (OldCond->use_empty())
1929       DeadInsts.emplace_back(OldCond);
1930     Changed = true;
1931   }
1932 
1933   return Changed;
1934 }
1935 
1936 //===----------------------------------------------------------------------===//
1937 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1938 //===----------------------------------------------------------------------===//
1939 
1940 bool IndVarSimplify::run(Loop *L) {
1941   // We need (and expect!) the incoming loop to be in LCSSA.
1942   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1943          "LCSSA required to run indvars!");
1944 
1945   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1946   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1947   //    canonicalization can be a pessimization without LSR to "clean up"
1948   //    afterwards.
1949   //  - We depend on having a preheader; in particular,
1950   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1951   //    and we're in trouble if we can't find the induction variable even when
1952   //    we've manually inserted one.
1953   //  - LFTR relies on having a single backedge.
1954   if (!L->isLoopSimplifyForm())
1955     return false;
1956 
1957   bool Changed = false;
1958   // If there are any floating-point recurrences, attempt to
1959   // transform them to use integer recurrences.
1960   Changed |= rewriteNonIntegerIVs(L);
1961 
1962   // Create a rewriter object which we'll use to transform the code with.
1963   SCEVExpander Rewriter(*SE, DL, "indvars");
1964 #ifndef NDEBUG
1965   Rewriter.setDebugType(DEBUG_TYPE);
1966 #endif
1967 
1968   // Eliminate redundant IV users.
1969   //
1970   // Simplification works best when run before other consumers of SCEV. We
1971   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1972   // other expressions involving loop IVs have been evaluated. This helps SCEV
1973   // set no-wrap flags before normalizing sign/zero extension.
1974   Rewriter.disableCanonicalMode();
1975   Changed |= simplifyAndExtend(L, Rewriter, LI);
1976 
1977   // Check to see if we can compute the final value of any expressions
1978   // that are recurrent in the loop, and substitute the exit values from the
1979   // loop into any instructions outside of the loop that use the final values
1980   // of the current expressions.
1981   if (ReplaceExitValue != NeverRepl) {
1982     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1983                                              ReplaceExitValue, DeadInsts)) {
1984       NumReplaced += Rewrites;
1985       Changed = true;
1986     }
1987   }
1988 
1989   // Eliminate redundant IV cycles.
1990   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1991 
1992   // Try to convert exit conditions to unsigned and rotate computation
1993   // out of the loop.  Note: Handles invalidation internally if needed.
1994   Changed |= canonicalizeExitCondition(L);
1995 
1996   // Try to eliminate loop exits based on analyzeable exit counts
1997   if (optimizeLoopExits(L, Rewriter))  {
1998     Changed = true;
1999     // Given we've changed exit counts, notify SCEV
2000     // Some nested loops may share same folded exit basic block,
2001     // thus we need to notify top most loop.
2002     SE->forgetTopmostLoop(L);
2003   }
2004 
2005   // Try to form loop invariant tests for loop exits by changing how many
2006   // iterations of the loop run when that is unobservable.
2007   if (predicateLoopExits(L, Rewriter)) {
2008     Changed = true;
2009     // Given we've changed exit counts, notify SCEV
2010     SE->forgetLoop(L);
2011   }
2012 
2013   // If we have a trip count expression, rewrite the loop's exit condition
2014   // using it.
2015   if (!DisableLFTR) {
2016     BasicBlock *PreHeader = L->getLoopPreheader();
2017 
2018     SmallVector<BasicBlock*, 16> ExitingBlocks;
2019     L->getExitingBlocks(ExitingBlocks);
2020     for (BasicBlock *ExitingBB : ExitingBlocks) {
2021       // Can't rewrite non-branch yet.
2022       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2023         continue;
2024 
2025       // If our exitting block exits multiple loops, we can only rewrite the
2026       // innermost one.  Otherwise, we're changing how many times the innermost
2027       // loop runs before it exits.
2028       if (LI->getLoopFor(ExitingBB) != L)
2029         continue;
2030 
2031       if (!needsLFTR(L, ExitingBB))
2032         continue;
2033 
2034       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2035       if (isa<SCEVCouldNotCompute>(ExitCount))
2036         continue;
2037 
2038       // This was handled above, but as we form SCEVs, we can sometimes refine
2039       // existing ones; this allows exit counts to be folded to zero which
2040       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2041       // until stable to handle cases like this better.
2042       if (ExitCount->isZero())
2043         continue;
2044 
2045       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2046       if (!IndVar)
2047         continue;
2048 
2049       // Avoid high cost expansions.  Note: This heuristic is questionable in
2050       // that our definition of "high cost" is not exactly principled.
2051       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2052                                        TTI, PreHeader->getTerminator()))
2053         continue;
2054 
2055       // Check preconditions for proper SCEVExpander operation. SCEV does not
2056       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2057       // any pass that uses the SCEVExpander must do it. This does not work
2058       // well for loop passes because SCEVExpander makes assumptions about
2059       // all loops, while LoopPassManager only forces the current loop to be
2060       // simplified.
2061       //
2062       // FIXME: SCEV expansion has no way to bail out, so the caller must
2063       // explicitly check any assumptions made by SCEV. Brittle.
2064       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2065       if (!AR || AR->getLoop()->getLoopPreheader())
2066         Changed |= linearFunctionTestReplace(L, ExitingBB,
2067                                              ExitCount, IndVar,
2068                                              Rewriter);
2069     }
2070   }
2071   // Clear the rewriter cache, because values that are in the rewriter's cache
2072   // can be deleted in the loop below, causing the AssertingVH in the cache to
2073   // trigger.
2074   Rewriter.clear();
2075 
2076   // Now that we're done iterating through lists, clean up any instructions
2077   // which are now dead.
2078   while (!DeadInsts.empty()) {
2079     Value *V = DeadInsts.pop_back_val();
2080 
2081     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2082       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2083     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2084       Changed |=
2085           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2086   }
2087 
2088   // The Rewriter may not be used from this point on.
2089 
2090   // Loop-invariant instructions in the preheader that aren't used in the
2091   // loop may be sunk below the loop to reduce register pressure.
2092   Changed |= sinkUnusedInvariants(L);
2093 
2094   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2095   // trip count and therefore can further simplify exit values in addition to
2096   // rewriteLoopExitValues.
2097   Changed |= rewriteFirstIterationLoopExitValues(L);
2098 
2099   // Clean up dead instructions.
2100   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2101 
2102   // Check a post-condition.
2103   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2104          "Indvars did not preserve LCSSA!");
2105   if (VerifyMemorySSA && MSSAU)
2106     MSSAU->getMemorySSA()->verifyMemorySSA();
2107 
2108   return Changed;
2109 }
2110 
2111 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2112                                           LoopStandardAnalysisResults &AR,
2113                                           LPMUpdater &) {
2114   Function *F = L.getHeader()->getParent();
2115   const DataLayout &DL = F->getParent()->getDataLayout();
2116 
2117   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2118                      WidenIndVars && AllowIVWidening);
2119   if (!IVS.run(&L))
2120     return PreservedAnalyses::all();
2121 
2122   auto PA = getLoopPassPreservedAnalyses();
2123   PA.preserveSet<CFGAnalyses>();
2124   if (AR.MSSA)
2125     PA.preserve<MemorySSAAnalysis>();
2126   return PA;
2127 }
2128