xref: /llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp (revision ad5a84c883354e8bb595ebfd9971fe4a14b770fd)
1 //===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The LoopPredication pass tries to convert loop variant range checks to loop
10 // invariant by widening checks across loop iterations. For example, it will
11 // convert
12 //
13 //   for (i = 0; i < n; i++) {
14 //     guard(i < len);
15 //     ...
16 //   }
17 //
18 // to
19 //
20 //   for (i = 0; i < n; i++) {
21 //     guard(n - 1 < len);
22 //     ...
23 //   }
24 //
25 // After this transformation the condition of the guard is loop invariant, so
26 // loop-unswitch can later unswitch the loop by this condition which basically
27 // predicates the loop by the widened condition:
28 //
29 //   if (n - 1 < len)
30 //     for (i = 0; i < n; i++) {
31 //       ...
32 //     }
33 //   else
34 //     deoptimize
35 //
36 // It's tempting to rely on SCEV here, but it has proven to be problematic.
37 // Generally the facts SCEV provides about the increment step of add
38 // recurrences are true if the backedge of the loop is taken, which implicitly
39 // assumes that the guard doesn't fail. Using these facts to optimize the
40 // guard results in a circular logic where the guard is optimized under the
41 // assumption that it never fails.
42 //
43 // For example, in the loop below the induction variable will be marked as nuw
44 // basing on the guard. Basing on nuw the guard predicate will be considered
45 // monotonic. Given a monotonic condition it's tempting to replace the induction
46 // variable in the condition with its value on the last iteration. But this
47 // transformation is not correct, e.g. e = 4, b = 5 breaks the loop.
48 //
49 //   for (int i = b; i != e; i++)
50 //     guard(i u< len)
51 //
52 // One of the ways to reason about this problem is to use an inductive proof
53 // approach. Given the loop:
54 //
55 //   if (B(0)) {
56 //     do {
57 //       I = PHI(0, I.INC)
58 //       I.INC = I + Step
59 //       guard(G(I));
60 //     } while (B(I));
61 //   }
62 //
63 // where B(x) and G(x) are predicates that map integers to booleans, we want a
64 // loop invariant expression M such the following program has the same semantics
65 // as the above:
66 //
67 //   if (B(0)) {
68 //     do {
69 //       I = PHI(0, I.INC)
70 //       I.INC = I + Step
71 //       guard(G(0) && M);
72 //     } while (B(I));
73 //   }
74 //
75 // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step)
76 //
77 // Informal proof that the transformation above is correct:
78 //
79 //   By the definition of guards we can rewrite the guard condition to:
80 //     G(I) && G(0) && M
81 //
82 //   Let's prove that for each iteration of the loop:
83 //     G(0) && M => G(I)
84 //   And the condition above can be simplified to G(Start) && M.
85 //
86 //   Induction base.
87 //     G(0) && M => G(0)
88 //
89 //   Induction step. Assuming G(0) && M => G(I) on the subsequent
90 //   iteration:
91 //
92 //     B(I) is true because it's the backedge condition.
93 //     G(I) is true because the backedge is guarded by this condition.
94 //
95 //   So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step).
96 //
97 // Note that we can use anything stronger than M, i.e. any condition which
98 // implies M.
99 //
100 // When S = 1 (i.e. forward iterating loop), the transformation is supported
101 // when:
102 //   * The loop has a single latch with the condition of the form:
103 //     B(X) = latchStart + X <pred> latchLimit,
104 //     where <pred> is u<, u<=, s<, or s<=.
105 //   * The guard condition is of the form
106 //     G(X) = guardStart + X u< guardLimit
107 //
108 //   For the ult latch comparison case M is:
109 //     forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit =>
110 //        guardStart + X + 1 u< guardLimit
111 //
112 //   The only way the antecedent can be true and the consequent can be false is
113 //   if
114 //     X == guardLimit - 1 - guardStart
115 //   (and guardLimit is non-zero, but we won't use this latter fact).
116 //   If X == guardLimit - 1 - guardStart then the second half of the antecedent is
117 //     latchStart + guardLimit - 1 - guardStart u< latchLimit
118 //   and its negation is
119 //     latchStart + guardLimit - 1 - guardStart u>= latchLimit
120 //
121 //   In other words, if
122 //     latchLimit u<= latchStart + guardLimit - 1 - guardStart
123 //   then:
124 //   (the ranges below are written in ConstantRange notation, where [A, B) is the
125 //   set for (I = A; I != B; I++ /*maywrap*/) yield(I);)
126 //
127 //      forall X . guardStart + X u< guardLimit &&
128 //                 latchStart + X u< latchLimit =>
129 //        guardStart + X + 1 u< guardLimit
130 //   == forall X . guardStart + X u< guardLimit &&
131 //                 latchStart + X u< latchStart + guardLimit - 1 - guardStart =>
132 //        guardStart + X + 1 u< guardLimit
133 //   == forall X . (guardStart + X) in [0, guardLimit) &&
134 //                 (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) =>
135 //        (guardStart + X + 1) in [0, guardLimit)
136 //   == forall X . X in [-guardStart, guardLimit - guardStart) &&
137 //                 X in [-latchStart, guardLimit - 1 - guardStart) =>
138 //         X in [-guardStart - 1, guardLimit - guardStart - 1)
139 //   == true
140 //
141 //   So the widened condition is:
142 //     guardStart u< guardLimit &&
143 //     latchStart + guardLimit - 1 - guardStart u>= latchLimit
144 //   Similarly for ule condition the widened condition is:
145 //     guardStart u< guardLimit &&
146 //     latchStart + guardLimit - 1 - guardStart u> latchLimit
147 //   For slt condition the widened condition is:
148 //     guardStart u< guardLimit &&
149 //     latchStart + guardLimit - 1 - guardStart s>= latchLimit
150 //   For sle condition the widened condition is:
151 //     guardStart u< guardLimit &&
152 //     latchStart + guardLimit - 1 - guardStart s> latchLimit
153 //
154 // When S = -1 (i.e. reverse iterating loop), the transformation is supported
155 // when:
156 //   * The loop has a single latch with the condition of the form:
157 //     B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=.
158 //   * The guard condition is of the form
159 //     G(X) = X - 1 u< guardLimit
160 //
161 //   For the ugt latch comparison case M is:
162 //     forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit
163 //
164 //   The only way the antecedent can be true and the consequent can be false is if
165 //     X == 1.
166 //   If X == 1 then the second half of the antecedent is
167 //     1 u> latchLimit, and its negation is latchLimit u>= 1.
168 //
169 //   So the widened condition is:
170 //     guardStart u< guardLimit && latchLimit u>= 1.
171 //   Similarly for sgt condition the widened condition is:
172 //     guardStart u< guardLimit && latchLimit s>= 1.
173 //   For uge condition the widened condition is:
174 //     guardStart u< guardLimit && latchLimit u> 1.
175 //   For sge condition the widened condition is:
176 //     guardStart u< guardLimit && latchLimit s> 1.
177 //===----------------------------------------------------------------------===//
178 
179 #include "llvm/Transforms/Scalar/LoopPredication.h"
180 #include "llvm/ADT/Statistic.h"
181 #include "llvm/Analysis/AliasAnalysis.h"
182 #include "llvm/Analysis/BranchProbabilityInfo.h"
183 #include "llvm/Analysis/GuardUtils.h"
184 #include "llvm/Analysis/LoopInfo.h"
185 #include "llvm/Analysis/LoopPass.h"
186 #include "llvm/Analysis/ScalarEvolution.h"
187 #include "llvm/Analysis/ScalarEvolutionExpander.h"
188 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
189 #include "llvm/IR/Function.h"
190 #include "llvm/IR/GlobalValue.h"
191 #include "llvm/IR/IntrinsicInst.h"
192 #include "llvm/IR/Module.h"
193 #include "llvm/IR/PatternMatch.h"
194 #include "llvm/InitializePasses.h"
195 #include "llvm/Pass.h"
196 #include "llvm/Support/CommandLine.h"
197 #include "llvm/Support/Debug.h"
198 #include "llvm/Transforms/Scalar.h"
199 #include "llvm/Transforms/Utils/Local.h"
200 #include "llvm/Transforms/Utils/LoopUtils.h"
201 
202 #define DEBUG_TYPE "loop-predication"
203 
204 STATISTIC(TotalConsidered, "Number of guards considered");
205 STATISTIC(TotalWidened, "Number of checks widened");
206 
207 using namespace llvm;
208 
209 static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
210                                         cl::Hidden, cl::init(true));
211 
212 static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop",
213                                         cl::Hidden, cl::init(true));
214 
215 static cl::opt<bool>
216     SkipProfitabilityChecks("loop-predication-skip-profitability-checks",
217                             cl::Hidden, cl::init(false));
218 
219 // This is the scale factor for the latch probability. We use this during
220 // profitability analysis to find other exiting blocks that have a much higher
221 // probability of exiting the loop instead of loop exiting via latch.
222 // This value should be greater than 1 for a sane profitability check.
223 static cl::opt<float> LatchExitProbabilityScale(
224     "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0),
225     cl::desc("scale factor for the latch probability. Value should be greater "
226              "than 1. Lower values are ignored"));
227 
228 static cl::opt<bool> PredicateWidenableBranchGuards(
229     "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,
230     cl::desc("Whether or not we should predicate guards "
231              "expressed as widenable branches to deoptimize blocks"),
232     cl::init(true));
233 
234 namespace {
235 /// Represents an induction variable check:
236 ///   icmp Pred, <induction variable>, <loop invariant limit>
237 struct LoopICmp {
238   ICmpInst::Predicate Pred;
239   const SCEVAddRecExpr *IV;
240   const SCEV *Limit;
241   LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
242            const SCEV *Limit)
243     : Pred(Pred), IV(IV), Limit(Limit) {}
244   LoopICmp() {}
245   void dump() {
246     dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
247            << ", Limit = " << *Limit << "\n";
248   }
249 };
250 
251 class LoopPredication {
252   AliasAnalysis *AA;
253   DominatorTree *DT;
254   ScalarEvolution *SE;
255   LoopInfo *LI;
256   BranchProbabilityInfo *BPI;
257 
258   Loop *L;
259   const DataLayout *DL;
260   BasicBlock *Preheader;
261   LoopICmp LatchCheck;
262 
263   bool isSupportedStep(const SCEV* Step);
264   Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
265   Optional<LoopICmp> parseLoopLatchICmp();
266 
267   /// Return an insertion point suitable for inserting a safe to speculate
268   /// instruction whose only user will be 'User' which has operands 'Ops'.  A
269   /// trivial result would be the at the User itself, but we try to return a
270   /// loop invariant location if possible.
271   Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
272   /// Same as above, *except* that this uses the SCEV definition of invariant
273   /// which is that an expression *can be made* invariant via SCEVExpander.
274   /// Thus, this version is only suitable for finding an insert point to be be
275   /// passed to SCEVExpander!
276   Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops);
277 
278   /// Return true if the value is known to produce a single fixed value across
279   /// all iterations on which it executes.  Note that this does not imply
280   /// speculation safety.  That must be established seperately.
281   bool isLoopInvariantValue(const SCEV* S);
282 
283   Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,
284                      ICmpInst::Predicate Pred, const SCEV *LHS,
285                      const SCEV *RHS);
286 
287   Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
288                                         Instruction *Guard);
289   Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck,
290                                                         LoopICmp RangeCheck,
291                                                         SCEVExpander &Expander,
292                                                         Instruction *Guard);
293   Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck,
294                                                         LoopICmp RangeCheck,
295                                                         SCEVExpander &Expander,
296                                                         Instruction *Guard);
297   unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition,
298                          SCEVExpander &Expander, Instruction *Guard);
299   bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
300   bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
301   // If the loop always exits through another block in the loop, we should not
302   // predicate based on the latch check. For example, the latch check can be a
303   // very coarse grained check and there can be more fine grained exit checks
304   // within the loop. We identify such unprofitable loops through BPI.
305   bool isLoopProfitableToPredicate();
306 
307   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
308 
309 public:
310   LoopPredication(AliasAnalysis *AA, DominatorTree *DT,
311                   ScalarEvolution *SE, LoopInfo *LI,
312                   BranchProbabilityInfo *BPI)
313     : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {};
314   bool runOnLoop(Loop *L);
315 };
316 
317 class LoopPredicationLegacyPass : public LoopPass {
318 public:
319   static char ID;
320   LoopPredicationLegacyPass() : LoopPass(ID) {
321     initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry());
322   }
323 
324   void getAnalysisUsage(AnalysisUsage &AU) const override {
325     AU.addRequired<BranchProbabilityInfoWrapperPass>();
326     getLoopAnalysisUsage(AU);
327   }
328 
329   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
330     if (skipLoop(L))
331       return false;
332     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
333     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
334     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
335     BranchProbabilityInfo &BPI =
336         getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
337     auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
338     LoopPredication LP(AA, DT, SE, LI, &BPI);
339     return LP.runOnLoop(L);
340   }
341 };
342 
343 char LoopPredicationLegacyPass::ID = 0;
344 } // end namespace llvm
345 
346 INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
347                       "Loop predication", false, false)
348 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
349 INITIALIZE_PASS_DEPENDENCY(LoopPass)
350 INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
351                     "Loop predication", false, false)
352 
353 Pass *llvm::createLoopPredicationPass() {
354   return new LoopPredicationLegacyPass();
355 }
356 
357 PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
358                                            LoopStandardAnalysisResults &AR,
359                                            LPMUpdater &U) {
360   const auto &FAM =
361       AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
362   Function *F = L.getHeader()->getParent();
363   auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F);
364   LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, BPI);
365   if (!LP.runOnLoop(&L))
366     return PreservedAnalyses::all();
367 
368   return getLoopPassPreservedAnalyses();
369 }
370 
371 Optional<LoopICmp>
372 LoopPredication::parseLoopICmp(ICmpInst *ICI) {
373   auto Pred = ICI->getPredicate();
374   auto *LHS = ICI->getOperand(0);
375   auto *RHS = ICI->getOperand(1);
376 
377   const SCEV *LHSS = SE->getSCEV(LHS);
378   if (isa<SCEVCouldNotCompute>(LHSS))
379     return None;
380   const SCEV *RHSS = SE->getSCEV(RHS);
381   if (isa<SCEVCouldNotCompute>(RHSS))
382     return None;
383 
384   // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV
385   if (SE->isLoopInvariant(LHSS, L)) {
386     std::swap(LHS, RHS);
387     std::swap(LHSS, RHSS);
388     Pred = ICmpInst::getSwappedPredicate(Pred);
389   }
390 
391   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS);
392   if (!AR || AR->getLoop() != L)
393     return None;
394 
395   return LoopICmp(Pred, AR, RHSS);
396 }
397 
398 Value *LoopPredication::expandCheck(SCEVExpander &Expander,
399                                     Instruction *Guard,
400                                     ICmpInst::Predicate Pred, const SCEV *LHS,
401                                     const SCEV *RHS) {
402   Type *Ty = LHS->getType();
403   assert(Ty == RHS->getType() && "expandCheck operands have different types?");
404 
405   if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {
406     IRBuilder<> Builder(Guard);
407     if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
408       return Builder.getTrue();
409     if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
410                                      LHS, RHS))
411       return Builder.getFalse();
412   }
413 
414   Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS}));
415   Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS}));
416   IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));
417   return Builder.CreateICmp(Pred, LHSV, RHSV);
418 }
419 
420 
421 // Returns true if its safe to truncate the IV to RangeCheckType.
422 // When the IV type is wider than the range operand type, we can still do loop
423 // predication, by generating SCEVs for the range and latch that are of the
424 // same type. We achieve this by generating a SCEV truncate expression for the
425 // latch IV. This is done iff truncation of the IV is a safe operation,
426 // without loss of information.
427 // Another way to achieve this is by generating a wider type SCEV for the
428 // range check operand, however, this needs a more involved check that
429 // operands do not overflow. This can lead to loss of information when the
430 // range operand is of the form: add i32 %offset, %iv. We need to prove that
431 // sext(x + y) is same as sext(x) + sext(y).
432 // This function returns true if we can safely represent the IV type in
433 // the RangeCheckType without loss of information.
434 static bool isSafeToTruncateWideIVType(const DataLayout &DL,
435                                        ScalarEvolution &SE,
436                                        const LoopICmp LatchCheck,
437                                        Type *RangeCheckType) {
438   if (!EnableIVTruncation)
439     return false;
440   assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()) >
441              DL.getTypeSizeInBits(RangeCheckType) &&
442          "Expected latch check IV type to be larger than range check operand "
443          "type!");
444   // The start and end values of the IV should be known. This is to guarantee
445   // that truncating the wide type will not lose information.
446   auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
447   auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
448   if (!Limit || !Start)
449     return false;
450   // This check makes sure that the IV does not change sign during loop
451   // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
452   // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
453   // IV wraps around, and the truncation of the IV would lose the range of
454   // iterations between 2^32 and 2^64.
455   bool Increasing;
456   if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
457     return false;
458   // The active bits should be less than the bits in the RangeCheckType. This
459   // guarantees that truncating the latch check to RangeCheckType is a safe
460   // operation.
461   auto RangeCheckTypeBitSize = DL.getTypeSizeInBits(RangeCheckType);
462   return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
463          Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
464 }
465 
466 
467 // Return an LoopICmp describing a latch check equivlent to LatchCheck but with
468 // the requested type if safe to do so.  May involve the use of a new IV.
469 static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
470                                                  ScalarEvolution &SE,
471                                                  const LoopICmp LatchCheck,
472                                                  Type *RangeCheckType) {
473 
474   auto *LatchType = LatchCheck.IV->getType();
475   if (RangeCheckType == LatchType)
476     return LatchCheck;
477   // For now, bail out if latch type is narrower than range type.
478   if (DL.getTypeSizeInBits(LatchType) < DL.getTypeSizeInBits(RangeCheckType))
479     return None;
480   if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
481     return None;
482   // We can now safely identify the truncated version of the IV and limit for
483   // RangeCheckType.
484   LoopICmp NewLatchCheck;
485   NewLatchCheck.Pred = LatchCheck.Pred;
486   NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
487       SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));
488   if (!NewLatchCheck.IV)
489     return None;
490   NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);
491   LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType
492                     << "can be represented as range check type:"
493                     << *RangeCheckType << "\n");
494   LLVM_DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n");
495   LLVM_DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n");
496   return NewLatchCheck;
497 }
498 
499 bool LoopPredication::isSupportedStep(const SCEV* Step) {
500   return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
501 }
502 
503 Instruction *LoopPredication::findInsertPt(Instruction *Use,
504                                            ArrayRef<Value*> Ops) {
505   for (Value *Op : Ops)
506     if (!L->isLoopInvariant(Op))
507       return Use;
508   return Preheader->getTerminator();
509 }
510 
511 Instruction *LoopPredication::findInsertPt(Instruction *Use,
512                                            ArrayRef<const SCEV*> Ops) {
513   // Subtlety: SCEV considers things to be invariant if the value produced is
514   // the same across iterations.  This is not the same as being able to
515   // evaluate outside the loop, which is what we actually need here.
516   for (const SCEV *Op : Ops)
517     if (!SE->isLoopInvariant(Op, L) ||
518         !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE))
519       return Use;
520   return Preheader->getTerminator();
521 }
522 
523 bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
524   // Handling expressions which produce invariant results, but *haven't* yet
525   // been removed from the loop serves two important purposes.
526   // 1) Most importantly, it resolves a pass ordering cycle which would
527   // otherwise need us to iteration licm, loop-predication, and either
528   // loop-unswitch or loop-peeling to make progress on examples with lots of
529   // predicable range checks in a row.  (Since, in the general case,  we can't
530   // hoist the length checks until the dominating checks have been discharged
531   // as we can't prove doing so is safe.)
532   // 2) As a nice side effect, this exposes the value of peeling or unswitching
533   // much more obviously in the IR.  Otherwise, the cost modeling for other
534   // transforms would end up needing to duplicate all of this logic to model a
535   // check which becomes predictable based on a modeled peel or unswitch.
536   //
537   // The cost of doing so in the worst case is an extra fill from the stack  in
538   // the loop to materialize the loop invariant test value instead of checking
539   // against the original IV which is presumable in a register inside the loop.
540   // Such cases are presumably rare, and hint at missing oppurtunities for
541   // other passes.
542 
543   if (SE->isLoopInvariant(S, L))
544     // Note: This the SCEV variant, so the original Value* may be within the
545     // loop even though SCEV has proven it is loop invariant.
546     return true;
547 
548   // Handle a particular important case which SCEV doesn't yet know about which
549   // shows up in range checks on arrays with immutable lengths.
550   // TODO: This should be sunk inside SCEV.
551   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
552     if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))
553       if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))
554         if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
555             LI->hasMetadata(LLVMContext::MD_invariant_load))
556           return true;
557   return false;
558 }
559 
560 Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
561     LoopICmp LatchCheck, LoopICmp RangeCheck,
562     SCEVExpander &Expander, Instruction *Guard) {
563   auto *Ty = RangeCheck.IV->getType();
564   // Generate the widened condition for the forward loop:
565   //   guardStart u< guardLimit &&
566   //   latchLimit <pred> guardLimit - 1 - guardStart + latchStart
567   // where <pred> depends on the latch condition predicate. See the file
568   // header comment for the reasoning.
569   // guardLimit - guardStart + latchStart - 1
570   const SCEV *GuardStart = RangeCheck.IV->getStart();
571   const SCEV *GuardLimit = RangeCheck.Limit;
572   const SCEV *LatchStart = LatchCheck.IV->getStart();
573   const SCEV *LatchLimit = LatchCheck.Limit;
574   // Subtlety: We need all the values to be *invariant* across all iterations,
575   // but we only need to check expansion safety for those which *aren't*
576   // already guaranteed to dominate the guard.
577   if (!isLoopInvariantValue(GuardStart) ||
578       !isLoopInvariantValue(GuardLimit) ||
579       !isLoopInvariantValue(LatchStart) ||
580       !isLoopInvariantValue(LatchLimit)) {
581     LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
582     return None;
583   }
584   if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
585       !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
586     LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
587     return None;
588   }
589 
590   // guardLimit - guardStart + latchStart - 1
591   const SCEV *RHS =
592       SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
593                      SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
594   auto LimitCheckPred =
595       ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
596 
597   LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n");
598   LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n");
599   LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n");
600 
601   auto *LimitCheck =
602       expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS);
603   auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,
604                                           GuardStart, GuardLimit);
605   IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
606   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
607 }
608 
609 Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
610     LoopICmp LatchCheck, LoopICmp RangeCheck,
611     SCEVExpander &Expander, Instruction *Guard) {
612   auto *Ty = RangeCheck.IV->getType();
613   const SCEV *GuardStart = RangeCheck.IV->getStart();
614   const SCEV *GuardLimit = RangeCheck.Limit;
615   const SCEV *LatchStart = LatchCheck.IV->getStart();
616   const SCEV *LatchLimit = LatchCheck.Limit;
617   // Subtlety: We need all the values to be *invariant* across all iterations,
618   // but we only need to check expansion safety for those which *aren't*
619   // already guaranteed to dominate the guard.
620   if (!isLoopInvariantValue(GuardStart) ||
621       !isLoopInvariantValue(GuardLimit) ||
622       !isLoopInvariantValue(LatchStart) ||
623       !isLoopInvariantValue(LatchLimit)) {
624     LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
625     return None;
626   }
627   if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
628       !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
629     LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
630     return None;
631   }
632   // The decrement of the latch check IV should be the same as the
633   // rangeCheckIV.
634   auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE);
635   if (RangeCheck.IV != PostDecLatchCheckIV) {
636     LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: "
637                       << *PostDecLatchCheckIV
638                       << "  and RangeCheckIV: " << *RangeCheck.IV << "\n");
639     return None;
640   }
641 
642   // Generate the widened condition for CountDownLoop:
643   // guardStart u< guardLimit &&
644   // latchLimit <pred> 1.
645   // See the header comment for reasoning of the checks.
646   auto LimitCheckPred =
647       ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
648   auto *FirstIterationCheck = expandCheck(Expander, Guard,
649                                           ICmpInst::ICMP_ULT,
650                                           GuardStart, GuardLimit);
651   auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,
652                                  SE->getOne(Ty));
653   IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
654   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
655 }
656 
657 static void normalizePredicate(ScalarEvolution *SE, Loop *L,
658                                LoopICmp& RC) {
659   // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the
660   // ULT/UGE form for ease of handling by our caller.
661   if (ICmpInst::isEquality(RC.Pred) &&
662       RC.IV->getStepRecurrence(*SE)->isOne() &&
663       SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
664     RC.Pred = RC.Pred == ICmpInst::ICMP_NE ?
665       ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
666 }
667 
668 
669 /// If ICI can be widened to a loop invariant condition emits the loop
670 /// invariant condition in the loop preheader and return it, otherwise
671 /// returns None.
672 Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
673                                                        SCEVExpander &Expander,
674                                                        Instruction *Guard) {
675   LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
676   LLVM_DEBUG(ICI->dump());
677 
678   // parseLoopStructure guarantees that the latch condition is:
679   //   ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=.
680   // We are looking for the range checks of the form:
681   //   i u< guardLimit
682   auto RangeCheck = parseLoopICmp(ICI);
683   if (!RangeCheck) {
684     LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
685     return None;
686   }
687   LLVM_DEBUG(dbgs() << "Guard check:\n");
688   LLVM_DEBUG(RangeCheck->dump());
689   if (RangeCheck->Pred != ICmpInst::ICMP_ULT) {
690     LLVM_DEBUG(dbgs() << "Unsupported range check predicate("
691                       << RangeCheck->Pred << ")!\n");
692     return None;
693   }
694   auto *RangeCheckIV = RangeCheck->IV;
695   if (!RangeCheckIV->isAffine()) {
696     LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n");
697     return None;
698   }
699   auto *Step = RangeCheckIV->getStepRecurrence(*SE);
700   // We cannot just compare with latch IV step because the latch and range IVs
701   // may have different types.
702   if (!isSupportedStep(Step)) {
703     LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n");
704     return None;
705   }
706   auto *Ty = RangeCheckIV->getType();
707   auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);
708   if (!CurrLatchCheckOpt) {
709     LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "
710                          "corresponding to range type: "
711                       << *Ty << "\n");
712     return None;
713   }
714 
715   LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;
716   // At this point, the range and latch step should have the same type, but need
717   // not have the same value (we support both 1 and -1 steps).
718   assert(Step->getType() ==
719              CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() &&
720          "Range and latch steps should be of same type!");
721   if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) {
722     LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n");
723     return None;
724   }
725 
726   if (Step->isOne())
727     return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
728                                                Expander, Guard);
729   else {
730     assert(Step->isAllOnesValue() && "Step should be -1!");
731     return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,
732                                                Expander, Guard);
733   }
734 }
735 
736 unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
737                                         Value *Condition,
738                                         SCEVExpander &Expander,
739                                         Instruction *Guard) {
740   unsigned NumWidened = 0;
741   // The guard condition is expected to be in form of:
742   //   cond1 && cond2 && cond3 ...
743   // Iterate over subconditions looking for icmp conditions which can be
744   // widened across loop iterations. Widening these conditions remember the
745   // resulting list of subconditions in Checks vector.
746   SmallVector<Value *, 4> Worklist(1, Condition);
747   SmallPtrSet<Value *, 4> Visited;
748   Value *WideableCond = nullptr;
749   do {
750     Value *Condition = Worklist.pop_back_val();
751     if (!Visited.insert(Condition).second)
752       continue;
753 
754     Value *LHS, *RHS;
755     using namespace llvm::PatternMatch;
756     if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
757       Worklist.push_back(LHS);
758       Worklist.push_back(RHS);
759       continue;
760     }
761 
762     if (match(Condition,
763               m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
764       // Pick any, we don't care which
765       WideableCond = Condition;
766       continue;
767     }
768 
769     if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
770       if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
771                                                    Guard)) {
772         Checks.push_back(NewRangeCheck.getValue());
773         NumWidened++;
774         continue;
775       }
776     }
777 
778     // Save the condition as is if we can't widen it
779     Checks.push_back(Condition);
780   } while (!Worklist.empty());
781   // At the moment, our matching logic for wideable conditions implicitly
782   // assumes we preserve the form: (br (and Cond, WC())).  FIXME
783   // Note that if there were multiple calls to wideable condition in the
784   // traversal, we only need to keep one, and which one is arbitrary.
785   if (WideableCond)
786     Checks.push_back(WideableCond);
787   return NumWidened;
788 }
789 
790 bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
791                                            SCEVExpander &Expander) {
792   LLVM_DEBUG(dbgs() << "Processing guard:\n");
793   LLVM_DEBUG(Guard->dump());
794 
795   TotalConsidered++;
796   SmallVector<Value *, 4> Checks;
797   unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander,
798                                       Guard);
799   if (NumWidened == 0)
800     return false;
801 
802   TotalWidened += NumWidened;
803 
804   // Emit the new guard condition
805   IRBuilder<> Builder(findInsertPt(Guard, Checks));
806   Value *AllChecks = Builder.CreateAnd(Checks);
807   auto *OldCond = Guard->getOperand(0);
808   Guard->setOperand(0, AllChecks);
809   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
810 
811   LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
812   return true;
813 }
814 
815 bool LoopPredication::widenWidenableBranchGuardConditions(
816     BranchInst *BI, SCEVExpander &Expander) {
817   assert(isGuardAsWidenableBranch(BI) && "Must be!");
818   LLVM_DEBUG(dbgs() << "Processing guard:\n");
819   LLVM_DEBUG(BI->dump());
820 
821   TotalConsidered++;
822   SmallVector<Value *, 4> Checks;
823   unsigned NumWidened = collectChecks(Checks, BI->getCondition(),
824                                       Expander, BI);
825   if (NumWidened == 0)
826     return false;
827 
828   TotalWidened += NumWidened;
829 
830   // Emit the new guard condition
831   IRBuilder<> Builder(findInsertPt(BI, Checks));
832   Value *AllChecks = Builder.CreateAnd(Checks);
833   auto *OldCond = BI->getCondition();
834   BI->setCondition(AllChecks);
835   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
836   assert(isGuardAsWidenableBranch(BI) &&
837          "Stopped being a guard after transform?");
838 
839   LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
840   return true;
841 }
842 
843 Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
844   using namespace PatternMatch;
845 
846   BasicBlock *LoopLatch = L->getLoopLatch();
847   if (!LoopLatch) {
848     LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n");
849     return None;
850   }
851 
852   auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
853   if (!BI || !BI->isConditional()) {
854     LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");
855     return None;
856   }
857   BasicBlock *TrueDest = BI->getSuccessor(0);
858   assert(
859       (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&
860       "One of the latch's destinations must be the header");
861 
862   auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
863   if (!ICI) {
864     LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");
865     return None;
866   }
867   auto Result = parseLoopICmp(ICI);
868   if (!Result) {
869     LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
870     return None;
871   }
872 
873   if (TrueDest != L->getHeader())
874     Result->Pred = ICmpInst::getInversePredicate(Result->Pred);
875 
876   // Check affine first, so if it's not we don't try to compute the step
877   // recurrence.
878   if (!Result->IV->isAffine()) {
879     LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n");
880     return None;
881   }
882 
883   auto *Step = Result->IV->getStepRecurrence(*SE);
884   if (!isSupportedStep(Step)) {
885     LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n");
886     return None;
887   }
888 
889   auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) {
890     if (Step->isOne()) {
891       return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT &&
892              Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;
893     } else {
894       assert(Step->isAllOnesValue() && "Step should be -1!");
895       return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT &&
896              Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE;
897     }
898   };
899 
900   normalizePredicate(SE, L, *Result);
901   if (IsUnsupportedPredicate(Step, Result->Pred)) {
902     LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
903                       << ")!\n");
904     return None;
905   }
906 
907   return Result;
908 }
909 
910 
911 bool LoopPredication::isLoopProfitableToPredicate() {
912   if (SkipProfitabilityChecks || !BPI)
913     return true;
914 
915   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
916   L->getExitEdges(ExitEdges);
917   // If there is only one exiting edge in the loop, it is always profitable to
918   // predicate the loop.
919   if (ExitEdges.size() == 1)
920     return true;
921 
922   // Calculate the exiting probabilities of all exiting edges from the loop,
923   // starting with the LatchExitProbability.
924   // Heuristic for profitability: If any of the exiting blocks' probability of
925   // exiting the loop is larger than exiting through the latch block, it's not
926   // profitable to predicate the loop.
927   auto *LatchBlock = L->getLoopLatch();
928   assert(LatchBlock && "Should have a single latch at this point!");
929   auto *LatchTerm = LatchBlock->getTerminator();
930   assert(LatchTerm->getNumSuccessors() == 2 &&
931          "expected to be an exiting block with 2 succs!");
932   unsigned LatchBrExitIdx =
933       LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
934   BranchProbability LatchExitProbability =
935       BPI->getEdgeProbability(LatchBlock, LatchBrExitIdx);
936 
937   // Protect against degenerate inputs provided by the user. Providing a value
938   // less than one, can invert the definition of profitable loop predication.
939   float ScaleFactor = LatchExitProbabilityScale;
940   if (ScaleFactor < 1) {
941     LLVM_DEBUG(
942         dbgs()
943         << "Ignored user setting for loop-predication-latch-probability-scale: "
944         << LatchExitProbabilityScale << "\n");
945     LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");
946     ScaleFactor = 1.0;
947   }
948   const auto LatchProbabilityThreshold =
949       LatchExitProbability * ScaleFactor;
950 
951   for (const auto &ExitEdge : ExitEdges) {
952     BranchProbability ExitingBlockProbability =
953         BPI->getEdgeProbability(ExitEdge.first, ExitEdge.second);
954     // Some exiting edge has higher probability than the latch exiting edge.
955     // No longer profitable to predicate.
956     if (ExitingBlockProbability > LatchProbabilityThreshold)
957       return false;
958   }
959   // Using BPI, we have concluded that the most probable way to exit from the
960   // loop is through the latch (or there's no profile information and all
961   // exits are equally likely).
962   return true;
963 }
964 
965 /// If we can (cheaply) find a widenable branch which controls entry into the
966 /// loop, return it.
967 static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) {
968   // Walk back through any unconditional executed blocks and see if we can find
969   // a widenable condition which seems to control execution of this loop.  Note
970   // that we predict that maythrow calls are likely untaken and thus that it's
971   // profitable to widen a branch before a maythrow call with a condition
972   // afterwards even though that may cause the slow path to run in a case where
973   // it wouldn't have otherwise.
974   BasicBlock *BB = L->getLoopPreheader();
975   if (!BB)
976     return nullptr;
977   do {
978     if (BasicBlock *Pred = BB->getSinglePredecessor())
979       if (BB == Pred->getSingleSuccessor()) {
980         BB = Pred;
981         continue;
982       }
983     break;
984   } while (true);
985 
986   if (BasicBlock *Pred = BB->getSinglePredecessor()) {
987     auto *Term = Pred->getTerminator();
988 
989     Value *Cond, *WC;
990     BasicBlock *IfTrueBB, *IfFalseBB;
991     if (parseWidenableBranch(Term, Cond, WC, IfTrueBB, IfFalseBB) &&
992         IfTrueBB == BB)
993       return cast<BranchInst>(Term);
994   }
995   return nullptr;
996 }
997 
998 /// Return the minimum of all analyzeable exit counts.  This is an upper bound
999 /// on the actual exit count.  If there are not at least two analyzeable exits,
1000 /// returns SCEVCouldNotCompute.
1001 static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE,
1002                                                        DominatorTree &DT,
1003                                                        Loop *L) {
1004   SmallVector<BasicBlock *, 16> ExitingBlocks;
1005   L->getExitingBlocks(ExitingBlocks);
1006 
1007   SmallVector<const SCEV *, 4> ExitCounts;
1008   for (BasicBlock *ExitingBB : ExitingBlocks) {
1009     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
1010     if (isa<SCEVCouldNotCompute>(ExitCount))
1011       continue;
1012     assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
1013            "We should only have known counts for exiting blocks that "
1014            "dominate latch!");
1015     ExitCounts.push_back(ExitCount);
1016   }
1017   if (ExitCounts.size() < 2)
1018     return SE.getCouldNotCompute();
1019   return SE.getUMinFromMismatchedTypes(ExitCounts);
1020 }
1021 
1022 /// This implements an analogous, but entirely distinct transform from the main
1023 /// loop predication transform.  This one is phrased in terms of using a
1024 /// widenable branch *outside* the loop to allow us to simplify loop exits in a
1025 /// following loop.  This is close in spirit to the IndVarSimplify transform
1026 /// of the same name, but is materially different widening loosens legality
1027 /// sharply.
1028 bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1029   // The transformation performed here aims to widen a widenable condition
1030   // above the loop such that all analyzeable exit leading to deopt are dead.
1031   // It assumes that the latch is the dominant exit for profitability and that
1032   // exits branching to deoptimizing blocks are rarely taken. It relies on the
1033   // semantics of widenable expressions for legality. (i.e. being able to fall
1034   // down the widenable path spuriously allows us to ignore exit order,
1035   // unanalyzeable exits, side effects, exceptional exits, and other challenges
1036   // which restrict the applicability of the non-WC based version of this
1037   // transform in IndVarSimplify.)
1038   //
1039   // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may
1040   // imply flags on the expression being hoisted and inserting new uses (flags
1041   // are only correct for current uses).  The result is that we may be
1042   // inserting a branch on the value which can be either poison or undef.  In
1043   // this case, the branch can legally go either way; we just need to avoid
1044   // introducing UB.  This is achieved through the use of the freeze
1045   // instruction.
1046 
1047   SmallVector<BasicBlock *, 16> ExitingBlocks;
1048   L->getExitingBlocks(ExitingBlocks);
1049 
1050   if (ExitingBlocks.empty())
1051     return false; // Nothing to do.
1052 
1053   auto *Latch = L->getLoopLatch();
1054   if (!Latch)
1055     return false;
1056 
1057   auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI);
1058   if (!WidenableBR)
1059     return false;
1060 
1061   const SCEV *LatchEC = SE->getExitCount(L, Latch);
1062   if (isa<SCEVCouldNotCompute>(LatchEC))
1063     return false; // profitability - want hot exit in analyzeable set
1064 
1065   // The use of umin(all analyzeable exits) instead of latch is subtle, but
1066   // important for profitability.  We may have a loop which hasn't been fully
1067   // canonicalized just yet.  If the exit we chose to widen is provably never
1068   // taken, we want the widened form to *also* be provably never taken.  We
1069   // can't guarantee this as a current unanalyzeable exit may later become
1070   // analyzeable, but we can at least avoid the obvious cases.
1071   const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);
1072   if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
1073       !SE->isLoopInvariant(MinEC, L) ||
1074       !isSafeToExpandAt(MinEC, WidenableBR, *SE))
1075     return false;
1076 
1077   // Subtlety: We need to avoid inserting additional uses of the WC.  We know
1078   // that it can only have one transitive use at the moment, and thus moving
1079   // that use to just before the branch and inserting code before it and then
1080   // modifying the operand is legal.
1081   auto *IP = cast<Instruction>(WidenableBR->getCondition());
1082   IP->moveBefore(WidenableBR);
1083   Rewriter.setInsertPoint(IP);
1084   IRBuilder<> B(IP);
1085 
1086   bool Changed = false;
1087   Value *MinECV = nullptr; // lazily generated if needed
1088   for (BasicBlock *ExitingBB : ExitingBlocks) {
1089     // If our exiting block exits multiple loops, we can only rewrite the
1090     // innermost one.  Otherwise, we're changing how many times the innermost
1091     // loop runs before it exits.
1092     if (LI->getLoopFor(ExitingBB) != L)
1093       continue;
1094 
1095     // Can't rewrite non-branch yet.
1096     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1097     if (!BI)
1098       continue;
1099 
1100     // If already constant, nothing to do.
1101     if (isa<Constant>(BI->getCondition()))
1102       continue;
1103 
1104     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1105     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1106         ExitCount->getType()->isPointerTy() ||
1107         !isSafeToExpandAt(ExitCount, WidenableBR, *SE))
1108       continue;
1109 
1110     const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1111     BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
1112     if (!ExitBB->getTerminatingDeoptimizeCall())
1113       // Profitability: indicator of rarely/never taken exit
1114       continue;
1115 
1116     // If we found a widenable exit condition, do two things:
1117     // 1) fold the widened exit test into the widenable condition
1118     // 2) fold the branch to untaken - avoids infinite looping
1119 
1120     Value *ECV = Rewriter.expandCodeFor(ExitCount);
1121     if (!MinECV)
1122       MinECV = Rewriter.expandCodeFor(MinEC);
1123     Value *RHS = MinECV;
1124     if (ECV->getType() != RHS->getType()) {
1125       Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1126       ECV = B.CreateZExt(ECV, WiderTy);
1127       RHS = B.CreateZExt(RHS, WiderTy);
1128     }
1129     assert(!Latch || DT->dominates(ExitingBB, Latch));
1130     Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS);
1131     // Freeze poison or undef to an arbitrary bit pattern to ensure we can
1132     // branch without introducing UB.  See NOTE ON POISON/UNDEF above for
1133     // context.
1134     NewCond = B.CreateFreeze(NewCond);
1135 
1136     Value *Cond, *WC;
1137     BasicBlock *IfTrueBB, *IfFalseBB;
1138     bool Success =
1139         parseWidenableBranch(WidenableBR, Cond, WC, IfTrueBB, IfFalseBB);
1140     assert(Success && "implied from above");
1141     (void)Success;
1142     Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition());
1143     WCAnd->setOperand(0, B.CreateAnd(NewCond, Cond));
1144 
1145     Value *OldCond = BI->getCondition();
1146     BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
1147     Changed = true;
1148   }
1149 
1150   if (Changed)
1151     // We just mutated a bunch of loop exits changing there exit counts
1152     // widely.  We need to force recomputation of the exit counts given these
1153     // changes.  Note that all of the inserted exits are never taken, and
1154     // should be removed next time the CFG is modified.
1155     SE->forgetLoop(L);
1156   return Changed;
1157 }
1158 
1159 bool LoopPredication::runOnLoop(Loop *Loop) {
1160   L = Loop;
1161 
1162   LLVM_DEBUG(dbgs() << "Analyzing ");
1163   LLVM_DEBUG(L->dump());
1164 
1165   Module *M = L->getHeader()->getModule();
1166 
1167   // There is nothing to do if the module doesn't use guards
1168   auto *GuardDecl =
1169       M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
1170   bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();
1171   auto *WCDecl = M->getFunction(
1172       Intrinsic::getName(Intrinsic::experimental_widenable_condition));
1173   bool HasWidenableConditions =
1174       PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty();
1175   if (!HasIntrinsicGuards && !HasWidenableConditions)
1176     return false;
1177 
1178   DL = &M->getDataLayout();
1179 
1180   Preheader = L->getLoopPreheader();
1181   if (!Preheader)
1182     return false;
1183 
1184   auto LatchCheckOpt = parseLoopLatchICmp();
1185   if (!LatchCheckOpt)
1186     return false;
1187   LatchCheck = *LatchCheckOpt;
1188 
1189   LLVM_DEBUG(dbgs() << "Latch check:\n");
1190   LLVM_DEBUG(LatchCheck.dump());
1191 
1192   if (!isLoopProfitableToPredicate()) {
1193     LLVM_DEBUG(dbgs() << "Loop not profitable to predicate!\n");
1194     return false;
1195   }
1196   // Collect all the guards into a vector and process later, so as not
1197   // to invalidate the instruction iterator.
1198   SmallVector<IntrinsicInst *, 4> Guards;
1199   SmallVector<BranchInst *, 4> GuardsAsWidenableBranches;
1200   for (const auto BB : L->blocks()) {
1201     for (auto &I : *BB)
1202       if (isGuard(&I))
1203         Guards.push_back(cast<IntrinsicInst>(&I));
1204     if (PredicateWidenableBranchGuards &&
1205         isGuardAsWidenableBranch(BB->getTerminator()))
1206       GuardsAsWidenableBranches.push_back(
1207           cast<BranchInst>(BB->getTerminator()));
1208   }
1209 
1210   SCEVExpander Expander(*SE, *DL, "loop-predication");
1211   bool Changed = false;
1212   for (auto *Guard : Guards)
1213     Changed |= widenGuardConditions(Guard, Expander);
1214   for (auto *Guard : GuardsAsWidenableBranches)
1215     Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
1216   Changed |= predicateLoopExits(L, Expander);
1217   return Changed;
1218 }
1219