xref: /llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp (revision aec2fa352f533d230ab50b6c3002a1a664c9d6c2)
1 //===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop unroller.  It works best when loops have
11 // been canonicalized by the -indvars pass, allowing it to determine the trip
12 // counts of loops easily.
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/Analysis/AssumptionCache.h"
18 #include "llvm/Analysis/CodeMetrics.h"
19 #include "llvm/Analysis/GlobalsModRef.h"
20 #include "llvm/Analysis/InstructionSimplify.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/Analysis/LoopPassManager.h"
23 #include "llvm/Analysis/LoopUnrollAnalyzer.h"
24 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
25 #include "llvm/Analysis/ScalarEvolution.h"
26 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/InstVisitor.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Transforms/Scalar.h"
36 #include "llvm/Transforms/Utils/LoopUtils.h"
37 #include "llvm/Transforms/Utils/UnrollLoop.h"
38 #include <climits>
39 #include <utility>
40 
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "loop-unroll"
44 
45 static cl::opt<unsigned>
46     UnrollThreshold("unroll-threshold", cl::Hidden,
47                     cl::desc("The baseline cost threshold for loop unrolling"));
48 
49 static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
50     "unroll-percent-dynamic-cost-saved-threshold", cl::init(50), cl::Hidden,
51     cl::desc("The percentage of estimated dynamic cost which must be saved by "
52              "unrolling to allow unrolling up to the max threshold."));
53 
54 static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
55     "unroll-dynamic-cost-savings-discount", cl::init(100), cl::Hidden,
56     cl::desc("This is the amount discounted from the total unroll cost when "
57              "the unrolled form has a high dynamic cost savings (triggered by "
58              "the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
59 
60 static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
61     "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
62     cl::desc("Don't allow loop unrolling to simulate more than this number of"
63              "iterations when checking full unroll profitability"));
64 
65 static cl::opt<unsigned> UnrollCount(
66     "unroll-count", cl::Hidden,
67     cl::desc("Use this unroll count for all loops including those with "
68              "unroll_count pragma values, for testing purposes"));
69 
70 static cl::opt<unsigned> UnrollMaxCount(
71     "unroll-max-count", cl::Hidden,
72     cl::desc("Set the max unroll count for partial and runtime unrolling, for"
73              "testing purposes"));
74 
75 static cl::opt<unsigned> UnrollFullMaxCount(
76     "unroll-full-max-count", cl::Hidden,
77     cl::desc(
78         "Set the max unroll count for full unrolling, for testing purposes"));
79 
80 static cl::opt<bool>
81     UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
82                        cl::desc("Allows loops to be partially unrolled until "
83                                 "-unroll-threshold loop size is reached."));
84 
85 static cl::opt<bool> UnrollAllowRemainder(
86     "unroll-allow-remainder", cl::Hidden,
87     cl::desc("Allow generation of a loop remainder (extra iterations) "
88              "when unrolling a loop."));
89 
90 static cl::opt<bool>
91     UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
92                   cl::desc("Unroll loops with run-time trip counts"));
93 
94 static cl::opt<unsigned> UnrollMaxUpperBound(
95     "unroll-max-upperbound", cl::init(8), cl::Hidden,
96     cl::desc(
97         "The max of trip count upper bound that is considered in unrolling"));
98 
99 static cl::opt<unsigned> PragmaUnrollThreshold(
100     "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
101     cl::desc("Unrolled size limit for loops with an unroll(full) or "
102              "unroll_count pragma."));
103 
104 static cl::opt<unsigned> FlatLoopTripCountThreshold(
105     "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
106     cl::desc("If the runtime tripcount for the loop is lower than the "
107              "threshold, the loop is considered as flat and will be less "
108              "aggressively unrolled."));
109 
110 static cl::opt<bool>
111     UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden,
112                        cl::desc("Allows loops to be peeled when the dynamic "
113                                 "trip count is known to be low."));
114 
115 /// A magic value for use with the Threshold parameter to indicate
116 /// that the loop unroll should be performed regardless of how much
117 /// code expansion would result.
118 static const unsigned NoThreshold = UINT_MAX;
119 
120 /// Gather the various unrolling parameters based on the defaults, compiler
121 /// flags, TTI overrides and user specified parameters.
122 static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
123     Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
124     Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
125     Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
126   TargetTransformInfo::UnrollingPreferences UP;
127 
128   // Set up the defaults
129   UP.Threshold = 150;
130   UP.PercentDynamicCostSavedThreshold = 50;
131   UP.DynamicCostSavingsDiscount = 100;
132   UP.OptSizeThreshold = 0;
133   UP.PartialThreshold = UP.Threshold;
134   UP.PartialOptSizeThreshold = 0;
135   UP.Count = 0;
136   UP.PeelCount = 0;
137   UP.DefaultUnrollRuntimeCount = 8;
138   UP.MaxCount = UINT_MAX;
139   UP.FullUnrollMaxCount = UINT_MAX;
140   UP.BEInsns = 2;
141   UP.Partial = false;
142   UP.Runtime = false;
143   UP.AllowRemainder = true;
144   UP.AllowExpensiveTripCount = false;
145   UP.Force = false;
146   UP.UpperBound = false;
147   UP.AllowPeeling = false;
148 
149   // Override with any target specific settings
150   TTI.getUnrollingPreferences(L, UP);
151 
152   // Apply size attributes
153   if (L->getHeader()->getParent()->optForSize()) {
154     UP.Threshold = UP.OptSizeThreshold;
155     UP.PartialThreshold = UP.PartialOptSizeThreshold;
156   }
157 
158   // Apply any user values specified by cl::opt
159   if (UnrollThreshold.getNumOccurrences() > 0) {
160     UP.Threshold = UnrollThreshold;
161     UP.PartialThreshold = UnrollThreshold;
162   }
163   if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
164     UP.PercentDynamicCostSavedThreshold =
165         UnrollPercentDynamicCostSavedThreshold;
166   if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
167     UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
168   if (UnrollMaxCount.getNumOccurrences() > 0)
169     UP.MaxCount = UnrollMaxCount;
170   if (UnrollFullMaxCount.getNumOccurrences() > 0)
171     UP.FullUnrollMaxCount = UnrollFullMaxCount;
172   if (UnrollAllowPartial.getNumOccurrences() > 0)
173     UP.Partial = UnrollAllowPartial;
174   if (UnrollAllowRemainder.getNumOccurrences() > 0)
175     UP.AllowRemainder = UnrollAllowRemainder;
176   if (UnrollRuntime.getNumOccurrences() > 0)
177     UP.Runtime = UnrollRuntime;
178   if (UnrollMaxUpperBound == 0)
179     UP.UpperBound = false;
180   if (UnrollAllowPeeling.getNumOccurrences() > 0)
181     UP.AllowPeeling = UnrollAllowPeeling;
182 
183   // Apply user values provided by argument
184   if (UserThreshold.hasValue()) {
185     UP.Threshold = *UserThreshold;
186     UP.PartialThreshold = *UserThreshold;
187   }
188   if (UserCount.hasValue())
189     UP.Count = *UserCount;
190   if (UserAllowPartial.hasValue())
191     UP.Partial = *UserAllowPartial;
192   if (UserRuntime.hasValue())
193     UP.Runtime = *UserRuntime;
194   if (UserUpperBound.hasValue())
195     UP.UpperBound = *UserUpperBound;
196 
197   return UP;
198 }
199 
200 namespace {
201 /// A struct to densely store the state of an instruction after unrolling at
202 /// each iteration.
203 ///
204 /// This is designed to work like a tuple of <Instruction *, int> for the
205 /// purposes of hashing and lookup, but to be able to associate two boolean
206 /// states with each key.
207 struct UnrolledInstState {
208   Instruction *I;
209   int Iteration : 30;
210   unsigned IsFree : 1;
211   unsigned IsCounted : 1;
212 };
213 
214 /// Hashing and equality testing for a set of the instruction states.
215 struct UnrolledInstStateKeyInfo {
216   typedef DenseMapInfo<Instruction *> PtrInfo;
217   typedef DenseMapInfo<std::pair<Instruction *, int>> PairInfo;
218   static inline UnrolledInstState getEmptyKey() {
219     return {PtrInfo::getEmptyKey(), 0, 0, 0};
220   }
221   static inline UnrolledInstState getTombstoneKey() {
222     return {PtrInfo::getTombstoneKey(), 0, 0, 0};
223   }
224   static inline unsigned getHashValue(const UnrolledInstState &S) {
225     return PairInfo::getHashValue({S.I, S.Iteration});
226   }
227   static inline bool isEqual(const UnrolledInstState &LHS,
228                              const UnrolledInstState &RHS) {
229     return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
230   }
231 };
232 }
233 
234 namespace {
235 struct EstimatedUnrollCost {
236   /// \brief The estimated cost after unrolling.
237   unsigned UnrolledCost;
238 
239   /// \brief The estimated dynamic cost of executing the instructions in the
240   /// rolled form.
241   unsigned RolledDynamicCost;
242 };
243 }
244 
245 /// \brief Figure out if the loop is worth full unrolling.
246 ///
247 /// Complete loop unrolling can make some loads constant, and we need to know
248 /// if that would expose any further optimization opportunities.  This routine
249 /// estimates this optimization.  It computes cost of unrolled loop
250 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
251 /// dynamic cost we mean that we won't count costs of blocks that are known not
252 /// to be executed (i.e. if we have a branch in the loop and we know that at the
253 /// given iteration its condition would be resolved to true, we won't add up the
254 /// cost of the 'false'-block).
255 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
256 /// the analysis failed (no benefits expected from the unrolling, or the loop is
257 /// too big to analyze), the returned value is None.
258 static Optional<EstimatedUnrollCost>
259 analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
260                       ScalarEvolution &SE, const TargetTransformInfo &TTI,
261                       unsigned MaxUnrolledLoopSize) {
262   // We want to be able to scale offsets by the trip count and add more offsets
263   // to them without checking for overflows, and we already don't want to
264   // analyze *massive* trip counts, so we force the max to be reasonably small.
265   assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
266          "The unroll iterations max is too large!");
267 
268   // Only analyze inner loops. We can't properly estimate cost of nested loops
269   // and we won't visit inner loops again anyway.
270   if (!L->empty())
271     return None;
272 
273   // Don't simulate loops with a big or unknown tripcount
274   if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
275       TripCount > UnrollMaxIterationsCountToAnalyze)
276     return None;
277 
278   SmallSetVector<BasicBlock *, 16> BBWorklist;
279   SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
280   DenseMap<Value *, Constant *> SimplifiedValues;
281   SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
282 
283   // The estimated cost of the unrolled form of the loop. We try to estimate
284   // this by simplifying as much as we can while computing the estimate.
285   unsigned UnrolledCost = 0;
286 
287   // We also track the estimated dynamic (that is, actually executed) cost in
288   // the rolled form. This helps identify cases when the savings from unrolling
289   // aren't just exposing dead control flows, but actual reduced dynamic
290   // instructions due to the simplifications which we expect to occur after
291   // unrolling.
292   unsigned RolledDynamicCost = 0;
293 
294   // We track the simplification of each instruction in each iteration. We use
295   // this to recursively merge costs into the unrolled cost on-demand so that
296   // we don't count the cost of any dead code. This is essentially a map from
297   // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
298   DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;
299 
300   // A small worklist used to accumulate cost of instructions from each
301   // observable and reached root in the loop.
302   SmallVector<Instruction *, 16> CostWorklist;
303 
304   // PHI-used worklist used between iterations while accumulating cost.
305   SmallVector<Instruction *, 4> PHIUsedList;
306 
307   // Helper function to accumulate cost for instructions in the loop.
308   auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
309     assert(Iteration >= 0 && "Cannot have a negative iteration!");
310     assert(CostWorklist.empty() && "Must start with an empty cost list");
311     assert(PHIUsedList.empty() && "Must start with an empty phi used list");
312     CostWorklist.push_back(&RootI);
313     for (;; --Iteration) {
314       do {
315         Instruction *I = CostWorklist.pop_back_val();
316 
317         // InstCostMap only uses I and Iteration as a key, the other two values
318         // don't matter here.
319         auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
320         if (CostIter == InstCostMap.end())
321           // If an input to a PHI node comes from a dead path through the loop
322           // we may have no cost data for it here. What that actually means is
323           // that it is free.
324           continue;
325         auto &Cost = *CostIter;
326         if (Cost.IsCounted)
327           // Already counted this instruction.
328           continue;
329 
330         // Mark that we are counting the cost of this instruction now.
331         Cost.IsCounted = true;
332 
333         // If this is a PHI node in the loop header, just add it to the PHI set.
334         if (auto *PhiI = dyn_cast<PHINode>(I))
335           if (PhiI->getParent() == L->getHeader()) {
336             assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
337                                   "inherently simplify during unrolling.");
338             if (Iteration == 0)
339               continue;
340 
341             // Push the incoming value from the backedge into the PHI used list
342             // if it is an in-loop instruction. We'll use this to populate the
343             // cost worklist for the next iteration (as we count backwards).
344             if (auto *OpI = dyn_cast<Instruction>(
345                     PhiI->getIncomingValueForBlock(L->getLoopLatch())))
346               if (L->contains(OpI))
347                 PHIUsedList.push_back(OpI);
348             continue;
349           }
350 
351         // First accumulate the cost of this instruction.
352         if (!Cost.IsFree) {
353           UnrolledCost += TTI.getUserCost(I);
354           DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
355                        << "): ");
356           DEBUG(I->dump());
357         }
358 
359         // We must count the cost of every operand which is not free,
360         // recursively. If we reach a loop PHI node, simply add it to the set
361         // to be considered on the next iteration (backwards!).
362         for (Value *Op : I->operands()) {
363           // Check whether this operand is free due to being a constant or
364           // outside the loop.
365           auto *OpI = dyn_cast<Instruction>(Op);
366           if (!OpI || !L->contains(OpI))
367             continue;
368 
369           // Otherwise accumulate its cost.
370           CostWorklist.push_back(OpI);
371         }
372       } while (!CostWorklist.empty());
373 
374       if (PHIUsedList.empty())
375         // We've exhausted the search.
376         break;
377 
378       assert(Iteration > 0 &&
379              "Cannot track PHI-used values past the first iteration!");
380       CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
381       PHIUsedList.clear();
382     }
383   };
384 
385   // Ensure that we don't violate the loop structure invariants relied on by
386   // this analysis.
387   assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
388   assert(L->isLCSSAForm(DT) &&
389          "Must have loops in LCSSA form to track live-out values.");
390 
391   DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
392 
393   // Simulate execution of each iteration of the loop counting instructions,
394   // which would be simplified.
395   // Since the same load will take different values on different iterations,
396   // we literally have to go through all loop's iterations.
397   for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
398     DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
399 
400     // Prepare for the iteration by collecting any simplified entry or backedge
401     // inputs.
402     for (Instruction &I : *L->getHeader()) {
403       auto *PHI = dyn_cast<PHINode>(&I);
404       if (!PHI)
405         break;
406 
407       // The loop header PHI nodes must have exactly two input: one from the
408       // loop preheader and one from the loop latch.
409       assert(
410           PHI->getNumIncomingValues() == 2 &&
411           "Must have an incoming value only for the preheader and the latch.");
412 
413       Value *V = PHI->getIncomingValueForBlock(
414           Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
415       Constant *C = dyn_cast<Constant>(V);
416       if (Iteration != 0 && !C)
417         C = SimplifiedValues.lookup(V);
418       if (C)
419         SimplifiedInputValues.push_back({PHI, C});
420     }
421 
422     // Now clear and re-populate the map for the next iteration.
423     SimplifiedValues.clear();
424     while (!SimplifiedInputValues.empty())
425       SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
426 
427     UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
428 
429     BBWorklist.clear();
430     BBWorklist.insert(L->getHeader());
431     // Note that we *must not* cache the size, this loop grows the worklist.
432     for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
433       BasicBlock *BB = BBWorklist[Idx];
434 
435       // Visit all instructions in the given basic block and try to simplify
436       // it.  We don't change the actual IR, just count optimization
437       // opportunities.
438       for (Instruction &I : *BB) {
439         if (isa<DbgInfoIntrinsic>(I))
440           continue;
441 
442         // Track this instruction's expected baseline cost when executing the
443         // rolled loop form.
444         RolledDynamicCost += TTI.getUserCost(&I);
445 
446         // Visit the instruction to analyze its loop cost after unrolling,
447         // and if the visitor returns true, mark the instruction as free after
448         // unrolling and continue.
449         bool IsFree = Analyzer.visit(I);
450         bool Inserted = InstCostMap.insert({&I, (int)Iteration,
451                                            (unsigned)IsFree,
452                                            /*IsCounted*/ false}).second;
453         (void)Inserted;
454         assert(Inserted && "Cannot have a state for an unvisited instruction!");
455 
456         if (IsFree)
457           continue;
458 
459         // Can't properly model a cost of a call.
460         // FIXME: With a proper cost model we should be able to do it.
461         if(isa<CallInst>(&I))
462           return None;
463 
464         // If the instruction might have a side-effect recursively account for
465         // the cost of it and all the instructions leading up to it.
466         if (I.mayHaveSideEffects())
467           AddCostRecursively(I, Iteration);
468 
469         // If unrolled body turns out to be too big, bail out.
470         if (UnrolledCost > MaxUnrolledLoopSize) {
471           DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
472                        << "  UnrolledCost: " << UnrolledCost
473                        << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
474                        << "\n");
475           return None;
476         }
477       }
478 
479       TerminatorInst *TI = BB->getTerminator();
480 
481       // Add in the live successors by first checking whether we have terminator
482       // that may be simplified based on the values simplified by this call.
483       BasicBlock *KnownSucc = nullptr;
484       if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
485         if (BI->isConditional()) {
486           if (Constant *SimpleCond =
487                   SimplifiedValues.lookup(BI->getCondition())) {
488             // Just take the first successor if condition is undef
489             if (isa<UndefValue>(SimpleCond))
490               KnownSucc = BI->getSuccessor(0);
491             else if (ConstantInt *SimpleCondVal =
492                          dyn_cast<ConstantInt>(SimpleCond))
493               KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
494           }
495         }
496       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
497         if (Constant *SimpleCond =
498                 SimplifiedValues.lookup(SI->getCondition())) {
499           // Just take the first successor if condition is undef
500           if (isa<UndefValue>(SimpleCond))
501             KnownSucc = SI->getSuccessor(0);
502           else if (ConstantInt *SimpleCondVal =
503                        dyn_cast<ConstantInt>(SimpleCond))
504             KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
505         }
506       }
507       if (KnownSucc) {
508         if (L->contains(KnownSucc))
509           BBWorklist.insert(KnownSucc);
510         else
511           ExitWorklist.insert({BB, KnownSucc});
512         continue;
513       }
514 
515       // Add BB's successors to the worklist.
516       for (BasicBlock *Succ : successors(BB))
517         if (L->contains(Succ))
518           BBWorklist.insert(Succ);
519         else
520           ExitWorklist.insert({BB, Succ});
521       AddCostRecursively(*TI, Iteration);
522     }
523 
524     // If we found no optimization opportunities on the first iteration, we
525     // won't find them on later ones too.
526     if (UnrolledCost == RolledDynamicCost) {
527       DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
528                    << "  UnrolledCost: " << UnrolledCost << "\n");
529       return None;
530     }
531   }
532 
533   while (!ExitWorklist.empty()) {
534     BasicBlock *ExitingBB, *ExitBB;
535     std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
536 
537     for (Instruction &I : *ExitBB) {
538       auto *PN = dyn_cast<PHINode>(&I);
539       if (!PN)
540         break;
541 
542       Value *Op = PN->getIncomingValueForBlock(ExitingBB);
543       if (auto *OpI = dyn_cast<Instruction>(Op))
544         if (L->contains(OpI))
545           AddCostRecursively(*OpI, TripCount - 1);
546     }
547   }
548 
549   DEBUG(dbgs() << "Analysis finished:\n"
550                << "UnrolledCost: " << UnrolledCost << ", "
551                << "RolledDynamicCost: " << RolledDynamicCost << "\n");
552   return {{UnrolledCost, RolledDynamicCost}};
553 }
554 
555 /// ApproximateLoopSize - Approximate the size of the loop.
556 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
557                                     bool &NotDuplicatable, bool &Convergent,
558                                     const TargetTransformInfo &TTI,
559                                     AssumptionCache *AC, unsigned BEInsns) {
560   SmallPtrSet<const Value *, 32> EphValues;
561   CodeMetrics::collectEphemeralValues(L, AC, EphValues);
562 
563   CodeMetrics Metrics;
564   for (BasicBlock *BB : L->blocks())
565     Metrics.analyzeBasicBlock(BB, TTI, EphValues);
566   NumCalls = Metrics.NumInlineCandidates;
567   NotDuplicatable = Metrics.notDuplicatable;
568   Convergent = Metrics.convergent;
569 
570   unsigned LoopSize = Metrics.NumInsts;
571 
572   // Don't allow an estimate of size zero.  This would allows unrolling of loops
573   // with huge iteration counts, which is a compile time problem even if it's
574   // not a problem for code quality. Also, the code using this size may assume
575   // that each loop has at least three instructions (likely a conditional
576   // branch, a comparison feeding that branch, and some kind of loop increment
577   // feeding that comparison instruction).
578   LoopSize = std::max(LoopSize, BEInsns + 1);
579 
580   return LoopSize;
581 }
582 
583 // Returns the loop hint metadata node with the given name (for example,
584 // "llvm.loop.unroll.count").  If no such metadata node exists, then nullptr is
585 // returned.
586 static MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) {
587   if (MDNode *LoopID = L->getLoopID())
588     return GetUnrollMetadata(LoopID, Name);
589   return nullptr;
590 }
591 
592 // Returns true if the loop has an unroll(full) pragma.
593 static bool HasUnrollFullPragma(const Loop *L) {
594   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
595 }
596 
597 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
598 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
599 static bool HasUnrollEnablePragma(const Loop *L) {
600   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
601 }
602 
603 // Returns true if the loop has an unroll(disable) pragma.
604 static bool HasUnrollDisablePragma(const Loop *L) {
605   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
606 }
607 
608 // Returns true if the loop has an runtime unroll(disable) pragma.
609 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
610   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
611 }
612 
613 // If loop has an unroll_count pragma return the (necessarily
614 // positive) value from the pragma.  Otherwise return 0.
615 static unsigned UnrollCountPragmaValue(const Loop *L) {
616   MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
617   if (MD) {
618     assert(MD->getNumOperands() == 2 &&
619            "Unroll count hint metadata should have two operands.");
620     unsigned Count =
621         mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
622     assert(Count >= 1 && "Unroll count must be positive.");
623     return Count;
624   }
625   return 0;
626 }
627 
628 // Remove existing unroll metadata and add unroll disable metadata to
629 // indicate the loop has already been unrolled.  This prevents a loop
630 // from being unrolled more than is directed by a pragma if the loop
631 // unrolling pass is run more than once (which it generally is).
632 static void SetLoopAlreadyUnrolled(Loop *L) {
633   MDNode *LoopID = L->getLoopID();
634   // First remove any existing loop unrolling metadata.
635   SmallVector<Metadata *, 4> MDs;
636   // Reserve first location for self reference to the LoopID metadata node.
637   MDs.push_back(nullptr);
638 
639   if (LoopID) {
640     for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
641       bool IsUnrollMetadata = false;
642       MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
643       if (MD) {
644         const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
645         IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
646       }
647       if (!IsUnrollMetadata)
648         MDs.push_back(LoopID->getOperand(i));
649     }
650   }
651 
652   // Add unroll(disable) metadata to disable future unrolling.
653   LLVMContext &Context = L->getHeader()->getContext();
654   SmallVector<Metadata *, 1> DisableOperands;
655   DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
656   MDNode *DisableNode = MDNode::get(Context, DisableOperands);
657   MDs.push_back(DisableNode);
658 
659   MDNode *NewLoopID = MDNode::get(Context, MDs);
660   // Set operand 0 to refer to the loop id itself.
661   NewLoopID->replaceOperandWith(0, NewLoopID);
662   L->setLoopID(NewLoopID);
663 }
664 
665 static bool canUnrollCompletely(Loop *L, unsigned Threshold,
666                                 unsigned PercentDynamicCostSavedThreshold,
667                                 unsigned DynamicCostSavingsDiscount,
668                                 uint64_t UnrolledCost,
669                                 uint64_t RolledDynamicCost) {
670   if (Threshold == NoThreshold) {
671     DEBUG(dbgs() << "  Can fully unroll, because no threshold is set.\n");
672     return true;
673   }
674 
675   if (UnrolledCost <= Threshold) {
676     DEBUG(dbgs() << "  Can fully unroll, because unrolled cost: "
677                  << UnrolledCost << "<=" << Threshold << "\n");
678     return true;
679   }
680 
681   assert(UnrolledCost && "UnrolledCost can't be 0 at this point.");
682   assert(RolledDynamicCost >= UnrolledCost &&
683          "Cannot have a higher unrolled cost than a rolled cost!");
684 
685   // Compute the percentage of the dynamic cost in the rolled form that is
686   // saved when unrolled. If unrolling dramatically reduces the estimated
687   // dynamic cost of the loop, we use a higher threshold to allow more
688   // unrolling.
689   unsigned PercentDynamicCostSaved =
690       (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost;
691 
692   if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold &&
693       (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
694           (int64_t)Threshold) {
695     DEBUG(dbgs() << "  Can fully unroll, because unrolling will reduce the "
696                     "expected dynamic cost by "
697                  << PercentDynamicCostSaved << "% (threshold: "
698                  << PercentDynamicCostSavedThreshold << "%)\n"
699                  << "  and the unrolled cost (" << UnrolledCost
700                  << ") is less than the max threshold ("
701                  << DynamicCostSavingsDiscount << ").\n");
702     return true;
703   }
704 
705   DEBUG(dbgs() << "  Too large to fully unroll:\n");
706   DEBUG(dbgs() << "    Threshold: " << Threshold << "\n");
707   DEBUG(dbgs() << "    Max threshold: " << DynamicCostSavingsDiscount << "\n");
708   DEBUG(dbgs() << "    Percent cost saved threshold: "
709                << PercentDynamicCostSavedThreshold << "%\n");
710   DEBUG(dbgs() << "    Unrolled cost: " << UnrolledCost << "\n");
711   DEBUG(dbgs() << "    Rolled dynamic cost: " << RolledDynamicCost << "\n");
712   DEBUG(dbgs() << "    Percent cost saved: " << PercentDynamicCostSaved
713                << "\n");
714   return false;
715 }
716 
717 // Returns loop size estimation for unrolled loop.
718 static uint64_t getUnrolledLoopSize(
719     unsigned LoopSize,
720     TargetTransformInfo::UnrollingPreferences &UP) {
721   assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
722   return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
723 }
724 
725 // Returns true if unroll count was set explicitly.
726 // Calculates unroll count and writes it to UP.Count.
727 static bool computeUnrollCount(
728     Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
729     ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
730     unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
731     TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
732   // Check for explicit Count.
733   // 1st priority is unroll count set by "unroll-count" option.
734   bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
735   if (UserUnrollCount) {
736     UP.Count = UnrollCount;
737     UP.AllowExpensiveTripCount = true;
738     UP.Force = true;
739     if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
740       return true;
741   }
742 
743   // 2nd priority is unroll count set by pragma.
744   unsigned PragmaCount = UnrollCountPragmaValue(L);
745   if (PragmaCount > 0) {
746     UP.Count = PragmaCount;
747     UP.Runtime = true;
748     UP.AllowExpensiveTripCount = true;
749     UP.Force = true;
750     if (UP.AllowRemainder &&
751         getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
752       return true;
753   }
754   bool PragmaFullUnroll = HasUnrollFullPragma(L);
755   if (PragmaFullUnroll && TripCount != 0) {
756     UP.Count = TripCount;
757     if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
758       return false;
759   }
760 
761   bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
762   bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
763                         PragmaEnableUnroll || UserUnrollCount;
764 
765   if (ExplicitUnroll && TripCount != 0) {
766     // If the loop has an unrolling pragma, we want to be more aggressive with
767     // unrolling limits. Set thresholds to at least the PragmaThreshold value
768     // which is larger than the default limits.
769     UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
770     UP.PartialThreshold =
771         std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
772   }
773 
774   // 3rd priority is full unroll count.
775   // Full unroll makes sense only when TripCount or its upper bound could be
776   // statically calculated.
777   // Also we need to check if we exceed FullUnrollMaxCount.
778   // If using the upper bound to unroll, TripMultiple should be set to 1 because
779   // we do not know when loop may exit.
780   // MaxTripCount and ExactTripCount cannot both be non zero since we only
781   // compute the former when the latter is zero.
782   unsigned ExactTripCount = TripCount;
783   assert((ExactTripCount == 0 || MaxTripCount == 0) &&
784          "ExtractTripCound and MaxTripCount cannot both be non zero.");
785   unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
786   UP.Count = FullUnrollTripCount;
787   if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
788     // When computing the unrolled size, note that BEInsns are not replicated
789     // like the rest of the loop body.
790     if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
791                             getUnrolledLoopSize(LoopSize, UP),
792                             getUnrolledLoopSize(LoopSize, UP))) {
793       UseUpperBound = (MaxTripCount == FullUnrollTripCount);
794       TripCount = FullUnrollTripCount;
795       TripMultiple = UP.UpperBound ? 1 : TripMultiple;
796       return ExplicitUnroll;
797     } else {
798       // The loop isn't that small, but we still can fully unroll it if that
799       // helps to remove a significant number of instructions.
800       // To check that, run additional analysis on the loop.
801       if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
802               L, FullUnrollTripCount, DT, *SE, TTI,
803               UP.Threshold + UP.DynamicCostSavingsDiscount))
804         if (canUnrollCompletely(L, UP.Threshold,
805                                 UP.PercentDynamicCostSavedThreshold,
806                                 UP.DynamicCostSavingsDiscount,
807                                 Cost->UnrolledCost, Cost->RolledDynamicCost)) {
808           UseUpperBound = (MaxTripCount == FullUnrollTripCount);
809           TripCount = FullUnrollTripCount;
810           TripMultiple = UP.UpperBound ? 1 : TripMultiple;
811           return ExplicitUnroll;
812         }
813     }
814   }
815 
816   // 4rd priority is partial unrolling.
817   // Try partial unroll only when TripCount could be staticaly calculated.
818   if (TripCount) {
819     UP.Partial |= ExplicitUnroll;
820     if (!UP.Partial) {
821       DEBUG(dbgs() << "  will not try to unroll partially because "
822                    << "-unroll-allow-partial not given\n");
823       UP.Count = 0;
824       return false;
825     }
826     if (UP.Count == 0)
827       UP.Count = TripCount;
828     if (UP.PartialThreshold != NoThreshold) {
829       // Reduce unroll count to be modulo of TripCount for partial unrolling.
830       if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
831         UP.Count =
832             (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
833             (LoopSize - UP.BEInsns);
834       if (UP.Count > UP.MaxCount)
835         UP.Count = UP.MaxCount;
836       while (UP.Count != 0 && TripCount % UP.Count != 0)
837         UP.Count--;
838       if (UP.AllowRemainder && UP.Count <= 1) {
839         // If there is no Count that is modulo of TripCount, set Count to
840         // largest power-of-two factor that satisfies the threshold limit.
841         // As we'll create fixup loop, do the type of unrolling only if
842         // remainder loop is allowed.
843         UP.Count = UP.DefaultUnrollRuntimeCount;
844         while (UP.Count != 0 &&
845                getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
846           UP.Count >>= 1;
847       }
848       if (UP.Count < 2) {
849         if (PragmaEnableUnroll)
850           ORE->emit(
851               OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge",
852                                        L->getStartLoc(), L->getHeader())
853               << "Unable to unroll loop as directed by unroll(enable) pragma "
854                  "because unrolled size is too large.");
855         UP.Count = 0;
856       }
857     } else {
858       UP.Count = TripCount;
859     }
860     if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
861         UP.Count != TripCount)
862       ORE->emit(
863           OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedTooLarge",
864                                    L->getStartLoc(), L->getHeader())
865           << "Unable to fully unroll loop as directed by unroll pragma because "
866              "unrolled size is too large.");
867     return ExplicitUnroll;
868   }
869   assert(TripCount == 0 &&
870          "All cases when TripCount is constant should be covered here.");
871   if (PragmaFullUnroll)
872     ORE->emit(
873         OptimizationRemarkMissed(DEBUG_TYPE,
874                                  "CantFullUnrollAsDirectedRuntimeTripCount",
875                                  L->getStartLoc(), L->getHeader())
876         << "Unable to fully unroll loop as directed by unroll(full) pragma "
877            "because loop has a runtime trip count.");
878 
879   // 5th priority is loop peeling
880   computePeelCount(L, LoopSize, UP);
881   if (UP.PeelCount) {
882     UP.Runtime = false;
883     UP.Count = 1;
884     return ExplicitUnroll;
885   }
886 
887   // 6th priority is runtime unrolling.
888   // Don't unroll a runtime trip count loop when it is disabled.
889   if (HasRuntimeUnrollDisablePragma(L)) {
890     UP.Count = 0;
891     return false;
892   }
893 
894   // Check if the runtime trip count is too small when profile is available.
895   if (L->getHeader()->getParent()->getEntryCount()) {
896     if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
897       if (*ProfileTripCount < FlatLoopTripCountThreshold)
898         return false;
899       else
900         UP.AllowExpensiveTripCount = true;
901     }
902   }
903 
904   // Reduce count based on the type of unrolling and the threshold values.
905   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
906   if (!UP.Runtime) {
907     DEBUG(dbgs() << "  will not try to unroll loop with runtime trip count "
908                  << "-unroll-runtime not given\n");
909     UP.Count = 0;
910     return false;
911   }
912   if (UP.Count == 0)
913     UP.Count = UP.DefaultUnrollRuntimeCount;
914 
915   // Reduce unroll count to be the largest power-of-two factor of
916   // the original count which satisfies the threshold limit.
917   while (UP.Count != 0 &&
918          getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
919     UP.Count >>= 1;
920 
921 #ifndef NDEBUG
922   unsigned OrigCount = UP.Count;
923 #endif
924 
925   if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
926     while (UP.Count != 0 && TripMultiple % UP.Count != 0)
927       UP.Count >>= 1;
928     DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
929                     "specific or because the loop contains a convergent "
930                     "instruction), so unroll count must divide the trip "
931                     "multiple, "
932                  << TripMultiple << ".  Reducing unroll count from "
933                  << OrigCount << " to " << UP.Count << ".\n");
934     using namespace ore;
935     if (PragmaCount > 0 && !UP.AllowRemainder)
936       ORE->emit(
937           OptimizationRemarkMissed(DEBUG_TYPE,
938                                    "DifferentUnrollCountFromDirected",
939                                    L->getStartLoc(), L->getHeader())
940           << "Unable to unroll loop the number of times directed by "
941              "unroll_count pragma because remainder loop is restricted "
942              "(that could architecture specific or because the loop "
943              "contains a convergent instruction) and so must have an unroll "
944              "count that divides the loop trip multiple of "
945           << NV("TripMultiple", TripMultiple) << ".  Unrolling instead "
946           << NV("UnrollCount", UP.Count) << " time(s).");
947   }
948 
949   if (UP.Count > UP.MaxCount)
950     UP.Count = UP.MaxCount;
951   DEBUG(dbgs() << "  partially unrolling with count: " << UP.Count << "\n");
952   if (UP.Count < 2)
953     UP.Count = 0;
954   return ExplicitUnroll;
955 }
956 
957 static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
958                             ScalarEvolution *SE, const TargetTransformInfo &TTI,
959                             AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
960                             bool PreserveLCSSA,
961                             Optional<unsigned> ProvidedCount,
962                             Optional<unsigned> ProvidedThreshold,
963                             Optional<bool> ProvidedAllowPartial,
964                             Optional<bool> ProvidedRuntime,
965                             Optional<bool> ProvidedUpperBound) {
966   DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
967                << "] Loop %" << L->getHeader()->getName() << "\n");
968   if (HasUnrollDisablePragma(L))
969     return false;
970   if (!L->isLoopSimplifyForm()) {
971     DEBUG(
972         dbgs() << "  Not unrolling loop which is not in loop-simplify form.\n");
973     return false;
974   }
975 
976   unsigned NumInlineCandidates;
977   bool NotDuplicatable;
978   bool Convergent;
979   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
980       L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
981       ProvidedRuntime, ProvidedUpperBound);
982   // Exit early if unrolling is disabled.
983   if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
984     return false;
985   unsigned LoopSize = ApproximateLoopSize(
986       L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);
987   DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
988   if (NotDuplicatable) {
989     DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
990                  << " instructions.\n");
991     return false;
992   }
993   if (NumInlineCandidates != 0) {
994     DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
995     return false;
996   }
997 
998   // Find trip count and trip multiple if count is not available
999   unsigned TripCount = 0;
1000   unsigned MaxTripCount = 0;
1001   unsigned TripMultiple = 1;
1002   // If there are multiple exiting blocks but one of them is the latch, use the
1003   // latch for the trip count estimation. Otherwise insist on a single exiting
1004   // block for the trip count estimation.
1005   BasicBlock *ExitingBlock = L->getLoopLatch();
1006   if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1007     ExitingBlock = L->getExitingBlock();
1008   if (ExitingBlock) {
1009     TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
1010     TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
1011   }
1012 
1013   // If the loop contains a convergent operation, the prelude we'd add
1014   // to do the first few instructions before we hit the unrolled loop
1015   // is unsafe -- it adds a control-flow dependency to the convergent
1016   // operation.  Therefore restrict remainder loop (try unrollig without).
1017   //
1018   // TODO: This is quite conservative.  In practice, convergent_op()
1019   // is likely to be called unconditionally in the loop.  In this
1020   // case, the program would be ill-formed (on most architectures)
1021   // unless n were the same on all threads in a thread group.
1022   // Assuming n is the same on all threads, any kind of unrolling is
1023   // safe.  But currently llvm's notion of convergence isn't powerful
1024   // enough to express this.
1025   if (Convergent)
1026     UP.AllowRemainder = false;
1027 
1028   // Try to find the trip count upper bound if we cannot find the exact trip
1029   // count.
1030   bool MaxOrZero = false;
1031   if (!TripCount) {
1032     MaxTripCount = SE->getSmallConstantMaxTripCount(L);
1033     MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
1034     // We can unroll by the upper bound amount if it's generally allowed or if
1035     // we know that the loop is executed either the upper bound or zero times.
1036     // (MaxOrZero unrolling keeps only the first loop test, so the number of
1037     // loop tests remains the same compared to the non-unrolled version, whereas
1038     // the generic upper bound unrolling keeps all but the last loop test so the
1039     // number of loop tests goes up which may end up being worse on targets with
1040     // constriained branch predictor resources so is controlled by an option.)
1041     // In addition we only unroll small upper bounds.
1042     if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1043       MaxTripCount = 0;
1044     }
1045   }
1046 
1047   // computeUnrollCount() decides whether it is beneficial to use upper bound to
1048   // fully unroll the loop.
1049   bool UseUpperBound = false;
1050   bool IsCountSetExplicitly =
1051       computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,
1052                          TripMultiple, LoopSize, UP, UseUpperBound);
1053   if (!UP.Count)
1054     return false;
1055   // Unroll factor (Count) must be less or equal to TripCount.
1056   if (TripCount && UP.Count > TripCount)
1057     UP.Count = TripCount;
1058 
1059   // Unroll the loop.
1060   if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
1061                   UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
1062                   TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE,
1063                   PreserveLCSSA))
1064     return false;
1065 
1066   // If loop has an unroll count pragma or unrolled by explicitly set count
1067   // mark loop as unrolled to prevent unrolling beyond that requested.
1068   // If the loop was peeled, we already "used up" the profile information
1069   // we had, so we don't want to unroll or peel again.
1070   if (IsCountSetExplicitly || UP.PeelCount)
1071     SetLoopAlreadyUnrolled(L);
1072 
1073   return true;
1074 }
1075 
1076 namespace {
1077 class LoopUnroll : public LoopPass {
1078 public:
1079   static char ID; // Pass ID, replacement for typeid
1080   LoopUnroll(Optional<unsigned> Threshold = None,
1081              Optional<unsigned> Count = None,
1082              Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1083              Optional<bool> UpperBound = None)
1084       : LoopPass(ID), ProvidedCount(std::move(Count)),
1085         ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1086         ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
1087     initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
1088   }
1089 
1090   Optional<unsigned> ProvidedCount;
1091   Optional<unsigned> ProvidedThreshold;
1092   Optional<bool> ProvidedAllowPartial;
1093   Optional<bool> ProvidedRuntime;
1094   Optional<bool> ProvidedUpperBound;
1095 
1096   bool runOnLoop(Loop *L, LPPassManager &) override {
1097     if (skipLoop(L))
1098       return false;
1099 
1100     Function &F = *L->getHeader()->getParent();
1101 
1102     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1103     LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1104     ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1105     const TargetTransformInfo &TTI =
1106         getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1107     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1108     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1109     // pass.  Function analyses need to be preserved across loop transformations
1110     // but ORE cannot be preserved (see comment before the pass definition).
1111     OptimizationRemarkEmitter ORE(&F);
1112     bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1113 
1114     return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
1115                            ProvidedCount, ProvidedThreshold,
1116                            ProvidedAllowPartial, ProvidedRuntime,
1117                            ProvidedUpperBound);
1118   }
1119 
1120   /// This transformation requires natural loop information & requires that
1121   /// loop preheaders be inserted into the CFG...
1122   ///
1123   void getAnalysisUsage(AnalysisUsage &AU) const override {
1124     AU.addRequired<AssumptionCacheTracker>();
1125     AU.addRequired<TargetTransformInfoWrapperPass>();
1126     // FIXME: Loop passes are required to preserve domtree, and for now we just
1127     // recreate dom info if anything gets unrolled.
1128     getLoopAnalysisUsage(AU);
1129   }
1130 };
1131 }
1132 
1133 char LoopUnroll::ID = 0;
1134 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1135 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1136 INITIALIZE_PASS_DEPENDENCY(LoopPass)
1137 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1138 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1139 
1140 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
1141                                  int Runtime, int UpperBound) {
1142   // TODO: It would make more sense for this function to take the optionals
1143   // directly, but that's dangerous since it would silently break out of tree
1144   // callers.
1145   return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
1146                         Count == -1 ? None : Optional<unsigned>(Count),
1147                         AllowPartial == -1 ? None
1148                                            : Optional<bool>(AllowPartial),
1149                         Runtime == -1 ? None : Optional<bool>(Runtime),
1150                         UpperBound == -1 ? None : Optional<bool>(UpperBound));
1151 }
1152 
1153 Pass *llvm::createSimpleLoopUnrollPass() {
1154   return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
1155 }
1156 
1157 PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM) {
1158   const auto &FAM =
1159       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
1160   Function *F = L.getHeader()->getParent();
1161 
1162 
1163   DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
1164   LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(*F);
1165   ScalarEvolution *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
1166   auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
1167   auto *AC = FAM.getCachedResult<AssumptionAnalysis>(*F);
1168   auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1169   if (!DT)
1170     report_fatal_error(
1171         "LoopUnrollPass: DominatorTreeAnalysis not cached at a higher level");
1172   if (!LI)
1173     report_fatal_error(
1174         "LoopUnrollPass: LoopAnalysis not cached at a higher level");
1175   if (!SE)
1176     report_fatal_error(
1177         "LoopUnrollPass: ScalarEvolutionAnalysis not cached at a higher level");
1178   if (!TTI)
1179     report_fatal_error(
1180         "LoopUnrollPass: TargetIRAnalysis not cached at a higher level");
1181   if (!AC)
1182     report_fatal_error(
1183         "LoopUnrollPass: AssumptionAnalysis not cached at a higher level");
1184   if (!ORE)
1185     report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
1186                        "cached at a higher level");
1187 
1188   bool Changed =
1189       tryToUnrollLoop(&L, *DT, LI, SE, *TTI, *AC, *ORE, /*PreserveLCSSA*/ true,
1190                       ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
1191                       ProvidedRuntime, ProvidedUpperBound);
1192 
1193   if (!Changed)
1194     return PreservedAnalyses::all();
1195   return getLoopPassPreservedAnalyses();
1196 }
1197