xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass implements a simple loop unroller.  It works best when loops have
100b57cec5SDimitry Andric // been canonicalized by the -indvars pass, allowing it to determine the trip
110b57cec5SDimitry Andric // counts of loops easily.
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
150b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
160b57cec5SDimitry Andric #include "llvm/ADT/DenseMapInfo.h"
170b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
19*0fca6ea1SDimitry Andric #include "llvm/ADT/ScopedHashTable.h"
200b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h"
210b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
220b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
230b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
270b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
280b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
290b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
300b57cec5SDimitry Andric #include "llvm/Analysis/LoopUnrollAnalyzer.h"
31*0fca6ea1SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
320b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
330b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
340b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
350b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
360b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
370b57cec5SDimitry Andric #include "llvm/IR/CFG.h"
380b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
390b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
400b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
410b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
420b57cec5SDimitry Andric #include "llvm/IR/Function.h"
430b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
440b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
450b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
460b57cec5SDimitry Andric #include "llvm/IR/Metadata.h"
470b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
48480093f4SDimitry Andric #include "llvm/InitializePasses.h"
490b57cec5SDimitry Andric #include "llvm/Pass.h"
500b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
510b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
520b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
530b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
540b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
550b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
560b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h"
570b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
58e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/LoopPeel.h"
590b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopSimplify.h"
600b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
610b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h"
620b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h"
630b57cec5SDimitry Andric #include <algorithm>
640b57cec5SDimitry Andric #include <cassert>
650b57cec5SDimitry Andric #include <cstdint>
660b57cec5SDimitry Andric #include <limits>
67bdd1243dSDimitry Andric #include <optional>
680b57cec5SDimitry Andric #include <string>
690b57cec5SDimitry Andric #include <tuple>
700b57cec5SDimitry Andric #include <utility>
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric using namespace llvm;
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll"
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
770b57cec5SDimitry Andric     "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
780b57cec5SDimitry Andric     cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
79e8d8bef9SDimitry Andric              " the current top-most loop. This is sometimes preferred to reduce"
800b57cec5SDimitry Andric              " compile time."));
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric static cl::opt<unsigned>
830b57cec5SDimitry Andric     UnrollThreshold("unroll-threshold", cl::Hidden,
840b57cec5SDimitry Andric                     cl::desc("The cost threshold for loop unrolling"));
850b57cec5SDimitry Andric 
86e8d8bef9SDimitry Andric static cl::opt<unsigned>
87e8d8bef9SDimitry Andric     UnrollOptSizeThreshold(
88e8d8bef9SDimitry Andric       "unroll-optsize-threshold", cl::init(0), cl::Hidden,
89e8d8bef9SDimitry Andric       cl::desc("The cost threshold for loop unrolling when optimizing for "
90e8d8bef9SDimitry Andric                "size"));
91e8d8bef9SDimitry Andric 
920b57cec5SDimitry Andric static cl::opt<unsigned> UnrollPartialThreshold(
930b57cec5SDimitry Andric     "unroll-partial-threshold", cl::Hidden,
940b57cec5SDimitry Andric     cl::desc("The cost threshold for partial loop unrolling"));
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
970b57cec5SDimitry Andric     "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
980b57cec5SDimitry Andric     cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
990b57cec5SDimitry Andric              "to the threshold when aggressively unrolling a loop due to the "
1000b57cec5SDimitry Andric              "dynamic cost savings. If completely unrolling a loop will reduce "
1010b57cec5SDimitry Andric              "the total runtime from X to Y, we boost the loop unroll "
1020b57cec5SDimitry Andric              "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
1030b57cec5SDimitry Andric              "X/Y). This limit avoids excessive code bloat."));
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
1060b57cec5SDimitry Andric     "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
1070b57cec5SDimitry Andric     cl::desc("Don't allow loop unrolling to simulate more than this number of"
1080b57cec5SDimitry Andric              "iterations when checking full unroll profitability"));
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric static cl::opt<unsigned> UnrollCount(
1110b57cec5SDimitry Andric     "unroll-count", cl::Hidden,
1120b57cec5SDimitry Andric     cl::desc("Use this unroll count for all loops including those with "
1130b57cec5SDimitry Andric              "unroll_count pragma values, for testing purposes"));
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric static cl::opt<unsigned> UnrollMaxCount(
1160b57cec5SDimitry Andric     "unroll-max-count", cl::Hidden,
1170b57cec5SDimitry Andric     cl::desc("Set the max unroll count for partial and runtime unrolling, for"
1180b57cec5SDimitry Andric              "testing purposes"));
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric static cl::opt<unsigned> UnrollFullMaxCount(
1210b57cec5SDimitry Andric     "unroll-full-max-count", cl::Hidden,
1220b57cec5SDimitry Andric     cl::desc(
1230b57cec5SDimitry Andric         "Set the max unroll count for full unrolling, for testing purposes"));
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric static cl::opt<bool>
1260b57cec5SDimitry Andric     UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
1270b57cec5SDimitry Andric                        cl::desc("Allows loops to be partially unrolled until "
1280b57cec5SDimitry Andric                                 "-unroll-threshold loop size is reached."));
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric static cl::opt<bool> UnrollAllowRemainder(
1310b57cec5SDimitry Andric     "unroll-allow-remainder", cl::Hidden,
1320b57cec5SDimitry Andric     cl::desc("Allow generation of a loop remainder (extra iterations) "
1330b57cec5SDimitry Andric              "when unrolling a loop."));
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric static cl::opt<bool>
13681ad6265SDimitry Andric     UnrollRuntime("unroll-runtime", cl::Hidden,
1370b57cec5SDimitry Andric                   cl::desc("Unroll loops with run-time trip counts"));
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric static cl::opt<unsigned> UnrollMaxUpperBound(
1400b57cec5SDimitry Andric     "unroll-max-upperbound", cl::init(8), cl::Hidden,
1410b57cec5SDimitry Andric     cl::desc(
1420b57cec5SDimitry Andric         "The max of trip count upper bound that is considered in unrolling"));
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric static cl::opt<unsigned> PragmaUnrollThreshold(
1450b57cec5SDimitry Andric     "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
1460b57cec5SDimitry Andric     cl::desc("Unrolled size limit for loops with an unroll(full) or "
1470b57cec5SDimitry Andric              "unroll_count pragma."));
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric static cl::opt<unsigned> FlatLoopTripCountThreshold(
1500b57cec5SDimitry Andric     "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
1510b57cec5SDimitry Andric     cl::desc("If the runtime tripcount for the loop is lower than the "
1520b57cec5SDimitry Andric              "threshold, the loop is considered as flat and will be less "
1530b57cec5SDimitry Andric              "aggressively unrolled."));
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric static cl::opt<bool> UnrollUnrollRemainder(
1560b57cec5SDimitry Andric   "unroll-remainder", cl::Hidden,
1570b57cec5SDimitry Andric   cl::desc("Allow the loop remainder to be unrolled."));
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric // This option isn't ever intended to be enabled, it serves to allow
1600b57cec5SDimitry Andric // experiments to check the assumptions about when this kind of revisit is
1610b57cec5SDimitry Andric // necessary.
1620b57cec5SDimitry Andric static cl::opt<bool> UnrollRevisitChildLoops(
1630b57cec5SDimitry Andric     "unroll-revisit-child-loops", cl::Hidden,
1640b57cec5SDimitry Andric     cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
1650b57cec5SDimitry Andric              "This shouldn't typically be needed as child loops (or their "
1660b57cec5SDimitry Andric              "clones) were already visited."));
1670b57cec5SDimitry Andric 
1685ffd83dbSDimitry Andric static cl::opt<unsigned> UnrollThresholdAggressive(
1695ffd83dbSDimitry Andric     "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
1705ffd83dbSDimitry Andric     cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
1715ffd83dbSDimitry Andric              "optimizations"));
1725ffd83dbSDimitry Andric static cl::opt<unsigned>
1735ffd83dbSDimitry Andric     UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
1745ffd83dbSDimitry Andric                            cl::Hidden,
1755ffd83dbSDimitry Andric                            cl::desc("Default threshold (max size of unrolled "
1765ffd83dbSDimitry Andric                                     "loop), used in all but O3 optimizations"));
1775ffd83dbSDimitry Andric 
178*0fca6ea1SDimitry Andric static cl::opt<unsigned> PragmaUnrollFullMaxIterations(
179*0fca6ea1SDimitry Andric     "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
180*0fca6ea1SDimitry Andric     cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
181*0fca6ea1SDimitry Andric 
1820b57cec5SDimitry Andric /// A magic value for use with the Threshold parameter to indicate
1830b57cec5SDimitry Andric /// that the loop unroll should be performed regardless of how much
1840b57cec5SDimitry Andric /// code expansion would result.
1850b57cec5SDimitry Andric static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric /// Gather the various unrolling parameters based on the defaults, compiler
1880b57cec5SDimitry Andric /// flags, TTI overrides and user specified parameters.
1890b57cec5SDimitry Andric TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
1900b57cec5SDimitry Andric     Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
191349cc55cSDimitry Andric     BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
192349cc55cSDimitry Andric     OptimizationRemarkEmitter &ORE, int OptLevel,
193bdd1243dSDimitry Andric     std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
194bdd1243dSDimitry Andric     std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
195bdd1243dSDimitry Andric     std::optional<bool> UserUpperBound,
196bdd1243dSDimitry Andric     std::optional<unsigned> UserFullUnrollMaxCount) {
1970b57cec5SDimitry Andric   TargetTransformInfo::UnrollingPreferences UP;
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   // Set up the defaults
2005ffd83dbSDimitry Andric   UP.Threshold =
2015ffd83dbSDimitry Andric       OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault;
2020b57cec5SDimitry Andric   UP.MaxPercentThresholdBoost = 400;
203e8d8bef9SDimitry Andric   UP.OptSizeThreshold = UnrollOptSizeThreshold;
2040b57cec5SDimitry Andric   UP.PartialThreshold = 150;
205e8d8bef9SDimitry Andric   UP.PartialOptSizeThreshold = UnrollOptSizeThreshold;
2060b57cec5SDimitry Andric   UP.Count = 0;
2070b57cec5SDimitry Andric   UP.DefaultUnrollRuntimeCount = 8;
2080b57cec5SDimitry Andric   UP.MaxCount = std::numeric_limits<unsigned>::max();
209cb14a3feSDimitry Andric   UP.MaxUpperBound = UnrollMaxUpperBound;
2100b57cec5SDimitry Andric   UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
2110b57cec5SDimitry Andric   UP.BEInsns = 2;
2120b57cec5SDimitry Andric   UP.Partial = false;
2130b57cec5SDimitry Andric   UP.Runtime = false;
2140b57cec5SDimitry Andric   UP.AllowRemainder = true;
2150b57cec5SDimitry Andric   UP.UnrollRemainder = false;
2160b57cec5SDimitry Andric   UP.AllowExpensiveTripCount = false;
2170b57cec5SDimitry Andric   UP.Force = false;
2180b57cec5SDimitry Andric   UP.UpperBound = false;
2190b57cec5SDimitry Andric   UP.UnrollAndJam = false;
2200b57cec5SDimitry Andric   UP.UnrollAndJamInnerLoopThreshold = 60;
2215ffd83dbSDimitry Andric   UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   // Override with any target specific settings
224349cc55cSDimitry Andric   TTI.getUnrollingPreferences(L, SE, UP, &ORE);
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   // Apply size attributes
2270b57cec5SDimitry Andric   bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
228e8d8bef9SDimitry Andric                     // Let unroll hints / pragmas take precedence over PGSO.
229e8d8bef9SDimitry Andric                     (hasUnrollTransformation(L) != TM_ForcedByUser &&
230480093f4SDimitry Andric                      llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
231e8d8bef9SDimitry Andric                                                  PGSOQueryType::IRPass));
2320b57cec5SDimitry Andric   if (OptForSize) {
2330b57cec5SDimitry Andric     UP.Threshold = UP.OptSizeThreshold;
2340b57cec5SDimitry Andric     UP.PartialThreshold = UP.PartialOptSizeThreshold;
2350b57cec5SDimitry Andric     UP.MaxPercentThresholdBoost = 100;
2360b57cec5SDimitry Andric   }
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric   // Apply any user values specified by cl::opt
2390b57cec5SDimitry Andric   if (UnrollThreshold.getNumOccurrences() > 0)
2400b57cec5SDimitry Andric     UP.Threshold = UnrollThreshold;
2410b57cec5SDimitry Andric   if (UnrollPartialThreshold.getNumOccurrences() > 0)
2420b57cec5SDimitry Andric     UP.PartialThreshold = UnrollPartialThreshold;
2430b57cec5SDimitry Andric   if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
2440b57cec5SDimitry Andric     UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
2450b57cec5SDimitry Andric   if (UnrollMaxCount.getNumOccurrences() > 0)
2460b57cec5SDimitry Andric     UP.MaxCount = UnrollMaxCount;
247cb14a3feSDimitry Andric   if (UnrollMaxUpperBound.getNumOccurrences() > 0)
248cb14a3feSDimitry Andric     UP.MaxUpperBound = UnrollMaxUpperBound;
2490b57cec5SDimitry Andric   if (UnrollFullMaxCount.getNumOccurrences() > 0)
2500b57cec5SDimitry Andric     UP.FullUnrollMaxCount = UnrollFullMaxCount;
2510b57cec5SDimitry Andric   if (UnrollAllowPartial.getNumOccurrences() > 0)
2520b57cec5SDimitry Andric     UP.Partial = UnrollAllowPartial;
2530b57cec5SDimitry Andric   if (UnrollAllowRemainder.getNumOccurrences() > 0)
2540b57cec5SDimitry Andric     UP.AllowRemainder = UnrollAllowRemainder;
2550b57cec5SDimitry Andric   if (UnrollRuntime.getNumOccurrences() > 0)
2560b57cec5SDimitry Andric     UP.Runtime = UnrollRuntime;
2570b57cec5SDimitry Andric   if (UnrollMaxUpperBound == 0)
2580b57cec5SDimitry Andric     UP.UpperBound = false;
2590b57cec5SDimitry Andric   if (UnrollUnrollRemainder.getNumOccurrences() > 0)
2600b57cec5SDimitry Andric     UP.UnrollRemainder = UnrollUnrollRemainder;
2615ffd83dbSDimitry Andric   if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
2625ffd83dbSDimitry Andric     UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric   // Apply user values provided by argument
26581ad6265SDimitry Andric   if (UserThreshold) {
2660b57cec5SDimitry Andric     UP.Threshold = *UserThreshold;
2670b57cec5SDimitry Andric     UP.PartialThreshold = *UserThreshold;
2680b57cec5SDimitry Andric   }
26981ad6265SDimitry Andric   if (UserCount)
2700b57cec5SDimitry Andric     UP.Count = *UserCount;
27181ad6265SDimitry Andric   if (UserAllowPartial)
2720b57cec5SDimitry Andric     UP.Partial = *UserAllowPartial;
27381ad6265SDimitry Andric   if (UserRuntime)
2740b57cec5SDimitry Andric     UP.Runtime = *UserRuntime;
27581ad6265SDimitry Andric   if (UserUpperBound)
2760b57cec5SDimitry Andric     UP.UpperBound = *UserUpperBound;
27781ad6265SDimitry Andric   if (UserFullUnrollMaxCount)
2788bcb0991SDimitry Andric     UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
2790b57cec5SDimitry Andric 
2800b57cec5SDimitry Andric   return UP;
2810b57cec5SDimitry Andric }
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric namespace {
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric /// A struct to densely store the state of an instruction after unrolling at
2860b57cec5SDimitry Andric /// each iteration.
2870b57cec5SDimitry Andric ///
2880b57cec5SDimitry Andric /// This is designed to work like a tuple of <Instruction *, int> for the
2890b57cec5SDimitry Andric /// purposes of hashing and lookup, but to be able to associate two boolean
2900b57cec5SDimitry Andric /// states with each key.
2910b57cec5SDimitry Andric struct UnrolledInstState {
2920b57cec5SDimitry Andric   Instruction *I;
2930b57cec5SDimitry Andric   int Iteration : 30;
2940b57cec5SDimitry Andric   unsigned IsFree : 1;
2950b57cec5SDimitry Andric   unsigned IsCounted : 1;
2960b57cec5SDimitry Andric };
2970b57cec5SDimitry Andric 
2980b57cec5SDimitry Andric /// Hashing and equality testing for a set of the instruction states.
2990b57cec5SDimitry Andric struct UnrolledInstStateKeyInfo {
3000b57cec5SDimitry Andric   using PtrInfo = DenseMapInfo<Instruction *>;
3010b57cec5SDimitry Andric   using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric   static inline UnrolledInstState getEmptyKey() {
3040b57cec5SDimitry Andric     return {PtrInfo::getEmptyKey(), 0, 0, 0};
3050b57cec5SDimitry Andric   }
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric   static inline UnrolledInstState getTombstoneKey() {
3080b57cec5SDimitry Andric     return {PtrInfo::getTombstoneKey(), 0, 0, 0};
3090b57cec5SDimitry Andric   }
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   static inline unsigned getHashValue(const UnrolledInstState &S) {
3120b57cec5SDimitry Andric     return PairInfo::getHashValue({S.I, S.Iteration});
3130b57cec5SDimitry Andric   }
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric   static inline bool isEqual(const UnrolledInstState &LHS,
3160b57cec5SDimitry Andric                              const UnrolledInstState &RHS) {
3170b57cec5SDimitry Andric     return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
3180b57cec5SDimitry Andric   }
3190b57cec5SDimitry Andric };
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric struct EstimatedUnrollCost {
3220b57cec5SDimitry Andric   /// The estimated cost after unrolling.
3230b57cec5SDimitry Andric   unsigned UnrolledCost;
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   /// The estimated dynamic cost of executing the instructions in the
3260b57cec5SDimitry Andric   /// rolled form.
3270b57cec5SDimitry Andric   unsigned RolledDynamicCost;
3280b57cec5SDimitry Andric };
3290b57cec5SDimitry Andric 
330349cc55cSDimitry Andric struct PragmaInfo {
331349cc55cSDimitry Andric   PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
332349cc55cSDimitry Andric       : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
333349cc55cSDimitry Andric         PragmaEnableUnroll(PEU) {}
334349cc55cSDimitry Andric   const bool UserUnrollCount;
335349cc55cSDimitry Andric   const bool PragmaFullUnroll;
336349cc55cSDimitry Andric   const unsigned PragmaCount;
337349cc55cSDimitry Andric   const bool PragmaEnableUnroll;
338349cc55cSDimitry Andric };
339349cc55cSDimitry Andric 
3400b57cec5SDimitry Andric } // end anonymous namespace
3410b57cec5SDimitry Andric 
3420b57cec5SDimitry Andric /// Figure out if the loop is worth full unrolling.
3430b57cec5SDimitry Andric ///
3440b57cec5SDimitry Andric /// Complete loop unrolling can make some loads constant, and we need to know
3450b57cec5SDimitry Andric /// if that would expose any further optimization opportunities.  This routine
3460b57cec5SDimitry Andric /// estimates this optimization.  It computes cost of unrolled loop
3470b57cec5SDimitry Andric /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
3480b57cec5SDimitry Andric /// dynamic cost we mean that we won't count costs of blocks that are known not
3490b57cec5SDimitry Andric /// to be executed (i.e. if we have a branch in the loop and we know that at the
3500b57cec5SDimitry Andric /// given iteration its condition would be resolved to true, we won't add up the
3510b57cec5SDimitry Andric /// cost of the 'false'-block).
3520b57cec5SDimitry Andric /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
3530b57cec5SDimitry Andric /// the analysis failed (no benefits expected from the unrolling, or the loop is
354bdd1243dSDimitry Andric /// too big to analyze), the returned value is std::nullopt.
355bdd1243dSDimitry Andric static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
3560b57cec5SDimitry Andric     const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
3570b57cec5SDimitry Andric     const SmallPtrSetImpl<const Value *> &EphValues,
3585ffd83dbSDimitry Andric     const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
3595ffd83dbSDimitry Andric     unsigned MaxIterationsCountToAnalyze) {
3600b57cec5SDimitry Andric   // We want to be able to scale offsets by the trip count and add more offsets
3610b57cec5SDimitry Andric   // to them without checking for overflows, and we already don't want to
3620b57cec5SDimitry Andric   // analyze *massive* trip counts, so we force the max to be reasonably small.
3635ffd83dbSDimitry Andric   assert(MaxIterationsCountToAnalyze <
3640b57cec5SDimitry Andric              (unsigned)(std::numeric_limits<int>::max() / 2) &&
3650b57cec5SDimitry Andric          "The unroll iterations max is too large!");
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric   // Only analyze inner loops. We can't properly estimate cost of nested loops
3680b57cec5SDimitry Andric   // and we won't visit inner loops again anyway.
369e8d8bef9SDimitry Andric   if (!L->isInnermost())
370bdd1243dSDimitry Andric     return std::nullopt;
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric   // Don't simulate loops with a big or unknown tripcount
3735ffd83dbSDimitry Andric   if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
374bdd1243dSDimitry Andric     return std::nullopt;
3750b57cec5SDimitry Andric 
3760b57cec5SDimitry Andric   SmallSetVector<BasicBlock *, 16> BBWorklist;
3770b57cec5SDimitry Andric   SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist;
378fe6060f1SDimitry Andric   DenseMap<Value *, Value *> SimplifiedValues;
379fe6060f1SDimitry Andric   SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues;
3800b57cec5SDimitry Andric 
3810b57cec5SDimitry Andric   // The estimated cost of the unrolled form of the loop. We try to estimate
3820b57cec5SDimitry Andric   // this by simplifying as much as we can while computing the estimate.
383fe6060f1SDimitry Andric   InstructionCost UnrolledCost = 0;
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric   // We also track the estimated dynamic (that is, actually executed) cost in
3860b57cec5SDimitry Andric   // the rolled form. This helps identify cases when the savings from unrolling
3870b57cec5SDimitry Andric   // aren't just exposing dead control flows, but actual reduced dynamic
3880b57cec5SDimitry Andric   // instructions due to the simplifications which we expect to occur after
3890b57cec5SDimitry Andric   // unrolling.
390fe6060f1SDimitry Andric   InstructionCost RolledDynamicCost = 0;
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric   // We track the simplification of each instruction in each iteration. We use
3930b57cec5SDimitry Andric   // this to recursively merge costs into the unrolled cost on-demand so that
3940b57cec5SDimitry Andric   // we don't count the cost of any dead code. This is essentially a map from
3950b57cec5SDimitry Andric   // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
3960b57cec5SDimitry Andric   DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;
3970b57cec5SDimitry Andric 
3980b57cec5SDimitry Andric   // A small worklist used to accumulate cost of instructions from each
3990b57cec5SDimitry Andric   // observable and reached root in the loop.
4000b57cec5SDimitry Andric   SmallVector<Instruction *, 16> CostWorklist;
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric   // PHI-used worklist used between iterations while accumulating cost.
4030b57cec5SDimitry Andric   SmallVector<Instruction *, 4> PHIUsedList;
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric   // Helper function to accumulate cost for instructions in the loop.
4060b57cec5SDimitry Andric   auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
4070b57cec5SDimitry Andric     assert(Iteration >= 0 && "Cannot have a negative iteration!");
4080b57cec5SDimitry Andric     assert(CostWorklist.empty() && "Must start with an empty cost list");
4090b57cec5SDimitry Andric     assert(PHIUsedList.empty() && "Must start with an empty phi used list");
4100b57cec5SDimitry Andric     CostWorklist.push_back(&RootI);
411e8d8bef9SDimitry Andric     TargetTransformInfo::TargetCostKind CostKind =
412e8d8bef9SDimitry Andric       RootI.getFunction()->hasMinSize() ?
413e8d8bef9SDimitry Andric       TargetTransformInfo::TCK_CodeSize :
414e8d8bef9SDimitry Andric       TargetTransformInfo::TCK_SizeAndLatency;
4150b57cec5SDimitry Andric     for (;; --Iteration) {
4160b57cec5SDimitry Andric       do {
4170b57cec5SDimitry Andric         Instruction *I = CostWorklist.pop_back_val();
4180b57cec5SDimitry Andric 
4190b57cec5SDimitry Andric         // InstCostMap only uses I and Iteration as a key, the other two values
4200b57cec5SDimitry Andric         // don't matter here.
4210b57cec5SDimitry Andric         auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
4220b57cec5SDimitry Andric         if (CostIter == InstCostMap.end())
4230b57cec5SDimitry Andric           // If an input to a PHI node comes from a dead path through the loop
4240b57cec5SDimitry Andric           // we may have no cost data for it here. What that actually means is
4250b57cec5SDimitry Andric           // that it is free.
4260b57cec5SDimitry Andric           continue;
4270b57cec5SDimitry Andric         auto &Cost = *CostIter;
4280b57cec5SDimitry Andric         if (Cost.IsCounted)
4290b57cec5SDimitry Andric           // Already counted this instruction.
4300b57cec5SDimitry Andric           continue;
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric         // Mark that we are counting the cost of this instruction now.
4330b57cec5SDimitry Andric         Cost.IsCounted = true;
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric         // If this is a PHI node in the loop header, just add it to the PHI set.
4360b57cec5SDimitry Andric         if (auto *PhiI = dyn_cast<PHINode>(I))
4370b57cec5SDimitry Andric           if (PhiI->getParent() == L->getHeader()) {
4380b57cec5SDimitry Andric             assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
4390b57cec5SDimitry Andric                                   "inherently simplify during unrolling.");
4400b57cec5SDimitry Andric             if (Iteration == 0)
4410b57cec5SDimitry Andric               continue;
4420b57cec5SDimitry Andric 
4430b57cec5SDimitry Andric             // Push the incoming value from the backedge into the PHI used list
4440b57cec5SDimitry Andric             // if it is an in-loop instruction. We'll use this to populate the
4450b57cec5SDimitry Andric             // cost worklist for the next iteration (as we count backwards).
4460b57cec5SDimitry Andric             if (auto *OpI = dyn_cast<Instruction>(
4470b57cec5SDimitry Andric                     PhiI->getIncomingValueForBlock(L->getLoopLatch())))
4480b57cec5SDimitry Andric               if (L->contains(OpI))
4490b57cec5SDimitry Andric                 PHIUsedList.push_back(OpI);
4500b57cec5SDimitry Andric             continue;
4510b57cec5SDimitry Andric           }
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric         // First accumulate the cost of this instruction.
4540b57cec5SDimitry Andric         if (!Cost.IsFree) {
455*0fca6ea1SDimitry Andric           // Consider simplified operands in instruction cost.
456*0fca6ea1SDimitry Andric           SmallVector<Value *, 4> Operands;
457*0fca6ea1SDimitry Andric           transform(I->operands(), std::back_inserter(Operands),
458*0fca6ea1SDimitry Andric                     [&](Value *Op) {
459*0fca6ea1SDimitry Andric                       if (auto Res = SimplifiedValues.lookup(Op))
460*0fca6ea1SDimitry Andric                         return Res;
461*0fca6ea1SDimitry Andric                       return Op;
462*0fca6ea1SDimitry Andric                     });
463*0fca6ea1SDimitry Andric           UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
4640b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
4650b57cec5SDimitry Andric                             << Iteration << "): ");
4660b57cec5SDimitry Andric           LLVM_DEBUG(I->dump());
4670b57cec5SDimitry Andric         }
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric         // We must count the cost of every operand which is not free,
4700b57cec5SDimitry Andric         // recursively. If we reach a loop PHI node, simply add it to the set
4710b57cec5SDimitry Andric         // to be considered on the next iteration (backwards!).
4720b57cec5SDimitry Andric         for (Value *Op : I->operands()) {
4730b57cec5SDimitry Andric           // Check whether this operand is free due to being a constant or
4740b57cec5SDimitry Andric           // outside the loop.
4750b57cec5SDimitry Andric           auto *OpI = dyn_cast<Instruction>(Op);
4760b57cec5SDimitry Andric           if (!OpI || !L->contains(OpI))
4770b57cec5SDimitry Andric             continue;
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric           // Otherwise accumulate its cost.
4800b57cec5SDimitry Andric           CostWorklist.push_back(OpI);
4810b57cec5SDimitry Andric         }
4820b57cec5SDimitry Andric       } while (!CostWorklist.empty());
4830b57cec5SDimitry Andric 
4840b57cec5SDimitry Andric       if (PHIUsedList.empty())
4850b57cec5SDimitry Andric         // We've exhausted the search.
4860b57cec5SDimitry Andric         break;
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric       assert(Iteration > 0 &&
4890b57cec5SDimitry Andric              "Cannot track PHI-used values past the first iteration!");
4900b57cec5SDimitry Andric       CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
4910b57cec5SDimitry Andric       PHIUsedList.clear();
4920b57cec5SDimitry Andric     }
4930b57cec5SDimitry Andric   };
4940b57cec5SDimitry Andric 
4950b57cec5SDimitry Andric   // Ensure that we don't violate the loop structure invariants relied on by
4960b57cec5SDimitry Andric   // this analysis.
4970b57cec5SDimitry Andric   assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
4980b57cec5SDimitry Andric   assert(L->isLCSSAForm(DT) &&
4990b57cec5SDimitry Andric          "Must have loops in LCSSA form to track live-out values.");
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
5020b57cec5SDimitry Andric 
503e8d8bef9SDimitry Andric   TargetTransformInfo::TargetCostKind CostKind =
504e8d8bef9SDimitry Andric     L->getHeader()->getParent()->hasMinSize() ?
505e8d8bef9SDimitry Andric     TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency;
5060b57cec5SDimitry Andric   // Simulate execution of each iteration of the loop counting instructions,
5070b57cec5SDimitry Andric   // which would be simplified.
5080b57cec5SDimitry Andric   // Since the same load will take different values on different iterations,
5090b57cec5SDimitry Andric   // we literally have to go through all loop's iterations.
5100b57cec5SDimitry Andric   for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
5110b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
5120b57cec5SDimitry Andric 
5130b57cec5SDimitry Andric     // Prepare for the iteration by collecting any simplified entry or backedge
5140b57cec5SDimitry Andric     // inputs.
5150b57cec5SDimitry Andric     for (Instruction &I : *L->getHeader()) {
5160b57cec5SDimitry Andric       auto *PHI = dyn_cast<PHINode>(&I);
5170b57cec5SDimitry Andric       if (!PHI)
5180b57cec5SDimitry Andric         break;
5190b57cec5SDimitry Andric 
5200b57cec5SDimitry Andric       // The loop header PHI nodes must have exactly two input: one from the
5210b57cec5SDimitry Andric       // loop preheader and one from the loop latch.
5220b57cec5SDimitry Andric       assert(
5230b57cec5SDimitry Andric           PHI->getNumIncomingValues() == 2 &&
5240b57cec5SDimitry Andric           "Must have an incoming value only for the preheader and the latch.");
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric       Value *V = PHI->getIncomingValueForBlock(
5270b57cec5SDimitry Andric           Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
528fe6060f1SDimitry Andric       if (Iteration != 0 && SimplifiedValues.count(V))
529fe6060f1SDimitry Andric         V = SimplifiedValues.lookup(V);
530fe6060f1SDimitry Andric       SimplifiedInputValues.push_back({PHI, V});
5310b57cec5SDimitry Andric     }
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric     // Now clear and re-populate the map for the next iteration.
5340b57cec5SDimitry Andric     SimplifiedValues.clear();
5350b57cec5SDimitry Andric     while (!SimplifiedInputValues.empty())
5360b57cec5SDimitry Andric       SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric     UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
5390b57cec5SDimitry Andric 
5400b57cec5SDimitry Andric     BBWorklist.clear();
5410b57cec5SDimitry Andric     BBWorklist.insert(L->getHeader());
5420b57cec5SDimitry Andric     // Note that we *must not* cache the size, this loop grows the worklist.
5430b57cec5SDimitry Andric     for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
5440b57cec5SDimitry Andric       BasicBlock *BB = BBWorklist[Idx];
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric       // Visit all instructions in the given basic block and try to simplify
5470b57cec5SDimitry Andric       // it.  We don't change the actual IR, just count optimization
5480b57cec5SDimitry Andric       // opportunities.
5490b57cec5SDimitry Andric       for (Instruction &I : *BB) {
5500b57cec5SDimitry Andric         // These won't get into the final code - don't even try calculating the
5510b57cec5SDimitry Andric         // cost for them.
5520b57cec5SDimitry Andric         if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
5530b57cec5SDimitry Andric           continue;
5540b57cec5SDimitry Andric 
5550b57cec5SDimitry Andric         // Track this instruction's expected baseline cost when executing the
5560b57cec5SDimitry Andric         // rolled loop form.
557bdd1243dSDimitry Andric         RolledDynamicCost += TTI.getInstructionCost(&I, CostKind);
5580b57cec5SDimitry Andric 
5590b57cec5SDimitry Andric         // Visit the instruction to analyze its loop cost after unrolling,
5600b57cec5SDimitry Andric         // and if the visitor returns true, mark the instruction as free after
5610b57cec5SDimitry Andric         // unrolling and continue.
5620b57cec5SDimitry Andric         bool IsFree = Analyzer.visit(I);
5630b57cec5SDimitry Andric         bool Inserted = InstCostMap.insert({&I, (int)Iteration,
5640b57cec5SDimitry Andric                                            (unsigned)IsFree,
5650b57cec5SDimitry Andric                                            /*IsCounted*/ false}).second;
5660b57cec5SDimitry Andric         (void)Inserted;
5670b57cec5SDimitry Andric         assert(Inserted && "Cannot have a state for an unvisited instruction!");
5680b57cec5SDimitry Andric 
5690b57cec5SDimitry Andric         if (IsFree)
5700b57cec5SDimitry Andric           continue;
5710b57cec5SDimitry Andric 
5720b57cec5SDimitry Andric         // Can't properly model a cost of a call.
5730b57cec5SDimitry Andric         // FIXME: With a proper cost model we should be able to do it.
5740b57cec5SDimitry Andric         if (auto *CI = dyn_cast<CallInst>(&I)) {
5750b57cec5SDimitry Andric           const Function *Callee = CI->getCalledFunction();
5760b57cec5SDimitry Andric           if (!Callee || TTI.isLoweredToCall(Callee)) {
5770b57cec5SDimitry Andric             LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
578bdd1243dSDimitry Andric             return std::nullopt;
5790b57cec5SDimitry Andric           }
5800b57cec5SDimitry Andric         }
5810b57cec5SDimitry Andric 
5820b57cec5SDimitry Andric         // If the instruction might have a side-effect recursively account for
5830b57cec5SDimitry Andric         // the cost of it and all the instructions leading up to it.
5840b57cec5SDimitry Andric         if (I.mayHaveSideEffects())
5850b57cec5SDimitry Andric           AddCostRecursively(I, Iteration);
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric         // If unrolled body turns out to be too big, bail out.
5880b57cec5SDimitry Andric         if (UnrolledCost > MaxUnrolledLoopSize) {
5890b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
5900b57cec5SDimitry Andric                             << "  UnrolledCost: " << UnrolledCost
5910b57cec5SDimitry Andric                             << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
5920b57cec5SDimitry Andric                             << "\n");
593bdd1243dSDimitry Andric           return std::nullopt;
5940b57cec5SDimitry Andric         }
5950b57cec5SDimitry Andric       }
5960b57cec5SDimitry Andric 
5970b57cec5SDimitry Andric       Instruction *TI = BB->getTerminator();
5980b57cec5SDimitry Andric 
599fe6060f1SDimitry Andric       auto getSimplifiedConstant = [&](Value *V) -> Constant * {
600fe6060f1SDimitry Andric         if (SimplifiedValues.count(V))
601fe6060f1SDimitry Andric           V = SimplifiedValues.lookup(V);
602fe6060f1SDimitry Andric         return dyn_cast<Constant>(V);
603fe6060f1SDimitry Andric       };
604fe6060f1SDimitry Andric 
6050b57cec5SDimitry Andric       // Add in the live successors by first checking whether we have terminator
6060b57cec5SDimitry Andric       // that may be simplified based on the values simplified by this call.
6070b57cec5SDimitry Andric       BasicBlock *KnownSucc = nullptr;
6080b57cec5SDimitry Andric       if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
6090b57cec5SDimitry Andric         if (BI->isConditional()) {
610fe6060f1SDimitry Andric           if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
6110b57cec5SDimitry Andric             // Just take the first successor if condition is undef
6120b57cec5SDimitry Andric             if (isa<UndefValue>(SimpleCond))
6130b57cec5SDimitry Andric               KnownSucc = BI->getSuccessor(0);
6140b57cec5SDimitry Andric             else if (ConstantInt *SimpleCondVal =
6150b57cec5SDimitry Andric                          dyn_cast<ConstantInt>(SimpleCond))
6160b57cec5SDimitry Andric               KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
6170b57cec5SDimitry Andric           }
6180b57cec5SDimitry Andric         }
6190b57cec5SDimitry Andric       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
620fe6060f1SDimitry Andric         if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
6210b57cec5SDimitry Andric           // Just take the first successor if condition is undef
6220b57cec5SDimitry Andric           if (isa<UndefValue>(SimpleCond))
6230b57cec5SDimitry Andric             KnownSucc = SI->getSuccessor(0);
6240b57cec5SDimitry Andric           else if (ConstantInt *SimpleCondVal =
6250b57cec5SDimitry Andric                        dyn_cast<ConstantInt>(SimpleCond))
6260b57cec5SDimitry Andric             KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
6270b57cec5SDimitry Andric         }
6280b57cec5SDimitry Andric       }
6290b57cec5SDimitry Andric       if (KnownSucc) {
6300b57cec5SDimitry Andric         if (L->contains(KnownSucc))
6310b57cec5SDimitry Andric           BBWorklist.insert(KnownSucc);
6320b57cec5SDimitry Andric         else
6330b57cec5SDimitry Andric           ExitWorklist.insert({BB, KnownSucc});
6340b57cec5SDimitry Andric         continue;
6350b57cec5SDimitry Andric       }
6360b57cec5SDimitry Andric 
6370b57cec5SDimitry Andric       // Add BB's successors to the worklist.
6380b57cec5SDimitry Andric       for (BasicBlock *Succ : successors(BB))
6390b57cec5SDimitry Andric         if (L->contains(Succ))
6400b57cec5SDimitry Andric           BBWorklist.insert(Succ);
6410b57cec5SDimitry Andric         else
6420b57cec5SDimitry Andric           ExitWorklist.insert({BB, Succ});
6430b57cec5SDimitry Andric       AddCostRecursively(*TI, Iteration);
6440b57cec5SDimitry Andric     }
6450b57cec5SDimitry Andric 
6460b57cec5SDimitry Andric     // If we found no optimization opportunities on the first iteration, we
6470b57cec5SDimitry Andric     // won't find them on later ones too.
6480b57cec5SDimitry Andric     if (UnrolledCost == RolledDynamicCost) {
6490b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
6500b57cec5SDimitry Andric                         << "  UnrolledCost: " << UnrolledCost << "\n");
651bdd1243dSDimitry Andric       return std::nullopt;
6520b57cec5SDimitry Andric     }
6530b57cec5SDimitry Andric   }
6540b57cec5SDimitry Andric 
6550b57cec5SDimitry Andric   while (!ExitWorklist.empty()) {
6560b57cec5SDimitry Andric     BasicBlock *ExitingBB, *ExitBB;
6570b57cec5SDimitry Andric     std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
6580b57cec5SDimitry Andric 
6590b57cec5SDimitry Andric     for (Instruction &I : *ExitBB) {
6600b57cec5SDimitry Andric       auto *PN = dyn_cast<PHINode>(&I);
6610b57cec5SDimitry Andric       if (!PN)
6620b57cec5SDimitry Andric         break;
6630b57cec5SDimitry Andric 
6640b57cec5SDimitry Andric       Value *Op = PN->getIncomingValueForBlock(ExitingBB);
6650b57cec5SDimitry Andric       if (auto *OpI = dyn_cast<Instruction>(Op))
6660b57cec5SDimitry Andric         if (L->contains(OpI))
6670b57cec5SDimitry Andric           AddCostRecursively(*OpI, TripCount - 1);
6680b57cec5SDimitry Andric     }
6690b57cec5SDimitry Andric   }
6700b57cec5SDimitry Andric 
671fe6060f1SDimitry Andric   assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
672fe6060f1SDimitry Andric          "All instructions must have a valid cost, whether the "
673fe6060f1SDimitry Andric          "loop is rolled or unrolled.");
674fe6060f1SDimitry Andric 
6750b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Analysis finished:\n"
6760b57cec5SDimitry Andric                     << "UnrolledCost: " << UnrolledCost << ", "
6770b57cec5SDimitry Andric                     << "RolledDynamicCost: " << RolledDynamicCost << "\n");
678fe6060f1SDimitry Andric   return {{unsigned(*UnrolledCost.getValue()),
679fe6060f1SDimitry Andric            unsigned(*RolledDynamicCost.getValue())}};
6800b57cec5SDimitry Andric }
6810b57cec5SDimitry Andric 
6825f757f3fSDimitry Andric UnrollCostEstimator::UnrollCostEstimator(
6835f757f3fSDimitry Andric     const Loop *L, const TargetTransformInfo &TTI,
6840b57cec5SDimitry Andric     const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
6850b57cec5SDimitry Andric   CodeMetrics Metrics;
6860b57cec5SDimitry Andric   for (BasicBlock *BB : L->blocks())
687*0fca6ea1SDimitry Andric     Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
688*0fca6ea1SDimitry Andric                               L);
6895f757f3fSDimitry Andric   NumInlineCandidates = Metrics.NumInlineCandidates;
6900b57cec5SDimitry Andric   NotDuplicatable = Metrics.notDuplicatable;
691*0fca6ea1SDimitry Andric   Convergence = Metrics.Convergence;
6925f757f3fSDimitry Andric   LoopSize = Metrics.NumInsts;
693*0fca6ea1SDimitry Andric   ConvergenceAllowsRuntime =
694*0fca6ea1SDimitry Andric       Metrics.Convergence != ConvergenceKind::Uncontrolled &&
695*0fca6ea1SDimitry Andric       !getLoopConvergenceHeart(L);
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric   // Don't allow an estimate of size zero.  This would allows unrolling of loops
6980b57cec5SDimitry Andric   // with huge iteration counts, which is a compile time problem even if it's
6990b57cec5SDimitry Andric   // not a problem for code quality. Also, the code using this size may assume
7000b57cec5SDimitry Andric   // that each loop has at least three instructions (likely a conditional
7010b57cec5SDimitry Andric   // branch, a comparison feeding that branch, and some kind of loop increment
7020b57cec5SDimitry Andric   // feeding that comparison instruction).
703bdd1243dSDimitry Andric   if (LoopSize.isValid() && LoopSize < BEInsns + 1)
70481ad6265SDimitry Andric     // This is an open coded max() on InstructionCost
70581ad6265SDimitry Andric     LoopSize = BEInsns + 1;
7065f757f3fSDimitry Andric }
7070b57cec5SDimitry Andric 
708*0fca6ea1SDimitry Andric bool UnrollCostEstimator::canUnroll() const {
709*0fca6ea1SDimitry Andric   switch (Convergence) {
710*0fca6ea1SDimitry Andric   case ConvergenceKind::ExtendedLoop:
711*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Convergence prevents unrolling.\n");
712*0fca6ea1SDimitry Andric     return false;
713*0fca6ea1SDimitry Andric   default:
714*0fca6ea1SDimitry Andric     break;
715*0fca6ea1SDimitry Andric   }
716*0fca6ea1SDimitry Andric   if (!LoopSize.isValid()) {
717*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Invalid loop size prevents unrolling.\n");
718*0fca6ea1SDimitry Andric     return false;
719*0fca6ea1SDimitry Andric   }
720*0fca6ea1SDimitry Andric   if (NotDuplicatable) {
721*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Non-duplicatable blocks prevent unrolling.\n");
722*0fca6ea1SDimitry Andric     return false;
723*0fca6ea1SDimitry Andric   }
724*0fca6ea1SDimitry Andric   return true;
725*0fca6ea1SDimitry Andric }
726*0fca6ea1SDimitry Andric 
7275f757f3fSDimitry Andric uint64_t UnrollCostEstimator::getUnrolledLoopSize(
7285f757f3fSDimitry Andric     const TargetTransformInfo::UnrollingPreferences &UP,
7295f757f3fSDimitry Andric     unsigned CountOverwrite) const {
7305f757f3fSDimitry Andric   unsigned LS = *LoopSize.getValue();
7315f757f3fSDimitry Andric   assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
7325f757f3fSDimitry Andric   if (CountOverwrite)
7335f757f3fSDimitry Andric     return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
7345f757f3fSDimitry Andric   else
7355f757f3fSDimitry Andric     return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
7360b57cec5SDimitry Andric }
7370b57cec5SDimitry Andric 
7380b57cec5SDimitry Andric // Returns the loop hint metadata node with the given name (for example,
7390b57cec5SDimitry Andric // "llvm.loop.unroll.count").  If no such metadata node exists, then nullptr is
7400b57cec5SDimitry Andric // returned.
7415ffd83dbSDimitry Andric static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {
7420b57cec5SDimitry Andric   if (MDNode *LoopID = L->getLoopID())
7430b57cec5SDimitry Andric     return GetUnrollMetadata(LoopID, Name);
7440b57cec5SDimitry Andric   return nullptr;
7450b57cec5SDimitry Andric }
7460b57cec5SDimitry Andric 
7470b57cec5SDimitry Andric // Returns true if the loop has an unroll(full) pragma.
7485ffd83dbSDimitry Andric static bool hasUnrollFullPragma(const Loop *L) {
7495ffd83dbSDimitry Andric   return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric 
7520b57cec5SDimitry Andric // Returns true if the loop has an unroll(enable) pragma. This metadata is used
7530b57cec5SDimitry Andric // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
7545ffd83dbSDimitry Andric static bool hasUnrollEnablePragma(const Loop *L) {
7555ffd83dbSDimitry Andric   return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
7560b57cec5SDimitry Andric }
7570b57cec5SDimitry Andric 
7580b57cec5SDimitry Andric // Returns true if the loop has an runtime unroll(disable) pragma.
7595ffd83dbSDimitry Andric static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
7605ffd83dbSDimitry Andric   return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
7610b57cec5SDimitry Andric }
7620b57cec5SDimitry Andric 
7630b57cec5SDimitry Andric // If loop has an unroll_count pragma return the (necessarily
7640b57cec5SDimitry Andric // positive) value from the pragma.  Otherwise return 0.
7655ffd83dbSDimitry Andric static unsigned unrollCountPragmaValue(const Loop *L) {
7665ffd83dbSDimitry Andric   MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
7670b57cec5SDimitry Andric   if (MD) {
7680b57cec5SDimitry Andric     assert(MD->getNumOperands() == 2 &&
7690b57cec5SDimitry Andric            "Unroll count hint metadata should have two operands.");
7700b57cec5SDimitry Andric     unsigned Count =
7710b57cec5SDimitry Andric         mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
7720b57cec5SDimitry Andric     assert(Count >= 1 && "Unroll count must be positive.");
7730b57cec5SDimitry Andric     return Count;
7740b57cec5SDimitry Andric   }
7750b57cec5SDimitry Andric   return 0;
7760b57cec5SDimitry Andric }
7770b57cec5SDimitry Andric 
7780b57cec5SDimitry Andric // Computes the boosting factor for complete unrolling.
7790b57cec5SDimitry Andric // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
7800b57cec5SDimitry Andric // be beneficial to fully unroll the loop even if unrolledcost is large. We
7810b57cec5SDimitry Andric // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
7820b57cec5SDimitry Andric // the unroll threshold.
7830b57cec5SDimitry Andric static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
7840b57cec5SDimitry Andric                                             unsigned MaxPercentThresholdBoost) {
7850b57cec5SDimitry Andric   if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
7860b57cec5SDimitry Andric     return 100;
7870b57cec5SDimitry Andric   else if (Cost.UnrolledCost != 0)
7880b57cec5SDimitry Andric     // The boosting factor is RolledDynamicCost / UnrolledCost
7890b57cec5SDimitry Andric     return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
7900b57cec5SDimitry Andric                     MaxPercentThresholdBoost);
7910b57cec5SDimitry Andric   else
7920b57cec5SDimitry Andric     return MaxPercentThresholdBoost;
7930b57cec5SDimitry Andric }
7940b57cec5SDimitry Andric 
795bdd1243dSDimitry Andric static std::optional<unsigned>
796349cc55cSDimitry Andric shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
797349cc55cSDimitry Andric                    const unsigned TripMultiple, const unsigned TripCount,
7985f757f3fSDimitry Andric                    unsigned MaxTripCount, const UnrollCostEstimator UCE,
799349cc55cSDimitry Andric                    const TargetTransformInfo::UnrollingPreferences &UP) {
800349cc55cSDimitry Andric 
801349cc55cSDimitry Andric   // Using unroll pragma
802349cc55cSDimitry Andric   // 1st priority is unroll count set by "unroll-count" option.
803349cc55cSDimitry Andric 
804349cc55cSDimitry Andric   if (PInfo.UserUnrollCount) {
805349cc55cSDimitry Andric     if (UP.AllowRemainder &&
806349cc55cSDimitry Andric         UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
807349cc55cSDimitry Andric       return (unsigned)UnrollCount;
808349cc55cSDimitry Andric   }
809349cc55cSDimitry Andric 
810349cc55cSDimitry Andric   // 2nd priority is unroll count set by pragma.
811349cc55cSDimitry Andric   if (PInfo.PragmaCount > 0) {
81281ad6265SDimitry Andric     if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
813349cc55cSDimitry Andric       return PInfo.PragmaCount;
814349cc55cSDimitry Andric   }
815349cc55cSDimitry Andric 
816*0fca6ea1SDimitry Andric   if (PInfo.PragmaFullUnroll && TripCount != 0) {
817*0fca6ea1SDimitry Andric     // Certain cases with UBSAN can cause trip count to be calculated as
818*0fca6ea1SDimitry Andric     // INT_MAX, Block full unrolling at a reasonable limit so that the compiler
819*0fca6ea1SDimitry Andric     // doesn't hang trying to unroll the loop. See PR77842
820*0fca6ea1SDimitry Andric     if (TripCount > PragmaUnrollFullMaxIterations) {
821*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");
822*0fca6ea1SDimitry Andric       return std::nullopt;
823*0fca6ea1SDimitry Andric     }
824*0fca6ea1SDimitry Andric 
825349cc55cSDimitry Andric     return TripCount;
826*0fca6ea1SDimitry Andric   }
82781ad6265SDimitry Andric 
8285f757f3fSDimitry Andric   if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
829cb14a3feSDimitry Andric       MaxTripCount <= UP.MaxUpperBound)
8305f757f3fSDimitry Andric     return MaxTripCount;
8315f757f3fSDimitry Andric 
832349cc55cSDimitry Andric   // if didn't return until here, should continue to other priorties
833bdd1243dSDimitry Andric   return std::nullopt;
834349cc55cSDimitry Andric }
835349cc55cSDimitry Andric 
836bdd1243dSDimitry Andric static std::optional<unsigned> shouldFullUnroll(
837349cc55cSDimitry Andric     Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
838349cc55cSDimitry Andric     ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
839349cc55cSDimitry Andric     const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
840349cc55cSDimitry Andric     const TargetTransformInfo::UnrollingPreferences &UP) {
8414824e7fdSDimitry Andric   assert(FullUnrollTripCount && "should be non-zero!");
842349cc55cSDimitry Andric 
8434824e7fdSDimitry Andric   if (FullUnrollTripCount > UP.FullUnrollMaxCount)
844bdd1243dSDimitry Andric     return std::nullopt;
8454824e7fdSDimitry Andric 
846349cc55cSDimitry Andric   // When computing the unrolled size, note that BEInsns are not replicated
847349cc55cSDimitry Andric   // like the rest of the loop body.
8484824e7fdSDimitry Andric   if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
849349cc55cSDimitry Andric     return FullUnrollTripCount;
850349cc55cSDimitry Andric 
851349cc55cSDimitry Andric   // The loop isn't that small, but we still can fully unroll it if that
852349cc55cSDimitry Andric   // helps to remove a significant number of instructions.
853349cc55cSDimitry Andric   // To check that, run additional analysis on the loop.
854bdd1243dSDimitry Andric   if (std::optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
855349cc55cSDimitry Andric           L, FullUnrollTripCount, DT, SE, EphValues, TTI,
856349cc55cSDimitry Andric           UP.Threshold * UP.MaxPercentThresholdBoost / 100,
857349cc55cSDimitry Andric           UP.MaxIterationsCountToAnalyze)) {
858349cc55cSDimitry Andric     unsigned Boost =
859349cc55cSDimitry Andric       getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
8604824e7fdSDimitry Andric     if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
861349cc55cSDimitry Andric       return FullUnrollTripCount;
862349cc55cSDimitry Andric   }
863bdd1243dSDimitry Andric   return std::nullopt;
864349cc55cSDimitry Andric }
865349cc55cSDimitry Andric 
866bdd1243dSDimitry Andric static std::optional<unsigned>
867349cc55cSDimitry Andric shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
868349cc55cSDimitry Andric                     const UnrollCostEstimator UCE,
869349cc55cSDimitry Andric                     const TargetTransformInfo::UnrollingPreferences &UP) {
870349cc55cSDimitry Andric 
8714824e7fdSDimitry Andric   if (!TripCount)
872bdd1243dSDimitry Andric     return std::nullopt;
8734824e7fdSDimitry Andric 
874349cc55cSDimitry Andric   if (!UP.Partial) {
875349cc55cSDimitry Andric     LLVM_DEBUG(dbgs() << "  will not try to unroll partially because "
876349cc55cSDimitry Andric                << "-unroll-allow-partial not given\n");
8774824e7fdSDimitry Andric     return 0;
878349cc55cSDimitry Andric   }
8794824e7fdSDimitry Andric   unsigned count = UP.Count;
880349cc55cSDimitry Andric   if (count == 0)
881349cc55cSDimitry Andric     count = TripCount;
882349cc55cSDimitry Andric   if (UP.PartialThreshold != NoThreshold) {
883349cc55cSDimitry Andric     // Reduce unroll count to be modulo of TripCount for partial unrolling.
884349cc55cSDimitry Andric     if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
885349cc55cSDimitry Andric       count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
886349cc55cSDimitry Andric         (LoopSize - UP.BEInsns);
887349cc55cSDimitry Andric     if (count > UP.MaxCount)
888349cc55cSDimitry Andric       count = UP.MaxCount;
889349cc55cSDimitry Andric     while (count != 0 && TripCount % count != 0)
890349cc55cSDimitry Andric       count--;
891349cc55cSDimitry Andric     if (UP.AllowRemainder && count <= 1) {
892349cc55cSDimitry Andric       // If there is no Count that is modulo of TripCount, set Count to
893349cc55cSDimitry Andric       // largest power-of-two factor that satisfies the threshold limit.
894349cc55cSDimitry Andric       // As we'll create fixup loop, do the type of unrolling only if
895349cc55cSDimitry Andric       // remainder loop is allowed.
896349cc55cSDimitry Andric       count = UP.DefaultUnrollRuntimeCount;
897349cc55cSDimitry Andric       while (count != 0 &&
898349cc55cSDimitry Andric              UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
899349cc55cSDimitry Andric         count >>= 1;
900349cc55cSDimitry Andric     }
901349cc55cSDimitry Andric     if (count < 2) {
902349cc55cSDimitry Andric       count = 0;
903349cc55cSDimitry Andric     }
904349cc55cSDimitry Andric   } else {
905349cc55cSDimitry Andric     count = TripCount;
906349cc55cSDimitry Andric   }
907349cc55cSDimitry Andric   if (count > UP.MaxCount)
908349cc55cSDimitry Andric     count = UP.MaxCount;
909349cc55cSDimitry Andric 
910349cc55cSDimitry Andric   LLVM_DEBUG(dbgs() << "  partially unrolling with count: " << count << "\n");
911349cc55cSDimitry Andric 
912349cc55cSDimitry Andric   return count;
913349cc55cSDimitry Andric }
9140b57cec5SDimitry Andric // Returns true if unroll count was set explicitly.
9150b57cec5SDimitry Andric // Calculates unroll count and writes it to UP.Count.
9160b57cec5SDimitry Andric // Unless IgnoreUser is true, will also use metadata and command-line options
9170b57cec5SDimitry Andric // that are specific to to the LoopUnroll pass (which, for instance, are
9180b57cec5SDimitry Andric // irrelevant for the LoopUnrollAndJam pass).
9190b57cec5SDimitry Andric // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
9200b57cec5SDimitry Andric // many LoopUnroll-specific options. The shared functionality should be
9210b57cec5SDimitry Andric // refactored into it own function.
9220b57cec5SDimitry Andric bool llvm::computeUnrollCount(
9230b57cec5SDimitry Andric     Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
9245f757f3fSDimitry Andric     AssumptionCache *AC, ScalarEvolution &SE,
9255f757f3fSDimitry Andric     const SmallPtrSetImpl<const Value *> &EphValues,
926fe6060f1SDimitry Andric     OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
9275f757f3fSDimitry Andric     bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE,
9285ffd83dbSDimitry Andric     TargetTransformInfo::UnrollingPreferences &UP,
9295ffd83dbSDimitry Andric     TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
9300b57cec5SDimitry Andric 
9315f757f3fSDimitry Andric   unsigned LoopSize = UCE.getRolledLoopSize();
932fe6060f1SDimitry Andric 
933349cc55cSDimitry Andric   const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
934349cc55cSDimitry Andric   const bool PragmaFullUnroll = hasUnrollFullPragma(L);
935349cc55cSDimitry Andric   const unsigned PragmaCount = unrollCountPragmaValue(L);
936349cc55cSDimitry Andric   const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
937349cc55cSDimitry Andric 
938349cc55cSDimitry Andric   const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
939349cc55cSDimitry Andric                               PragmaEnableUnroll || UserUnrollCount;
940349cc55cSDimitry Andric 
941349cc55cSDimitry Andric   PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
942349cc55cSDimitry Andric                    PragmaEnableUnroll);
943fe6060f1SDimitry Andric   // Use an explicit peel count that has been specified for testing. In this
944fe6060f1SDimitry Andric   // case it's not permitted to also specify an explicit unroll count.
945fe6060f1SDimitry Andric   if (PP.PeelCount) {
946fe6060f1SDimitry Andric     if (UnrollCount.getNumOccurrences() > 0) {
947fe6060f1SDimitry Andric       report_fatal_error("Cannot specify both explicit peel count and "
94881ad6265SDimitry Andric                          "explicit unroll count", /*GenCrashDiag=*/false);
949fe6060f1SDimitry Andric     }
950fe6060f1SDimitry Andric     UP.Count = 1;
951fe6060f1SDimitry Andric     UP.Runtime = false;
952fe6060f1SDimitry Andric     return true;
953fe6060f1SDimitry Andric   }
9540b57cec5SDimitry Andric   // Check for explicit Count.
9550b57cec5SDimitry Andric   // 1st priority is unroll count set by "unroll-count" option.
9560b57cec5SDimitry Andric   // 2nd priority is unroll count set by pragma.
9574824e7fdSDimitry Andric   if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
9585f757f3fSDimitry Andric                                              MaxTripCount, UCE, UP)) {
959349cc55cSDimitry Andric     UP.Count = *UnrollFactor;
960349cc55cSDimitry Andric 
961349cc55cSDimitry Andric     if (UserUnrollCount || (PragmaCount > 0)) {
9620b57cec5SDimitry Andric       UP.AllowExpensiveTripCount = true;
9630b57cec5SDimitry Andric       UP.Force = true;
9640b57cec5SDimitry Andric     }
965349cc55cSDimitry Andric     UP.Runtime |= (PragmaCount > 0);
966349cc55cSDimitry Andric     return ExplicitUnroll;
967349cc55cSDimitry Andric   } else {
9680b57cec5SDimitry Andric     if (ExplicitUnroll && TripCount != 0) {
9690b57cec5SDimitry Andric       // If the loop has an unrolling pragma, we want to be more aggressive with
9700b57cec5SDimitry Andric       // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
9710b57cec5SDimitry Andric       // value which is larger than the default limits.
9720b57cec5SDimitry Andric       UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
9730b57cec5SDimitry Andric       UP.PartialThreshold =
9740b57cec5SDimitry Andric           std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
9750b57cec5SDimitry Andric     }
976349cc55cSDimitry Andric   }
9770b57cec5SDimitry Andric 
9784824e7fdSDimitry Andric   // 3rd priority is exact full unrolling.  This will eliminate all copies
9794824e7fdSDimitry Andric   // of some exit test.
9804824e7fdSDimitry Andric   UP.Count = 0;
9814824e7fdSDimitry Andric   if (TripCount) {
9824824e7fdSDimitry Andric     UP.Count = TripCount;
9834824e7fdSDimitry Andric     if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
9844824e7fdSDimitry Andric                                              TripCount, UCE, UP)) {
9854824e7fdSDimitry Andric       UP.Count = *UnrollFactor;
9864824e7fdSDimitry Andric       UseUpperBound = false;
9874824e7fdSDimitry Andric       return ExplicitUnroll;
9884824e7fdSDimitry Andric     }
9894824e7fdSDimitry Andric   }
9908bcb0991SDimitry Andric 
9914824e7fdSDimitry Andric   // 4th priority is bounded unrolling.
9928bcb0991SDimitry Andric   // We can unroll by the upper bound amount if it's generally allowed or if
9938bcb0991SDimitry Andric   // we know that the loop is executed either the upper bound or zero times.
9948bcb0991SDimitry Andric   // (MaxOrZero unrolling keeps only the first loop test, so the number of
9958bcb0991SDimitry Andric   // loop tests remains the same compared to the non-unrolled version, whereas
9968bcb0991SDimitry Andric   // the generic upper bound unrolling keeps all but the last loop test so the
9978bcb0991SDimitry Andric   // number of loop tests goes up which may end up being worse on targets with
9988bcb0991SDimitry Andric   // constrained branch predictor resources so is controlled by an option.)
9998bcb0991SDimitry Andric   // In addition we only unroll small upper bounds.
10004824e7fdSDimitry Andric   // Note that the cost of bounded unrolling is always strictly greater than
10014824e7fdSDimitry Andric   // cost of exact full unrolling.  As such, if we have an exact count and
10024824e7fdSDimitry Andric   // found it unprofitable, we'll never chose to bounded unroll.
10034824e7fdSDimitry Andric   if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
1004cb14a3feSDimitry Andric       MaxTripCount <= UP.MaxUpperBound) {
10054824e7fdSDimitry Andric     UP.Count = MaxTripCount;
10064824e7fdSDimitry Andric     if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
10074824e7fdSDimitry Andric                                              MaxTripCount, UCE, UP)) {
1008349cc55cSDimitry Andric       UP.Count = *UnrollFactor;
10094824e7fdSDimitry Andric       UseUpperBound = true;
10100b57cec5SDimitry Andric       return ExplicitUnroll;
10114824e7fdSDimitry Andric     }
10120b57cec5SDimitry Andric   }
10130b57cec5SDimitry Andric 
10144824e7fdSDimitry Andric   // 5th priority is loop peeling.
1015bdd1243dSDimitry Andric   computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
10165ffd83dbSDimitry Andric   if (PP.PeelCount) {
10170b57cec5SDimitry Andric     UP.Runtime = false;
10180b57cec5SDimitry Andric     UP.Count = 1;
10190b57cec5SDimitry Andric     return ExplicitUnroll;
10200b57cec5SDimitry Andric   }
10210b57cec5SDimitry Andric 
1022349cc55cSDimitry Andric   // Before starting partial unrolling, set up.partial to true,
1023349cc55cSDimitry Andric   // if user explicitly asked  for unrolling
1024349cc55cSDimitry Andric   if (TripCount)
1025349cc55cSDimitry Andric     UP.Partial |= ExplicitUnroll;
1026349cc55cSDimitry Andric 
10274824e7fdSDimitry Andric   // 6th priority is partial unrolling.
10280b57cec5SDimitry Andric   // Try partial unroll only when TripCount could be statically calculated.
10294824e7fdSDimitry Andric   if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
1030349cc55cSDimitry Andric     UP.Count = *UnrollFactor;
1031349cc55cSDimitry Andric 
10320b57cec5SDimitry Andric     if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
10330b57cec5SDimitry Andric         UP.Count != TripCount)
10340b57cec5SDimitry Andric       ORE->emit([&]() {
10350b57cec5SDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE,
10360b57cec5SDimitry Andric                                         "FullUnrollAsDirectedTooLarge",
10370b57cec5SDimitry Andric                                         L->getStartLoc(), L->getHeader())
10380b57cec5SDimitry Andric                << "Unable to fully unroll loop as directed by unroll pragma "
10390b57cec5SDimitry Andric                   "because "
10400b57cec5SDimitry Andric                   "unrolled size is too large.";
10410b57cec5SDimitry Andric       });
1042349cc55cSDimitry Andric 
1043349cc55cSDimitry Andric     if (UP.PartialThreshold != NoThreshold) {
1044349cc55cSDimitry Andric       if (UP.Count == 0) {
1045349cc55cSDimitry Andric         if (PragmaEnableUnroll)
1046349cc55cSDimitry Andric           ORE->emit([&]() {
1047349cc55cSDimitry Andric             return OptimizationRemarkMissed(DEBUG_TYPE,
1048349cc55cSDimitry Andric                                             "UnrollAsDirectedTooLarge",
1049349cc55cSDimitry Andric                                             L->getStartLoc(), L->getHeader())
1050349cc55cSDimitry Andric                    << "Unable to unroll loop as directed by unroll(enable) "
1051349cc55cSDimitry Andric                       "pragma "
1052349cc55cSDimitry Andric                       "because unrolled size is too large.";
1053349cc55cSDimitry Andric           });
1054349cc55cSDimitry Andric       }
1055349cc55cSDimitry Andric     }
10560b57cec5SDimitry Andric     return ExplicitUnroll;
10570b57cec5SDimitry Andric   }
10580b57cec5SDimitry Andric   assert(TripCount == 0 &&
10590b57cec5SDimitry Andric          "All cases when TripCount is constant should be covered here.");
10600b57cec5SDimitry Andric   if (PragmaFullUnroll)
10610b57cec5SDimitry Andric     ORE->emit([&]() {
10620b57cec5SDimitry Andric       return OptimizationRemarkMissed(
10630b57cec5SDimitry Andric                  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
10640b57cec5SDimitry Andric                  L->getStartLoc(), L->getHeader())
10650b57cec5SDimitry Andric              << "Unable to fully unroll loop as directed by unroll(full) "
10660b57cec5SDimitry Andric                 "pragma "
10670b57cec5SDimitry Andric                 "because loop has a runtime trip count.";
10680b57cec5SDimitry Andric     });
10690b57cec5SDimitry Andric 
10704824e7fdSDimitry Andric   // 7th priority is runtime unrolling.
10710b57cec5SDimitry Andric   // Don't unroll a runtime trip count loop when it is disabled.
10725ffd83dbSDimitry Andric   if (hasRuntimeUnrollDisablePragma(L)) {
10730b57cec5SDimitry Andric     UP.Count = 0;
10740b57cec5SDimitry Andric     return false;
10750b57cec5SDimitry Andric   }
10760b57cec5SDimitry Andric 
10778bcb0991SDimitry Andric   // Don't unroll a small upper bound loop unless user or TTI asked to do so.
1078cb14a3feSDimitry Andric   if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
10798bcb0991SDimitry Andric     UP.Count = 0;
10808bcb0991SDimitry Andric     return false;
10818bcb0991SDimitry Andric   }
10828bcb0991SDimitry Andric 
10830b57cec5SDimitry Andric   // Check if the runtime trip count is too small when profile is available.
10840b57cec5SDimitry Andric   if (L->getHeader()->getParent()->hasProfileData()) {
10850b57cec5SDimitry Andric     if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
10860b57cec5SDimitry Andric       if (*ProfileTripCount < FlatLoopTripCountThreshold)
10870b57cec5SDimitry Andric         return false;
10880b57cec5SDimitry Andric       else
10890b57cec5SDimitry Andric         UP.AllowExpensiveTripCount = true;
10900b57cec5SDimitry Andric     }
10910b57cec5SDimitry Andric   }
10920b57cec5SDimitry Andric   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
10930b57cec5SDimitry Andric   if (!UP.Runtime) {
10940b57cec5SDimitry Andric     LLVM_DEBUG(
10950b57cec5SDimitry Andric         dbgs() << "  will not try to unroll loop with runtime trip count "
10960b57cec5SDimitry Andric                << "-unroll-runtime not given\n");
10970b57cec5SDimitry Andric     UP.Count = 0;
10980b57cec5SDimitry Andric     return false;
10990b57cec5SDimitry Andric   }
11000b57cec5SDimitry Andric   if (UP.Count == 0)
11010b57cec5SDimitry Andric     UP.Count = UP.DefaultUnrollRuntimeCount;
11020b57cec5SDimitry Andric 
11030b57cec5SDimitry Andric   // Reduce unroll count to be the largest power-of-two factor of
11040b57cec5SDimitry Andric   // the original count which satisfies the threshold limit.
11050b57cec5SDimitry Andric   while (UP.Count != 0 &&
1106fe6060f1SDimitry Andric          UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
11070b57cec5SDimitry Andric     UP.Count >>= 1;
11080b57cec5SDimitry Andric 
11090b57cec5SDimitry Andric #ifndef NDEBUG
11100b57cec5SDimitry Andric   unsigned OrigCount = UP.Count;
11110b57cec5SDimitry Andric #endif
11120b57cec5SDimitry Andric 
11130b57cec5SDimitry Andric   if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
11140b57cec5SDimitry Andric     while (UP.Count != 0 && TripMultiple % UP.Count != 0)
11150b57cec5SDimitry Andric       UP.Count >>= 1;
11160b57cec5SDimitry Andric     LLVM_DEBUG(
11170b57cec5SDimitry Andric         dbgs() << "Remainder loop is restricted (that could architecture "
11180b57cec5SDimitry Andric                   "specific or because the loop contains a convergent "
11190b57cec5SDimitry Andric                   "instruction), so unroll count must divide the trip "
11200b57cec5SDimitry Andric                   "multiple, "
11210b57cec5SDimitry Andric                << TripMultiple << ".  Reducing unroll count from " << OrigCount
11220b57cec5SDimitry Andric                << " to " << UP.Count << ".\n");
11230b57cec5SDimitry Andric 
11240b57cec5SDimitry Andric     using namespace ore;
11250b57cec5SDimitry Andric 
1126349cc55cSDimitry Andric     if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
11270b57cec5SDimitry Andric       ORE->emit([&]() {
11280b57cec5SDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE,
11290b57cec5SDimitry Andric                                         "DifferentUnrollCountFromDirected",
11300b57cec5SDimitry Andric                                         L->getStartLoc(), L->getHeader())
11310b57cec5SDimitry Andric                << "Unable to unroll loop the number of times directed by "
11320b57cec5SDimitry Andric                   "unroll_count pragma because remainder loop is restricted "
11330b57cec5SDimitry Andric                   "(that could architecture specific or because the loop "
11340b57cec5SDimitry Andric                   "contains a convergent instruction) and so must have an "
11350b57cec5SDimitry Andric                   "unroll "
11360b57cec5SDimitry Andric                   "count that divides the loop trip multiple of "
11370b57cec5SDimitry Andric                << NV("TripMultiple", TripMultiple) << ".  Unrolling instead "
11380b57cec5SDimitry Andric                << NV("UnrollCount", UP.Count) << " time(s).";
11390b57cec5SDimitry Andric       });
11400b57cec5SDimitry Andric   }
11410b57cec5SDimitry Andric 
11420b57cec5SDimitry Andric   if (UP.Count > UP.MaxCount)
11430b57cec5SDimitry Andric     UP.Count = UP.MaxCount;
11448bcb0991SDimitry Andric 
11458bcb0991SDimitry Andric   if (MaxTripCount && UP.Count > MaxTripCount)
11468bcb0991SDimitry Andric     UP.Count = MaxTripCount;
11478bcb0991SDimitry Andric 
11488bcb0991SDimitry Andric   LLVM_DEBUG(dbgs() << "  runtime unrolling with count: " << UP.Count
11490b57cec5SDimitry Andric                     << "\n");
11500b57cec5SDimitry Andric   if (UP.Count < 2)
11510b57cec5SDimitry Andric     UP.Count = 0;
11520b57cec5SDimitry Andric   return ExplicitUnroll;
11530b57cec5SDimitry Andric }
11540b57cec5SDimitry Andric 
1155bdd1243dSDimitry Andric static LoopUnrollResult
1156bdd1243dSDimitry Andric tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
11570b57cec5SDimitry Andric                 const TargetTransformInfo &TTI, AssumptionCache &AC,
11588bcb0991SDimitry Andric                 OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
11598bcb0991SDimitry Andric                 ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
116006c3fb27SDimitry Andric                 bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
1161bdd1243dSDimitry Andric                 std::optional<unsigned> ProvidedCount,
1162bdd1243dSDimitry Andric                 std::optional<unsigned> ProvidedThreshold,
1163bdd1243dSDimitry Andric                 std::optional<bool> ProvidedAllowPartial,
1164bdd1243dSDimitry Andric                 std::optional<bool> ProvidedRuntime,
1165bdd1243dSDimitry Andric                 std::optional<bool> ProvidedUpperBound,
1166bdd1243dSDimitry Andric                 std::optional<bool> ProvidedAllowPeeling,
1167bdd1243dSDimitry Andric                 std::optional<bool> ProvidedAllowProfileBasedPeeling,
1168*0fca6ea1SDimitry Andric                 std::optional<unsigned> ProvidedFullUnrollMaxCount,
1169*0fca6ea1SDimitry Andric                 AAResults *AA = nullptr) {
117006c3fb27SDimitry Andric 
11710b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Loop Unroll: F["
11720b57cec5SDimitry Andric                     << L->getHeader()->getParent()->getName() << "] Loop %"
11730b57cec5SDimitry Andric                     << L->getHeader()->getName() << "\n");
11740b57cec5SDimitry Andric   TransformationMode TM = hasUnrollTransformation(L);
11750b57cec5SDimitry Andric   if (TM & TM_Disable)
11760b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
11770eae32dcSDimitry Andric 
11780eae32dcSDimitry Andric   // If this loop isn't forced to be unrolled, avoid unrolling it when the
11790eae32dcSDimitry Andric   // parent loop has an explicit unroll-and-jam pragma. This is to prevent
11800eae32dcSDimitry Andric   // automatic unrolling from interfering with the user requested
11810eae32dcSDimitry Andric   // transformation.
11820eae32dcSDimitry Andric   Loop *ParentL = L->getParentLoop();
118304eeddc0SDimitry Andric   if (ParentL != nullptr &&
11840eae32dcSDimitry Andric       hasUnrollAndJamTransformation(ParentL) == TM_ForcedByUser &&
11850eae32dcSDimitry Andric       hasUnrollTransformation(L) != TM_ForcedByUser) {
11860eae32dcSDimitry Andric     LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
11870eae32dcSDimitry Andric                       << " llvm.loop.unroll_and_jam.\n");
11880eae32dcSDimitry Andric     return LoopUnrollResult::Unmodified;
11890eae32dcSDimitry Andric   }
11900eae32dcSDimitry Andric 
11910eae32dcSDimitry Andric   // If this loop isn't forced to be unrolled, avoid unrolling it when the
11920eae32dcSDimitry Andric   // loop has an explicit unroll-and-jam pragma. This is to prevent automatic
11930eae32dcSDimitry Andric   // unrolling from interfering with the user requested transformation.
11940eae32dcSDimitry Andric   if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser &&
11950eae32dcSDimitry Andric       hasUnrollTransformation(L) != TM_ForcedByUser) {
11960eae32dcSDimitry Andric     LLVM_DEBUG(
11970eae32dcSDimitry Andric         dbgs()
11980eae32dcSDimitry Andric         << "  Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
11990eae32dcSDimitry Andric     return LoopUnrollResult::Unmodified;
12000eae32dcSDimitry Andric   }
12010eae32dcSDimitry Andric 
12020b57cec5SDimitry Andric   if (!L->isLoopSimplifyForm()) {
12030b57cec5SDimitry Andric     LLVM_DEBUG(
12040b57cec5SDimitry Andric         dbgs() << "  Not unrolling loop which is not in loop-simplify form.\n");
12050b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
12060b57cec5SDimitry Andric   }
12070b57cec5SDimitry Andric 
1208e8d8bef9SDimitry Andric   // When automatic unrolling is disabled, do not unroll unless overridden for
12090b57cec5SDimitry Andric   // this loop.
12100b57cec5SDimitry Andric   if (OnlyWhenForced && !(TM & TM_Enable))
12110b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
12120b57cec5SDimitry Andric 
12130b57cec5SDimitry Andric   bool OptForSize = L->getHeader()->getParent()->hasOptSize();
12140b57cec5SDimitry Andric   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
1215349cc55cSDimitry Andric       L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
12160b57cec5SDimitry Andric       ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
12178bcb0991SDimitry Andric       ProvidedFullUnrollMaxCount);
12185ffd83dbSDimitry Andric   TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
1219e8d8bef9SDimitry Andric       L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
12200b57cec5SDimitry Andric 
12210b57cec5SDimitry Andric   // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
12220b57cec5SDimitry Andric   // as threshold later on.
12230b57cec5SDimitry Andric   if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
12240b57cec5SDimitry Andric       !OptForSize)
12250b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
12260b57cec5SDimitry Andric 
12270b57cec5SDimitry Andric   SmallPtrSet<const Value *, 32> EphValues;
12280b57cec5SDimitry Andric   CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
12290b57cec5SDimitry Andric 
12305f757f3fSDimitry Andric   UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
12315f757f3fSDimitry Andric   if (!UCE.canUnroll()) {
1232*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Loop not considered unrollable.\n");
123381ad6265SDimitry Andric     return LoopUnrollResult::Unmodified;
123481ad6265SDimitry Andric   }
123581ad6265SDimitry Andric 
12365f757f3fSDimitry Andric   unsigned LoopSize = UCE.getRolledLoopSize();
12375f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
12380b57cec5SDimitry Andric 
12398bcb0991SDimitry Andric   // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
12408bcb0991SDimitry Andric   // later), to (fully) unroll loops, if it does not increase code size.
12410b57cec5SDimitry Andric   if (OptForSize)
12428bcb0991SDimitry Andric     UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
12430b57cec5SDimitry Andric 
12445f757f3fSDimitry Andric   if (UCE.NumInlineCandidates != 0) {
12450b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
12460b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
12470b57cec5SDimitry Andric   }
12480b57cec5SDimitry Andric 
1249fe6060f1SDimitry Andric   // Find the smallest exact trip count for any exit. This is an upper bound
1250fe6060f1SDimitry Andric   // on the loop trip count, but an exit at an earlier iteration is still
1251fe6060f1SDimitry Andric   // possible. An unroll by the smallest exact trip count guarantees that all
1252bdd1243dSDimitry Andric   // branches relating to at least one exit can be eliminated. This is unlike
1253fe6060f1SDimitry Andric   // the max trip count, which only guarantees that the backedge can be broken.
12540b57cec5SDimitry Andric   unsigned TripCount = 0;
12550b57cec5SDimitry Andric   unsigned TripMultiple = 1;
1256fe6060f1SDimitry Andric   SmallVector<BasicBlock *, 8> ExitingBlocks;
1257fe6060f1SDimitry Andric   L->getExitingBlocks(ExitingBlocks);
1258fe6060f1SDimitry Andric   for (BasicBlock *ExitingBlock : ExitingBlocks)
1259fe6060f1SDimitry Andric     if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1260fe6060f1SDimitry Andric       if (!TripCount || TC < TripCount)
1261fe6060f1SDimitry Andric         TripCount = TripMultiple = TC;
1262fe6060f1SDimitry Andric 
1263fe6060f1SDimitry Andric   if (!TripCount) {
1264fe6060f1SDimitry Andric     // If no exact trip count is known, determine the trip multiple of either
1265fe6060f1SDimitry Andric     // the loop latch or the single exiting block.
1266fe6060f1SDimitry Andric     // TODO: Relax for multiple exits.
12670b57cec5SDimitry Andric     BasicBlock *ExitingBlock = L->getLoopLatch();
12680b57cec5SDimitry Andric     if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
12690b57cec5SDimitry Andric       ExitingBlock = L->getExitingBlock();
1270fe6060f1SDimitry Andric     if (ExitingBlock)
12710b57cec5SDimitry Andric       TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
12720b57cec5SDimitry Andric   }
12730b57cec5SDimitry Andric 
12740b57cec5SDimitry Andric   // If the loop contains a convergent operation, the prelude we'd add
12750b57cec5SDimitry Andric   // to do the first few instructions before we hit the unrolled loop
12760b57cec5SDimitry Andric   // is unsafe -- it adds a control-flow dependency to the convergent
1277e8d8bef9SDimitry Andric   // operation.  Therefore restrict remainder loop (try unrolling without).
12780b57cec5SDimitry Andric   //
1279*0fca6ea1SDimitry Andric   // TODO: This is somewhat conservative; we could allow the remainder if the
1280*0fca6ea1SDimitry Andric   // trip count is uniform.
1281*0fca6ea1SDimitry Andric   UP.AllowRemainder &= UCE.ConvergenceAllowsRuntime;
12820b57cec5SDimitry Andric 
12830b57cec5SDimitry Andric   // Try to find the trip count upper bound if we cannot find the exact trip
12840b57cec5SDimitry Andric   // count.
12858bcb0991SDimitry Andric   unsigned MaxTripCount = 0;
12860b57cec5SDimitry Andric   bool MaxOrZero = false;
12870b57cec5SDimitry Andric   if (!TripCount) {
12880b57cec5SDimitry Andric     MaxTripCount = SE.getSmallConstantMaxTripCount(L);
12890b57cec5SDimitry Andric     MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
12900b57cec5SDimitry Andric   }
12910b57cec5SDimitry Andric 
12920b57cec5SDimitry Andric   // computeUnrollCount() decides whether it is beneficial to use upper bound to
12930b57cec5SDimitry Andric   // fully unroll the loop.
12940b57cec5SDimitry Andric   bool UseUpperBound = false;
12950b57cec5SDimitry Andric   bool IsCountSetExplicitly = computeUnrollCount(
12965f757f3fSDimitry Andric       L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
12975f757f3fSDimitry Andric       MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
12980b57cec5SDimitry Andric   if (!UP.Count)
12990b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
1300fe6060f1SDimitry Andric 
1301*0fca6ea1SDimitry Andric   UP.Runtime &= UCE.ConvergenceAllowsRuntime;
1302*0fca6ea1SDimitry Andric 
1303fe6060f1SDimitry Andric   if (PP.PeelCount) {
1304fe6060f1SDimitry Andric     assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1305fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1306fe6060f1SDimitry Andric                       << " with iteration count " << PP.PeelCount << "!\n");
1307fe6060f1SDimitry Andric     ORE.emit([&]() {
1308fe6060f1SDimitry Andric       return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
1309fe6060f1SDimitry Andric                                 L->getHeader())
1310fe6060f1SDimitry Andric              << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1311fe6060f1SDimitry Andric              << " iterations";
1312fe6060f1SDimitry Andric     });
1313fe6060f1SDimitry Andric 
1314bdd1243dSDimitry Andric     ValueToValueMapTy VMap;
1315bdd1243dSDimitry Andric     if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) {
1316*0fca6ea1SDimitry Andric       simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI, nullptr);
1317fe6060f1SDimitry Andric       // If the loop was peeled, we already "used up" the profile information
1318fe6060f1SDimitry Andric       // we had, so we don't want to unroll or peel again.
1319fe6060f1SDimitry Andric       if (PP.PeelProfiledIterations)
1320fe6060f1SDimitry Andric         L->setLoopAlreadyUnrolled();
1321fe6060f1SDimitry Andric       return LoopUnrollResult::PartiallyUnrolled;
1322fe6060f1SDimitry Andric     }
1323fe6060f1SDimitry Andric     return LoopUnrollResult::Unmodified;
1324fe6060f1SDimitry Andric   }
1325fe6060f1SDimitry Andric 
132606c3fb27SDimitry Andric   // Do not attempt partial/runtime unrolling in FullLoopUnrolling
1327*0fca6ea1SDimitry Andric   if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) {
132806c3fb27SDimitry Andric     LLVM_DEBUG(
132906c3fb27SDimitry Andric         dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
133006c3fb27SDimitry Andric     return LoopUnrollResult::Unmodified;
133106c3fb27SDimitry Andric   }
133206c3fb27SDimitry Andric 
1333fe6060f1SDimitry Andric   // At this point, UP.Runtime indicates that run-time unrolling is allowed.
1334fe6060f1SDimitry Andric   // However, we only want to actually perform it if we don't know the trip
1335fe6060f1SDimitry Andric   // count and the unroll count doesn't divide the known trip multiple.
1336fe6060f1SDimitry Andric   // TODO: This decision should probably be pushed up into
1337fe6060f1SDimitry Andric   // computeUnrollCount().
1338fe6060f1SDimitry Andric   UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
13390b57cec5SDimitry Andric 
13400b57cec5SDimitry Andric   // Save loop properties before it is transformed.
13410b57cec5SDimitry Andric   MDNode *OrigLoopID = L->getLoopID();
13420b57cec5SDimitry Andric 
13430b57cec5SDimitry Andric   // Unroll the loop.
13440b57cec5SDimitry Andric   Loop *RemainderLoop = nullptr;
1345*0fca6ea1SDimitry Andric   UnrollLoopOptions ULO;
1346*0fca6ea1SDimitry Andric   ULO.Count = UP.Count;
1347*0fca6ea1SDimitry Andric   ULO.Force = UP.Force;
1348*0fca6ea1SDimitry Andric   ULO.AllowExpensiveTripCount = UP.AllowExpensiveTripCount;
1349*0fca6ea1SDimitry Andric   ULO.UnrollRemainder = UP.UnrollRemainder;
1350*0fca6ea1SDimitry Andric   ULO.Runtime = UP.Runtime;
1351*0fca6ea1SDimitry Andric   ULO.ForgetAllSCEV = ForgetAllSCEV;
1352*0fca6ea1SDimitry Andric   ULO.Heart = getLoopConvergenceHeart(L);
13530b57cec5SDimitry Andric   LoopUnrollResult UnrollResult = UnrollLoop(
1354*0fca6ea1SDimitry Andric       L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
13550b57cec5SDimitry Andric   if (UnrollResult == LoopUnrollResult::Unmodified)
13560b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
13570b57cec5SDimitry Andric 
13580b57cec5SDimitry Andric   if (RemainderLoop) {
1359bdd1243dSDimitry Andric     std::optional<MDNode *> RemainderLoopID =
13600b57cec5SDimitry Andric         makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
13610b57cec5SDimitry Andric                                         LLVMLoopUnrollFollowupRemainder});
136281ad6265SDimitry Andric     if (RemainderLoopID)
1363bdd1243dSDimitry Andric       RemainderLoop->setLoopID(*RemainderLoopID);
13640b57cec5SDimitry Andric   }
13650b57cec5SDimitry Andric 
13660b57cec5SDimitry Andric   if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1367bdd1243dSDimitry Andric     std::optional<MDNode *> NewLoopID =
13680b57cec5SDimitry Andric         makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
13690b57cec5SDimitry Andric                                         LLVMLoopUnrollFollowupUnrolled});
137081ad6265SDimitry Andric     if (NewLoopID) {
1371bdd1243dSDimitry Andric       L->setLoopID(*NewLoopID);
13720b57cec5SDimitry Andric 
13730b57cec5SDimitry Andric       // Do not setLoopAlreadyUnrolled if loop attributes have been specified
13740b57cec5SDimitry Andric       // explicitly.
13750b57cec5SDimitry Andric       return UnrollResult;
13760b57cec5SDimitry Andric     }
13770b57cec5SDimitry Andric   }
13780b57cec5SDimitry Andric 
13790b57cec5SDimitry Andric   // If loop has an unroll count pragma or unrolled by explicitly set count
13800b57cec5SDimitry Andric   // mark loop as unrolled to prevent unrolling beyond that requested.
1381fe6060f1SDimitry Andric   if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
13820b57cec5SDimitry Andric     L->setLoopAlreadyUnrolled();
13830b57cec5SDimitry Andric 
13840b57cec5SDimitry Andric   return UnrollResult;
13850b57cec5SDimitry Andric }
13860b57cec5SDimitry Andric 
13870b57cec5SDimitry Andric namespace {
13880b57cec5SDimitry Andric 
13890b57cec5SDimitry Andric class LoopUnroll : public LoopPass {
13900b57cec5SDimitry Andric public:
13910b57cec5SDimitry Andric   static char ID; // Pass ID, replacement for typeid
13920b57cec5SDimitry Andric 
13930b57cec5SDimitry Andric   int OptLevel;
13940b57cec5SDimitry Andric 
13950b57cec5SDimitry Andric   /// If false, use a cost model to determine whether unrolling of a loop is
13960b57cec5SDimitry Andric   /// profitable. If true, only loops that explicitly request unrolling via
13970b57cec5SDimitry Andric   /// metadata are considered. All other loops are skipped.
13980b57cec5SDimitry Andric   bool OnlyWhenForced;
13990b57cec5SDimitry Andric 
14000b57cec5SDimitry Andric   /// If false, when SCEV is invalidated, only forget everything in the
14010b57cec5SDimitry Andric   /// top-most loop (call forgetTopMostLoop), of the loop being processed.
14020b57cec5SDimitry Andric   /// Otherwise, forgetAllLoops and rebuild when needed next.
14030b57cec5SDimitry Andric   bool ForgetAllSCEV;
14040b57cec5SDimitry Andric 
1405bdd1243dSDimitry Andric   std::optional<unsigned> ProvidedCount;
1406bdd1243dSDimitry Andric   std::optional<unsigned> ProvidedThreshold;
1407bdd1243dSDimitry Andric   std::optional<bool> ProvidedAllowPartial;
1408bdd1243dSDimitry Andric   std::optional<bool> ProvidedRuntime;
1409bdd1243dSDimitry Andric   std::optional<bool> ProvidedUpperBound;
1410bdd1243dSDimitry Andric   std::optional<bool> ProvidedAllowPeeling;
1411bdd1243dSDimitry Andric   std::optional<bool> ProvidedAllowProfileBasedPeeling;
1412bdd1243dSDimitry Andric   std::optional<unsigned> ProvidedFullUnrollMaxCount;
14130b57cec5SDimitry Andric 
14140b57cec5SDimitry Andric   LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1415bdd1243dSDimitry Andric              bool ForgetAllSCEV = false,
1416bdd1243dSDimitry Andric              std::optional<unsigned> Threshold = std::nullopt,
1417bdd1243dSDimitry Andric              std::optional<unsigned> Count = std::nullopt,
1418bdd1243dSDimitry Andric              std::optional<bool> AllowPartial = std::nullopt,
1419bdd1243dSDimitry Andric              std::optional<bool> Runtime = std::nullopt,
1420bdd1243dSDimitry Andric              std::optional<bool> UpperBound = std::nullopt,
1421bdd1243dSDimitry Andric              std::optional<bool> AllowPeeling = std::nullopt,
1422bdd1243dSDimitry Andric              std::optional<bool> AllowProfileBasedPeeling = std::nullopt,
1423bdd1243dSDimitry Andric              std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt)
14240b57cec5SDimitry Andric       : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
14250b57cec5SDimitry Andric         ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
14260b57cec5SDimitry Andric         ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
14270b57cec5SDimitry Andric         ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
14288bcb0991SDimitry Andric         ProvidedAllowPeeling(AllowPeeling),
14298bcb0991SDimitry Andric         ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
14308bcb0991SDimitry Andric         ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
14310b57cec5SDimitry Andric     initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
14320b57cec5SDimitry Andric   }
14330b57cec5SDimitry Andric 
14340b57cec5SDimitry Andric   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
14350b57cec5SDimitry Andric     if (skipLoop(L))
14360b57cec5SDimitry Andric       return false;
14370b57cec5SDimitry Andric 
14380b57cec5SDimitry Andric     Function &F = *L->getHeader()->getParent();
14390b57cec5SDimitry Andric 
14400b57cec5SDimitry Andric     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
14410b57cec5SDimitry Andric     LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
14420b57cec5SDimitry Andric     ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
14430b57cec5SDimitry Andric     const TargetTransformInfo &TTI =
14440b57cec5SDimitry Andric         getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
14450b57cec5SDimitry Andric     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
14460b57cec5SDimitry Andric     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
14470b57cec5SDimitry Andric     // pass.  Function analyses need to be preserved across loop transformations
14480b57cec5SDimitry Andric     // but ORE cannot be preserved (see comment before the pass definition).
14490b57cec5SDimitry Andric     OptimizationRemarkEmitter ORE(&F);
14500b57cec5SDimitry Andric     bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
14510b57cec5SDimitry Andric 
14520b57cec5SDimitry Andric     LoopUnrollResult Result = tryToUnrollLoop(
14538bcb0991SDimitry Andric         L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
145406c3fb27SDimitry Andric         /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
145506c3fb27SDimitry Andric         ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
145606c3fb27SDimitry Andric         ProvidedUpperBound, ProvidedAllowPeeling,
145706c3fb27SDimitry Andric         ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
14580b57cec5SDimitry Andric 
14590b57cec5SDimitry Andric     if (Result == LoopUnrollResult::FullyUnrolled)
14600b57cec5SDimitry Andric       LPM.markLoopAsDeleted(*L);
14610b57cec5SDimitry Andric 
14620b57cec5SDimitry Andric     return Result != LoopUnrollResult::Unmodified;
14630b57cec5SDimitry Andric   }
14640b57cec5SDimitry Andric 
14650b57cec5SDimitry Andric   /// This transformation requires natural loop information & requires that
14660b57cec5SDimitry Andric   /// loop preheaders be inserted into the CFG...
14670b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
14680b57cec5SDimitry Andric     AU.addRequired<AssumptionCacheTracker>();
14690b57cec5SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
14700b57cec5SDimitry Andric     // FIXME: Loop passes are required to preserve domtree, and for now we just
14710b57cec5SDimitry Andric     // recreate dom info if anything gets unrolled.
14720b57cec5SDimitry Andric     getLoopAnalysisUsage(AU);
14730b57cec5SDimitry Andric   }
14740b57cec5SDimitry Andric };
14750b57cec5SDimitry Andric 
14760b57cec5SDimitry Andric } // end anonymous namespace
14770b57cec5SDimitry Andric 
14780b57cec5SDimitry Andric char LoopUnroll::ID = 0;
14790b57cec5SDimitry Andric 
14800b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
14810b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
14820b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopPass)
14830b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
14840b57cec5SDimitry Andric INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
14850b57cec5SDimitry Andric 
14860b57cec5SDimitry Andric Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
14870b57cec5SDimitry Andric                                  bool ForgetAllSCEV, int Threshold, int Count,
14880b57cec5SDimitry Andric                                  int AllowPartial, int Runtime, int UpperBound,
14890b57cec5SDimitry Andric                                  int AllowPeeling) {
14900b57cec5SDimitry Andric   // TODO: It would make more sense for this function to take the optionals
14910b57cec5SDimitry Andric   // directly, but that's dangerous since it would silently break out of tree
14920b57cec5SDimitry Andric   // callers.
14930b57cec5SDimitry Andric   return new LoopUnroll(
14940b57cec5SDimitry Andric       OptLevel, OnlyWhenForced, ForgetAllSCEV,
1495bdd1243dSDimitry Andric       Threshold == -1 ? std::nullopt : std::optional<unsigned>(Threshold),
1496bdd1243dSDimitry Andric       Count == -1 ? std::nullopt : std::optional<unsigned>(Count),
1497bdd1243dSDimitry Andric       AllowPartial == -1 ? std::nullopt : std::optional<bool>(AllowPartial),
1498bdd1243dSDimitry Andric       Runtime == -1 ? std::nullopt : std::optional<bool>(Runtime),
1499bdd1243dSDimitry Andric       UpperBound == -1 ? std::nullopt : std::optional<bool>(UpperBound),
1500bdd1243dSDimitry Andric       AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling));
15010b57cec5SDimitry Andric }
15020b57cec5SDimitry Andric 
15030b57cec5SDimitry Andric PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
15040b57cec5SDimitry Andric                                           LoopStandardAnalysisResults &AR,
15050b57cec5SDimitry Andric                                           LPMUpdater &Updater) {
15065ffd83dbSDimitry Andric   // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
15075ffd83dbSDimitry Andric   // pass. Function analyses need to be preserved across loop transformations
15085ffd83dbSDimitry Andric   // but ORE cannot be preserved (see comment before the pass definition).
15095ffd83dbSDimitry Andric   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
15100b57cec5SDimitry Andric 
15110b57cec5SDimitry Andric   // Keep track of the previous loop structure so we can identify new loops
15120b57cec5SDimitry Andric   // created by unrolling.
15130b57cec5SDimitry Andric   Loop *ParentL = L.getParentLoop();
15140b57cec5SDimitry Andric   SmallPtrSet<Loop *, 4> OldLoops;
15150b57cec5SDimitry Andric   if (ParentL)
15160b57cec5SDimitry Andric     OldLoops.insert(ParentL->begin(), ParentL->end());
15170b57cec5SDimitry Andric   else
15180b57cec5SDimitry Andric     OldLoops.insert(AR.LI.begin(), AR.LI.end());
15190b57cec5SDimitry Andric 
15205ffd83dbSDimitry Andric   std::string LoopName = std::string(L.getName());
15210b57cec5SDimitry Andric 
1522bdd1243dSDimitry Andric   bool Changed =
1523bdd1243dSDimitry Andric       tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
15240b57cec5SDimitry Andric                       /*BFI*/ nullptr, /*PSI*/ nullptr,
152506c3fb27SDimitry Andric                       /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true,
152606c3fb27SDimitry Andric                       OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt,
1527bdd1243dSDimitry Andric                       /*Threshold*/ std::nullopt, /*AllowPartial*/ false,
15280b57cec5SDimitry Andric                       /*Runtime*/ false, /*UpperBound*/ false,
1529e8d8bef9SDimitry Andric                       /*AllowPeeling*/ true,
15308bcb0991SDimitry Andric                       /*AllowProfileBasedPeeling*/ false,
1531bdd1243dSDimitry Andric                       /*FullUnrollMaxCount*/ std::nullopt) !=
15328bcb0991SDimitry Andric       LoopUnrollResult::Unmodified;
15330b57cec5SDimitry Andric   if (!Changed)
15340b57cec5SDimitry Andric     return PreservedAnalyses::all();
15350b57cec5SDimitry Andric 
15360b57cec5SDimitry Andric   // The parent must not be damaged by unrolling!
15370b57cec5SDimitry Andric #ifndef NDEBUG
15380b57cec5SDimitry Andric   if (ParentL)
15390b57cec5SDimitry Andric     ParentL->verifyLoop();
15400b57cec5SDimitry Andric #endif
15410b57cec5SDimitry Andric 
15420b57cec5SDimitry Andric   // Unrolling can do several things to introduce new loops into a loop nest:
15430b57cec5SDimitry Andric   // - Full unrolling clones child loops within the current loop but then
15440b57cec5SDimitry Andric   //   removes the current loop making all of the children appear to be new
15450b57cec5SDimitry Andric   //   sibling loops.
15460b57cec5SDimitry Andric   //
15470b57cec5SDimitry Andric   // When a new loop appears as a sibling loop after fully unrolling,
15480b57cec5SDimitry Andric   // its nesting structure has fundamentally changed and we want to revisit
15490b57cec5SDimitry Andric   // it to reflect that.
15500b57cec5SDimitry Andric   //
15510b57cec5SDimitry Andric   // When unrolling has removed the current loop, we need to tell the
15520b57cec5SDimitry Andric   // infrastructure that it is gone.
15530b57cec5SDimitry Andric   //
15540b57cec5SDimitry Andric   // Finally, we support a debugging/testing mode where we revisit child loops
15550b57cec5SDimitry Andric   // as well. These are not expected to require further optimizations as either
15560b57cec5SDimitry Andric   // they or the loop they were cloned from have been directly visited already.
15570b57cec5SDimitry Andric   // But the debugging mode allows us to check this assumption.
15580b57cec5SDimitry Andric   bool IsCurrentLoopValid = false;
15590b57cec5SDimitry Andric   SmallVector<Loop *, 4> SibLoops;
15600b57cec5SDimitry Andric   if (ParentL)
15610b57cec5SDimitry Andric     SibLoops.append(ParentL->begin(), ParentL->end());
15620b57cec5SDimitry Andric   else
15630b57cec5SDimitry Andric     SibLoops.append(AR.LI.begin(), AR.LI.end());
15640b57cec5SDimitry Andric   erase_if(SibLoops, [&](Loop *SibLoop) {
15650b57cec5SDimitry Andric     if (SibLoop == &L) {
15660b57cec5SDimitry Andric       IsCurrentLoopValid = true;
15670b57cec5SDimitry Andric       return true;
15680b57cec5SDimitry Andric     }
15690b57cec5SDimitry Andric 
15700b57cec5SDimitry Andric     // Otherwise erase the loop from the list if it was in the old loops.
1571e8d8bef9SDimitry Andric     return OldLoops.contains(SibLoop);
15720b57cec5SDimitry Andric   });
15730b57cec5SDimitry Andric   Updater.addSiblingLoops(SibLoops);
15740b57cec5SDimitry Andric 
15750b57cec5SDimitry Andric   if (!IsCurrentLoopValid) {
15760b57cec5SDimitry Andric     Updater.markLoopAsDeleted(L, LoopName);
15770b57cec5SDimitry Andric   } else {
15780b57cec5SDimitry Andric     // We can only walk child loops if the current loop remained valid.
15790b57cec5SDimitry Andric     if (UnrollRevisitChildLoops) {
15800b57cec5SDimitry Andric       // Walk *all* of the child loops.
15810b57cec5SDimitry Andric       SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
15820b57cec5SDimitry Andric       Updater.addChildLoops(ChildLoops);
15830b57cec5SDimitry Andric     }
15840b57cec5SDimitry Andric   }
15850b57cec5SDimitry Andric 
15860b57cec5SDimitry Andric   return getLoopPassPreservedAnalyses();
15870b57cec5SDimitry Andric }
15880b57cec5SDimitry Andric 
15890b57cec5SDimitry Andric PreservedAnalyses LoopUnrollPass::run(Function &F,
15900b57cec5SDimitry Andric                                       FunctionAnalysisManager &AM) {
15910b57cec5SDimitry Andric   auto &LI = AM.getResult<LoopAnalysis>(F);
159281ad6265SDimitry Andric   // There are no loops in the function. Return before computing other expensive
159381ad6265SDimitry Andric   // analyses.
159481ad6265SDimitry Andric   if (LI.empty())
159581ad6265SDimitry Andric     return PreservedAnalyses::all();
159681ad6265SDimitry Andric   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
15970b57cec5SDimitry Andric   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
15980b57cec5SDimitry Andric   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
15990b57cec5SDimitry Andric   auto &AC = AM.getResult<AssumptionAnalysis>(F);
16000b57cec5SDimitry Andric   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1601*0fca6ea1SDimitry Andric   AAResults &AA = AM.getResult<AAManager>(F);
16020b57cec5SDimitry Andric 
16030b57cec5SDimitry Andric   LoopAnalysisManager *LAM = nullptr;
16040b57cec5SDimitry Andric   if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
16050b57cec5SDimitry Andric     LAM = &LAMProxy->getManager();
16060b57cec5SDimitry Andric 
16075ffd83dbSDimitry Andric   auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
16080b57cec5SDimitry Andric   ProfileSummaryInfo *PSI =
16095ffd83dbSDimitry Andric       MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
16100b57cec5SDimitry Andric   auto *BFI = (PSI && PSI->hasProfileSummary()) ?
16110b57cec5SDimitry Andric       &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
16120b57cec5SDimitry Andric 
16130b57cec5SDimitry Andric   bool Changed = false;
16140b57cec5SDimitry Andric 
16150b57cec5SDimitry Andric   // The unroller requires loops to be in simplified form, and also needs LCSSA.
16160b57cec5SDimitry Andric   // Since simplification may add new inner loops, it has to run before the
16170b57cec5SDimitry Andric   // legality and profitability checks. This means running the loop unroller
16180b57cec5SDimitry Andric   // will simplify all loops, regardless of whether anything end up being
16190b57cec5SDimitry Andric   // unrolled.
1620bdd1243dSDimitry Andric   for (const auto &L : LI) {
16210b57cec5SDimitry Andric     Changed |=
16220b57cec5SDimitry Andric         simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
16230b57cec5SDimitry Andric     Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
16240b57cec5SDimitry Andric   }
16250b57cec5SDimitry Andric 
16265ffd83dbSDimitry Andric   // Add the loop nests in the reverse order of LoopInfo. See method
16275ffd83dbSDimitry Andric   // declaration.
16285ffd83dbSDimitry Andric   SmallPriorityWorklist<Loop *, 4> Worklist;
16295ffd83dbSDimitry Andric   appendLoopsToWorklist(LI, Worklist);
16300b57cec5SDimitry Andric 
16310b57cec5SDimitry Andric   while (!Worklist.empty()) {
16320b57cec5SDimitry Andric     // Because the LoopInfo stores the loops in RPO, we walk the worklist
16330b57cec5SDimitry Andric     // from back to front so that we work forward across the CFG, which
16340b57cec5SDimitry Andric     // for unrolling is only needed to get optimization remarks emitted in
16350b57cec5SDimitry Andric     // a forward order.
16360b57cec5SDimitry Andric     Loop &L = *Worklist.pop_back_val();
16370b57cec5SDimitry Andric #ifndef NDEBUG
16380b57cec5SDimitry Andric     Loop *ParentL = L.getParentLoop();
16390b57cec5SDimitry Andric #endif
16400b57cec5SDimitry Andric 
16410b57cec5SDimitry Andric     // Check if the profile summary indicates that the profiled application
16420b57cec5SDimitry Andric     // has a huge working set size, in which case we disable peeling to avoid
16430b57cec5SDimitry Andric     // bloating it further.
1644bdd1243dSDimitry Andric     std::optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
16450b57cec5SDimitry Andric     if (PSI && PSI->hasHugeWorkingSetSize())
16460b57cec5SDimitry Andric       LocalAllowPeeling = false;
16475ffd83dbSDimitry Andric     std::string LoopName = std::string(L.getName());
16480b57cec5SDimitry Andric     // The API here is quite complex to call and we allow to select some
16490b57cec5SDimitry Andric     // flavors of unrolling during construction time (by setting UnrollOpts).
16500b57cec5SDimitry Andric     LoopUnrollResult Result = tryToUnrollLoop(
16510b57cec5SDimitry Andric         &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
165206c3fb27SDimitry Andric         /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false,
165306c3fb27SDimitry Andric         UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
165406c3fb27SDimitry Andric         /*Count*/ std::nullopt,
1655bdd1243dSDimitry Andric         /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
1656bdd1243dSDimitry Andric         UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1657*0fca6ea1SDimitry Andric         UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,
1658*0fca6ea1SDimitry Andric         &AA);
16590b57cec5SDimitry Andric     Changed |= Result != LoopUnrollResult::Unmodified;
16600b57cec5SDimitry Andric 
16610b57cec5SDimitry Andric     // The parent must not be damaged by unrolling!
16620b57cec5SDimitry Andric #ifndef NDEBUG
16630b57cec5SDimitry Andric     if (Result != LoopUnrollResult::Unmodified && ParentL)
16640b57cec5SDimitry Andric       ParentL->verifyLoop();
16650b57cec5SDimitry Andric #endif
16660b57cec5SDimitry Andric 
16670b57cec5SDimitry Andric     // Clear any cached analysis results for L if we removed it completely.
16680b57cec5SDimitry Andric     if (LAM && Result == LoopUnrollResult::FullyUnrolled)
16690b57cec5SDimitry Andric       LAM->clear(L, LoopName);
16700b57cec5SDimitry Andric   }
16710b57cec5SDimitry Andric 
16720b57cec5SDimitry Andric   if (!Changed)
16730b57cec5SDimitry Andric     return PreservedAnalyses::all();
16740b57cec5SDimitry Andric 
16750b57cec5SDimitry Andric   return getLoopPassPreservedAnalyses();
16760b57cec5SDimitry Andric }
1677349cc55cSDimitry Andric 
1678349cc55cSDimitry Andric void LoopUnrollPass::printPipeline(
1679349cc55cSDimitry Andric     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1680349cc55cSDimitry Andric   static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
1681349cc55cSDimitry Andric       OS, MapClassName2PassName);
168206c3fb27SDimitry Andric   OS << '<';
1683bdd1243dSDimitry Andric   if (UnrollOpts.AllowPartial != std::nullopt)
1684bdd1243dSDimitry Andric     OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";
1685bdd1243dSDimitry Andric   if (UnrollOpts.AllowPeeling != std::nullopt)
1686bdd1243dSDimitry Andric     OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";
1687bdd1243dSDimitry Andric   if (UnrollOpts.AllowRuntime != std::nullopt)
1688bdd1243dSDimitry Andric     OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";
1689bdd1243dSDimitry Andric   if (UnrollOpts.AllowUpperBound != std::nullopt)
1690bdd1243dSDimitry Andric     OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";
1691bdd1243dSDimitry Andric   if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)
1692bdd1243dSDimitry Andric     OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")
1693349cc55cSDimitry Andric        << "profile-peeling;";
1694bdd1243dSDimitry Andric   if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
169506c3fb27SDimitry Andric     OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';';
169606c3fb27SDimitry Andric   OS << 'O' << UnrollOpts.OptLevel;
169706c3fb27SDimitry Andric   OS << '>';
1698349cc55cSDimitry Andric }
1699