10b57cec5SDimitry Andric //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass implements a simple loop unroller. It works best when loops have 100b57cec5SDimitry Andric // been canonicalized by the -indvars pass, allowing it to determine the trip 110b57cec5SDimitry Andric // counts of loops easily. 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopUnrollPass.h" 150b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 160b57cec5SDimitry Andric #include "llvm/ADT/DenseMapInfo.h" 170b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 19*0fca6ea1SDimitry Andric #include "llvm/ADT/ScopedHashTable.h" 200b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 210b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 220b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 230b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h" 280b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 290b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 300b57cec5SDimitry Andric #include "llvm/Analysis/LoopUnrollAnalyzer.h" 31*0fca6ea1SDimitry Andric #include "llvm/Analysis/MemorySSA.h" 320b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 330b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 340b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 350b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 360b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 370b57cec5SDimitry Andric #include "llvm/IR/CFG.h" 380b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 390b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 400b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 410b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 420b57cec5SDimitry Andric #include "llvm/IR/Function.h" 430b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 440b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 450b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 460b57cec5SDimitry Andric #include "llvm/IR/Metadata.h" 470b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 48480093f4SDimitry Andric #include "llvm/InitializePasses.h" 490b57cec5SDimitry Andric #include "llvm/Pass.h" 500b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 510b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 520b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 530b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 540b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 550b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 560b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h" 570b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h" 58e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/LoopPeel.h" 590b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopSimplify.h" 600b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 610b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h" 620b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h" 630b57cec5SDimitry Andric #include <algorithm> 640b57cec5SDimitry Andric #include <cassert> 650b57cec5SDimitry Andric #include <cstdint> 660b57cec5SDimitry Andric #include <limits> 67bdd1243dSDimitry Andric #include <optional> 680b57cec5SDimitry Andric #include <string> 690b57cec5SDimitry Andric #include <tuple> 700b57cec5SDimitry Andric #include <utility> 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric using namespace llvm; 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll" 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric cl::opt<bool> llvm::ForgetSCEVInLoopUnroll( 770b57cec5SDimitry Andric "forget-scev-loop-unroll", cl::init(false), cl::Hidden, 780b57cec5SDimitry Andric cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just" 79e8d8bef9SDimitry Andric " the current top-most loop. This is sometimes preferred to reduce" 800b57cec5SDimitry Andric " compile time.")); 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric static cl::opt<unsigned> 830b57cec5SDimitry Andric UnrollThreshold("unroll-threshold", cl::Hidden, 840b57cec5SDimitry Andric cl::desc("The cost threshold for loop unrolling")); 850b57cec5SDimitry Andric 86e8d8bef9SDimitry Andric static cl::opt<unsigned> 87e8d8bef9SDimitry Andric UnrollOptSizeThreshold( 88e8d8bef9SDimitry Andric "unroll-optsize-threshold", cl::init(0), cl::Hidden, 89e8d8bef9SDimitry Andric cl::desc("The cost threshold for loop unrolling when optimizing for " 90e8d8bef9SDimitry Andric "size")); 91e8d8bef9SDimitry Andric 920b57cec5SDimitry Andric static cl::opt<unsigned> UnrollPartialThreshold( 930b57cec5SDimitry Andric "unroll-partial-threshold", cl::Hidden, 940b57cec5SDimitry Andric cl::desc("The cost threshold for partial loop unrolling")); 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric static cl::opt<unsigned> UnrollMaxPercentThresholdBoost( 970b57cec5SDimitry Andric "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, 980b57cec5SDimitry Andric cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " 990b57cec5SDimitry Andric "to the threshold when aggressively unrolling a loop due to the " 1000b57cec5SDimitry Andric "dynamic cost savings. If completely unrolling a loop will reduce " 1010b57cec5SDimitry Andric "the total runtime from X to Y, we boost the loop unroll " 1020b57cec5SDimitry Andric "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " 1030b57cec5SDimitry Andric "X/Y). This limit avoids excessive code bloat.")); 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze( 1060b57cec5SDimitry Andric "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, 1070b57cec5SDimitry Andric cl::desc("Don't allow loop unrolling to simulate more than this number of" 1080b57cec5SDimitry Andric "iterations when checking full unroll profitability")); 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric static cl::opt<unsigned> UnrollCount( 1110b57cec5SDimitry Andric "unroll-count", cl::Hidden, 1120b57cec5SDimitry Andric cl::desc("Use this unroll count for all loops including those with " 1130b57cec5SDimitry Andric "unroll_count pragma values, for testing purposes")); 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric static cl::opt<unsigned> UnrollMaxCount( 1160b57cec5SDimitry Andric "unroll-max-count", cl::Hidden, 1170b57cec5SDimitry Andric cl::desc("Set the max unroll count for partial and runtime unrolling, for" 1180b57cec5SDimitry Andric "testing purposes")); 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric static cl::opt<unsigned> UnrollFullMaxCount( 1210b57cec5SDimitry Andric "unroll-full-max-count", cl::Hidden, 1220b57cec5SDimitry Andric cl::desc( 1230b57cec5SDimitry Andric "Set the max unroll count for full unrolling, for testing purposes")); 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric static cl::opt<bool> 1260b57cec5SDimitry Andric UnrollAllowPartial("unroll-allow-partial", cl::Hidden, 1270b57cec5SDimitry Andric cl::desc("Allows loops to be partially unrolled until " 1280b57cec5SDimitry Andric "-unroll-threshold loop size is reached.")); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric static cl::opt<bool> UnrollAllowRemainder( 1310b57cec5SDimitry Andric "unroll-allow-remainder", cl::Hidden, 1320b57cec5SDimitry Andric cl::desc("Allow generation of a loop remainder (extra iterations) " 1330b57cec5SDimitry Andric "when unrolling a loop.")); 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric static cl::opt<bool> 13681ad6265SDimitry Andric UnrollRuntime("unroll-runtime", cl::Hidden, 1370b57cec5SDimitry Andric cl::desc("Unroll loops with run-time trip counts")); 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric static cl::opt<unsigned> UnrollMaxUpperBound( 1400b57cec5SDimitry Andric "unroll-max-upperbound", cl::init(8), cl::Hidden, 1410b57cec5SDimitry Andric cl::desc( 1420b57cec5SDimitry Andric "The max of trip count upper bound that is considered in unrolling")); 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric static cl::opt<unsigned> PragmaUnrollThreshold( 1450b57cec5SDimitry Andric "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden, 1460b57cec5SDimitry Andric cl::desc("Unrolled size limit for loops with an unroll(full) or " 1470b57cec5SDimitry Andric "unroll_count pragma.")); 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric static cl::opt<unsigned> FlatLoopTripCountThreshold( 1500b57cec5SDimitry Andric "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, 1510b57cec5SDimitry Andric cl::desc("If the runtime tripcount for the loop is lower than the " 1520b57cec5SDimitry Andric "threshold, the loop is considered as flat and will be less " 1530b57cec5SDimitry Andric "aggressively unrolled.")); 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric static cl::opt<bool> UnrollUnrollRemainder( 1560b57cec5SDimitry Andric "unroll-remainder", cl::Hidden, 1570b57cec5SDimitry Andric cl::desc("Allow the loop remainder to be unrolled.")); 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric // This option isn't ever intended to be enabled, it serves to allow 1600b57cec5SDimitry Andric // experiments to check the assumptions about when this kind of revisit is 1610b57cec5SDimitry Andric // necessary. 1620b57cec5SDimitry Andric static cl::opt<bool> UnrollRevisitChildLoops( 1630b57cec5SDimitry Andric "unroll-revisit-child-loops", cl::Hidden, 1640b57cec5SDimitry Andric cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " 1650b57cec5SDimitry Andric "This shouldn't typically be needed as child loops (or their " 1660b57cec5SDimitry Andric "clones) were already visited.")); 1670b57cec5SDimitry Andric 1685ffd83dbSDimitry Andric static cl::opt<unsigned> UnrollThresholdAggressive( 1695ffd83dbSDimitry Andric "unroll-threshold-aggressive", cl::init(300), cl::Hidden, 1705ffd83dbSDimitry Andric cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " 1715ffd83dbSDimitry Andric "optimizations")); 1725ffd83dbSDimitry Andric static cl::opt<unsigned> 1735ffd83dbSDimitry Andric UnrollThresholdDefault("unroll-threshold-default", cl::init(150), 1745ffd83dbSDimitry Andric cl::Hidden, 1755ffd83dbSDimitry Andric cl::desc("Default threshold (max size of unrolled " 1765ffd83dbSDimitry Andric "loop), used in all but O3 optimizations")); 1775ffd83dbSDimitry Andric 178*0fca6ea1SDimitry Andric static cl::opt<unsigned> PragmaUnrollFullMaxIterations( 179*0fca6ea1SDimitry Andric "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden, 180*0fca6ea1SDimitry Andric cl::desc("Maximum allowed iterations to unroll under pragma unroll full.")); 181*0fca6ea1SDimitry Andric 1820b57cec5SDimitry Andric /// A magic value for use with the Threshold parameter to indicate 1830b57cec5SDimitry Andric /// that the loop unroll should be performed regardless of how much 1840b57cec5SDimitry Andric /// code expansion would result. 1850b57cec5SDimitry Andric static const unsigned NoThreshold = std::numeric_limits<unsigned>::max(); 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric /// Gather the various unrolling parameters based on the defaults, compiler 1880b57cec5SDimitry Andric /// flags, TTI overrides and user specified parameters. 1890b57cec5SDimitry Andric TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( 1900b57cec5SDimitry Andric Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, 191349cc55cSDimitry Andric BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, 192349cc55cSDimitry Andric OptimizationRemarkEmitter &ORE, int OptLevel, 193bdd1243dSDimitry Andric std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount, 194bdd1243dSDimitry Andric std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime, 195bdd1243dSDimitry Andric std::optional<bool> UserUpperBound, 196bdd1243dSDimitry Andric std::optional<unsigned> UserFullUnrollMaxCount) { 1970b57cec5SDimitry Andric TargetTransformInfo::UnrollingPreferences UP; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric // Set up the defaults 2005ffd83dbSDimitry Andric UP.Threshold = 2015ffd83dbSDimitry Andric OptLevel > 2 ? UnrollThresholdAggressive : UnrollThresholdDefault; 2020b57cec5SDimitry Andric UP.MaxPercentThresholdBoost = 400; 203e8d8bef9SDimitry Andric UP.OptSizeThreshold = UnrollOptSizeThreshold; 2040b57cec5SDimitry Andric UP.PartialThreshold = 150; 205e8d8bef9SDimitry Andric UP.PartialOptSizeThreshold = UnrollOptSizeThreshold; 2060b57cec5SDimitry Andric UP.Count = 0; 2070b57cec5SDimitry Andric UP.DefaultUnrollRuntimeCount = 8; 2080b57cec5SDimitry Andric UP.MaxCount = std::numeric_limits<unsigned>::max(); 209cb14a3feSDimitry Andric UP.MaxUpperBound = UnrollMaxUpperBound; 2100b57cec5SDimitry Andric UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max(); 2110b57cec5SDimitry Andric UP.BEInsns = 2; 2120b57cec5SDimitry Andric UP.Partial = false; 2130b57cec5SDimitry Andric UP.Runtime = false; 2140b57cec5SDimitry Andric UP.AllowRemainder = true; 2150b57cec5SDimitry Andric UP.UnrollRemainder = false; 2160b57cec5SDimitry Andric UP.AllowExpensiveTripCount = false; 2170b57cec5SDimitry Andric UP.Force = false; 2180b57cec5SDimitry Andric UP.UpperBound = false; 2190b57cec5SDimitry Andric UP.UnrollAndJam = false; 2200b57cec5SDimitry Andric UP.UnrollAndJamInnerLoopThreshold = 60; 2215ffd83dbSDimitry Andric UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric // Override with any target specific settings 224349cc55cSDimitry Andric TTI.getUnrollingPreferences(L, SE, UP, &ORE); 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric // Apply size attributes 2270b57cec5SDimitry Andric bool OptForSize = L->getHeader()->getParent()->hasOptSize() || 228e8d8bef9SDimitry Andric // Let unroll hints / pragmas take precedence over PGSO. 229e8d8bef9SDimitry Andric (hasUnrollTransformation(L) != TM_ForcedByUser && 230480093f4SDimitry Andric llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, 231e8d8bef9SDimitry Andric PGSOQueryType::IRPass)); 2320b57cec5SDimitry Andric if (OptForSize) { 2330b57cec5SDimitry Andric UP.Threshold = UP.OptSizeThreshold; 2340b57cec5SDimitry Andric UP.PartialThreshold = UP.PartialOptSizeThreshold; 2350b57cec5SDimitry Andric UP.MaxPercentThresholdBoost = 100; 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric // Apply any user values specified by cl::opt 2390b57cec5SDimitry Andric if (UnrollThreshold.getNumOccurrences() > 0) 2400b57cec5SDimitry Andric UP.Threshold = UnrollThreshold; 2410b57cec5SDimitry Andric if (UnrollPartialThreshold.getNumOccurrences() > 0) 2420b57cec5SDimitry Andric UP.PartialThreshold = UnrollPartialThreshold; 2430b57cec5SDimitry Andric if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0) 2440b57cec5SDimitry Andric UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost; 2450b57cec5SDimitry Andric if (UnrollMaxCount.getNumOccurrences() > 0) 2460b57cec5SDimitry Andric UP.MaxCount = UnrollMaxCount; 247cb14a3feSDimitry Andric if (UnrollMaxUpperBound.getNumOccurrences() > 0) 248cb14a3feSDimitry Andric UP.MaxUpperBound = UnrollMaxUpperBound; 2490b57cec5SDimitry Andric if (UnrollFullMaxCount.getNumOccurrences() > 0) 2500b57cec5SDimitry Andric UP.FullUnrollMaxCount = UnrollFullMaxCount; 2510b57cec5SDimitry Andric if (UnrollAllowPartial.getNumOccurrences() > 0) 2520b57cec5SDimitry Andric UP.Partial = UnrollAllowPartial; 2530b57cec5SDimitry Andric if (UnrollAllowRemainder.getNumOccurrences() > 0) 2540b57cec5SDimitry Andric UP.AllowRemainder = UnrollAllowRemainder; 2550b57cec5SDimitry Andric if (UnrollRuntime.getNumOccurrences() > 0) 2560b57cec5SDimitry Andric UP.Runtime = UnrollRuntime; 2570b57cec5SDimitry Andric if (UnrollMaxUpperBound == 0) 2580b57cec5SDimitry Andric UP.UpperBound = false; 2590b57cec5SDimitry Andric if (UnrollUnrollRemainder.getNumOccurrences() > 0) 2600b57cec5SDimitry Andric UP.UnrollRemainder = UnrollUnrollRemainder; 2615ffd83dbSDimitry Andric if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0) 2625ffd83dbSDimitry Andric UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric // Apply user values provided by argument 26581ad6265SDimitry Andric if (UserThreshold) { 2660b57cec5SDimitry Andric UP.Threshold = *UserThreshold; 2670b57cec5SDimitry Andric UP.PartialThreshold = *UserThreshold; 2680b57cec5SDimitry Andric } 26981ad6265SDimitry Andric if (UserCount) 2700b57cec5SDimitry Andric UP.Count = *UserCount; 27181ad6265SDimitry Andric if (UserAllowPartial) 2720b57cec5SDimitry Andric UP.Partial = *UserAllowPartial; 27381ad6265SDimitry Andric if (UserRuntime) 2740b57cec5SDimitry Andric UP.Runtime = *UserRuntime; 27581ad6265SDimitry Andric if (UserUpperBound) 2760b57cec5SDimitry Andric UP.UpperBound = *UserUpperBound; 27781ad6265SDimitry Andric if (UserFullUnrollMaxCount) 2788bcb0991SDimitry Andric UP.FullUnrollMaxCount = *UserFullUnrollMaxCount; 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric return UP; 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric namespace { 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric /// A struct to densely store the state of an instruction after unrolling at 2860b57cec5SDimitry Andric /// each iteration. 2870b57cec5SDimitry Andric /// 2880b57cec5SDimitry Andric /// This is designed to work like a tuple of <Instruction *, int> for the 2890b57cec5SDimitry Andric /// purposes of hashing and lookup, but to be able to associate two boolean 2900b57cec5SDimitry Andric /// states with each key. 2910b57cec5SDimitry Andric struct UnrolledInstState { 2920b57cec5SDimitry Andric Instruction *I; 2930b57cec5SDimitry Andric int Iteration : 30; 2940b57cec5SDimitry Andric unsigned IsFree : 1; 2950b57cec5SDimitry Andric unsigned IsCounted : 1; 2960b57cec5SDimitry Andric }; 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric /// Hashing and equality testing for a set of the instruction states. 2990b57cec5SDimitry Andric struct UnrolledInstStateKeyInfo { 3000b57cec5SDimitry Andric using PtrInfo = DenseMapInfo<Instruction *>; 3010b57cec5SDimitry Andric using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>; 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric static inline UnrolledInstState getEmptyKey() { 3040b57cec5SDimitry Andric return {PtrInfo::getEmptyKey(), 0, 0, 0}; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric static inline UnrolledInstState getTombstoneKey() { 3080b57cec5SDimitry Andric return {PtrInfo::getTombstoneKey(), 0, 0, 0}; 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric static inline unsigned getHashValue(const UnrolledInstState &S) { 3120b57cec5SDimitry Andric return PairInfo::getHashValue({S.I, S.Iteration}); 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric static inline bool isEqual(const UnrolledInstState &LHS, 3160b57cec5SDimitry Andric const UnrolledInstState &RHS) { 3170b57cec5SDimitry Andric return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration}); 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric }; 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric struct EstimatedUnrollCost { 3220b57cec5SDimitry Andric /// The estimated cost after unrolling. 3230b57cec5SDimitry Andric unsigned UnrolledCost; 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric /// The estimated dynamic cost of executing the instructions in the 3260b57cec5SDimitry Andric /// rolled form. 3270b57cec5SDimitry Andric unsigned RolledDynamicCost; 3280b57cec5SDimitry Andric }; 3290b57cec5SDimitry Andric 330349cc55cSDimitry Andric struct PragmaInfo { 331349cc55cSDimitry Andric PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU) 332349cc55cSDimitry Andric : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC), 333349cc55cSDimitry Andric PragmaEnableUnroll(PEU) {} 334349cc55cSDimitry Andric const bool UserUnrollCount; 335349cc55cSDimitry Andric const bool PragmaFullUnroll; 336349cc55cSDimitry Andric const unsigned PragmaCount; 337349cc55cSDimitry Andric const bool PragmaEnableUnroll; 338349cc55cSDimitry Andric }; 339349cc55cSDimitry Andric 3400b57cec5SDimitry Andric } // end anonymous namespace 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric /// Figure out if the loop is worth full unrolling. 3430b57cec5SDimitry Andric /// 3440b57cec5SDimitry Andric /// Complete loop unrolling can make some loads constant, and we need to know 3450b57cec5SDimitry Andric /// if that would expose any further optimization opportunities. This routine 3460b57cec5SDimitry Andric /// estimates this optimization. It computes cost of unrolled loop 3470b57cec5SDimitry Andric /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By 3480b57cec5SDimitry Andric /// dynamic cost we mean that we won't count costs of blocks that are known not 3490b57cec5SDimitry Andric /// to be executed (i.e. if we have a branch in the loop and we know that at the 3500b57cec5SDimitry Andric /// given iteration its condition would be resolved to true, we won't add up the 3510b57cec5SDimitry Andric /// cost of the 'false'-block). 3520b57cec5SDimitry Andric /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If 3530b57cec5SDimitry Andric /// the analysis failed (no benefits expected from the unrolling, or the loop is 354bdd1243dSDimitry Andric /// too big to analyze), the returned value is std::nullopt. 355bdd1243dSDimitry Andric static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost( 3560b57cec5SDimitry Andric const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, 3570b57cec5SDimitry Andric const SmallPtrSetImpl<const Value *> &EphValues, 3585ffd83dbSDimitry Andric const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, 3595ffd83dbSDimitry Andric unsigned MaxIterationsCountToAnalyze) { 3600b57cec5SDimitry Andric // We want to be able to scale offsets by the trip count and add more offsets 3610b57cec5SDimitry Andric // to them without checking for overflows, and we already don't want to 3620b57cec5SDimitry Andric // analyze *massive* trip counts, so we force the max to be reasonably small. 3635ffd83dbSDimitry Andric assert(MaxIterationsCountToAnalyze < 3640b57cec5SDimitry Andric (unsigned)(std::numeric_limits<int>::max() / 2) && 3650b57cec5SDimitry Andric "The unroll iterations max is too large!"); 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric // Only analyze inner loops. We can't properly estimate cost of nested loops 3680b57cec5SDimitry Andric // and we won't visit inner loops again anyway. 369e8d8bef9SDimitry Andric if (!L->isInnermost()) 370bdd1243dSDimitry Andric return std::nullopt; 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric // Don't simulate loops with a big or unknown tripcount 3735ffd83dbSDimitry Andric if (!TripCount || TripCount > MaxIterationsCountToAnalyze) 374bdd1243dSDimitry Andric return std::nullopt; 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric SmallSetVector<BasicBlock *, 16> BBWorklist; 3770b57cec5SDimitry Andric SmallSetVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitWorklist; 378fe6060f1SDimitry Andric DenseMap<Value *, Value *> SimplifiedValues; 379fe6060f1SDimitry Andric SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues; 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric // The estimated cost of the unrolled form of the loop. We try to estimate 3820b57cec5SDimitry Andric // this by simplifying as much as we can while computing the estimate. 383fe6060f1SDimitry Andric InstructionCost UnrolledCost = 0; 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric // We also track the estimated dynamic (that is, actually executed) cost in 3860b57cec5SDimitry Andric // the rolled form. This helps identify cases when the savings from unrolling 3870b57cec5SDimitry Andric // aren't just exposing dead control flows, but actual reduced dynamic 3880b57cec5SDimitry Andric // instructions due to the simplifications which we expect to occur after 3890b57cec5SDimitry Andric // unrolling. 390fe6060f1SDimitry Andric InstructionCost RolledDynamicCost = 0; 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric // We track the simplification of each instruction in each iteration. We use 3930b57cec5SDimitry Andric // this to recursively merge costs into the unrolled cost on-demand so that 3940b57cec5SDimitry Andric // we don't count the cost of any dead code. This is essentially a map from 3950b57cec5SDimitry Andric // <instruction, int> to <bool, bool>, but stored as a densely packed struct. 3960b57cec5SDimitry Andric DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap; 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric // A small worklist used to accumulate cost of instructions from each 3990b57cec5SDimitry Andric // observable and reached root in the loop. 4000b57cec5SDimitry Andric SmallVector<Instruction *, 16> CostWorklist; 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric // PHI-used worklist used between iterations while accumulating cost. 4030b57cec5SDimitry Andric SmallVector<Instruction *, 4> PHIUsedList; 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric // Helper function to accumulate cost for instructions in the loop. 4060b57cec5SDimitry Andric auto AddCostRecursively = [&](Instruction &RootI, int Iteration) { 4070b57cec5SDimitry Andric assert(Iteration >= 0 && "Cannot have a negative iteration!"); 4080b57cec5SDimitry Andric assert(CostWorklist.empty() && "Must start with an empty cost list"); 4090b57cec5SDimitry Andric assert(PHIUsedList.empty() && "Must start with an empty phi used list"); 4100b57cec5SDimitry Andric CostWorklist.push_back(&RootI); 411e8d8bef9SDimitry Andric TargetTransformInfo::TargetCostKind CostKind = 412e8d8bef9SDimitry Andric RootI.getFunction()->hasMinSize() ? 413e8d8bef9SDimitry Andric TargetTransformInfo::TCK_CodeSize : 414e8d8bef9SDimitry Andric TargetTransformInfo::TCK_SizeAndLatency; 4150b57cec5SDimitry Andric for (;; --Iteration) { 4160b57cec5SDimitry Andric do { 4170b57cec5SDimitry Andric Instruction *I = CostWorklist.pop_back_val(); 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric // InstCostMap only uses I and Iteration as a key, the other two values 4200b57cec5SDimitry Andric // don't matter here. 4210b57cec5SDimitry Andric auto CostIter = InstCostMap.find({I, Iteration, 0, 0}); 4220b57cec5SDimitry Andric if (CostIter == InstCostMap.end()) 4230b57cec5SDimitry Andric // If an input to a PHI node comes from a dead path through the loop 4240b57cec5SDimitry Andric // we may have no cost data for it here. What that actually means is 4250b57cec5SDimitry Andric // that it is free. 4260b57cec5SDimitry Andric continue; 4270b57cec5SDimitry Andric auto &Cost = *CostIter; 4280b57cec5SDimitry Andric if (Cost.IsCounted) 4290b57cec5SDimitry Andric // Already counted this instruction. 4300b57cec5SDimitry Andric continue; 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric // Mark that we are counting the cost of this instruction now. 4330b57cec5SDimitry Andric Cost.IsCounted = true; 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric // If this is a PHI node in the loop header, just add it to the PHI set. 4360b57cec5SDimitry Andric if (auto *PhiI = dyn_cast<PHINode>(I)) 4370b57cec5SDimitry Andric if (PhiI->getParent() == L->getHeader()) { 4380b57cec5SDimitry Andric assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they " 4390b57cec5SDimitry Andric "inherently simplify during unrolling."); 4400b57cec5SDimitry Andric if (Iteration == 0) 4410b57cec5SDimitry Andric continue; 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric // Push the incoming value from the backedge into the PHI used list 4440b57cec5SDimitry Andric // if it is an in-loop instruction. We'll use this to populate the 4450b57cec5SDimitry Andric // cost worklist for the next iteration (as we count backwards). 4460b57cec5SDimitry Andric if (auto *OpI = dyn_cast<Instruction>( 4470b57cec5SDimitry Andric PhiI->getIncomingValueForBlock(L->getLoopLatch()))) 4480b57cec5SDimitry Andric if (L->contains(OpI)) 4490b57cec5SDimitry Andric PHIUsedList.push_back(OpI); 4500b57cec5SDimitry Andric continue; 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric // First accumulate the cost of this instruction. 4540b57cec5SDimitry Andric if (!Cost.IsFree) { 455*0fca6ea1SDimitry Andric // Consider simplified operands in instruction cost. 456*0fca6ea1SDimitry Andric SmallVector<Value *, 4> Operands; 457*0fca6ea1SDimitry Andric transform(I->operands(), std::back_inserter(Operands), 458*0fca6ea1SDimitry Andric [&](Value *Op) { 459*0fca6ea1SDimitry Andric if (auto Res = SimplifiedValues.lookup(Op)) 460*0fca6ea1SDimitry Andric return Res; 461*0fca6ea1SDimitry Andric return Op; 462*0fca6ea1SDimitry Andric }); 463*0fca6ea1SDimitry Andric UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind); 4640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration " 4650b57cec5SDimitry Andric << Iteration << "): "); 4660b57cec5SDimitry Andric LLVM_DEBUG(I->dump()); 4670b57cec5SDimitry Andric } 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric // We must count the cost of every operand which is not free, 4700b57cec5SDimitry Andric // recursively. If we reach a loop PHI node, simply add it to the set 4710b57cec5SDimitry Andric // to be considered on the next iteration (backwards!). 4720b57cec5SDimitry Andric for (Value *Op : I->operands()) { 4730b57cec5SDimitry Andric // Check whether this operand is free due to being a constant or 4740b57cec5SDimitry Andric // outside the loop. 4750b57cec5SDimitry Andric auto *OpI = dyn_cast<Instruction>(Op); 4760b57cec5SDimitry Andric if (!OpI || !L->contains(OpI)) 4770b57cec5SDimitry Andric continue; 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric // Otherwise accumulate its cost. 4800b57cec5SDimitry Andric CostWorklist.push_back(OpI); 4810b57cec5SDimitry Andric } 4820b57cec5SDimitry Andric } while (!CostWorklist.empty()); 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric if (PHIUsedList.empty()) 4850b57cec5SDimitry Andric // We've exhausted the search. 4860b57cec5SDimitry Andric break; 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric assert(Iteration > 0 && 4890b57cec5SDimitry Andric "Cannot track PHI-used values past the first iteration!"); 4900b57cec5SDimitry Andric CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end()); 4910b57cec5SDimitry Andric PHIUsedList.clear(); 4920b57cec5SDimitry Andric } 4930b57cec5SDimitry Andric }; 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric // Ensure that we don't violate the loop structure invariants relied on by 4960b57cec5SDimitry Andric // this analysis. 4970b57cec5SDimitry Andric assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); 4980b57cec5SDimitry Andric assert(L->isLCSSAForm(DT) && 4990b57cec5SDimitry Andric "Must have loops in LCSSA form to track live-out values."); 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); 5020b57cec5SDimitry Andric 503e8d8bef9SDimitry Andric TargetTransformInfo::TargetCostKind CostKind = 504e8d8bef9SDimitry Andric L->getHeader()->getParent()->hasMinSize() ? 505e8d8bef9SDimitry Andric TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; 5060b57cec5SDimitry Andric // Simulate execution of each iteration of the loop counting instructions, 5070b57cec5SDimitry Andric // which would be simplified. 5080b57cec5SDimitry Andric // Since the same load will take different values on different iterations, 5090b57cec5SDimitry Andric // we literally have to go through all loop's iterations. 5100b57cec5SDimitry Andric for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { 5110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric // Prepare for the iteration by collecting any simplified entry or backedge 5140b57cec5SDimitry Andric // inputs. 5150b57cec5SDimitry Andric for (Instruction &I : *L->getHeader()) { 5160b57cec5SDimitry Andric auto *PHI = dyn_cast<PHINode>(&I); 5170b57cec5SDimitry Andric if (!PHI) 5180b57cec5SDimitry Andric break; 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric // The loop header PHI nodes must have exactly two input: one from the 5210b57cec5SDimitry Andric // loop preheader and one from the loop latch. 5220b57cec5SDimitry Andric assert( 5230b57cec5SDimitry Andric PHI->getNumIncomingValues() == 2 && 5240b57cec5SDimitry Andric "Must have an incoming value only for the preheader and the latch."); 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric Value *V = PHI->getIncomingValueForBlock( 5270b57cec5SDimitry Andric Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); 528fe6060f1SDimitry Andric if (Iteration != 0 && SimplifiedValues.count(V)) 529fe6060f1SDimitry Andric V = SimplifiedValues.lookup(V); 530fe6060f1SDimitry Andric SimplifiedInputValues.push_back({PHI, V}); 5310b57cec5SDimitry Andric } 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric // Now clear and re-populate the map for the next iteration. 5340b57cec5SDimitry Andric SimplifiedValues.clear(); 5350b57cec5SDimitry Andric while (!SimplifiedInputValues.empty()) 5360b57cec5SDimitry Andric SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L); 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric BBWorklist.clear(); 5410b57cec5SDimitry Andric BBWorklist.insert(L->getHeader()); 5420b57cec5SDimitry Andric // Note that we *must not* cache the size, this loop grows the worklist. 5430b57cec5SDimitry Andric for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { 5440b57cec5SDimitry Andric BasicBlock *BB = BBWorklist[Idx]; 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric // Visit all instructions in the given basic block and try to simplify 5470b57cec5SDimitry Andric // it. We don't change the actual IR, just count optimization 5480b57cec5SDimitry Andric // opportunities. 5490b57cec5SDimitry Andric for (Instruction &I : *BB) { 5500b57cec5SDimitry Andric // These won't get into the final code - don't even try calculating the 5510b57cec5SDimitry Andric // cost for them. 5520b57cec5SDimitry Andric if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I)) 5530b57cec5SDimitry Andric continue; 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric // Track this instruction's expected baseline cost when executing the 5560b57cec5SDimitry Andric // rolled loop form. 557bdd1243dSDimitry Andric RolledDynamicCost += TTI.getInstructionCost(&I, CostKind); 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric // Visit the instruction to analyze its loop cost after unrolling, 5600b57cec5SDimitry Andric // and if the visitor returns true, mark the instruction as free after 5610b57cec5SDimitry Andric // unrolling and continue. 5620b57cec5SDimitry Andric bool IsFree = Analyzer.visit(I); 5630b57cec5SDimitry Andric bool Inserted = InstCostMap.insert({&I, (int)Iteration, 5640b57cec5SDimitry Andric (unsigned)IsFree, 5650b57cec5SDimitry Andric /*IsCounted*/ false}).second; 5660b57cec5SDimitry Andric (void)Inserted; 5670b57cec5SDimitry Andric assert(Inserted && "Cannot have a state for an unvisited instruction!"); 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric if (IsFree) 5700b57cec5SDimitry Andric continue; 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric // Can't properly model a cost of a call. 5730b57cec5SDimitry Andric // FIXME: With a proper cost model we should be able to do it. 5740b57cec5SDimitry Andric if (auto *CI = dyn_cast<CallInst>(&I)) { 5750b57cec5SDimitry Andric const Function *Callee = CI->getCalledFunction(); 5760b57cec5SDimitry Andric if (!Callee || TTI.isLoweredToCall(Callee)) { 5770b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n"); 578bdd1243dSDimitry Andric return std::nullopt; 5790b57cec5SDimitry Andric } 5800b57cec5SDimitry Andric } 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric // If the instruction might have a side-effect recursively account for 5830b57cec5SDimitry Andric // the cost of it and all the instructions leading up to it. 5840b57cec5SDimitry Andric if (I.mayHaveSideEffects()) 5850b57cec5SDimitry Andric AddCostRecursively(I, Iteration); 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric // If unrolled body turns out to be too big, bail out. 5880b57cec5SDimitry Andric if (UnrolledCost > MaxUnrolledLoopSize) { 5890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" 5900b57cec5SDimitry Andric << " UnrolledCost: " << UnrolledCost 5910b57cec5SDimitry Andric << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize 5920b57cec5SDimitry Andric << "\n"); 593bdd1243dSDimitry Andric return std::nullopt; 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric Instruction *TI = BB->getTerminator(); 5980b57cec5SDimitry Andric 599fe6060f1SDimitry Andric auto getSimplifiedConstant = [&](Value *V) -> Constant * { 600fe6060f1SDimitry Andric if (SimplifiedValues.count(V)) 601fe6060f1SDimitry Andric V = SimplifiedValues.lookup(V); 602fe6060f1SDimitry Andric return dyn_cast<Constant>(V); 603fe6060f1SDimitry Andric }; 604fe6060f1SDimitry Andric 6050b57cec5SDimitry Andric // Add in the live successors by first checking whether we have terminator 6060b57cec5SDimitry Andric // that may be simplified based on the values simplified by this call. 6070b57cec5SDimitry Andric BasicBlock *KnownSucc = nullptr; 6080b57cec5SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 6090b57cec5SDimitry Andric if (BI->isConditional()) { 610fe6060f1SDimitry Andric if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) { 6110b57cec5SDimitry Andric // Just take the first successor if condition is undef 6120b57cec5SDimitry Andric if (isa<UndefValue>(SimpleCond)) 6130b57cec5SDimitry Andric KnownSucc = BI->getSuccessor(0); 6140b57cec5SDimitry Andric else if (ConstantInt *SimpleCondVal = 6150b57cec5SDimitry Andric dyn_cast<ConstantInt>(SimpleCond)) 6160b57cec5SDimitry Andric KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0); 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 620fe6060f1SDimitry Andric if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) { 6210b57cec5SDimitry Andric // Just take the first successor if condition is undef 6220b57cec5SDimitry Andric if (isa<UndefValue>(SimpleCond)) 6230b57cec5SDimitry Andric KnownSucc = SI->getSuccessor(0); 6240b57cec5SDimitry Andric else if (ConstantInt *SimpleCondVal = 6250b57cec5SDimitry Andric dyn_cast<ConstantInt>(SimpleCond)) 6260b57cec5SDimitry Andric KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor(); 6270b57cec5SDimitry Andric } 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric if (KnownSucc) { 6300b57cec5SDimitry Andric if (L->contains(KnownSucc)) 6310b57cec5SDimitry Andric BBWorklist.insert(KnownSucc); 6320b57cec5SDimitry Andric else 6330b57cec5SDimitry Andric ExitWorklist.insert({BB, KnownSucc}); 6340b57cec5SDimitry Andric continue; 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric // Add BB's successors to the worklist. 6380b57cec5SDimitry Andric for (BasicBlock *Succ : successors(BB)) 6390b57cec5SDimitry Andric if (L->contains(Succ)) 6400b57cec5SDimitry Andric BBWorklist.insert(Succ); 6410b57cec5SDimitry Andric else 6420b57cec5SDimitry Andric ExitWorklist.insert({BB, Succ}); 6430b57cec5SDimitry Andric AddCostRecursively(*TI, Iteration); 6440b57cec5SDimitry Andric } 6450b57cec5SDimitry Andric 6460b57cec5SDimitry Andric // If we found no optimization opportunities on the first iteration, we 6470b57cec5SDimitry Andric // won't find them on later ones too. 6480b57cec5SDimitry Andric if (UnrolledCost == RolledDynamicCost) { 6490b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n" 6500b57cec5SDimitry Andric << " UnrolledCost: " << UnrolledCost << "\n"); 651bdd1243dSDimitry Andric return std::nullopt; 6520b57cec5SDimitry Andric } 6530b57cec5SDimitry Andric } 6540b57cec5SDimitry Andric 6550b57cec5SDimitry Andric while (!ExitWorklist.empty()) { 6560b57cec5SDimitry Andric BasicBlock *ExitingBB, *ExitBB; 6570b57cec5SDimitry Andric std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val(); 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric for (Instruction &I : *ExitBB) { 6600b57cec5SDimitry Andric auto *PN = dyn_cast<PHINode>(&I); 6610b57cec5SDimitry Andric if (!PN) 6620b57cec5SDimitry Andric break; 6630b57cec5SDimitry Andric 6640b57cec5SDimitry Andric Value *Op = PN->getIncomingValueForBlock(ExitingBB); 6650b57cec5SDimitry Andric if (auto *OpI = dyn_cast<Instruction>(Op)) 6660b57cec5SDimitry Andric if (L->contains(OpI)) 6670b57cec5SDimitry Andric AddCostRecursively(*OpI, TripCount - 1); 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric 671fe6060f1SDimitry Andric assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() && 672fe6060f1SDimitry Andric "All instructions must have a valid cost, whether the " 673fe6060f1SDimitry Andric "loop is rolled or unrolled."); 674fe6060f1SDimitry Andric 6750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Analysis finished:\n" 6760b57cec5SDimitry Andric << "UnrolledCost: " << UnrolledCost << ", " 6770b57cec5SDimitry Andric << "RolledDynamicCost: " << RolledDynamicCost << "\n"); 678fe6060f1SDimitry Andric return {{unsigned(*UnrolledCost.getValue()), 679fe6060f1SDimitry Andric unsigned(*RolledDynamicCost.getValue())}}; 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric 6825f757f3fSDimitry Andric UnrollCostEstimator::UnrollCostEstimator( 6835f757f3fSDimitry Andric const Loop *L, const TargetTransformInfo &TTI, 6840b57cec5SDimitry Andric const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) { 6850b57cec5SDimitry Andric CodeMetrics Metrics; 6860b57cec5SDimitry Andric for (BasicBlock *BB : L->blocks()) 687*0fca6ea1SDimitry Andric Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false, 688*0fca6ea1SDimitry Andric L); 6895f757f3fSDimitry Andric NumInlineCandidates = Metrics.NumInlineCandidates; 6900b57cec5SDimitry Andric NotDuplicatable = Metrics.notDuplicatable; 691*0fca6ea1SDimitry Andric Convergence = Metrics.Convergence; 6925f757f3fSDimitry Andric LoopSize = Metrics.NumInsts; 693*0fca6ea1SDimitry Andric ConvergenceAllowsRuntime = 694*0fca6ea1SDimitry Andric Metrics.Convergence != ConvergenceKind::Uncontrolled && 695*0fca6ea1SDimitry Andric !getLoopConvergenceHeart(L); 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andric // Don't allow an estimate of size zero. This would allows unrolling of loops 6980b57cec5SDimitry Andric // with huge iteration counts, which is a compile time problem even if it's 6990b57cec5SDimitry Andric // not a problem for code quality. Also, the code using this size may assume 7000b57cec5SDimitry Andric // that each loop has at least three instructions (likely a conditional 7010b57cec5SDimitry Andric // branch, a comparison feeding that branch, and some kind of loop increment 7020b57cec5SDimitry Andric // feeding that comparison instruction). 703bdd1243dSDimitry Andric if (LoopSize.isValid() && LoopSize < BEInsns + 1) 70481ad6265SDimitry Andric // This is an open coded max() on InstructionCost 70581ad6265SDimitry Andric LoopSize = BEInsns + 1; 7065f757f3fSDimitry Andric } 7070b57cec5SDimitry Andric 708*0fca6ea1SDimitry Andric bool UnrollCostEstimator::canUnroll() const { 709*0fca6ea1SDimitry Andric switch (Convergence) { 710*0fca6ea1SDimitry Andric case ConvergenceKind::ExtendedLoop: 711*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n"); 712*0fca6ea1SDimitry Andric return false; 713*0fca6ea1SDimitry Andric default: 714*0fca6ea1SDimitry Andric break; 715*0fca6ea1SDimitry Andric } 716*0fca6ea1SDimitry Andric if (!LoopSize.isValid()) { 717*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n"); 718*0fca6ea1SDimitry Andric return false; 719*0fca6ea1SDimitry Andric } 720*0fca6ea1SDimitry Andric if (NotDuplicatable) { 721*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n"); 722*0fca6ea1SDimitry Andric return false; 723*0fca6ea1SDimitry Andric } 724*0fca6ea1SDimitry Andric return true; 725*0fca6ea1SDimitry Andric } 726*0fca6ea1SDimitry Andric 7275f757f3fSDimitry Andric uint64_t UnrollCostEstimator::getUnrolledLoopSize( 7285f757f3fSDimitry Andric const TargetTransformInfo::UnrollingPreferences &UP, 7295f757f3fSDimitry Andric unsigned CountOverwrite) const { 7305f757f3fSDimitry Andric unsigned LS = *LoopSize.getValue(); 7315f757f3fSDimitry Andric assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!"); 7325f757f3fSDimitry Andric if (CountOverwrite) 7335f757f3fSDimitry Andric return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns; 7345f757f3fSDimitry Andric else 7355f757f3fSDimitry Andric return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns; 7360b57cec5SDimitry Andric } 7370b57cec5SDimitry Andric 7380b57cec5SDimitry Andric // Returns the loop hint metadata node with the given name (for example, 7390b57cec5SDimitry Andric // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is 7400b57cec5SDimitry Andric // returned. 7415ffd83dbSDimitry Andric static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) { 7420b57cec5SDimitry Andric if (MDNode *LoopID = L->getLoopID()) 7430b57cec5SDimitry Andric return GetUnrollMetadata(LoopID, Name); 7440b57cec5SDimitry Andric return nullptr; 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric // Returns true if the loop has an unroll(full) pragma. 7485ffd83dbSDimitry Andric static bool hasUnrollFullPragma(const Loop *L) { 7495ffd83dbSDimitry Andric return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric // Returns true if the loop has an unroll(enable) pragma. This metadata is used 7530b57cec5SDimitry Andric // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives. 7545ffd83dbSDimitry Andric static bool hasUnrollEnablePragma(const Loop *L) { 7555ffd83dbSDimitry Andric return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric // Returns true if the loop has an runtime unroll(disable) pragma. 7595ffd83dbSDimitry Andric static bool hasRuntimeUnrollDisablePragma(const Loop *L) { 7605ffd83dbSDimitry Andric return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); 7610b57cec5SDimitry Andric } 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric // If loop has an unroll_count pragma return the (necessarily 7640b57cec5SDimitry Andric // positive) value from the pragma. Otherwise return 0. 7655ffd83dbSDimitry Andric static unsigned unrollCountPragmaValue(const Loop *L) { 7665ffd83dbSDimitry Andric MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count"); 7670b57cec5SDimitry Andric if (MD) { 7680b57cec5SDimitry Andric assert(MD->getNumOperands() == 2 && 7690b57cec5SDimitry Andric "Unroll count hint metadata should have two operands."); 7700b57cec5SDimitry Andric unsigned Count = 7710b57cec5SDimitry Andric mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue(); 7720b57cec5SDimitry Andric assert(Count >= 1 && "Unroll count must be positive."); 7730b57cec5SDimitry Andric return Count; 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric return 0; 7760b57cec5SDimitry Andric } 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric // Computes the boosting factor for complete unrolling. 7790b57cec5SDimitry Andric // If fully unrolling the loop would save a lot of RolledDynamicCost, it would 7800b57cec5SDimitry Andric // be beneficial to fully unroll the loop even if unrolledcost is large. We 7810b57cec5SDimitry Andric // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust 7820b57cec5SDimitry Andric // the unroll threshold. 7830b57cec5SDimitry Andric static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, 7840b57cec5SDimitry Andric unsigned MaxPercentThresholdBoost) { 7850b57cec5SDimitry Andric if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100) 7860b57cec5SDimitry Andric return 100; 7870b57cec5SDimitry Andric else if (Cost.UnrolledCost != 0) 7880b57cec5SDimitry Andric // The boosting factor is RolledDynamicCost / UnrolledCost 7890b57cec5SDimitry Andric return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost, 7900b57cec5SDimitry Andric MaxPercentThresholdBoost); 7910b57cec5SDimitry Andric else 7920b57cec5SDimitry Andric return MaxPercentThresholdBoost; 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 795bdd1243dSDimitry Andric static std::optional<unsigned> 796349cc55cSDimitry Andric shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, 797349cc55cSDimitry Andric const unsigned TripMultiple, const unsigned TripCount, 7985f757f3fSDimitry Andric unsigned MaxTripCount, const UnrollCostEstimator UCE, 799349cc55cSDimitry Andric const TargetTransformInfo::UnrollingPreferences &UP) { 800349cc55cSDimitry Andric 801349cc55cSDimitry Andric // Using unroll pragma 802349cc55cSDimitry Andric // 1st priority is unroll count set by "unroll-count" option. 803349cc55cSDimitry Andric 804349cc55cSDimitry Andric if (PInfo.UserUnrollCount) { 805349cc55cSDimitry Andric if (UP.AllowRemainder && 806349cc55cSDimitry Andric UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold) 807349cc55cSDimitry Andric return (unsigned)UnrollCount; 808349cc55cSDimitry Andric } 809349cc55cSDimitry Andric 810349cc55cSDimitry Andric // 2nd priority is unroll count set by pragma. 811349cc55cSDimitry Andric if (PInfo.PragmaCount > 0) { 81281ad6265SDimitry Andric if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0))) 813349cc55cSDimitry Andric return PInfo.PragmaCount; 814349cc55cSDimitry Andric } 815349cc55cSDimitry Andric 816*0fca6ea1SDimitry Andric if (PInfo.PragmaFullUnroll && TripCount != 0) { 817*0fca6ea1SDimitry Andric // Certain cases with UBSAN can cause trip count to be calculated as 818*0fca6ea1SDimitry Andric // INT_MAX, Block full unrolling at a reasonable limit so that the compiler 819*0fca6ea1SDimitry Andric // doesn't hang trying to unroll the loop. See PR77842 820*0fca6ea1SDimitry Andric if (TripCount > PragmaUnrollFullMaxIterations) { 821*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n"); 822*0fca6ea1SDimitry Andric return std::nullopt; 823*0fca6ea1SDimitry Andric } 824*0fca6ea1SDimitry Andric 825349cc55cSDimitry Andric return TripCount; 826*0fca6ea1SDimitry Andric } 82781ad6265SDimitry Andric 8285f757f3fSDimitry Andric if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount && 829cb14a3feSDimitry Andric MaxTripCount <= UP.MaxUpperBound) 8305f757f3fSDimitry Andric return MaxTripCount; 8315f757f3fSDimitry Andric 832349cc55cSDimitry Andric // if didn't return until here, should continue to other priorties 833bdd1243dSDimitry Andric return std::nullopt; 834349cc55cSDimitry Andric } 835349cc55cSDimitry Andric 836bdd1243dSDimitry Andric static std::optional<unsigned> shouldFullUnroll( 837349cc55cSDimitry Andric Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, 838349cc55cSDimitry Andric ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, 839349cc55cSDimitry Andric const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, 840349cc55cSDimitry Andric const TargetTransformInfo::UnrollingPreferences &UP) { 8414824e7fdSDimitry Andric assert(FullUnrollTripCount && "should be non-zero!"); 842349cc55cSDimitry Andric 8434824e7fdSDimitry Andric if (FullUnrollTripCount > UP.FullUnrollMaxCount) 844bdd1243dSDimitry Andric return std::nullopt; 8454824e7fdSDimitry Andric 846349cc55cSDimitry Andric // When computing the unrolled size, note that BEInsns are not replicated 847349cc55cSDimitry Andric // like the rest of the loop body. 8484824e7fdSDimitry Andric if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) 849349cc55cSDimitry Andric return FullUnrollTripCount; 850349cc55cSDimitry Andric 851349cc55cSDimitry Andric // The loop isn't that small, but we still can fully unroll it if that 852349cc55cSDimitry Andric // helps to remove a significant number of instructions. 853349cc55cSDimitry Andric // To check that, run additional analysis on the loop. 854bdd1243dSDimitry Andric if (std::optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost( 855349cc55cSDimitry Andric L, FullUnrollTripCount, DT, SE, EphValues, TTI, 856349cc55cSDimitry Andric UP.Threshold * UP.MaxPercentThresholdBoost / 100, 857349cc55cSDimitry Andric UP.MaxIterationsCountToAnalyze)) { 858349cc55cSDimitry Andric unsigned Boost = 859349cc55cSDimitry Andric getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); 8604824e7fdSDimitry Andric if (Cost->UnrolledCost < UP.Threshold * Boost / 100) 861349cc55cSDimitry Andric return FullUnrollTripCount; 862349cc55cSDimitry Andric } 863bdd1243dSDimitry Andric return std::nullopt; 864349cc55cSDimitry Andric } 865349cc55cSDimitry Andric 866bdd1243dSDimitry Andric static std::optional<unsigned> 867349cc55cSDimitry Andric shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, 868349cc55cSDimitry Andric const UnrollCostEstimator UCE, 869349cc55cSDimitry Andric const TargetTransformInfo::UnrollingPreferences &UP) { 870349cc55cSDimitry Andric 8714824e7fdSDimitry Andric if (!TripCount) 872bdd1243dSDimitry Andric return std::nullopt; 8734824e7fdSDimitry Andric 874349cc55cSDimitry Andric if (!UP.Partial) { 875349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << " will not try to unroll partially because " 876349cc55cSDimitry Andric << "-unroll-allow-partial not given\n"); 8774824e7fdSDimitry Andric return 0; 878349cc55cSDimitry Andric } 8794824e7fdSDimitry Andric unsigned count = UP.Count; 880349cc55cSDimitry Andric if (count == 0) 881349cc55cSDimitry Andric count = TripCount; 882349cc55cSDimitry Andric if (UP.PartialThreshold != NoThreshold) { 883349cc55cSDimitry Andric // Reduce unroll count to be modulo of TripCount for partial unrolling. 884349cc55cSDimitry Andric if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) 885349cc55cSDimitry Andric count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / 886349cc55cSDimitry Andric (LoopSize - UP.BEInsns); 887349cc55cSDimitry Andric if (count > UP.MaxCount) 888349cc55cSDimitry Andric count = UP.MaxCount; 889349cc55cSDimitry Andric while (count != 0 && TripCount % count != 0) 890349cc55cSDimitry Andric count--; 891349cc55cSDimitry Andric if (UP.AllowRemainder && count <= 1) { 892349cc55cSDimitry Andric // If there is no Count that is modulo of TripCount, set Count to 893349cc55cSDimitry Andric // largest power-of-two factor that satisfies the threshold limit. 894349cc55cSDimitry Andric // As we'll create fixup loop, do the type of unrolling only if 895349cc55cSDimitry Andric // remainder loop is allowed. 896349cc55cSDimitry Andric count = UP.DefaultUnrollRuntimeCount; 897349cc55cSDimitry Andric while (count != 0 && 898349cc55cSDimitry Andric UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) 899349cc55cSDimitry Andric count >>= 1; 900349cc55cSDimitry Andric } 901349cc55cSDimitry Andric if (count < 2) { 902349cc55cSDimitry Andric count = 0; 903349cc55cSDimitry Andric } 904349cc55cSDimitry Andric } else { 905349cc55cSDimitry Andric count = TripCount; 906349cc55cSDimitry Andric } 907349cc55cSDimitry Andric if (count > UP.MaxCount) 908349cc55cSDimitry Andric count = UP.MaxCount; 909349cc55cSDimitry Andric 910349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n"); 911349cc55cSDimitry Andric 912349cc55cSDimitry Andric return count; 913349cc55cSDimitry Andric } 9140b57cec5SDimitry Andric // Returns true if unroll count was set explicitly. 9150b57cec5SDimitry Andric // Calculates unroll count and writes it to UP.Count. 9160b57cec5SDimitry Andric // Unless IgnoreUser is true, will also use metadata and command-line options 9170b57cec5SDimitry Andric // that are specific to to the LoopUnroll pass (which, for instance, are 9180b57cec5SDimitry Andric // irrelevant for the LoopUnrollAndJam pass). 9190b57cec5SDimitry Andric // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes 9200b57cec5SDimitry Andric // many LoopUnroll-specific options. The shared functionality should be 9210b57cec5SDimitry Andric // refactored into it own function. 9220b57cec5SDimitry Andric bool llvm::computeUnrollCount( 9230b57cec5SDimitry Andric Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, 9245f757f3fSDimitry Andric AssumptionCache *AC, ScalarEvolution &SE, 9255f757f3fSDimitry Andric const SmallPtrSetImpl<const Value *> &EphValues, 926fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, 9275f757f3fSDimitry Andric bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, 9285ffd83dbSDimitry Andric TargetTransformInfo::UnrollingPreferences &UP, 9295ffd83dbSDimitry Andric TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { 9300b57cec5SDimitry Andric 9315f757f3fSDimitry Andric unsigned LoopSize = UCE.getRolledLoopSize(); 932fe6060f1SDimitry Andric 933349cc55cSDimitry Andric const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; 934349cc55cSDimitry Andric const bool PragmaFullUnroll = hasUnrollFullPragma(L); 935349cc55cSDimitry Andric const unsigned PragmaCount = unrollCountPragmaValue(L); 936349cc55cSDimitry Andric const bool PragmaEnableUnroll = hasUnrollEnablePragma(L); 937349cc55cSDimitry Andric 938349cc55cSDimitry Andric const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || 939349cc55cSDimitry Andric PragmaEnableUnroll || UserUnrollCount; 940349cc55cSDimitry Andric 941349cc55cSDimitry Andric PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount, 942349cc55cSDimitry Andric PragmaEnableUnroll); 943fe6060f1SDimitry Andric // Use an explicit peel count that has been specified for testing. In this 944fe6060f1SDimitry Andric // case it's not permitted to also specify an explicit unroll count. 945fe6060f1SDimitry Andric if (PP.PeelCount) { 946fe6060f1SDimitry Andric if (UnrollCount.getNumOccurrences() > 0) { 947fe6060f1SDimitry Andric report_fatal_error("Cannot specify both explicit peel count and " 94881ad6265SDimitry Andric "explicit unroll count", /*GenCrashDiag=*/false); 949fe6060f1SDimitry Andric } 950fe6060f1SDimitry Andric UP.Count = 1; 951fe6060f1SDimitry Andric UP.Runtime = false; 952fe6060f1SDimitry Andric return true; 953fe6060f1SDimitry Andric } 9540b57cec5SDimitry Andric // Check for explicit Count. 9550b57cec5SDimitry Andric // 1st priority is unroll count set by "unroll-count" option. 9560b57cec5SDimitry Andric // 2nd priority is unroll count set by pragma. 9574824e7fdSDimitry Andric if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, 9585f757f3fSDimitry Andric MaxTripCount, UCE, UP)) { 959349cc55cSDimitry Andric UP.Count = *UnrollFactor; 960349cc55cSDimitry Andric 961349cc55cSDimitry Andric if (UserUnrollCount || (PragmaCount > 0)) { 9620b57cec5SDimitry Andric UP.AllowExpensiveTripCount = true; 9630b57cec5SDimitry Andric UP.Force = true; 9640b57cec5SDimitry Andric } 965349cc55cSDimitry Andric UP.Runtime |= (PragmaCount > 0); 966349cc55cSDimitry Andric return ExplicitUnroll; 967349cc55cSDimitry Andric } else { 9680b57cec5SDimitry Andric if (ExplicitUnroll && TripCount != 0) { 9690b57cec5SDimitry Andric // If the loop has an unrolling pragma, we want to be more aggressive with 9700b57cec5SDimitry Andric // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold 9710b57cec5SDimitry Andric // value which is larger than the default limits. 9720b57cec5SDimitry Andric UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold); 9730b57cec5SDimitry Andric UP.PartialThreshold = 9740b57cec5SDimitry Andric std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold); 9750b57cec5SDimitry Andric } 976349cc55cSDimitry Andric } 9770b57cec5SDimitry Andric 9784824e7fdSDimitry Andric // 3rd priority is exact full unrolling. This will eliminate all copies 9794824e7fdSDimitry Andric // of some exit test. 9804824e7fdSDimitry Andric UP.Count = 0; 9814824e7fdSDimitry Andric if (TripCount) { 9824824e7fdSDimitry Andric UP.Count = TripCount; 9834824e7fdSDimitry Andric if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues, 9844824e7fdSDimitry Andric TripCount, UCE, UP)) { 9854824e7fdSDimitry Andric UP.Count = *UnrollFactor; 9864824e7fdSDimitry Andric UseUpperBound = false; 9874824e7fdSDimitry Andric return ExplicitUnroll; 9884824e7fdSDimitry Andric } 9894824e7fdSDimitry Andric } 9908bcb0991SDimitry Andric 9914824e7fdSDimitry Andric // 4th priority is bounded unrolling. 9928bcb0991SDimitry Andric // We can unroll by the upper bound amount if it's generally allowed or if 9938bcb0991SDimitry Andric // we know that the loop is executed either the upper bound or zero times. 9948bcb0991SDimitry Andric // (MaxOrZero unrolling keeps only the first loop test, so the number of 9958bcb0991SDimitry Andric // loop tests remains the same compared to the non-unrolled version, whereas 9968bcb0991SDimitry Andric // the generic upper bound unrolling keeps all but the last loop test so the 9978bcb0991SDimitry Andric // number of loop tests goes up which may end up being worse on targets with 9988bcb0991SDimitry Andric // constrained branch predictor resources so is controlled by an option.) 9998bcb0991SDimitry Andric // In addition we only unroll small upper bounds. 10004824e7fdSDimitry Andric // Note that the cost of bounded unrolling is always strictly greater than 10014824e7fdSDimitry Andric // cost of exact full unrolling. As such, if we have an exact count and 10024824e7fdSDimitry Andric // found it unprofitable, we'll never chose to bounded unroll. 10034824e7fdSDimitry Andric if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) && 1004cb14a3feSDimitry Andric MaxTripCount <= UP.MaxUpperBound) { 10054824e7fdSDimitry Andric UP.Count = MaxTripCount; 10064824e7fdSDimitry Andric if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues, 10074824e7fdSDimitry Andric MaxTripCount, UCE, UP)) { 1008349cc55cSDimitry Andric UP.Count = *UnrollFactor; 10094824e7fdSDimitry Andric UseUpperBound = true; 10100b57cec5SDimitry Andric return ExplicitUnroll; 10114824e7fdSDimitry Andric } 10120b57cec5SDimitry Andric } 10130b57cec5SDimitry Andric 10144824e7fdSDimitry Andric // 5th priority is loop peeling. 1015bdd1243dSDimitry Andric computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold); 10165ffd83dbSDimitry Andric if (PP.PeelCount) { 10170b57cec5SDimitry Andric UP.Runtime = false; 10180b57cec5SDimitry Andric UP.Count = 1; 10190b57cec5SDimitry Andric return ExplicitUnroll; 10200b57cec5SDimitry Andric } 10210b57cec5SDimitry Andric 1022349cc55cSDimitry Andric // Before starting partial unrolling, set up.partial to true, 1023349cc55cSDimitry Andric // if user explicitly asked for unrolling 1024349cc55cSDimitry Andric if (TripCount) 1025349cc55cSDimitry Andric UP.Partial |= ExplicitUnroll; 1026349cc55cSDimitry Andric 10274824e7fdSDimitry Andric // 6th priority is partial unrolling. 10280b57cec5SDimitry Andric // Try partial unroll only when TripCount could be statically calculated. 10294824e7fdSDimitry Andric if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) { 1030349cc55cSDimitry Andric UP.Count = *UnrollFactor; 1031349cc55cSDimitry Andric 10320b57cec5SDimitry Andric if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && 10330b57cec5SDimitry Andric UP.Count != TripCount) 10340b57cec5SDimitry Andric ORE->emit([&]() { 10350b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, 10360b57cec5SDimitry Andric "FullUnrollAsDirectedTooLarge", 10370b57cec5SDimitry Andric L->getStartLoc(), L->getHeader()) 10380b57cec5SDimitry Andric << "Unable to fully unroll loop as directed by unroll pragma " 10390b57cec5SDimitry Andric "because " 10400b57cec5SDimitry Andric "unrolled size is too large."; 10410b57cec5SDimitry Andric }); 1042349cc55cSDimitry Andric 1043349cc55cSDimitry Andric if (UP.PartialThreshold != NoThreshold) { 1044349cc55cSDimitry Andric if (UP.Count == 0) { 1045349cc55cSDimitry Andric if (PragmaEnableUnroll) 1046349cc55cSDimitry Andric ORE->emit([&]() { 1047349cc55cSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, 1048349cc55cSDimitry Andric "UnrollAsDirectedTooLarge", 1049349cc55cSDimitry Andric L->getStartLoc(), L->getHeader()) 1050349cc55cSDimitry Andric << "Unable to unroll loop as directed by unroll(enable) " 1051349cc55cSDimitry Andric "pragma " 1052349cc55cSDimitry Andric "because unrolled size is too large."; 1053349cc55cSDimitry Andric }); 1054349cc55cSDimitry Andric } 1055349cc55cSDimitry Andric } 10560b57cec5SDimitry Andric return ExplicitUnroll; 10570b57cec5SDimitry Andric } 10580b57cec5SDimitry Andric assert(TripCount == 0 && 10590b57cec5SDimitry Andric "All cases when TripCount is constant should be covered here."); 10600b57cec5SDimitry Andric if (PragmaFullUnroll) 10610b57cec5SDimitry Andric ORE->emit([&]() { 10620b57cec5SDimitry Andric return OptimizationRemarkMissed( 10630b57cec5SDimitry Andric DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount", 10640b57cec5SDimitry Andric L->getStartLoc(), L->getHeader()) 10650b57cec5SDimitry Andric << "Unable to fully unroll loop as directed by unroll(full) " 10660b57cec5SDimitry Andric "pragma " 10670b57cec5SDimitry Andric "because loop has a runtime trip count."; 10680b57cec5SDimitry Andric }); 10690b57cec5SDimitry Andric 10704824e7fdSDimitry Andric // 7th priority is runtime unrolling. 10710b57cec5SDimitry Andric // Don't unroll a runtime trip count loop when it is disabled. 10725ffd83dbSDimitry Andric if (hasRuntimeUnrollDisablePragma(L)) { 10730b57cec5SDimitry Andric UP.Count = 0; 10740b57cec5SDimitry Andric return false; 10750b57cec5SDimitry Andric } 10760b57cec5SDimitry Andric 10778bcb0991SDimitry Andric // Don't unroll a small upper bound loop unless user or TTI asked to do so. 1078cb14a3feSDimitry Andric if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) { 10798bcb0991SDimitry Andric UP.Count = 0; 10808bcb0991SDimitry Andric return false; 10818bcb0991SDimitry Andric } 10828bcb0991SDimitry Andric 10830b57cec5SDimitry Andric // Check if the runtime trip count is too small when profile is available. 10840b57cec5SDimitry Andric if (L->getHeader()->getParent()->hasProfileData()) { 10850b57cec5SDimitry Andric if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { 10860b57cec5SDimitry Andric if (*ProfileTripCount < FlatLoopTripCountThreshold) 10870b57cec5SDimitry Andric return false; 10880b57cec5SDimitry Andric else 10890b57cec5SDimitry Andric UP.AllowExpensiveTripCount = true; 10900b57cec5SDimitry Andric } 10910b57cec5SDimitry Andric } 10920b57cec5SDimitry Andric UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; 10930b57cec5SDimitry Andric if (!UP.Runtime) { 10940b57cec5SDimitry Andric LLVM_DEBUG( 10950b57cec5SDimitry Andric dbgs() << " will not try to unroll loop with runtime trip count " 10960b57cec5SDimitry Andric << "-unroll-runtime not given\n"); 10970b57cec5SDimitry Andric UP.Count = 0; 10980b57cec5SDimitry Andric return false; 10990b57cec5SDimitry Andric } 11000b57cec5SDimitry Andric if (UP.Count == 0) 11010b57cec5SDimitry Andric UP.Count = UP.DefaultUnrollRuntimeCount; 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric // Reduce unroll count to be the largest power-of-two factor of 11040b57cec5SDimitry Andric // the original count which satisfies the threshold limit. 11050b57cec5SDimitry Andric while (UP.Count != 0 && 1106fe6060f1SDimitry Andric UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) 11070b57cec5SDimitry Andric UP.Count >>= 1; 11080b57cec5SDimitry Andric 11090b57cec5SDimitry Andric #ifndef NDEBUG 11100b57cec5SDimitry Andric unsigned OrigCount = UP.Count; 11110b57cec5SDimitry Andric #endif 11120b57cec5SDimitry Andric 11130b57cec5SDimitry Andric if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) { 11140b57cec5SDimitry Andric while (UP.Count != 0 && TripMultiple % UP.Count != 0) 11150b57cec5SDimitry Andric UP.Count >>= 1; 11160b57cec5SDimitry Andric LLVM_DEBUG( 11170b57cec5SDimitry Andric dbgs() << "Remainder loop is restricted (that could architecture " 11180b57cec5SDimitry Andric "specific or because the loop contains a convergent " 11190b57cec5SDimitry Andric "instruction), so unroll count must divide the trip " 11200b57cec5SDimitry Andric "multiple, " 11210b57cec5SDimitry Andric << TripMultiple << ". Reducing unroll count from " << OrigCount 11220b57cec5SDimitry Andric << " to " << UP.Count << ".\n"); 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andric using namespace ore; 11250b57cec5SDimitry Andric 1126349cc55cSDimitry Andric if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder) 11270b57cec5SDimitry Andric ORE->emit([&]() { 11280b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, 11290b57cec5SDimitry Andric "DifferentUnrollCountFromDirected", 11300b57cec5SDimitry Andric L->getStartLoc(), L->getHeader()) 11310b57cec5SDimitry Andric << "Unable to unroll loop the number of times directed by " 11320b57cec5SDimitry Andric "unroll_count pragma because remainder loop is restricted " 11330b57cec5SDimitry Andric "(that could architecture specific or because the loop " 11340b57cec5SDimitry Andric "contains a convergent instruction) and so must have an " 11350b57cec5SDimitry Andric "unroll " 11360b57cec5SDimitry Andric "count that divides the loop trip multiple of " 11370b57cec5SDimitry Andric << NV("TripMultiple", TripMultiple) << ". Unrolling instead " 11380b57cec5SDimitry Andric << NV("UnrollCount", UP.Count) << " time(s)."; 11390b57cec5SDimitry Andric }); 11400b57cec5SDimitry Andric } 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric if (UP.Count > UP.MaxCount) 11430b57cec5SDimitry Andric UP.Count = UP.MaxCount; 11448bcb0991SDimitry Andric 11458bcb0991SDimitry Andric if (MaxTripCount && UP.Count > MaxTripCount) 11468bcb0991SDimitry Andric UP.Count = MaxTripCount; 11478bcb0991SDimitry Andric 11488bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count 11490b57cec5SDimitry Andric << "\n"); 11500b57cec5SDimitry Andric if (UP.Count < 2) 11510b57cec5SDimitry Andric UP.Count = 0; 11520b57cec5SDimitry Andric return ExplicitUnroll; 11530b57cec5SDimitry Andric } 11540b57cec5SDimitry Andric 1155bdd1243dSDimitry Andric static LoopUnrollResult 1156bdd1243dSDimitry Andric tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, 11570b57cec5SDimitry Andric const TargetTransformInfo &TTI, AssumptionCache &AC, 11588bcb0991SDimitry Andric OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, 11598bcb0991SDimitry Andric ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, 116006c3fb27SDimitry Andric bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, 1161bdd1243dSDimitry Andric std::optional<unsigned> ProvidedCount, 1162bdd1243dSDimitry Andric std::optional<unsigned> ProvidedThreshold, 1163bdd1243dSDimitry Andric std::optional<bool> ProvidedAllowPartial, 1164bdd1243dSDimitry Andric std::optional<bool> ProvidedRuntime, 1165bdd1243dSDimitry Andric std::optional<bool> ProvidedUpperBound, 1166bdd1243dSDimitry Andric std::optional<bool> ProvidedAllowPeeling, 1167bdd1243dSDimitry Andric std::optional<bool> ProvidedAllowProfileBasedPeeling, 1168*0fca6ea1SDimitry Andric std::optional<unsigned> ProvidedFullUnrollMaxCount, 1169*0fca6ea1SDimitry Andric AAResults *AA = nullptr) { 117006c3fb27SDimitry Andric 11710b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop Unroll: F[" 11720b57cec5SDimitry Andric << L->getHeader()->getParent()->getName() << "] Loop %" 11730b57cec5SDimitry Andric << L->getHeader()->getName() << "\n"); 11740b57cec5SDimitry Andric TransformationMode TM = hasUnrollTransformation(L); 11750b57cec5SDimitry Andric if (TM & TM_Disable) 11760b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 11770eae32dcSDimitry Andric 11780eae32dcSDimitry Andric // If this loop isn't forced to be unrolled, avoid unrolling it when the 11790eae32dcSDimitry Andric // parent loop has an explicit unroll-and-jam pragma. This is to prevent 11800eae32dcSDimitry Andric // automatic unrolling from interfering with the user requested 11810eae32dcSDimitry Andric // transformation. 11820eae32dcSDimitry Andric Loop *ParentL = L->getParentLoop(); 118304eeddc0SDimitry Andric if (ParentL != nullptr && 11840eae32dcSDimitry Andric hasUnrollAndJamTransformation(ParentL) == TM_ForcedByUser && 11850eae32dcSDimitry Andric hasUnrollTransformation(L) != TM_ForcedByUser) { 11860eae32dcSDimitry Andric LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has" 11870eae32dcSDimitry Andric << " llvm.loop.unroll_and_jam.\n"); 11880eae32dcSDimitry Andric return LoopUnrollResult::Unmodified; 11890eae32dcSDimitry Andric } 11900eae32dcSDimitry Andric 11910eae32dcSDimitry Andric // If this loop isn't forced to be unrolled, avoid unrolling it when the 11920eae32dcSDimitry Andric // loop has an explicit unroll-and-jam pragma. This is to prevent automatic 11930eae32dcSDimitry Andric // unrolling from interfering with the user requested transformation. 11940eae32dcSDimitry Andric if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser && 11950eae32dcSDimitry Andric hasUnrollTransformation(L) != TM_ForcedByUser) { 11960eae32dcSDimitry Andric LLVM_DEBUG( 11970eae32dcSDimitry Andric dbgs() 11980eae32dcSDimitry Andric << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n"); 11990eae32dcSDimitry Andric return LoopUnrollResult::Unmodified; 12000eae32dcSDimitry Andric } 12010eae32dcSDimitry Andric 12020b57cec5SDimitry Andric if (!L->isLoopSimplifyForm()) { 12030b57cec5SDimitry Andric LLVM_DEBUG( 12040b57cec5SDimitry Andric dbgs() << " Not unrolling loop which is not in loop-simplify form.\n"); 12050b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 12060b57cec5SDimitry Andric } 12070b57cec5SDimitry Andric 1208e8d8bef9SDimitry Andric // When automatic unrolling is disabled, do not unroll unless overridden for 12090b57cec5SDimitry Andric // this loop. 12100b57cec5SDimitry Andric if (OnlyWhenForced && !(TM & TM_Enable)) 12110b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 12120b57cec5SDimitry Andric 12130b57cec5SDimitry Andric bool OptForSize = L->getHeader()->getParent()->hasOptSize(); 12140b57cec5SDimitry Andric TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( 1215349cc55cSDimitry Andric L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount, 12160b57cec5SDimitry Andric ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, 12178bcb0991SDimitry Andric ProvidedFullUnrollMaxCount); 12185ffd83dbSDimitry Andric TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences( 1219e8d8bef9SDimitry Andric L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true); 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric // Exit early if unrolling is disabled. For OptForSize, we pick the loop size 12220b57cec5SDimitry Andric // as threshold later on. 12230b57cec5SDimitry Andric if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) && 12240b57cec5SDimitry Andric !OptForSize) 12250b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 12260b57cec5SDimitry Andric 12270b57cec5SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 12280b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(L, &AC, EphValues); 12290b57cec5SDimitry Andric 12305f757f3fSDimitry Andric UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns); 12315f757f3fSDimitry Andric if (!UCE.canUnroll()) { 1232*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n"); 123381ad6265SDimitry Andric return LoopUnrollResult::Unmodified; 123481ad6265SDimitry Andric } 123581ad6265SDimitry Andric 12365f757f3fSDimitry Andric unsigned LoopSize = UCE.getRolledLoopSize(); 12375f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); 12380b57cec5SDimitry Andric 12398bcb0991SDimitry Andric // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold 12408bcb0991SDimitry Andric // later), to (fully) unroll loops, if it does not increase code size. 12410b57cec5SDimitry Andric if (OptForSize) 12428bcb0991SDimitry Andric UP.Threshold = std::max(UP.Threshold, LoopSize + 1); 12430b57cec5SDimitry Andric 12445f757f3fSDimitry Andric if (UCE.NumInlineCandidates != 0) { 12450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); 12460b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 12470b57cec5SDimitry Andric } 12480b57cec5SDimitry Andric 1249fe6060f1SDimitry Andric // Find the smallest exact trip count for any exit. This is an upper bound 1250fe6060f1SDimitry Andric // on the loop trip count, but an exit at an earlier iteration is still 1251fe6060f1SDimitry Andric // possible. An unroll by the smallest exact trip count guarantees that all 1252bdd1243dSDimitry Andric // branches relating to at least one exit can be eliminated. This is unlike 1253fe6060f1SDimitry Andric // the max trip count, which only guarantees that the backedge can be broken. 12540b57cec5SDimitry Andric unsigned TripCount = 0; 12550b57cec5SDimitry Andric unsigned TripMultiple = 1; 1256fe6060f1SDimitry Andric SmallVector<BasicBlock *, 8> ExitingBlocks; 1257fe6060f1SDimitry Andric L->getExitingBlocks(ExitingBlocks); 1258fe6060f1SDimitry Andric for (BasicBlock *ExitingBlock : ExitingBlocks) 1259fe6060f1SDimitry Andric if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock)) 1260fe6060f1SDimitry Andric if (!TripCount || TC < TripCount) 1261fe6060f1SDimitry Andric TripCount = TripMultiple = TC; 1262fe6060f1SDimitry Andric 1263fe6060f1SDimitry Andric if (!TripCount) { 1264fe6060f1SDimitry Andric // If no exact trip count is known, determine the trip multiple of either 1265fe6060f1SDimitry Andric // the loop latch or the single exiting block. 1266fe6060f1SDimitry Andric // TODO: Relax for multiple exits. 12670b57cec5SDimitry Andric BasicBlock *ExitingBlock = L->getLoopLatch(); 12680b57cec5SDimitry Andric if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) 12690b57cec5SDimitry Andric ExitingBlock = L->getExitingBlock(); 1270fe6060f1SDimitry Andric if (ExitingBlock) 12710b57cec5SDimitry Andric TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock); 12720b57cec5SDimitry Andric } 12730b57cec5SDimitry Andric 12740b57cec5SDimitry Andric // If the loop contains a convergent operation, the prelude we'd add 12750b57cec5SDimitry Andric // to do the first few instructions before we hit the unrolled loop 12760b57cec5SDimitry Andric // is unsafe -- it adds a control-flow dependency to the convergent 1277e8d8bef9SDimitry Andric // operation. Therefore restrict remainder loop (try unrolling without). 12780b57cec5SDimitry Andric // 1279*0fca6ea1SDimitry Andric // TODO: This is somewhat conservative; we could allow the remainder if the 1280*0fca6ea1SDimitry Andric // trip count is uniform. 1281*0fca6ea1SDimitry Andric UP.AllowRemainder &= UCE.ConvergenceAllowsRuntime; 12820b57cec5SDimitry Andric 12830b57cec5SDimitry Andric // Try to find the trip count upper bound if we cannot find the exact trip 12840b57cec5SDimitry Andric // count. 12858bcb0991SDimitry Andric unsigned MaxTripCount = 0; 12860b57cec5SDimitry Andric bool MaxOrZero = false; 12870b57cec5SDimitry Andric if (!TripCount) { 12880b57cec5SDimitry Andric MaxTripCount = SE.getSmallConstantMaxTripCount(L); 12890b57cec5SDimitry Andric MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L); 12900b57cec5SDimitry Andric } 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric // computeUnrollCount() decides whether it is beneficial to use upper bound to 12930b57cec5SDimitry Andric // fully unroll the loop. 12940b57cec5SDimitry Andric bool UseUpperBound = false; 12950b57cec5SDimitry Andric bool IsCountSetExplicitly = computeUnrollCount( 12965f757f3fSDimitry Andric L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, 12975f757f3fSDimitry Andric MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound); 12980b57cec5SDimitry Andric if (!UP.Count) 12990b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 1300fe6060f1SDimitry Andric 1301*0fca6ea1SDimitry Andric UP.Runtime &= UCE.ConvergenceAllowsRuntime; 1302*0fca6ea1SDimitry Andric 1303fe6060f1SDimitry Andric if (PP.PeelCount) { 1304fe6060f1SDimitry Andric assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step"); 1305fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName() 1306fe6060f1SDimitry Andric << " with iteration count " << PP.PeelCount << "!\n"); 1307fe6060f1SDimitry Andric ORE.emit([&]() { 1308fe6060f1SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), 1309fe6060f1SDimitry Andric L->getHeader()) 1310fe6060f1SDimitry Andric << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount) 1311fe6060f1SDimitry Andric << " iterations"; 1312fe6060f1SDimitry Andric }); 1313fe6060f1SDimitry Andric 1314bdd1243dSDimitry Andric ValueToValueMapTy VMap; 1315bdd1243dSDimitry Andric if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) { 1316*0fca6ea1SDimitry Andric simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI, nullptr); 1317fe6060f1SDimitry Andric // If the loop was peeled, we already "used up" the profile information 1318fe6060f1SDimitry Andric // we had, so we don't want to unroll or peel again. 1319fe6060f1SDimitry Andric if (PP.PeelProfiledIterations) 1320fe6060f1SDimitry Andric L->setLoopAlreadyUnrolled(); 1321fe6060f1SDimitry Andric return LoopUnrollResult::PartiallyUnrolled; 1322fe6060f1SDimitry Andric } 1323fe6060f1SDimitry Andric return LoopUnrollResult::Unmodified; 1324fe6060f1SDimitry Andric } 1325fe6060f1SDimitry Andric 132606c3fb27SDimitry Andric // Do not attempt partial/runtime unrolling in FullLoopUnrolling 1327*0fca6ea1SDimitry Andric if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) { 132806c3fb27SDimitry Andric LLVM_DEBUG( 132906c3fb27SDimitry Andric dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n"); 133006c3fb27SDimitry Andric return LoopUnrollResult::Unmodified; 133106c3fb27SDimitry Andric } 133206c3fb27SDimitry Andric 1333fe6060f1SDimitry Andric // At this point, UP.Runtime indicates that run-time unrolling is allowed. 1334fe6060f1SDimitry Andric // However, we only want to actually perform it if we don't know the trip 1335fe6060f1SDimitry Andric // count and the unroll count doesn't divide the known trip multiple. 1336fe6060f1SDimitry Andric // TODO: This decision should probably be pushed up into 1337fe6060f1SDimitry Andric // computeUnrollCount(). 1338fe6060f1SDimitry Andric UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0; 13390b57cec5SDimitry Andric 13400b57cec5SDimitry Andric // Save loop properties before it is transformed. 13410b57cec5SDimitry Andric MDNode *OrigLoopID = L->getLoopID(); 13420b57cec5SDimitry Andric 13430b57cec5SDimitry Andric // Unroll the loop. 13440b57cec5SDimitry Andric Loop *RemainderLoop = nullptr; 1345*0fca6ea1SDimitry Andric UnrollLoopOptions ULO; 1346*0fca6ea1SDimitry Andric ULO.Count = UP.Count; 1347*0fca6ea1SDimitry Andric ULO.Force = UP.Force; 1348*0fca6ea1SDimitry Andric ULO.AllowExpensiveTripCount = UP.AllowExpensiveTripCount; 1349*0fca6ea1SDimitry Andric ULO.UnrollRemainder = UP.UnrollRemainder; 1350*0fca6ea1SDimitry Andric ULO.Runtime = UP.Runtime; 1351*0fca6ea1SDimitry Andric ULO.ForgetAllSCEV = ForgetAllSCEV; 1352*0fca6ea1SDimitry Andric ULO.Heart = getLoopConvergenceHeart(L); 13530b57cec5SDimitry Andric LoopUnrollResult UnrollResult = UnrollLoop( 1354*0fca6ea1SDimitry Andric L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA); 13550b57cec5SDimitry Andric if (UnrollResult == LoopUnrollResult::Unmodified) 13560b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 13570b57cec5SDimitry Andric 13580b57cec5SDimitry Andric if (RemainderLoop) { 1359bdd1243dSDimitry Andric std::optional<MDNode *> RemainderLoopID = 13600b57cec5SDimitry Andric makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, 13610b57cec5SDimitry Andric LLVMLoopUnrollFollowupRemainder}); 136281ad6265SDimitry Andric if (RemainderLoopID) 1363bdd1243dSDimitry Andric RemainderLoop->setLoopID(*RemainderLoopID); 13640b57cec5SDimitry Andric } 13650b57cec5SDimitry Andric 13660b57cec5SDimitry Andric if (UnrollResult != LoopUnrollResult::FullyUnrolled) { 1367bdd1243dSDimitry Andric std::optional<MDNode *> NewLoopID = 13680b57cec5SDimitry Andric makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll, 13690b57cec5SDimitry Andric LLVMLoopUnrollFollowupUnrolled}); 137081ad6265SDimitry Andric if (NewLoopID) { 1371bdd1243dSDimitry Andric L->setLoopID(*NewLoopID); 13720b57cec5SDimitry Andric 13730b57cec5SDimitry Andric // Do not setLoopAlreadyUnrolled if loop attributes have been specified 13740b57cec5SDimitry Andric // explicitly. 13750b57cec5SDimitry Andric return UnrollResult; 13760b57cec5SDimitry Andric } 13770b57cec5SDimitry Andric } 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric // If loop has an unroll count pragma or unrolled by explicitly set count 13800b57cec5SDimitry Andric // mark loop as unrolled to prevent unrolling beyond that requested. 1381fe6060f1SDimitry Andric if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly) 13820b57cec5SDimitry Andric L->setLoopAlreadyUnrolled(); 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric return UnrollResult; 13850b57cec5SDimitry Andric } 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andric namespace { 13880b57cec5SDimitry Andric 13890b57cec5SDimitry Andric class LoopUnroll : public LoopPass { 13900b57cec5SDimitry Andric public: 13910b57cec5SDimitry Andric static char ID; // Pass ID, replacement for typeid 13920b57cec5SDimitry Andric 13930b57cec5SDimitry Andric int OptLevel; 13940b57cec5SDimitry Andric 13950b57cec5SDimitry Andric /// If false, use a cost model to determine whether unrolling of a loop is 13960b57cec5SDimitry Andric /// profitable. If true, only loops that explicitly request unrolling via 13970b57cec5SDimitry Andric /// metadata are considered. All other loops are skipped. 13980b57cec5SDimitry Andric bool OnlyWhenForced; 13990b57cec5SDimitry Andric 14000b57cec5SDimitry Andric /// If false, when SCEV is invalidated, only forget everything in the 14010b57cec5SDimitry Andric /// top-most loop (call forgetTopMostLoop), of the loop being processed. 14020b57cec5SDimitry Andric /// Otherwise, forgetAllLoops and rebuild when needed next. 14030b57cec5SDimitry Andric bool ForgetAllSCEV; 14040b57cec5SDimitry Andric 1405bdd1243dSDimitry Andric std::optional<unsigned> ProvidedCount; 1406bdd1243dSDimitry Andric std::optional<unsigned> ProvidedThreshold; 1407bdd1243dSDimitry Andric std::optional<bool> ProvidedAllowPartial; 1408bdd1243dSDimitry Andric std::optional<bool> ProvidedRuntime; 1409bdd1243dSDimitry Andric std::optional<bool> ProvidedUpperBound; 1410bdd1243dSDimitry Andric std::optional<bool> ProvidedAllowPeeling; 1411bdd1243dSDimitry Andric std::optional<bool> ProvidedAllowProfileBasedPeeling; 1412bdd1243dSDimitry Andric std::optional<unsigned> ProvidedFullUnrollMaxCount; 14130b57cec5SDimitry Andric 14140b57cec5SDimitry Andric LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, 1415bdd1243dSDimitry Andric bool ForgetAllSCEV = false, 1416bdd1243dSDimitry Andric std::optional<unsigned> Threshold = std::nullopt, 1417bdd1243dSDimitry Andric std::optional<unsigned> Count = std::nullopt, 1418bdd1243dSDimitry Andric std::optional<bool> AllowPartial = std::nullopt, 1419bdd1243dSDimitry Andric std::optional<bool> Runtime = std::nullopt, 1420bdd1243dSDimitry Andric std::optional<bool> UpperBound = std::nullopt, 1421bdd1243dSDimitry Andric std::optional<bool> AllowPeeling = std::nullopt, 1422bdd1243dSDimitry Andric std::optional<bool> AllowProfileBasedPeeling = std::nullopt, 1423bdd1243dSDimitry Andric std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt) 14240b57cec5SDimitry Andric : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), 14250b57cec5SDimitry Andric ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)), 14260b57cec5SDimitry Andric ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), 14270b57cec5SDimitry Andric ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), 14288bcb0991SDimitry Andric ProvidedAllowPeeling(AllowPeeling), 14298bcb0991SDimitry Andric ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), 14308bcb0991SDimitry Andric ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { 14310b57cec5SDimitry Andric initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); 14320b57cec5SDimitry Andric } 14330b57cec5SDimitry Andric 14340b57cec5SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override { 14350b57cec5SDimitry Andric if (skipLoop(L)) 14360b57cec5SDimitry Andric return false; 14370b57cec5SDimitry Andric 14380b57cec5SDimitry Andric Function &F = *L->getHeader()->getParent(); 14390b57cec5SDimitry Andric 14400b57cec5SDimitry Andric auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 14410b57cec5SDimitry Andric LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 14420b57cec5SDimitry Andric ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 14430b57cec5SDimitry Andric const TargetTransformInfo &TTI = 14440b57cec5SDimitry Andric getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 14450b57cec5SDimitry Andric auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 14460b57cec5SDimitry Andric // For the old PM, we can't use OptimizationRemarkEmitter as an analysis 14470b57cec5SDimitry Andric // pass. Function analyses need to be preserved across loop transformations 14480b57cec5SDimitry Andric // but ORE cannot be preserved (see comment before the pass definition). 14490b57cec5SDimitry Andric OptimizationRemarkEmitter ORE(&F); 14500b57cec5SDimitry Andric bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 14510b57cec5SDimitry Andric 14520b57cec5SDimitry Andric LoopUnrollResult Result = tryToUnrollLoop( 14538bcb0991SDimitry Andric L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, 145406c3fb27SDimitry Andric /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, 145506c3fb27SDimitry Andric ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, 145606c3fb27SDimitry Andric ProvidedUpperBound, ProvidedAllowPeeling, 145706c3fb27SDimitry Andric ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount); 14580b57cec5SDimitry Andric 14590b57cec5SDimitry Andric if (Result == LoopUnrollResult::FullyUnrolled) 14600b57cec5SDimitry Andric LPM.markLoopAsDeleted(*L); 14610b57cec5SDimitry Andric 14620b57cec5SDimitry Andric return Result != LoopUnrollResult::Unmodified; 14630b57cec5SDimitry Andric } 14640b57cec5SDimitry Andric 14650b57cec5SDimitry Andric /// This transformation requires natural loop information & requires that 14660b57cec5SDimitry Andric /// loop preheaders be inserted into the CFG... 14670b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 14680b57cec5SDimitry Andric AU.addRequired<AssumptionCacheTracker>(); 14690b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 14700b57cec5SDimitry Andric // FIXME: Loop passes are required to preserve domtree, and for now we just 14710b57cec5SDimitry Andric // recreate dom info if anything gets unrolled. 14720b57cec5SDimitry Andric getLoopAnalysisUsage(AU); 14730b57cec5SDimitry Andric } 14740b57cec5SDimitry Andric }; 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andric } // end anonymous namespace 14770b57cec5SDimitry Andric 14780b57cec5SDimitry Andric char LoopUnroll::ID = 0; 14790b57cec5SDimitry Andric 14800b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) 14810b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 14820b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopPass) 14830b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 14840b57cec5SDimitry Andric INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) 14850b57cec5SDimitry Andric 14860b57cec5SDimitry Andric Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced, 14870b57cec5SDimitry Andric bool ForgetAllSCEV, int Threshold, int Count, 14880b57cec5SDimitry Andric int AllowPartial, int Runtime, int UpperBound, 14890b57cec5SDimitry Andric int AllowPeeling) { 14900b57cec5SDimitry Andric // TODO: It would make more sense for this function to take the optionals 14910b57cec5SDimitry Andric // directly, but that's dangerous since it would silently break out of tree 14920b57cec5SDimitry Andric // callers. 14930b57cec5SDimitry Andric return new LoopUnroll( 14940b57cec5SDimitry Andric OptLevel, OnlyWhenForced, ForgetAllSCEV, 1495bdd1243dSDimitry Andric Threshold == -1 ? std::nullopt : std::optional<unsigned>(Threshold), 1496bdd1243dSDimitry Andric Count == -1 ? std::nullopt : std::optional<unsigned>(Count), 1497bdd1243dSDimitry Andric AllowPartial == -1 ? std::nullopt : std::optional<bool>(AllowPartial), 1498bdd1243dSDimitry Andric Runtime == -1 ? std::nullopt : std::optional<bool>(Runtime), 1499bdd1243dSDimitry Andric UpperBound == -1 ? std::nullopt : std::optional<bool>(UpperBound), 1500bdd1243dSDimitry Andric AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling)); 15010b57cec5SDimitry Andric } 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andric PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, 15040b57cec5SDimitry Andric LoopStandardAnalysisResults &AR, 15050b57cec5SDimitry Andric LPMUpdater &Updater) { 15065ffd83dbSDimitry Andric // For the new PM, we can't use OptimizationRemarkEmitter as an analysis 15075ffd83dbSDimitry Andric // pass. Function analyses need to be preserved across loop transformations 15085ffd83dbSDimitry Andric // but ORE cannot be preserved (see comment before the pass definition). 15095ffd83dbSDimitry Andric OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); 15100b57cec5SDimitry Andric 15110b57cec5SDimitry Andric // Keep track of the previous loop structure so we can identify new loops 15120b57cec5SDimitry Andric // created by unrolling. 15130b57cec5SDimitry Andric Loop *ParentL = L.getParentLoop(); 15140b57cec5SDimitry Andric SmallPtrSet<Loop *, 4> OldLoops; 15150b57cec5SDimitry Andric if (ParentL) 15160b57cec5SDimitry Andric OldLoops.insert(ParentL->begin(), ParentL->end()); 15170b57cec5SDimitry Andric else 15180b57cec5SDimitry Andric OldLoops.insert(AR.LI.begin(), AR.LI.end()); 15190b57cec5SDimitry Andric 15205ffd83dbSDimitry Andric std::string LoopName = std::string(L.getName()); 15210b57cec5SDimitry Andric 1522bdd1243dSDimitry Andric bool Changed = 1523bdd1243dSDimitry Andric tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, 15240b57cec5SDimitry Andric /*BFI*/ nullptr, /*PSI*/ nullptr, 152506c3fb27SDimitry Andric /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true, 152606c3fb27SDimitry Andric OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt, 1527bdd1243dSDimitry Andric /*Threshold*/ std::nullopt, /*AllowPartial*/ false, 15280b57cec5SDimitry Andric /*Runtime*/ false, /*UpperBound*/ false, 1529e8d8bef9SDimitry Andric /*AllowPeeling*/ true, 15308bcb0991SDimitry Andric /*AllowProfileBasedPeeling*/ false, 1531bdd1243dSDimitry Andric /*FullUnrollMaxCount*/ std::nullopt) != 15328bcb0991SDimitry Andric LoopUnrollResult::Unmodified; 15330b57cec5SDimitry Andric if (!Changed) 15340b57cec5SDimitry Andric return PreservedAnalyses::all(); 15350b57cec5SDimitry Andric 15360b57cec5SDimitry Andric // The parent must not be damaged by unrolling! 15370b57cec5SDimitry Andric #ifndef NDEBUG 15380b57cec5SDimitry Andric if (ParentL) 15390b57cec5SDimitry Andric ParentL->verifyLoop(); 15400b57cec5SDimitry Andric #endif 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric // Unrolling can do several things to introduce new loops into a loop nest: 15430b57cec5SDimitry Andric // - Full unrolling clones child loops within the current loop but then 15440b57cec5SDimitry Andric // removes the current loop making all of the children appear to be new 15450b57cec5SDimitry Andric // sibling loops. 15460b57cec5SDimitry Andric // 15470b57cec5SDimitry Andric // When a new loop appears as a sibling loop after fully unrolling, 15480b57cec5SDimitry Andric // its nesting structure has fundamentally changed and we want to revisit 15490b57cec5SDimitry Andric // it to reflect that. 15500b57cec5SDimitry Andric // 15510b57cec5SDimitry Andric // When unrolling has removed the current loop, we need to tell the 15520b57cec5SDimitry Andric // infrastructure that it is gone. 15530b57cec5SDimitry Andric // 15540b57cec5SDimitry Andric // Finally, we support a debugging/testing mode where we revisit child loops 15550b57cec5SDimitry Andric // as well. These are not expected to require further optimizations as either 15560b57cec5SDimitry Andric // they or the loop they were cloned from have been directly visited already. 15570b57cec5SDimitry Andric // But the debugging mode allows us to check this assumption. 15580b57cec5SDimitry Andric bool IsCurrentLoopValid = false; 15590b57cec5SDimitry Andric SmallVector<Loop *, 4> SibLoops; 15600b57cec5SDimitry Andric if (ParentL) 15610b57cec5SDimitry Andric SibLoops.append(ParentL->begin(), ParentL->end()); 15620b57cec5SDimitry Andric else 15630b57cec5SDimitry Andric SibLoops.append(AR.LI.begin(), AR.LI.end()); 15640b57cec5SDimitry Andric erase_if(SibLoops, [&](Loop *SibLoop) { 15650b57cec5SDimitry Andric if (SibLoop == &L) { 15660b57cec5SDimitry Andric IsCurrentLoopValid = true; 15670b57cec5SDimitry Andric return true; 15680b57cec5SDimitry Andric } 15690b57cec5SDimitry Andric 15700b57cec5SDimitry Andric // Otherwise erase the loop from the list if it was in the old loops. 1571e8d8bef9SDimitry Andric return OldLoops.contains(SibLoop); 15720b57cec5SDimitry Andric }); 15730b57cec5SDimitry Andric Updater.addSiblingLoops(SibLoops); 15740b57cec5SDimitry Andric 15750b57cec5SDimitry Andric if (!IsCurrentLoopValid) { 15760b57cec5SDimitry Andric Updater.markLoopAsDeleted(L, LoopName); 15770b57cec5SDimitry Andric } else { 15780b57cec5SDimitry Andric // We can only walk child loops if the current loop remained valid. 15790b57cec5SDimitry Andric if (UnrollRevisitChildLoops) { 15800b57cec5SDimitry Andric // Walk *all* of the child loops. 15810b57cec5SDimitry Andric SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end()); 15820b57cec5SDimitry Andric Updater.addChildLoops(ChildLoops); 15830b57cec5SDimitry Andric } 15840b57cec5SDimitry Andric } 15850b57cec5SDimitry Andric 15860b57cec5SDimitry Andric return getLoopPassPreservedAnalyses(); 15870b57cec5SDimitry Andric } 15880b57cec5SDimitry Andric 15890b57cec5SDimitry Andric PreservedAnalyses LoopUnrollPass::run(Function &F, 15900b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 15910b57cec5SDimitry Andric auto &LI = AM.getResult<LoopAnalysis>(F); 159281ad6265SDimitry Andric // There are no loops in the function. Return before computing other expensive 159381ad6265SDimitry Andric // analyses. 159481ad6265SDimitry Andric if (LI.empty()) 159581ad6265SDimitry Andric return PreservedAnalyses::all(); 159681ad6265SDimitry Andric auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); 15970b57cec5SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F); 15980b57cec5SDimitry Andric auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 15990b57cec5SDimitry Andric auto &AC = AM.getResult<AssumptionAnalysis>(F); 16000b57cec5SDimitry Andric auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1601*0fca6ea1SDimitry Andric AAResults &AA = AM.getResult<AAManager>(F); 16020b57cec5SDimitry Andric 16030b57cec5SDimitry Andric LoopAnalysisManager *LAM = nullptr; 16040b57cec5SDimitry Andric if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F)) 16050b57cec5SDimitry Andric LAM = &LAMProxy->getManager(); 16060b57cec5SDimitry Andric 16075ffd83dbSDimitry Andric auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 16080b57cec5SDimitry Andric ProfileSummaryInfo *PSI = 16095ffd83dbSDimitry Andric MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 16100b57cec5SDimitry Andric auto *BFI = (PSI && PSI->hasProfileSummary()) ? 16110b57cec5SDimitry Andric &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; 16120b57cec5SDimitry Andric 16130b57cec5SDimitry Andric bool Changed = false; 16140b57cec5SDimitry Andric 16150b57cec5SDimitry Andric // The unroller requires loops to be in simplified form, and also needs LCSSA. 16160b57cec5SDimitry Andric // Since simplification may add new inner loops, it has to run before the 16170b57cec5SDimitry Andric // legality and profitability checks. This means running the loop unroller 16180b57cec5SDimitry Andric // will simplify all loops, regardless of whether anything end up being 16190b57cec5SDimitry Andric // unrolled. 1620bdd1243dSDimitry Andric for (const auto &L : LI) { 16210b57cec5SDimitry Andric Changed |= 16220b57cec5SDimitry Andric simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */); 16230b57cec5SDimitry Andric Changed |= formLCSSARecursively(*L, DT, &LI, &SE); 16240b57cec5SDimitry Andric } 16250b57cec5SDimitry Andric 16265ffd83dbSDimitry Andric // Add the loop nests in the reverse order of LoopInfo. See method 16275ffd83dbSDimitry Andric // declaration. 16285ffd83dbSDimitry Andric SmallPriorityWorklist<Loop *, 4> Worklist; 16295ffd83dbSDimitry Andric appendLoopsToWorklist(LI, Worklist); 16300b57cec5SDimitry Andric 16310b57cec5SDimitry Andric while (!Worklist.empty()) { 16320b57cec5SDimitry Andric // Because the LoopInfo stores the loops in RPO, we walk the worklist 16330b57cec5SDimitry Andric // from back to front so that we work forward across the CFG, which 16340b57cec5SDimitry Andric // for unrolling is only needed to get optimization remarks emitted in 16350b57cec5SDimitry Andric // a forward order. 16360b57cec5SDimitry Andric Loop &L = *Worklist.pop_back_val(); 16370b57cec5SDimitry Andric #ifndef NDEBUG 16380b57cec5SDimitry Andric Loop *ParentL = L.getParentLoop(); 16390b57cec5SDimitry Andric #endif 16400b57cec5SDimitry Andric 16410b57cec5SDimitry Andric // Check if the profile summary indicates that the profiled application 16420b57cec5SDimitry Andric // has a huge working set size, in which case we disable peeling to avoid 16430b57cec5SDimitry Andric // bloating it further. 1644bdd1243dSDimitry Andric std::optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling; 16450b57cec5SDimitry Andric if (PSI && PSI->hasHugeWorkingSetSize()) 16460b57cec5SDimitry Andric LocalAllowPeeling = false; 16475ffd83dbSDimitry Andric std::string LoopName = std::string(L.getName()); 16480b57cec5SDimitry Andric // The API here is quite complex to call and we allow to select some 16490b57cec5SDimitry Andric // flavors of unrolling during construction time (by setting UnrollOpts). 16500b57cec5SDimitry Andric LoopUnrollResult Result = tryToUnrollLoop( 16510b57cec5SDimitry Andric &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI, 165206c3fb27SDimitry Andric /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false, 165306c3fb27SDimitry Andric UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV, 165406c3fb27SDimitry Andric /*Count*/ std::nullopt, 1655bdd1243dSDimitry Andric /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial, 1656bdd1243dSDimitry Andric UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling, 1657*0fca6ea1SDimitry Andric UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount, 1658*0fca6ea1SDimitry Andric &AA); 16590b57cec5SDimitry Andric Changed |= Result != LoopUnrollResult::Unmodified; 16600b57cec5SDimitry Andric 16610b57cec5SDimitry Andric // The parent must not be damaged by unrolling! 16620b57cec5SDimitry Andric #ifndef NDEBUG 16630b57cec5SDimitry Andric if (Result != LoopUnrollResult::Unmodified && ParentL) 16640b57cec5SDimitry Andric ParentL->verifyLoop(); 16650b57cec5SDimitry Andric #endif 16660b57cec5SDimitry Andric 16670b57cec5SDimitry Andric // Clear any cached analysis results for L if we removed it completely. 16680b57cec5SDimitry Andric if (LAM && Result == LoopUnrollResult::FullyUnrolled) 16690b57cec5SDimitry Andric LAM->clear(L, LoopName); 16700b57cec5SDimitry Andric } 16710b57cec5SDimitry Andric 16720b57cec5SDimitry Andric if (!Changed) 16730b57cec5SDimitry Andric return PreservedAnalyses::all(); 16740b57cec5SDimitry Andric 16750b57cec5SDimitry Andric return getLoopPassPreservedAnalyses(); 16760b57cec5SDimitry Andric } 1677349cc55cSDimitry Andric 1678349cc55cSDimitry Andric void LoopUnrollPass::printPipeline( 1679349cc55cSDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1680349cc55cSDimitry Andric static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline( 1681349cc55cSDimitry Andric OS, MapClassName2PassName); 168206c3fb27SDimitry Andric OS << '<'; 1683bdd1243dSDimitry Andric if (UnrollOpts.AllowPartial != std::nullopt) 1684bdd1243dSDimitry Andric OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;"; 1685bdd1243dSDimitry Andric if (UnrollOpts.AllowPeeling != std::nullopt) 1686bdd1243dSDimitry Andric OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;"; 1687bdd1243dSDimitry Andric if (UnrollOpts.AllowRuntime != std::nullopt) 1688bdd1243dSDimitry Andric OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;"; 1689bdd1243dSDimitry Andric if (UnrollOpts.AllowUpperBound != std::nullopt) 1690bdd1243dSDimitry Andric OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;"; 1691bdd1243dSDimitry Andric if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt) 1692bdd1243dSDimitry Andric OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-") 1693349cc55cSDimitry Andric << "profile-peeling;"; 1694bdd1243dSDimitry Andric if (UnrollOpts.FullUnrollMaxCount != std::nullopt) 169506c3fb27SDimitry Andric OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';'; 169606c3fb27SDimitry Andric OS << 'O' << UnrollOpts.OptLevel; 169706c3fb27SDimitry Andric OS << '>'; 1698349cc55cSDimitry Andric } 1699