10b57cec5SDimitry Andric //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass implements an unroll and jam pass. Most of the work is done by 100b57cec5SDimitry Andric // Utils/UnrollLoopAndJam.cpp. 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" 145ffd83dbSDimitry Andric #include "llvm/ADT/ArrayRef.h" 155ffd83dbSDimitry Andric #include "llvm/ADT/PriorityWorklist.h" 160b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 170b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 180b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 190b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 2381ad6265SDimitry Andric #include "llvm/Analysis/LoopNestAnalysis.h" 24fe6060f1SDimitry Andric #include "llvm/Analysis/LoopPass.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 280b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 290b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 300b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 310b57cec5SDimitry Andric #include "llvm/IR/Function.h" 320b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 330b57cec5SDimitry Andric #include "llvm/IR/Metadata.h" 340b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 350b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 360b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 375ffd83dbSDimitry Andric #include "llvm/Support/Compiler.h" 380b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 4081ad6265SDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h" 41e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/LoopPeel.h" 420b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 430b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h" 440b57cec5SDimitry Andric #include <cassert> 450b57cec5SDimitry Andric #include <cstdint> 465ffd83dbSDimitry Andric 475ffd83dbSDimitry Andric namespace llvm { 485ffd83dbSDimitry Andric class Instruction; 495ffd83dbSDimitry Andric class Value; 505ffd83dbSDimitry Andric } // namespace llvm 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric using namespace llvm; 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll-and-jam" 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric /// @{ 570b57cec5SDimitry Andric /// Metadata attribute names 580b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupAll = 590b57cec5SDimitry Andric "llvm.loop.unroll_and_jam.followup_all"; 600b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupInner = 610b57cec5SDimitry Andric "llvm.loop.unroll_and_jam.followup_inner"; 620b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupOuter = 630b57cec5SDimitry Andric "llvm.loop.unroll_and_jam.followup_outer"; 640b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner = 650b57cec5SDimitry Andric "llvm.loop.unroll_and_jam.followup_remainder_inner"; 660b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter = 670b57cec5SDimitry Andric "llvm.loop.unroll_and_jam.followup_remainder_outer"; 680b57cec5SDimitry Andric /// @} 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric static cl::opt<bool> 710b57cec5SDimitry Andric AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, 720b57cec5SDimitry Andric cl::desc("Allows loops to be unroll-and-jammed.")); 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric static cl::opt<unsigned> UnrollAndJamCount( 750b57cec5SDimitry Andric "unroll-and-jam-count", cl::Hidden, 760b57cec5SDimitry Andric cl::desc("Use this unroll count for all loops including those with " 770b57cec5SDimitry Andric "unroll_and_jam_count pragma values, for testing purposes")); 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric static cl::opt<unsigned> UnrollAndJamThreshold( 800b57cec5SDimitry Andric "unroll-and-jam-threshold", cl::init(60), cl::Hidden, 810b57cec5SDimitry Andric cl::desc("Threshold to use for inner loop when doing unroll and jam.")); 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric static cl::opt<unsigned> PragmaUnrollAndJamThreshold( 840b57cec5SDimitry Andric "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, 850b57cec5SDimitry Andric cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " 860b57cec5SDimitry Andric "unroll_count pragma.")); 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric // Returns the loop hint metadata node with the given name (for example, 890b57cec5SDimitry Andric // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is 900b57cec5SDimitry Andric // returned. 915ffd83dbSDimitry Andric static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) { 920b57cec5SDimitry Andric if (MDNode *LoopID = L->getLoopID()) 930b57cec5SDimitry Andric return GetUnrollMetadata(LoopID, Name); 940b57cec5SDimitry Andric return nullptr; 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric // Returns true if the loop has any metadata starting with Prefix. For example a 980b57cec5SDimitry Andric // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata. 995ffd83dbSDimitry Andric static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) { 1000b57cec5SDimitry Andric if (MDNode *LoopID = L->getLoopID()) { 1010b57cec5SDimitry Andric // First operand should refer to the loop id itself. 1020b57cec5SDimitry Andric assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1030b57cec5SDimitry Andric assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1040b57cec5SDimitry Andric 1055ffd83dbSDimitry Andric for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) { 1065ffd83dbSDimitry Andric MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I)); 1070b57cec5SDimitry Andric if (!MD) 1080b57cec5SDimitry Andric continue; 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1110b57cec5SDimitry Andric if (!S) 1120b57cec5SDimitry Andric continue; 1130b57cec5SDimitry Andric 1145f757f3fSDimitry Andric if (S->getString().starts_with(Prefix)) 1150b57cec5SDimitry Andric return true; 1160b57cec5SDimitry Andric } 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric return false; 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric // Returns true if the loop has an unroll_and_jam(enable) pragma. 1225ffd83dbSDimitry Andric static bool hasUnrollAndJamEnablePragma(const Loop *L) { 1235ffd83dbSDimitry Andric return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable"); 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric // If loop has an unroll_and_jam_count pragma return the (necessarily 1270b57cec5SDimitry Andric // positive) value from the pragma. Otherwise return 0. 1285ffd83dbSDimitry Andric static unsigned unrollAndJamCountPragmaValue(const Loop *L) { 1295ffd83dbSDimitry Andric MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count"); 1300b57cec5SDimitry Andric if (MD) { 1310b57cec5SDimitry Andric assert(MD->getNumOperands() == 2 && 1320b57cec5SDimitry Andric "Unroll count hint metadata should have two operands."); 1330b57cec5SDimitry Andric unsigned Count = 1340b57cec5SDimitry Andric mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue(); 1350b57cec5SDimitry Andric assert(Count >= 1 && "Unroll count must be positive."); 1360b57cec5SDimitry Andric return Count; 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric return 0; 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric // Returns loop size estimation for unrolled loop. 1420b57cec5SDimitry Andric static uint64_t 1430b57cec5SDimitry Andric getUnrollAndJammedLoopSize(unsigned LoopSize, 1440b57cec5SDimitry Andric TargetTransformInfo::UnrollingPreferences &UP) { 1450b57cec5SDimitry Andric assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!"); 1460b57cec5SDimitry Andric return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns; 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // Calculates unroll and jam count and writes it to UP.Count. Returns true if 1500b57cec5SDimitry Andric // unroll count was set explicitly. 1510b57cec5SDimitry Andric static bool computeUnrollAndJamCount( 1520b57cec5SDimitry Andric Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, 153bdd1243dSDimitry Andric LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, 1540b57cec5SDimitry Andric const SmallPtrSetImpl<const Value *> &EphValues, 1550b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, 1565f757f3fSDimitry Andric unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE, 1575f757f3fSDimitry Andric unsigned InnerTripCount, unsigned InnerLoopSize, 1585f757f3fSDimitry Andric TargetTransformInfo::UnrollingPreferences &UP, 1595ffd83dbSDimitry Andric TargetTransformInfo::PeelingPreferences &PP) { 1605f757f3fSDimitry Andric unsigned OuterLoopSize = OuterUCE.getRolledLoopSize(); 1610b57cec5SDimitry Andric // First up use computeUnrollCount from the loop unroller to get a count 1620b57cec5SDimitry Andric // for unrolling the outer loop, plus any loops requiring explicit 1630b57cec5SDimitry Andric // unrolling we leave to the unroller. This uses UP.Threshold / 1640b57cec5SDimitry Andric // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values. 1650b57cec5SDimitry Andric // We have already checked that the loop has no unroll.* pragmas. 1660b57cec5SDimitry Andric unsigned MaxTripCount = 0; 1670b57cec5SDimitry Andric bool UseUpperBound = false; 1680b57cec5SDimitry Andric bool ExplicitUnroll = computeUnrollCount( 169bdd1243dSDimitry Andric L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount, 1705f757f3fSDimitry Andric /*MaxOrZero*/ false, OuterTripMultiple, OuterUCE, UP, PP, 1715ffd83dbSDimitry Andric UseUpperBound); 1720b57cec5SDimitry Andric if (ExplicitUnroll || UseUpperBound) { 1730b57cec5SDimitry Andric // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it 1740b57cec5SDimitry Andric // for the unroller instead. 1750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by " 1760b57cec5SDimitry Andric "computeUnrollCount\n"); 1770b57cec5SDimitry Andric UP.Count = 0; 1780b57cec5SDimitry Andric return false; 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric // Override with any explicit Count from the "unroll-and-jam-count" option. 1820b57cec5SDimitry Andric bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0; 1830b57cec5SDimitry Andric if (UserUnrollCount) { 1840b57cec5SDimitry Andric UP.Count = UnrollAndJamCount; 1850b57cec5SDimitry Andric UP.Force = true; 1860b57cec5SDimitry Andric if (UP.AllowRemainder && 1870b57cec5SDimitry Andric getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold && 1880b57cec5SDimitry Andric getUnrollAndJammedLoopSize(InnerLoopSize, UP) < 1890b57cec5SDimitry Andric UP.UnrollAndJamInnerLoopThreshold) 1900b57cec5SDimitry Andric return true; 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric // Check for unroll_and_jam pragmas 1945ffd83dbSDimitry Andric unsigned PragmaCount = unrollAndJamCountPragmaValue(L); 1950b57cec5SDimitry Andric if (PragmaCount > 0) { 1960b57cec5SDimitry Andric UP.Count = PragmaCount; 1970b57cec5SDimitry Andric UP.Runtime = true; 1980b57cec5SDimitry Andric UP.Force = true; 1990b57cec5SDimitry Andric if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) && 2000b57cec5SDimitry Andric getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold && 2010b57cec5SDimitry Andric getUnrollAndJammedLoopSize(InnerLoopSize, UP) < 2020b57cec5SDimitry Andric UP.UnrollAndJamInnerLoopThreshold) 2030b57cec5SDimitry Andric return true; 2040b57cec5SDimitry Andric } 2050b57cec5SDimitry Andric 2065ffd83dbSDimitry Andric bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L); 2070b57cec5SDimitry Andric bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount; 2080b57cec5SDimitry Andric bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount; 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric // If the loop has an unrolling pragma, we want to be more aggressive with 2110b57cec5SDimitry Andric // unrolling limits. 2120b57cec5SDimitry Andric if (ExplicitUnrollAndJam) 2130b57cec5SDimitry Andric UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold; 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >= 2160b57cec5SDimitry Andric UP.UnrollAndJamInnerLoopThreshold) { 2170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and " 2180b57cec5SDimitry Andric "inner loop too large\n"); 2190b57cec5SDimitry Andric UP.Count = 0; 2200b57cec5SDimitry Andric return false; 2210b57cec5SDimitry Andric } 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric // We have a sensible limit for the outer loop, now adjust it for the inner 2240b57cec5SDimitry Andric // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set 2250b57cec5SDimitry Andric // explicitly, we want to stick to it. 2260b57cec5SDimitry Andric if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) { 2270b57cec5SDimitry Andric while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >= 2280b57cec5SDimitry Andric UP.UnrollAndJamInnerLoopThreshold) 2290b57cec5SDimitry Andric UP.Count--; 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric // If we are explicitly unroll and jamming, we are done. Otherwise there are a 2330b57cec5SDimitry Andric // number of extra performance heuristics to check. 2340b57cec5SDimitry Andric if (ExplicitUnrollAndJam) 2350b57cec5SDimitry Andric return true; 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric // If the inner loop count is known and small, leave the entire loop nest to 2380b57cec5SDimitry Andric // be the unroller 2390b57cec5SDimitry Andric if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) { 2400b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is " 2410b57cec5SDimitry Andric "being left for the unroller\n"); 2420b57cec5SDimitry Andric UP.Count = 0; 2430b57cec5SDimitry Andric return false; 2440b57cec5SDimitry Andric } 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric // Check for situations where UnJ is likely to be unprofitable. Including 2470b57cec5SDimitry Andric // subloops with more than 1 block. 2480b57cec5SDimitry Andric if (SubLoop->getBlocks().size() != 1) { 2490b57cec5SDimitry Andric LLVM_DEBUG( 2500b57cec5SDimitry Andric dbgs() << "Won't unroll-and-jam; More than one inner loop block\n"); 2510b57cec5SDimitry Andric UP.Count = 0; 2520b57cec5SDimitry Andric return false; 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric // Limit to loops where there is something to gain from unrolling and 2560b57cec5SDimitry Andric // jamming the loop. In this case, look for loads that are invariant in the 2570b57cec5SDimitry Andric // outer loop and can become shared. 2580b57cec5SDimitry Andric unsigned NumInvariant = 0; 2590b57cec5SDimitry Andric for (BasicBlock *BB : SubLoop->getBlocks()) { 2600b57cec5SDimitry Andric for (Instruction &I : *BB) { 2610b57cec5SDimitry Andric if (auto *Ld = dyn_cast<LoadInst>(&I)) { 2620b57cec5SDimitry Andric Value *V = Ld->getPointerOperand(); 2630b57cec5SDimitry Andric const SCEV *LSCEV = SE.getSCEVAtScope(V, L); 2640b57cec5SDimitry Andric if (SE.isLoopInvariant(LSCEV, L)) 2650b57cec5SDimitry Andric NumInvariant++; 2660b57cec5SDimitry Andric } 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric if (NumInvariant == 0) { 2700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n"); 2710b57cec5SDimitry Andric UP.Count = 0; 2720b57cec5SDimitry Andric return false; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric return false; 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric static LoopUnrollResult 2790b57cec5SDimitry Andric tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, 2800b57cec5SDimitry Andric ScalarEvolution &SE, const TargetTransformInfo &TTI, 2810b57cec5SDimitry Andric AssumptionCache &AC, DependenceInfo &DI, 2820b57cec5SDimitry Andric OptimizationRemarkEmitter &ORE, int OptLevel) { 283bdd1243dSDimitry Andric TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( 284bdd1243dSDimitry Andric L, SE, TTI, nullptr, nullptr, ORE, OptLevel, std::nullopt, std::nullopt, 285bdd1243dSDimitry Andric std::nullopt, std::nullopt, std::nullopt, std::nullopt); 2865ffd83dbSDimitry Andric TargetTransformInfo::PeelingPreferences PP = 287bdd1243dSDimitry Andric gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt); 288e8d8bef9SDimitry Andric 289e8d8bef9SDimitry Andric TransformationMode EnableMode = hasUnrollAndJamTransformation(L); 290e8d8bef9SDimitry Andric if (EnableMode & TM_Disable) 291e8d8bef9SDimitry Andric return LoopUnrollResult::Unmodified; 292e8d8bef9SDimitry Andric if (EnableMode & TM_ForcedByUser) 293e8d8bef9SDimitry Andric UP.UnrollAndJam = true; 294e8d8bef9SDimitry Andric 2950b57cec5SDimitry Andric if (AllowUnrollAndJam.getNumOccurrences() > 0) 2960b57cec5SDimitry Andric UP.UnrollAndJam = AllowUnrollAndJam; 2970b57cec5SDimitry Andric if (UnrollAndJamThreshold.getNumOccurrences() > 0) 2980b57cec5SDimitry Andric UP.UnrollAndJamInnerLoopThreshold = UnrollAndJamThreshold; 2990b57cec5SDimitry Andric // Exit early if unrolling is disabled. 3000b57cec5SDimitry Andric if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0) 3010b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F[" 3040b57cec5SDimitry Andric << L->getHeader()->getParent()->getName() << "] Loop %" 3050b57cec5SDimitry Andric << L->getHeader()->getName() << "\n"); 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric // A loop with any unroll pragma (enabling/disabling/count/etc) is left for 3080b57cec5SDimitry Andric // the unroller, so long as it does not explicitly have unroll_and_jam 3090b57cec5SDimitry Andric // metadata. This means #pragma nounroll will disable unroll and jam as well 3100b57cec5SDimitry Andric // as unrolling 3115ffd83dbSDimitry Andric if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") && 3125ffd83dbSDimitry Andric !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) { 3130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n"); 3140b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric 3175ffd83dbSDimitry Andric if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) { 3180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n"); 3190b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric // Approximate the loop size and collect useful info 3230b57cec5SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 3240b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(L, &AC, EphValues); 3255ffd83dbSDimitry Andric Loop *SubLoop = L->getSubLoops()[0]; 3265f757f3fSDimitry Andric UnrollCostEstimator InnerUCE(SubLoop, TTI, EphValues, UP.BEInsns); 3275f757f3fSDimitry Andric UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns); 32881ad6265SDimitry Andric 3295f757f3fSDimitry Andric if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) { 330*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Loop not considered unrollable\n"); 33181ad6265SDimitry Andric return LoopUnrollResult::Unmodified; 33281ad6265SDimitry Andric } 33381ad6265SDimitry Andric 3345f757f3fSDimitry Andric unsigned InnerLoopSize = InnerUCE.getRolledLoopSize(); 3355f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterUCE.getRolledLoopSize() 3365f757f3fSDimitry Andric << "\n"); 3375f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n"); 3385f757f3fSDimitry Andric 3395f757f3fSDimitry Andric if (InnerUCE.NumInlineCandidates != 0 || OuterUCE.NumInlineCandidates != 0) { 3400b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); 3410b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 3420b57cec5SDimitry Andric } 343*0fca6ea1SDimitry Andric // FIXME: The call to canUnroll() allows some controlled convergent 344*0fca6ea1SDimitry Andric // operations, but we block them here for future changes. 345*0fca6ea1SDimitry Andric if (InnerUCE.Convergence != ConvergenceKind::None || 346*0fca6ea1SDimitry Andric OuterUCE.Convergence != ConvergenceKind::None) { 3470b57cec5SDimitry Andric LLVM_DEBUG( 3480b57cec5SDimitry Andric dbgs() << " Not unrolling loop with convergent instructions.\n"); 3490b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric // Save original loop IDs for after the transformation. 3530b57cec5SDimitry Andric MDNode *OrigOuterLoopID = L->getLoopID(); 3540b57cec5SDimitry Andric MDNode *OrigSubLoopID = SubLoop->getLoopID(); 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric // To assign the loop id of the epilogue, assign it before unrolling it so it 3570b57cec5SDimitry Andric // is applied to every inner loop of the epilogue. We later apply the loop ID 3580b57cec5SDimitry Andric // for the jammed inner loop. 359bdd1243dSDimitry Andric std::optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID( 3600b57cec5SDimitry Andric OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, 3610b57cec5SDimitry Andric LLVMLoopUnrollAndJamFollowupRemainderInner}); 36281ad6265SDimitry Andric if (NewInnerEpilogueLoopID) 363bdd1243dSDimitry Andric SubLoop->setLoopID(*NewInnerEpilogueLoopID); 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric // Find trip count and trip multiple 3665ffd83dbSDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 3675ffd83dbSDimitry Andric BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); 3680b57cec5SDimitry Andric unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch); 3690b57cec5SDimitry Andric unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch); 3700b57cec5SDimitry Andric unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch); 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric // Decide if, and by how much, to unroll 3730b57cec5SDimitry Andric bool IsCountSetExplicitly = computeUnrollAndJamCount( 374bdd1243dSDimitry Andric L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount, 3755f757f3fSDimitry Andric OuterTripMultiple, OuterUCE, InnerTripCount, InnerLoopSize, UP, PP); 3760b57cec5SDimitry Andric if (UP.Count <= 1) 3770b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 3780b57cec5SDimitry Andric // Unroll factor (Count) must be less or equal to TripCount. 3790b57cec5SDimitry Andric if (OuterTripCount && UP.Count > OuterTripCount) 3800b57cec5SDimitry Andric UP.Count = OuterTripCount; 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric Loop *EpilogueOuterLoop = nullptr; 3830b57cec5SDimitry Andric LoopUnrollResult UnrollResult = UnrollAndJamLoop( 3840b57cec5SDimitry Andric L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI, 3855ffd83dbSDimitry Andric &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop); 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric // Assign new loop attributes. 3880b57cec5SDimitry Andric if (EpilogueOuterLoop) { 389bdd1243dSDimitry Andric std::optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID( 3900b57cec5SDimitry Andric OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, 3910b57cec5SDimitry Andric LLVMLoopUnrollAndJamFollowupRemainderOuter}); 39281ad6265SDimitry Andric if (NewOuterEpilogueLoopID) 393bdd1243dSDimitry Andric EpilogueOuterLoop->setLoopID(*NewOuterEpilogueLoopID); 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric 396bdd1243dSDimitry Andric std::optional<MDNode *> NewInnerLoopID = 3970b57cec5SDimitry Andric makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll, 3980b57cec5SDimitry Andric LLVMLoopUnrollAndJamFollowupInner}); 39981ad6265SDimitry Andric if (NewInnerLoopID) 400bdd1243dSDimitry Andric SubLoop->setLoopID(*NewInnerLoopID); 4010b57cec5SDimitry Andric else 4020b57cec5SDimitry Andric SubLoop->setLoopID(OrigSubLoopID); 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) { 405bdd1243dSDimitry Andric std::optional<MDNode *> NewOuterLoopID = makeFollowupLoopID( 4060b57cec5SDimitry Andric OrigOuterLoopID, 4070b57cec5SDimitry Andric {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter}); 40881ad6265SDimitry Andric if (NewOuterLoopID) { 409bdd1243dSDimitry Andric L->setLoopID(*NewOuterLoopID); 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric // Do not setLoopAlreadyUnrolled if a followup was given. 4120b57cec5SDimitry Andric return UnrollResult; 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric // If loop has an unroll count pragma or unrolled by explicitly set count 4170b57cec5SDimitry Andric // mark loop as unrolled to prevent unrolling beyond that requested. 4180b57cec5SDimitry Andric if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly) 4190b57cec5SDimitry Andric L->setLoopAlreadyUnrolled(); 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric return UnrollResult; 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric 424fe6060f1SDimitry Andric static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI, 425480093f4SDimitry Andric ScalarEvolution &SE, 426480093f4SDimitry Andric const TargetTransformInfo &TTI, 427480093f4SDimitry Andric AssumptionCache &AC, DependenceInfo &DI, 428fe6060f1SDimitry Andric OptimizationRemarkEmitter &ORE, int OptLevel, 429fe6060f1SDimitry Andric LPMUpdater &U) { 430480093f4SDimitry Andric bool DidSomething = false; 431fe6060f1SDimitry Andric ArrayRef<Loop *> Loops = LN.getLoops(); 432fe6060f1SDimitry Andric Loop *OutmostLoop = &LN.getOutermostLoop(); 433480093f4SDimitry Andric 434fe6060f1SDimitry Andric // Add the loop nests in the reverse order of LN. See method 4355ffd83dbSDimitry Andric // declaration. 436480093f4SDimitry Andric SmallPriorityWorklist<Loop *, 4> Worklist; 437fe6060f1SDimitry Andric appendLoopsToWorklist(Loops, Worklist); 438480093f4SDimitry Andric while (!Worklist.empty()) { 439480093f4SDimitry Andric Loop *L = Worklist.pop_back_val(); 440fe6060f1SDimitry Andric std::string LoopName = std::string(L->getName()); 441480093f4SDimitry Andric LoopUnrollResult Result = 442480093f4SDimitry Andric tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel); 443480093f4SDimitry Andric if (Result != LoopUnrollResult::Unmodified) 444480093f4SDimitry Andric DidSomething = true; 445fe6060f1SDimitry Andric if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled) 446fe6060f1SDimitry Andric U.markLoopAsDeleted(*L, LoopName); 447480093f4SDimitry Andric } 448480093f4SDimitry Andric 449480093f4SDimitry Andric return DidSomething; 450480093f4SDimitry Andric } 451480093f4SDimitry Andric 452fe6060f1SDimitry Andric PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN, 453fe6060f1SDimitry Andric LoopAnalysisManager &AM, 454fe6060f1SDimitry Andric LoopStandardAnalysisResults &AR, 455fe6060f1SDimitry Andric LPMUpdater &U) { 456fe6060f1SDimitry Andric Function &F = *LN.getParent(); 4570b57cec5SDimitry Andric 458fe6060f1SDimitry Andric DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); 459fe6060f1SDimitry Andric OptimizationRemarkEmitter ORE(&F); 460fe6060f1SDimitry Andric 461fe6060f1SDimitry Andric if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE, 462fe6060f1SDimitry Andric OptLevel, U)) 4630b57cec5SDimitry Andric return PreservedAnalyses::all(); 4640b57cec5SDimitry Andric 465fe6060f1SDimitry Andric auto PA = getLoopPassPreservedAnalyses(); 466fe6060f1SDimitry Andric PA.preserve<LoopNestAnalysis>(); 467fe6060f1SDimitry Andric return PA; 4680b57cec5SDimitry Andric } 469