xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass implements an unroll and jam pass. Most of the work is done by
100b57cec5SDimitry Andric // Utils/UnrollLoopAndJam.cpp.
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
145ffd83dbSDimitry Andric #include "llvm/ADT/ArrayRef.h"
155ffd83dbSDimitry Andric #include "llvm/ADT/PriorityWorklist.h"
160b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
170b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
180b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
190b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
2381ad6265SDimitry Andric #include "llvm/Analysis/LoopNestAnalysis.h"
24fe6060f1SDimitry Andric #include "llvm/Analysis/LoopPass.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
270b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
280b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
290b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
300b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
310b57cec5SDimitry Andric #include "llvm/IR/Function.h"
320b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
330b57cec5SDimitry Andric #include "llvm/IR/Metadata.h"
340b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
350b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
360b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
375ffd83dbSDimitry Andric #include "llvm/Support/Compiler.h"
380b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
4081ad6265SDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h"
41e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/LoopPeel.h"
420b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
430b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h"
440b57cec5SDimitry Andric #include <cassert>
450b57cec5SDimitry Andric #include <cstdint>
465ffd83dbSDimitry Andric 
475ffd83dbSDimitry Andric namespace llvm {
485ffd83dbSDimitry Andric class Instruction;
495ffd83dbSDimitry Andric class Value;
505ffd83dbSDimitry Andric } // namespace llvm
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric using namespace llvm;
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll-and-jam"
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric /// @{
570b57cec5SDimitry Andric /// Metadata attribute names
580b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupAll =
590b57cec5SDimitry Andric     "llvm.loop.unroll_and_jam.followup_all";
600b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupInner =
610b57cec5SDimitry Andric     "llvm.loop.unroll_and_jam.followup_inner";
620b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupOuter =
630b57cec5SDimitry Andric     "llvm.loop.unroll_and_jam.followup_outer";
640b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner =
650b57cec5SDimitry Andric     "llvm.loop.unroll_and_jam.followup_remainder_inner";
660b57cec5SDimitry Andric static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter =
670b57cec5SDimitry Andric     "llvm.loop.unroll_and_jam.followup_remainder_outer";
680b57cec5SDimitry Andric /// @}
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric static cl::opt<bool>
710b57cec5SDimitry Andric     AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
720b57cec5SDimitry Andric                       cl::desc("Allows loops to be unroll-and-jammed."));
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric static cl::opt<unsigned> UnrollAndJamCount(
750b57cec5SDimitry Andric     "unroll-and-jam-count", cl::Hidden,
760b57cec5SDimitry Andric     cl::desc("Use this unroll count for all loops including those with "
770b57cec5SDimitry Andric              "unroll_and_jam_count pragma values, for testing purposes"));
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric static cl::opt<unsigned> UnrollAndJamThreshold(
800b57cec5SDimitry Andric     "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
810b57cec5SDimitry Andric     cl::desc("Threshold to use for inner loop when doing unroll and jam."));
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric static cl::opt<unsigned> PragmaUnrollAndJamThreshold(
840b57cec5SDimitry Andric     "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
850b57cec5SDimitry Andric     cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
860b57cec5SDimitry Andric              "unroll_count pragma."));
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric // Returns the loop hint metadata node with the given name (for example,
890b57cec5SDimitry Andric // "llvm.loop.unroll.count").  If no such metadata node exists, then nullptr is
900b57cec5SDimitry Andric // returned.
915ffd83dbSDimitry Andric static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {
920b57cec5SDimitry Andric   if (MDNode *LoopID = L->getLoopID())
930b57cec5SDimitry Andric     return GetUnrollMetadata(LoopID, Name);
940b57cec5SDimitry Andric   return nullptr;
950b57cec5SDimitry Andric }
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric // Returns true if the loop has any metadata starting with Prefix. For example a
980b57cec5SDimitry Andric // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
995ffd83dbSDimitry Andric static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
1000b57cec5SDimitry Andric   if (MDNode *LoopID = L->getLoopID()) {
1010b57cec5SDimitry Andric     // First operand should refer to the loop id itself.
1020b57cec5SDimitry Andric     assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
1030b57cec5SDimitry Andric     assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
1040b57cec5SDimitry Andric 
1055ffd83dbSDimitry Andric     for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
1065ffd83dbSDimitry Andric       MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1070b57cec5SDimitry Andric       if (!MD)
1080b57cec5SDimitry Andric         continue;
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric       MDString *S = dyn_cast<MDString>(MD->getOperand(0));
1110b57cec5SDimitry Andric       if (!S)
1120b57cec5SDimitry Andric         continue;
1130b57cec5SDimitry Andric 
1145f757f3fSDimitry Andric       if (S->getString().starts_with(Prefix))
1150b57cec5SDimitry Andric         return true;
1160b57cec5SDimitry Andric     }
1170b57cec5SDimitry Andric   }
1180b57cec5SDimitry Andric   return false;
1190b57cec5SDimitry Andric }
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric // Returns true if the loop has an unroll_and_jam(enable) pragma.
1225ffd83dbSDimitry Andric static bool hasUnrollAndJamEnablePragma(const Loop *L) {
1235ffd83dbSDimitry Andric   return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
1240b57cec5SDimitry Andric }
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric // If loop has an unroll_and_jam_count pragma return the (necessarily
1270b57cec5SDimitry Andric // positive) value from the pragma.  Otherwise return 0.
1285ffd83dbSDimitry Andric static unsigned unrollAndJamCountPragmaValue(const Loop *L) {
1295ffd83dbSDimitry Andric   MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
1300b57cec5SDimitry Andric   if (MD) {
1310b57cec5SDimitry Andric     assert(MD->getNumOperands() == 2 &&
1320b57cec5SDimitry Andric            "Unroll count hint metadata should have two operands.");
1330b57cec5SDimitry Andric     unsigned Count =
1340b57cec5SDimitry Andric         mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
1350b57cec5SDimitry Andric     assert(Count >= 1 && "Unroll count must be positive.");
1360b57cec5SDimitry Andric     return Count;
1370b57cec5SDimitry Andric   }
1380b57cec5SDimitry Andric   return 0;
1390b57cec5SDimitry Andric }
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric // Returns loop size estimation for unrolled loop.
1420b57cec5SDimitry Andric static uint64_t
1430b57cec5SDimitry Andric getUnrollAndJammedLoopSize(unsigned LoopSize,
1440b57cec5SDimitry Andric                            TargetTransformInfo::UnrollingPreferences &UP) {
1450b57cec5SDimitry Andric   assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
1460b57cec5SDimitry Andric   return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
1470b57cec5SDimitry Andric }
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric // Calculates unroll and jam count and writes it to UP.Count. Returns true if
1500b57cec5SDimitry Andric // unroll count was set explicitly.
1510b57cec5SDimitry Andric static bool computeUnrollAndJamCount(
1520b57cec5SDimitry Andric     Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
153bdd1243dSDimitry Andric     LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE,
1540b57cec5SDimitry Andric     const SmallPtrSetImpl<const Value *> &EphValues,
1550b57cec5SDimitry Andric     OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
1565f757f3fSDimitry Andric     unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE,
1575f757f3fSDimitry Andric     unsigned InnerTripCount, unsigned InnerLoopSize,
1585f757f3fSDimitry Andric     TargetTransformInfo::UnrollingPreferences &UP,
1595ffd83dbSDimitry Andric     TargetTransformInfo::PeelingPreferences &PP) {
1605f757f3fSDimitry Andric   unsigned OuterLoopSize = OuterUCE.getRolledLoopSize();
1610b57cec5SDimitry Andric   // First up use computeUnrollCount from the loop unroller to get a count
1620b57cec5SDimitry Andric   // for unrolling the outer loop, plus any loops requiring explicit
1630b57cec5SDimitry Andric   // unrolling we leave to the unroller. This uses UP.Threshold /
1640b57cec5SDimitry Andric   // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
1650b57cec5SDimitry Andric   // We have already checked that the loop has no unroll.* pragmas.
1660b57cec5SDimitry Andric   unsigned MaxTripCount = 0;
1670b57cec5SDimitry Andric   bool UseUpperBound = false;
1680b57cec5SDimitry Andric   bool ExplicitUnroll = computeUnrollCount(
169bdd1243dSDimitry Andric     L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
1705f757f3fSDimitry Andric       /*MaxOrZero*/ false, OuterTripMultiple, OuterUCE, UP, PP,
1715ffd83dbSDimitry Andric       UseUpperBound);
1720b57cec5SDimitry Andric   if (ExplicitUnroll || UseUpperBound) {
1730b57cec5SDimitry Andric     // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
1740b57cec5SDimitry Andric     // for the unroller instead.
1750b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
1760b57cec5SDimitry Andric                          "computeUnrollCount\n");
1770b57cec5SDimitry Andric     UP.Count = 0;
1780b57cec5SDimitry Andric     return false;
1790b57cec5SDimitry Andric   }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric   // Override with any explicit Count from the "unroll-and-jam-count" option.
1820b57cec5SDimitry Andric   bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
1830b57cec5SDimitry Andric   if (UserUnrollCount) {
1840b57cec5SDimitry Andric     UP.Count = UnrollAndJamCount;
1850b57cec5SDimitry Andric     UP.Force = true;
1860b57cec5SDimitry Andric     if (UP.AllowRemainder &&
1870b57cec5SDimitry Andric         getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
1880b57cec5SDimitry Andric         getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
1890b57cec5SDimitry Andric             UP.UnrollAndJamInnerLoopThreshold)
1900b57cec5SDimitry Andric       return true;
1910b57cec5SDimitry Andric   }
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric   // Check for unroll_and_jam pragmas
1945ffd83dbSDimitry Andric   unsigned PragmaCount = unrollAndJamCountPragmaValue(L);
1950b57cec5SDimitry Andric   if (PragmaCount > 0) {
1960b57cec5SDimitry Andric     UP.Count = PragmaCount;
1970b57cec5SDimitry Andric     UP.Runtime = true;
1980b57cec5SDimitry Andric     UP.Force = true;
1990b57cec5SDimitry Andric     if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
2000b57cec5SDimitry Andric         getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
2010b57cec5SDimitry Andric         getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
2020b57cec5SDimitry Andric             UP.UnrollAndJamInnerLoopThreshold)
2030b57cec5SDimitry Andric       return true;
2040b57cec5SDimitry Andric   }
2050b57cec5SDimitry Andric 
2065ffd83dbSDimitry Andric   bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);
2070b57cec5SDimitry Andric   bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
2080b57cec5SDimitry Andric   bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric   // If the loop has an unrolling pragma, we want to be more aggressive with
2110b57cec5SDimitry Andric   // unrolling limits.
2120b57cec5SDimitry Andric   if (ExplicitUnrollAndJam)
2130b57cec5SDimitry Andric     UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold;
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric   if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
2160b57cec5SDimitry Andric                                 UP.UnrollAndJamInnerLoopThreshold) {
2170b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
2180b57cec5SDimitry Andric                          "inner loop too large\n");
2190b57cec5SDimitry Andric     UP.Count = 0;
2200b57cec5SDimitry Andric     return false;
2210b57cec5SDimitry Andric   }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   // We have a sensible limit for the outer loop, now adjust it for the inner
2240b57cec5SDimitry Andric   // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
2250b57cec5SDimitry Andric   // explicitly, we want to stick to it.
2260b57cec5SDimitry Andric   if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
2270b57cec5SDimitry Andric     while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
2280b57cec5SDimitry Andric                                 UP.UnrollAndJamInnerLoopThreshold)
2290b57cec5SDimitry Andric       UP.Count--;
2300b57cec5SDimitry Andric   }
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric   // If we are explicitly unroll and jamming, we are done. Otherwise there are a
2330b57cec5SDimitry Andric   // number of extra performance heuristics to check.
2340b57cec5SDimitry Andric   if (ExplicitUnrollAndJam)
2350b57cec5SDimitry Andric     return true;
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric   // If the inner loop count is known and small, leave the entire loop nest to
2380b57cec5SDimitry Andric   // be the unroller
2390b57cec5SDimitry Andric   if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
2400b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
2410b57cec5SDimitry Andric                          "being left for the unroller\n");
2420b57cec5SDimitry Andric     UP.Count = 0;
2430b57cec5SDimitry Andric     return false;
2440b57cec5SDimitry Andric   }
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   // Check for situations where UnJ is likely to be unprofitable. Including
2470b57cec5SDimitry Andric   // subloops with more than 1 block.
2480b57cec5SDimitry Andric   if (SubLoop->getBlocks().size() != 1) {
2490b57cec5SDimitry Andric     LLVM_DEBUG(
2500b57cec5SDimitry Andric         dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
2510b57cec5SDimitry Andric     UP.Count = 0;
2520b57cec5SDimitry Andric     return false;
2530b57cec5SDimitry Andric   }
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric   // Limit to loops where there is something to gain from unrolling and
2560b57cec5SDimitry Andric   // jamming the loop. In this case, look for loads that are invariant in the
2570b57cec5SDimitry Andric   // outer loop and can become shared.
2580b57cec5SDimitry Andric   unsigned NumInvariant = 0;
2590b57cec5SDimitry Andric   for (BasicBlock *BB : SubLoop->getBlocks()) {
2600b57cec5SDimitry Andric     for (Instruction &I : *BB) {
2610b57cec5SDimitry Andric       if (auto *Ld = dyn_cast<LoadInst>(&I)) {
2620b57cec5SDimitry Andric         Value *V = Ld->getPointerOperand();
2630b57cec5SDimitry Andric         const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
2640b57cec5SDimitry Andric         if (SE.isLoopInvariant(LSCEV, L))
2650b57cec5SDimitry Andric           NumInvariant++;
2660b57cec5SDimitry Andric       }
2670b57cec5SDimitry Andric     }
2680b57cec5SDimitry Andric   }
2690b57cec5SDimitry Andric   if (NumInvariant == 0) {
2700b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
2710b57cec5SDimitry Andric     UP.Count = 0;
2720b57cec5SDimitry Andric     return false;
2730b57cec5SDimitry Andric   }
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   return false;
2760b57cec5SDimitry Andric }
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric static LoopUnrollResult
2790b57cec5SDimitry Andric tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
2800b57cec5SDimitry Andric                       ScalarEvolution &SE, const TargetTransformInfo &TTI,
2810b57cec5SDimitry Andric                       AssumptionCache &AC, DependenceInfo &DI,
2820b57cec5SDimitry Andric                       OptimizationRemarkEmitter &ORE, int OptLevel) {
283bdd1243dSDimitry Andric   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
284bdd1243dSDimitry Andric       L, SE, TTI, nullptr, nullptr, ORE, OptLevel, std::nullopt, std::nullopt,
285bdd1243dSDimitry Andric       std::nullopt, std::nullopt, std::nullopt, std::nullopt);
2865ffd83dbSDimitry Andric   TargetTransformInfo::PeelingPreferences PP =
287bdd1243dSDimitry Andric       gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt);
288e8d8bef9SDimitry Andric 
289e8d8bef9SDimitry Andric   TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
290e8d8bef9SDimitry Andric   if (EnableMode & TM_Disable)
291e8d8bef9SDimitry Andric     return LoopUnrollResult::Unmodified;
292e8d8bef9SDimitry Andric   if (EnableMode & TM_ForcedByUser)
293e8d8bef9SDimitry Andric     UP.UnrollAndJam = true;
294e8d8bef9SDimitry Andric 
2950b57cec5SDimitry Andric   if (AllowUnrollAndJam.getNumOccurrences() > 0)
2960b57cec5SDimitry Andric     UP.UnrollAndJam = AllowUnrollAndJam;
2970b57cec5SDimitry Andric   if (UnrollAndJamThreshold.getNumOccurrences() > 0)
2980b57cec5SDimitry Andric     UP.UnrollAndJamInnerLoopThreshold = UnrollAndJamThreshold;
2990b57cec5SDimitry Andric   // Exit early if unrolling is disabled.
3000b57cec5SDimitry Andric   if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0)
3010b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
3040b57cec5SDimitry Andric                     << L->getHeader()->getParent()->getName() << "] Loop %"
3050b57cec5SDimitry Andric                     << L->getHeader()->getName() << "\n");
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric   // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
3080b57cec5SDimitry Andric   // the unroller, so long as it does not explicitly have unroll_and_jam
3090b57cec5SDimitry Andric   // metadata. This means #pragma nounroll will disable unroll and jam as well
3100b57cec5SDimitry Andric   // as unrolling
3115ffd83dbSDimitry Andric   if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
3125ffd83dbSDimitry Andric       !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
3130b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Disabled due to pragma.\n");
3140b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
3150b57cec5SDimitry Andric   }
3160b57cec5SDimitry Andric 
3175ffd83dbSDimitry Andric   if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {
3180b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Disabled due to not being safe.\n");
3190b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
3200b57cec5SDimitry Andric   }
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric   // Approximate the loop size and collect useful info
3230b57cec5SDimitry Andric   SmallPtrSet<const Value *, 32> EphValues;
3240b57cec5SDimitry Andric   CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
3255ffd83dbSDimitry Andric   Loop *SubLoop = L->getSubLoops()[0];
3265f757f3fSDimitry Andric   UnrollCostEstimator InnerUCE(SubLoop, TTI, EphValues, UP.BEInsns);
3275f757f3fSDimitry Andric   UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);
32881ad6265SDimitry Andric 
3295f757f3fSDimitry Andric   if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {
330*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Loop not considered unrollable\n");
33181ad6265SDimitry Andric     return LoopUnrollResult::Unmodified;
33281ad6265SDimitry Andric   }
33381ad6265SDimitry Andric 
3345f757f3fSDimitry Andric   unsigned InnerLoopSize = InnerUCE.getRolledLoopSize();
3355f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "  Outer Loop Size: " << OuterUCE.getRolledLoopSize()
3365f757f3fSDimitry Andric                     << "\n");
3375f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "  Inner Loop Size: " << InnerLoopSize << "\n");
3385f757f3fSDimitry Andric 
3395f757f3fSDimitry Andric   if (InnerUCE.NumInlineCandidates != 0 || OuterUCE.NumInlineCandidates != 0) {
3400b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
3410b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
3420b57cec5SDimitry Andric   }
343*0fca6ea1SDimitry Andric   // FIXME: The call to canUnroll() allows some controlled convergent
344*0fca6ea1SDimitry Andric   // operations, but we block them here for future changes.
345*0fca6ea1SDimitry Andric   if (InnerUCE.Convergence != ConvergenceKind::None ||
346*0fca6ea1SDimitry Andric       OuterUCE.Convergence != ConvergenceKind::None) {
3470b57cec5SDimitry Andric     LLVM_DEBUG(
3480b57cec5SDimitry Andric         dbgs() << "  Not unrolling loop with convergent instructions.\n");
3490b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
3500b57cec5SDimitry Andric   }
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric   // Save original loop IDs for after the transformation.
3530b57cec5SDimitry Andric   MDNode *OrigOuterLoopID = L->getLoopID();
3540b57cec5SDimitry Andric   MDNode *OrigSubLoopID = SubLoop->getLoopID();
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric   // To assign the loop id of the epilogue, assign it before unrolling it so it
3570b57cec5SDimitry Andric   // is applied to every inner loop of the epilogue. We later apply the loop ID
3580b57cec5SDimitry Andric   // for the jammed inner loop.
359bdd1243dSDimitry Andric   std::optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
3600b57cec5SDimitry Andric       OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
3610b57cec5SDimitry Andric                         LLVMLoopUnrollAndJamFollowupRemainderInner});
36281ad6265SDimitry Andric   if (NewInnerEpilogueLoopID)
363bdd1243dSDimitry Andric     SubLoop->setLoopID(*NewInnerEpilogueLoopID);
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric   // Find trip count and trip multiple
3665ffd83dbSDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
3675ffd83dbSDimitry Andric   BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
3680b57cec5SDimitry Andric   unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
3690b57cec5SDimitry Andric   unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
3700b57cec5SDimitry Andric   unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric   // Decide if, and by how much, to unroll
3730b57cec5SDimitry Andric   bool IsCountSetExplicitly = computeUnrollAndJamCount(
374bdd1243dSDimitry Andric     L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,
3755f757f3fSDimitry Andric       OuterTripMultiple, OuterUCE, InnerTripCount, InnerLoopSize, UP, PP);
3760b57cec5SDimitry Andric   if (UP.Count <= 1)
3770b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
3780b57cec5SDimitry Andric   // Unroll factor (Count) must be less or equal to TripCount.
3790b57cec5SDimitry Andric   if (OuterTripCount && UP.Count > OuterTripCount)
3800b57cec5SDimitry Andric     UP.Count = OuterTripCount;
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   Loop *EpilogueOuterLoop = nullptr;
3830b57cec5SDimitry Andric   LoopUnrollResult UnrollResult = UnrollAndJamLoop(
3840b57cec5SDimitry Andric       L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
3855ffd83dbSDimitry Andric       &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   // Assign new loop attributes.
3880b57cec5SDimitry Andric   if (EpilogueOuterLoop) {
389bdd1243dSDimitry Andric     std::optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
3900b57cec5SDimitry Andric         OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
3910b57cec5SDimitry Andric                           LLVMLoopUnrollAndJamFollowupRemainderOuter});
39281ad6265SDimitry Andric     if (NewOuterEpilogueLoopID)
393bdd1243dSDimitry Andric       EpilogueOuterLoop->setLoopID(*NewOuterEpilogueLoopID);
3940b57cec5SDimitry Andric   }
3950b57cec5SDimitry Andric 
396bdd1243dSDimitry Andric   std::optional<MDNode *> NewInnerLoopID =
3970b57cec5SDimitry Andric       makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
3980b57cec5SDimitry Andric                                            LLVMLoopUnrollAndJamFollowupInner});
39981ad6265SDimitry Andric   if (NewInnerLoopID)
400bdd1243dSDimitry Andric     SubLoop->setLoopID(*NewInnerLoopID);
4010b57cec5SDimitry Andric   else
4020b57cec5SDimitry Andric     SubLoop->setLoopID(OrigSubLoopID);
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric   if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
405bdd1243dSDimitry Andric     std::optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
4060b57cec5SDimitry Andric         OrigOuterLoopID,
4070b57cec5SDimitry Andric         {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
40881ad6265SDimitry Andric     if (NewOuterLoopID) {
409bdd1243dSDimitry Andric       L->setLoopID(*NewOuterLoopID);
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric       // Do not setLoopAlreadyUnrolled if a followup was given.
4120b57cec5SDimitry Andric       return UnrollResult;
4130b57cec5SDimitry Andric     }
4140b57cec5SDimitry Andric   }
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric   // If loop has an unroll count pragma or unrolled by explicitly set count
4170b57cec5SDimitry Andric   // mark loop as unrolled to prevent unrolling beyond that requested.
4180b57cec5SDimitry Andric   if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
4190b57cec5SDimitry Andric     L->setLoopAlreadyUnrolled();
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   return UnrollResult;
4220b57cec5SDimitry Andric }
4230b57cec5SDimitry Andric 
424fe6060f1SDimitry Andric static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI,
425480093f4SDimitry Andric                                   ScalarEvolution &SE,
426480093f4SDimitry Andric                                   const TargetTransformInfo &TTI,
427480093f4SDimitry Andric                                   AssumptionCache &AC, DependenceInfo &DI,
428fe6060f1SDimitry Andric                                   OptimizationRemarkEmitter &ORE, int OptLevel,
429fe6060f1SDimitry Andric                                   LPMUpdater &U) {
430480093f4SDimitry Andric   bool DidSomething = false;
431fe6060f1SDimitry Andric   ArrayRef<Loop *> Loops = LN.getLoops();
432fe6060f1SDimitry Andric   Loop *OutmostLoop = &LN.getOutermostLoop();
433480093f4SDimitry Andric 
434fe6060f1SDimitry Andric   // Add the loop nests in the reverse order of LN. See method
4355ffd83dbSDimitry Andric   // declaration.
436480093f4SDimitry Andric   SmallPriorityWorklist<Loop *, 4> Worklist;
437fe6060f1SDimitry Andric   appendLoopsToWorklist(Loops, Worklist);
438480093f4SDimitry Andric   while (!Worklist.empty()) {
439480093f4SDimitry Andric     Loop *L = Worklist.pop_back_val();
440fe6060f1SDimitry Andric     std::string LoopName = std::string(L->getName());
441480093f4SDimitry Andric     LoopUnrollResult Result =
442480093f4SDimitry Andric         tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
443480093f4SDimitry Andric     if (Result != LoopUnrollResult::Unmodified)
444480093f4SDimitry Andric       DidSomething = true;
445fe6060f1SDimitry Andric     if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled)
446fe6060f1SDimitry Andric       U.markLoopAsDeleted(*L, LoopName);
447480093f4SDimitry Andric   }
448480093f4SDimitry Andric 
449480093f4SDimitry Andric   return DidSomething;
450480093f4SDimitry Andric }
451480093f4SDimitry Andric 
452fe6060f1SDimitry Andric PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN,
453fe6060f1SDimitry Andric                                             LoopAnalysisManager &AM,
454fe6060f1SDimitry Andric                                             LoopStandardAnalysisResults &AR,
455fe6060f1SDimitry Andric                                             LPMUpdater &U) {
456fe6060f1SDimitry Andric   Function &F = *LN.getParent();
4570b57cec5SDimitry Andric 
458fe6060f1SDimitry Andric   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
459fe6060f1SDimitry Andric   OptimizationRemarkEmitter ORE(&F);
460fe6060f1SDimitry Andric 
461fe6060f1SDimitry Andric   if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,
462fe6060f1SDimitry Andric                              OptLevel, U))
4630b57cec5SDimitry Andric     return PreservedAnalyses::all();
4640b57cec5SDimitry Andric 
465fe6060f1SDimitry Andric   auto PA = getLoopPassPreservedAnalyses();
466fe6060f1SDimitry Andric   PA.preserve<LoopNestAnalysis>();
467fe6060f1SDimitry Andric   return PA;
4680b57cec5SDimitry Andric }
469