18bcb0991SDimitry Andric //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==// 28bcb0991SDimitry Andric // 38bcb0991SDimitry Andric // The LLVM Compiler Infrastructure 48bcb0991SDimitry Andric // 58bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 68bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 78bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 88bcb0991SDimitry Andric // 98bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 108bcb0991SDimitry Andric /// 118bcb0991SDimitry Andric /// \file 128bcb0991SDimitry Andric /// This file defines the implementation for the loop cache analysis. 138bcb0991SDimitry Andric /// The implementation is largely based on the following paper: 148bcb0991SDimitry Andric /// 158bcb0991SDimitry Andric /// Compiler Optimizations for Improving Data Locality 168bcb0991SDimitry Andric /// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng 178bcb0991SDimitry Andric /// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf 188bcb0991SDimitry Andric /// 198bcb0991SDimitry Andric /// The general approach taken to estimate the number of cache lines used by the 208bcb0991SDimitry Andric /// memory references in an inner loop is: 218bcb0991SDimitry Andric /// 1. Partition memory references that exhibit temporal or spacial reuse 228bcb0991SDimitry Andric /// into reference groups. 238bcb0991SDimitry Andric /// 2. For each loop L in the a loop nest LN: 248bcb0991SDimitry Andric /// a. Compute the cost of the reference group 258bcb0991SDimitry Andric /// b. Compute the loop cost by summing up the reference groups costs 268bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 278bcb0991SDimitry Andric 288bcb0991SDimitry Andric #include "llvm/Analysis/LoopCacheAnalysis.h" 298bcb0991SDimitry Andric #include "llvm/ADT/BreadthFirstIterator.h" 308bcb0991SDimitry Andric #include "llvm/ADT/Sequence.h" 318bcb0991SDimitry Andric #include "llvm/ADT/SmallVector.h" 32e8d8bef9SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 33349cc55cSDimitry Andric #include "llvm/Analysis/Delinearization.h" 34e8d8bef9SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h" 35e8d8bef9SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 365ffd83dbSDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 37e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 38480093f4SDimitry Andric #include "llvm/Support/CommandLine.h" 398bcb0991SDimitry Andric #include "llvm/Support/Debug.h" 408bcb0991SDimitry Andric 418bcb0991SDimitry Andric using namespace llvm; 428bcb0991SDimitry Andric 438bcb0991SDimitry Andric #define DEBUG_TYPE "loop-cache-cost" 448bcb0991SDimitry Andric 458bcb0991SDimitry Andric static cl::opt<unsigned> DefaultTripCount( 468bcb0991SDimitry Andric "default-trip-count", cl::init(100), cl::Hidden, 478bcb0991SDimitry Andric cl::desc("Use this to specify the default trip count of a loop")); 488bcb0991SDimitry Andric 498bcb0991SDimitry Andric // In this analysis two array references are considered to exhibit temporal 508bcb0991SDimitry Andric // reuse if they access either the same memory location, or a memory location 518bcb0991SDimitry Andric // with distance smaller than a configurable threshold. 528bcb0991SDimitry Andric static cl::opt<unsigned> TemporalReuseThreshold( 538bcb0991SDimitry Andric "temporal-reuse-threshold", cl::init(2), cl::Hidden, 548bcb0991SDimitry Andric cl::desc("Use this to specify the max. distance between array elements " 558bcb0991SDimitry Andric "accessed in a loop so that the elements are classified to have " 568bcb0991SDimitry Andric "temporal reuse")); 578bcb0991SDimitry Andric 588bcb0991SDimitry Andric /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a 598bcb0991SDimitry Andric /// nullptr if any loops in the loop vector supplied has more than one sibling. 608bcb0991SDimitry Andric /// The loop vector is expected to contain loops collected in breadth-first 618bcb0991SDimitry Andric /// order. 628bcb0991SDimitry Andric static Loop *getInnerMostLoop(const LoopVectorTy &Loops) { 638bcb0991SDimitry Andric assert(!Loops.empty() && "Expecting a non-empy loop vector"); 648bcb0991SDimitry Andric 658bcb0991SDimitry Andric Loop *LastLoop = Loops.back(); 668bcb0991SDimitry Andric Loop *ParentLoop = LastLoop->getParentLoop(); 678bcb0991SDimitry Andric 688bcb0991SDimitry Andric if (ParentLoop == nullptr) { 698bcb0991SDimitry Andric assert(Loops.size() == 1 && "Expecting a single loop"); 708bcb0991SDimitry Andric return LastLoop; 718bcb0991SDimitry Andric } 728bcb0991SDimitry Andric 735ffd83dbSDimitry Andric return (llvm::is_sorted(Loops, 748bcb0991SDimitry Andric [](const Loop *L1, const Loop *L2) { 758bcb0991SDimitry Andric return L1->getLoopDepth() < L2->getLoopDepth(); 768bcb0991SDimitry Andric })) 778bcb0991SDimitry Andric ? LastLoop 788bcb0991SDimitry Andric : nullptr; 798bcb0991SDimitry Andric } 808bcb0991SDimitry Andric 818bcb0991SDimitry Andric static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, 828bcb0991SDimitry Andric const Loop &L, ScalarEvolution &SE) { 838bcb0991SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn); 848bcb0991SDimitry Andric if (!AR || !AR->isAffine()) 858bcb0991SDimitry Andric return false; 868bcb0991SDimitry Andric 878bcb0991SDimitry Andric assert(AR->getLoop() && "AR should have a loop"); 888bcb0991SDimitry Andric 898bcb0991SDimitry Andric // Check that start and increment are not add recurrences. 908bcb0991SDimitry Andric const SCEV *Start = AR->getStart(); 918bcb0991SDimitry Andric const SCEV *Step = AR->getStepRecurrence(SE); 928bcb0991SDimitry Andric if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step)) 938bcb0991SDimitry Andric return false; 948bcb0991SDimitry Andric 958bcb0991SDimitry Andric // Check that start and increment are both invariant in the loop. 968bcb0991SDimitry Andric if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) 978bcb0991SDimitry Andric return false; 988bcb0991SDimitry Andric 995ffd83dbSDimitry Andric const SCEV *StepRec = AR->getStepRecurrence(SE); 1005ffd83dbSDimitry Andric if (StepRec && SE.isKnownNegative(StepRec)) 1015ffd83dbSDimitry Andric StepRec = SE.getNegativeSCEV(StepRec); 1025ffd83dbSDimitry Andric 1035ffd83dbSDimitry Andric return StepRec == &ElemSize; 1048bcb0991SDimitry Andric } 1058bcb0991SDimitry Andric 10681ad6265SDimitry Andric /// Compute the trip count for the given loop \p L or assume a default value if 10781ad6265SDimitry Andric /// it is not a compile time constant. Return the SCEV expression for the trip 10881ad6265SDimitry Andric /// count. 10981ad6265SDimitry Andric static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize, 11081ad6265SDimitry Andric ScalarEvolution &SE) { 1118bcb0991SDimitry Andric const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L); 11281ad6265SDimitry Andric const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && 11381ad6265SDimitry Andric isa<SCEVConstant>(BackedgeTakenCount)) 11481ad6265SDimitry Andric ? SE.getTripCountFromExitCount(BackedgeTakenCount) 11581ad6265SDimitry Andric : nullptr; 11681ad6265SDimitry Andric 11781ad6265SDimitry Andric if (!TripCount) { 11881ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() 11981ad6265SDimitry Andric << " could not be computed, using DefaultTripCount\n"); 12081ad6265SDimitry Andric TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount); 12181ad6265SDimitry Andric } 12281ad6265SDimitry Andric 12381ad6265SDimitry Andric return TripCount; 1248bcb0991SDimitry Andric } 1258bcb0991SDimitry Andric 1268bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 1278bcb0991SDimitry Andric // IndexedReference implementation 1288bcb0991SDimitry Andric // 1298bcb0991SDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) { 1308bcb0991SDimitry Andric if (!R.IsValid) { 1318bcb0991SDimitry Andric OS << R.StoreOrLoadInst; 1328bcb0991SDimitry Andric OS << ", IsValid=false."; 1338bcb0991SDimitry Andric return OS; 1348bcb0991SDimitry Andric } 1358bcb0991SDimitry Andric 1368bcb0991SDimitry Andric OS << *R.BasePointer; 1378bcb0991SDimitry Andric for (const SCEV *Subscript : R.Subscripts) 1388bcb0991SDimitry Andric OS << "[" << *Subscript << "]"; 1398bcb0991SDimitry Andric 1408bcb0991SDimitry Andric OS << ", Sizes: "; 1418bcb0991SDimitry Andric for (const SCEV *Size : R.Sizes) 1428bcb0991SDimitry Andric OS << "[" << *Size << "]"; 1438bcb0991SDimitry Andric 1448bcb0991SDimitry Andric return OS; 1458bcb0991SDimitry Andric } 1468bcb0991SDimitry Andric 1478bcb0991SDimitry Andric IndexedReference::IndexedReference(Instruction &StoreOrLoadInst, 1488bcb0991SDimitry Andric const LoopInfo &LI, ScalarEvolution &SE) 1498bcb0991SDimitry Andric : StoreOrLoadInst(StoreOrLoadInst), SE(SE) { 1508bcb0991SDimitry Andric assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) && 1518bcb0991SDimitry Andric "Expecting a load or store instruction"); 1528bcb0991SDimitry Andric 1538bcb0991SDimitry Andric IsValid = delinearize(LI); 1548bcb0991SDimitry Andric if (IsValid) 1558bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this 1568bcb0991SDimitry Andric << "\n"); 1578bcb0991SDimitry Andric } 1588bcb0991SDimitry Andric 159bdd1243dSDimitry Andric std::optional<bool> 160bdd1243dSDimitry Andric IndexedReference::hasSpacialReuse(const IndexedReference &Other, unsigned CLS, 161e8d8bef9SDimitry Andric AAResults &AA) const { 1628bcb0991SDimitry Andric assert(IsValid && "Expecting a valid reference"); 1638bcb0991SDimitry Andric 1648bcb0991SDimitry Andric if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { 1658bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) 1668bcb0991SDimitry Andric << "No spacial reuse: different base pointers\n"); 1678bcb0991SDimitry Andric return false; 1688bcb0991SDimitry Andric } 1698bcb0991SDimitry Andric 1708bcb0991SDimitry Andric unsigned NumSubscripts = getNumSubscripts(); 1718bcb0991SDimitry Andric if (NumSubscripts != Other.getNumSubscripts()) { 1728bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) 1738bcb0991SDimitry Andric << "No spacial reuse: different number of subscripts\n"); 1748bcb0991SDimitry Andric return false; 1758bcb0991SDimitry Andric } 1768bcb0991SDimitry Andric 1778bcb0991SDimitry Andric // all subscripts must be equal, except the leftmost one (the last one). 1788bcb0991SDimitry Andric for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) { 1798bcb0991SDimitry Andric if (getSubscript(SubNum) != Other.getSubscript(SubNum)) { 1808bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: " 1818bcb0991SDimitry Andric << "\n\t" << *getSubscript(SubNum) << "\n\t" 1828bcb0991SDimitry Andric << *Other.getSubscript(SubNum) << "\n"); 1838bcb0991SDimitry Andric return false; 1848bcb0991SDimitry Andric } 1858bcb0991SDimitry Andric } 1868bcb0991SDimitry Andric 1878bcb0991SDimitry Andric // the difference between the last subscripts must be less than the cache line 1888bcb0991SDimitry Andric // size. 1898bcb0991SDimitry Andric const SCEV *LastSubscript = getLastSubscript(); 1908bcb0991SDimitry Andric const SCEV *OtherLastSubscript = Other.getLastSubscript(); 1918bcb0991SDimitry Andric const SCEVConstant *Diff = dyn_cast<SCEVConstant>( 1928bcb0991SDimitry Andric SE.getMinusSCEV(LastSubscript, OtherLastSubscript)); 1938bcb0991SDimitry Andric 1948bcb0991SDimitry Andric if (Diff == nullptr) { 1958bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) 1968bcb0991SDimitry Andric << "No spacial reuse, difference between subscript:\n\t" 1978bcb0991SDimitry Andric << *LastSubscript << "\n\t" << OtherLastSubscript 1988bcb0991SDimitry Andric << "\nis not constant.\n"); 199bdd1243dSDimitry Andric return std::nullopt; 2008bcb0991SDimitry Andric } 2018bcb0991SDimitry Andric 2028bcb0991SDimitry Andric bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS); 2038bcb0991SDimitry Andric 2048bcb0991SDimitry Andric LLVM_DEBUG({ 2058bcb0991SDimitry Andric if (InSameCacheLine) 2068bcb0991SDimitry Andric dbgs().indent(2) << "Found spacial reuse.\n"; 2078bcb0991SDimitry Andric else 2088bcb0991SDimitry Andric dbgs().indent(2) << "No spacial reuse.\n"; 2098bcb0991SDimitry Andric }); 2108bcb0991SDimitry Andric 2118bcb0991SDimitry Andric return InSameCacheLine; 2128bcb0991SDimitry Andric } 2138bcb0991SDimitry Andric 214bdd1243dSDimitry Andric std::optional<bool> 215bdd1243dSDimitry Andric IndexedReference::hasTemporalReuse(const IndexedReference &Other, 216bdd1243dSDimitry Andric unsigned MaxDistance, const Loop &L, 217bdd1243dSDimitry Andric DependenceInfo &DI, AAResults &AA) const { 2188bcb0991SDimitry Andric assert(IsValid && "Expecting a valid reference"); 2198bcb0991SDimitry Andric 2208bcb0991SDimitry Andric if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { 2218bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) 2228bcb0991SDimitry Andric << "No temporal reuse: different base pointer\n"); 2238bcb0991SDimitry Andric return false; 2248bcb0991SDimitry Andric } 2258bcb0991SDimitry Andric 2268bcb0991SDimitry Andric std::unique_ptr<Dependence> D = 2278bcb0991SDimitry Andric DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true); 2288bcb0991SDimitry Andric 2298bcb0991SDimitry Andric if (D == nullptr) { 2308bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n"); 2318bcb0991SDimitry Andric return false; 2328bcb0991SDimitry Andric } 2338bcb0991SDimitry Andric 2348bcb0991SDimitry Andric if (D->isLoopIndependent()) { 2358bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); 2368bcb0991SDimitry Andric return true; 2378bcb0991SDimitry Andric } 2388bcb0991SDimitry Andric 2398bcb0991SDimitry Andric // Check the dependence distance at every loop level. There is temporal reuse 2408bcb0991SDimitry Andric // if the distance at the given loop's depth is small (|d| <= MaxDistance) and 2418bcb0991SDimitry Andric // it is zero at every other loop level. 2428bcb0991SDimitry Andric int LoopDepth = L.getLoopDepth(); 2438bcb0991SDimitry Andric int Levels = D->getLevels(); 2448bcb0991SDimitry Andric for (int Level = 1; Level <= Levels; ++Level) { 2458bcb0991SDimitry Andric const SCEV *Distance = D->getDistance(Level); 2468bcb0991SDimitry Andric const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance); 2478bcb0991SDimitry Andric 2488bcb0991SDimitry Andric if (SCEVConst == nullptr) { 2498bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n"); 250bdd1243dSDimitry Andric return std::nullopt; 2518bcb0991SDimitry Andric } 2528bcb0991SDimitry Andric 2538bcb0991SDimitry Andric const ConstantInt &CI = *SCEVConst->getValue(); 2548bcb0991SDimitry Andric if (Level != LoopDepth && !CI.isZero()) { 2558bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) 2568bcb0991SDimitry Andric << "No temporal reuse: distance is not zero at depth=" << Level 2578bcb0991SDimitry Andric << "\n"); 2588bcb0991SDimitry Andric return false; 2598bcb0991SDimitry Andric } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) { 2608bcb0991SDimitry Andric LLVM_DEBUG( 2618bcb0991SDimitry Andric dbgs().indent(2) 2628bcb0991SDimitry Andric << "No temporal reuse: distance is greater than MaxDistance at depth=" 2638bcb0991SDimitry Andric << Level << "\n"); 2648bcb0991SDimitry Andric return false; 2658bcb0991SDimitry Andric } 2668bcb0991SDimitry Andric } 2678bcb0991SDimitry Andric 2688bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); 2698bcb0991SDimitry Andric return true; 2708bcb0991SDimitry Andric } 2718bcb0991SDimitry Andric 2728bcb0991SDimitry Andric CacheCostTy IndexedReference::computeRefCost(const Loop &L, 2738bcb0991SDimitry Andric unsigned CLS) const { 2748bcb0991SDimitry Andric assert(IsValid && "Expecting a valid reference"); 2758bcb0991SDimitry Andric LLVM_DEBUG({ 2768bcb0991SDimitry Andric dbgs().indent(2) << "Computing cache cost for:\n"; 2778bcb0991SDimitry Andric dbgs().indent(4) << *this << "\n"; 2788bcb0991SDimitry Andric }); 2798bcb0991SDimitry Andric 2808bcb0991SDimitry Andric // If the indexed reference is loop invariant the cost is one. 2818bcb0991SDimitry Andric if (isLoopInvariant(L)) { 2828bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n"); 2838bcb0991SDimitry Andric return 1; 2848bcb0991SDimitry Andric } 2858bcb0991SDimitry Andric 28681ad6265SDimitry Andric const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE); 28781ad6265SDimitry Andric assert(TripCount && "Expecting valid TripCount"); 2888bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); 2898bcb0991SDimitry Andric 29081ad6265SDimitry Andric const SCEV *RefCost = nullptr; 291fcaf7f86SDimitry Andric const SCEV *Stride = nullptr; 292fcaf7f86SDimitry Andric if (isConsecutive(L, Stride, CLS)) { 29381ad6265SDimitry Andric // If the indexed reference is 'consecutive' the cost is 29481ad6265SDimitry Andric // (TripCount*Stride)/CLS. 295fcaf7f86SDimitry Andric assert(Stride != nullptr && 296fcaf7f86SDimitry Andric "Stride should not be null for consecutive access!"); 297480093f4SDimitry Andric Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); 298349cc55cSDimitry Andric const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS); 2995ffd83dbSDimitry Andric Stride = SE.getNoopOrAnyExtend(Stride, WiderType); 30006c3fb27SDimitry Andric TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType); 3018bcb0991SDimitry Andric const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); 302*0fca6ea1SDimitry Andric // Round the fractional cost up to the nearest integer number. 303*0fca6ea1SDimitry Andric // The impact is the most significant when cost is calculated 304*0fca6ea1SDimitry Andric // to be a number less than one, because it makes more sense 305*0fca6ea1SDimitry Andric // to say one cache line is used rather than zero cache line 306*0fca6ea1SDimitry Andric // is used. 307*0fca6ea1SDimitry Andric RefCost = SE.getUDivCeilSCEV(Numerator, CacheLineSize); 3085ffd83dbSDimitry Andric 3098bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(4) 3108bcb0991SDimitry Andric << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" 3118bcb0991SDimitry Andric << *RefCost << "\n"); 31281ad6265SDimitry Andric } else { 31381ad6265SDimitry Andric // If the indexed reference is not 'consecutive' the cost is proportional to 31481ad6265SDimitry Andric // the trip count and the depth of the dimension which the subject loop 31581ad6265SDimitry Andric // subscript is accessing. We try to estimate this by multiplying the cost 31681ad6265SDimitry Andric // by the trip counts of loops corresponding to the inner dimensions. For 31781ad6265SDimitry Andric // example, given the indexed reference 'A[i][j][k]', and assuming the 31881ad6265SDimitry Andric // i-loop is in the innermost position, the cost would be equal to the 31981ad6265SDimitry Andric // iterations of the i-loop multiplied by iterations of the j-loop. 32081ad6265SDimitry Andric RefCost = TripCount; 32181ad6265SDimitry Andric 32281ad6265SDimitry Andric int Index = getSubscriptIndex(L); 323*0fca6ea1SDimitry Andric assert(Index >= 0 && "Could not locate a valid Index"); 32481ad6265SDimitry Andric 32581ad6265SDimitry Andric for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) { 32681ad6265SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(I)); 32781ad6265SDimitry Andric assert(AR && AR->getLoop() && "Expecting valid loop"); 32881ad6265SDimitry Andric const SCEV *TripCount = 32981ad6265SDimitry Andric computeTripCount(*AR->getLoop(), *Sizes.back(), SE); 33081ad6265SDimitry Andric Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType()); 33106c3fb27SDimitry Andric RefCost = SE.getMulExpr(SE.getNoopOrZeroExtend(RefCost, WiderType), 33206c3fb27SDimitry Andric SE.getNoopOrZeroExtend(TripCount, WiderType)); 33381ad6265SDimitry Andric } 33481ad6265SDimitry Andric 3358bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(4) 33681ad6265SDimitry Andric << "Access is not consecutive: RefCost=" << *RefCost << "\n"); 33781ad6265SDimitry Andric } 33881ad6265SDimitry Andric assert(RefCost && "Expecting a valid RefCost"); 3398bcb0991SDimitry Andric 3408bcb0991SDimitry Andric // Attempt to fold RefCost into a constant. 3418bcb0991SDimitry Andric if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost)) 34206c3fb27SDimitry Andric return ConstantCost->getValue()->getZExtValue(); 3438bcb0991SDimitry Andric 3448bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(4) 3458bcb0991SDimitry Andric << "RefCost is not a constant! Setting to RefCost=InvalidCost " 3468bcb0991SDimitry Andric "(invalid value).\n"); 3478bcb0991SDimitry Andric 3488bcb0991SDimitry Andric return CacheCost::InvalidCost; 3498bcb0991SDimitry Andric } 3508bcb0991SDimitry Andric 35181ad6265SDimitry Andric bool IndexedReference::tryDelinearizeFixedSize( 35281ad6265SDimitry Andric const SCEV *AccessFn, SmallVectorImpl<const SCEV *> &Subscripts) { 35381ad6265SDimitry Andric SmallVector<int, 4> ArraySizes; 35481ad6265SDimitry Andric if (!tryDelinearizeFixedSizeImpl(&SE, &StoreOrLoadInst, AccessFn, Subscripts, 35581ad6265SDimitry Andric ArraySizes)) 35681ad6265SDimitry Andric return false; 35781ad6265SDimitry Andric 35881ad6265SDimitry Andric // Populate Sizes with scev expressions to be used in calculations later. 35981ad6265SDimitry Andric for (auto Idx : seq<unsigned>(1, Subscripts.size())) 36081ad6265SDimitry Andric Sizes.push_back( 36181ad6265SDimitry Andric SE.getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1])); 36281ad6265SDimitry Andric 36381ad6265SDimitry Andric LLVM_DEBUG({ 36481ad6265SDimitry Andric dbgs() << "Delinearized subscripts of fixed-size array\n" 36581ad6265SDimitry Andric << "GEP:" << *getLoadStorePointerOperand(&StoreOrLoadInst) 36681ad6265SDimitry Andric << "\n"; 36781ad6265SDimitry Andric }); 36881ad6265SDimitry Andric return true; 36981ad6265SDimitry Andric } 37081ad6265SDimitry Andric 3718bcb0991SDimitry Andric bool IndexedReference::delinearize(const LoopInfo &LI) { 3728bcb0991SDimitry Andric assert(Subscripts.empty() && "Subscripts should be empty"); 3738bcb0991SDimitry Andric assert(Sizes.empty() && "Sizes should be empty"); 3748bcb0991SDimitry Andric assert(!IsValid && "Should be called once from the constructor"); 3758bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n"); 3768bcb0991SDimitry Andric 3778bcb0991SDimitry Andric const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst); 3788bcb0991SDimitry Andric const BasicBlock *BB = StoreOrLoadInst.getParent(); 3798bcb0991SDimitry Andric 380480093f4SDimitry Andric if (Loop *L = LI.getLoopFor(BB)) { 3818bcb0991SDimitry Andric const SCEV *AccessFn = 3828bcb0991SDimitry Andric SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L); 3838bcb0991SDimitry Andric 3848bcb0991SDimitry Andric BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn)); 3858bcb0991SDimitry Andric if (BasePointer == nullptr) { 3868bcb0991SDimitry Andric LLVM_DEBUG( 3878bcb0991SDimitry Andric dbgs().indent(2) 3888bcb0991SDimitry Andric << "ERROR: failed to delinearize, can't identify base pointer\n"); 3898bcb0991SDimitry Andric return false; 3908bcb0991SDimitry Andric } 3918bcb0991SDimitry Andric 39281ad6265SDimitry Andric bool IsFixedSize = false; 39381ad6265SDimitry Andric // Try to delinearize fixed-size arrays. 39481ad6265SDimitry Andric if (tryDelinearizeFixedSize(AccessFn, Subscripts)) { 39581ad6265SDimitry Andric IsFixedSize = true; 39681ad6265SDimitry Andric // The last element of Sizes is the element size. 39781ad6265SDimitry Andric Sizes.push_back(ElemSize); 3988bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() 3998bcb0991SDimitry Andric << "', AccessFn: " << *AccessFn << "\n"); 40081ad6265SDimitry Andric } 4018bcb0991SDimitry Andric 40281ad6265SDimitry Andric AccessFn = SE.getMinusSCEV(AccessFn, BasePointer); 40381ad6265SDimitry Andric 40481ad6265SDimitry Andric // Try to delinearize parametric-size arrays. 40581ad6265SDimitry Andric if (!IsFixedSize) { 40681ad6265SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() 40781ad6265SDimitry Andric << "', AccessFn: " << *AccessFn << "\n"); 408349cc55cSDimitry Andric llvm::delinearize(SE, AccessFn, Subscripts, Sizes, 4098bcb0991SDimitry Andric SE.getElementSize(&StoreOrLoadInst)); 41081ad6265SDimitry Andric } 4118bcb0991SDimitry Andric 4128bcb0991SDimitry Andric if (Subscripts.empty() || Sizes.empty() || 4138bcb0991SDimitry Andric Subscripts.size() != Sizes.size()) { 4148bcb0991SDimitry Andric // Attempt to determine whether we have a single dimensional array access. 4158bcb0991SDimitry Andric // before giving up. 4168bcb0991SDimitry Andric if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) { 4178bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) 4188bcb0991SDimitry Andric << "ERROR: failed to delinearize reference\n"); 4198bcb0991SDimitry Andric Subscripts.clear(); 4208bcb0991SDimitry Andric Sizes.clear(); 421480093f4SDimitry Andric return false; 4228bcb0991SDimitry Andric } 4238bcb0991SDimitry Andric 4245ffd83dbSDimitry Andric // The array may be accessed in reverse, for example: 4255ffd83dbSDimitry Andric // for (i = N; i > 0; i--) 4265ffd83dbSDimitry Andric // A[i] = 0; 4275ffd83dbSDimitry Andric // In this case, reconstruct the access function using the absolute value 4285ffd83dbSDimitry Andric // of the step recurrence. 4295ffd83dbSDimitry Andric const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn); 4305ffd83dbSDimitry Andric const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr; 4315ffd83dbSDimitry Andric 4325ffd83dbSDimitry Andric if (StepRec && SE.isKnownNegative(StepRec)) 4335ffd83dbSDimitry Andric AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(), 4345ffd83dbSDimitry Andric SE.getNegativeSCEV(StepRec), 4355ffd83dbSDimitry Andric AccessFnAR->getLoop(), 4365ffd83dbSDimitry Andric AccessFnAR->getNoWrapFlags()); 4378bcb0991SDimitry Andric const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize); 4388bcb0991SDimitry Andric Subscripts.push_back(Div); 4398bcb0991SDimitry Andric Sizes.push_back(ElemSize); 4408bcb0991SDimitry Andric } 4418bcb0991SDimitry Andric 4428bcb0991SDimitry Andric return all_of(Subscripts, [&](const SCEV *Subscript) { 4438bcb0991SDimitry Andric return isSimpleAddRecurrence(*Subscript, *L); 4448bcb0991SDimitry Andric }); 4458bcb0991SDimitry Andric } 4468bcb0991SDimitry Andric 4478bcb0991SDimitry Andric return false; 4488bcb0991SDimitry Andric } 4498bcb0991SDimitry Andric 4508bcb0991SDimitry Andric bool IndexedReference::isLoopInvariant(const Loop &L) const { 4518bcb0991SDimitry Andric Value *Addr = getPointerOperand(&StoreOrLoadInst); 4528bcb0991SDimitry Andric assert(Addr != nullptr && "Expecting either a load or a store instruction"); 4538bcb0991SDimitry Andric assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable"); 4548bcb0991SDimitry Andric 4558bcb0991SDimitry Andric if (SE.isLoopInvariant(SE.getSCEV(Addr), &L)) 4568bcb0991SDimitry Andric return true; 4578bcb0991SDimitry Andric 4588bcb0991SDimitry Andric // The indexed reference is loop invariant if none of the coefficients use 4598bcb0991SDimitry Andric // the loop induction variable. 4608bcb0991SDimitry Andric bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) { 4618bcb0991SDimitry Andric return isCoeffForLoopZeroOrInvariant(*Subscript, L); 4628bcb0991SDimitry Andric }); 4638bcb0991SDimitry Andric 4648bcb0991SDimitry Andric return allCoeffForLoopAreZero; 4658bcb0991SDimitry Andric } 4668bcb0991SDimitry Andric 467fcaf7f86SDimitry Andric bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride, 468fcaf7f86SDimitry Andric unsigned CLS) const { 4698bcb0991SDimitry Andric // The indexed reference is 'consecutive' if the only coefficient that uses 4708bcb0991SDimitry Andric // the loop induction variable is the last one... 4718bcb0991SDimitry Andric const SCEV *LastSubscript = Subscripts.back(); 4728bcb0991SDimitry Andric for (const SCEV *Subscript : Subscripts) { 4738bcb0991SDimitry Andric if (Subscript == LastSubscript) 4748bcb0991SDimitry Andric continue; 4758bcb0991SDimitry Andric if (!isCoeffForLoopZeroOrInvariant(*Subscript, L)) 4768bcb0991SDimitry Andric return false; 4778bcb0991SDimitry Andric } 4788bcb0991SDimitry Andric 4798bcb0991SDimitry Andric // ...and the access stride is less than the cache line size. 4808bcb0991SDimitry Andric const SCEV *Coeff = getLastCoefficient(); 4818bcb0991SDimitry Andric const SCEV *ElemSize = Sizes.back(); 482fcaf7f86SDimitry Andric Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType()); 483fcaf7f86SDimitry Andric // FIXME: This assumes that all values are signed integers which may 484fcaf7f86SDimitry Andric // be incorrect in unusual codes and incorrectly use sext instead of zext. 485fcaf7f86SDimitry Andric // for (uint32_t i = 0; i < 512; ++i) { 486fcaf7f86SDimitry Andric // uint8_t trunc = i; 487fcaf7f86SDimitry Andric // A[trunc] = 42; 488fcaf7f86SDimitry Andric // } 489fcaf7f86SDimitry Andric // This consecutively iterates twice over A. If `trunc` is sign-extended, 490fcaf7f86SDimitry Andric // we would conclude that this may iterate backwards over the array. 491fcaf7f86SDimitry Andric // However, LoopCacheAnalysis is heuristic anyway and transformations must 492fcaf7f86SDimitry Andric // not result in wrong optimizations if the heuristic was incorrect. 493fcaf7f86SDimitry Andric Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType), 494fcaf7f86SDimitry Andric SE.getNoopOrSignExtend(ElemSize, WiderType)); 4958bcb0991SDimitry Andric const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); 4968bcb0991SDimitry Andric 4975ffd83dbSDimitry Andric Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride; 4988bcb0991SDimitry Andric return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); 4998bcb0991SDimitry Andric } 5008bcb0991SDimitry Andric 50181ad6265SDimitry Andric int IndexedReference::getSubscriptIndex(const Loop &L) const { 50281ad6265SDimitry Andric for (auto Idx : seq<int>(0, getNumSubscripts())) { 50381ad6265SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(Idx)); 50481ad6265SDimitry Andric if (AR && AR->getLoop() == &L) { 50581ad6265SDimitry Andric return Idx; 50681ad6265SDimitry Andric } 50781ad6265SDimitry Andric } 50881ad6265SDimitry Andric return -1; 50981ad6265SDimitry Andric } 51081ad6265SDimitry Andric 5118bcb0991SDimitry Andric const SCEV *IndexedReference::getLastCoefficient() const { 5128bcb0991SDimitry Andric const SCEV *LastSubscript = getLastSubscript(); 513349cc55cSDimitry Andric auto *AR = cast<SCEVAddRecExpr>(LastSubscript); 5148bcb0991SDimitry Andric return AR->getStepRecurrence(SE); 5158bcb0991SDimitry Andric } 5168bcb0991SDimitry Andric 5178bcb0991SDimitry Andric bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, 5188bcb0991SDimitry Andric const Loop &L) const { 5198bcb0991SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript); 5208bcb0991SDimitry Andric return (AR != nullptr) ? AR->getLoop() != &L 5218bcb0991SDimitry Andric : SE.isLoopInvariant(&Subscript, &L); 5228bcb0991SDimitry Andric } 5238bcb0991SDimitry Andric 5248bcb0991SDimitry Andric bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript, 5258bcb0991SDimitry Andric const Loop &L) const { 5268bcb0991SDimitry Andric if (!isa<SCEVAddRecExpr>(Subscript)) 5278bcb0991SDimitry Andric return false; 5288bcb0991SDimitry Andric 5298bcb0991SDimitry Andric const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript); 5308bcb0991SDimitry Andric assert(AR->getLoop() && "AR should have a loop"); 5318bcb0991SDimitry Andric 5328bcb0991SDimitry Andric if (!AR->isAffine()) 5338bcb0991SDimitry Andric return false; 5348bcb0991SDimitry Andric 5358bcb0991SDimitry Andric const SCEV *Start = AR->getStart(); 5368bcb0991SDimitry Andric const SCEV *Step = AR->getStepRecurrence(SE); 5378bcb0991SDimitry Andric 5388bcb0991SDimitry Andric if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) 5398bcb0991SDimitry Andric return false; 5408bcb0991SDimitry Andric 5418bcb0991SDimitry Andric return true; 5428bcb0991SDimitry Andric } 5438bcb0991SDimitry Andric 5448bcb0991SDimitry Andric bool IndexedReference::isAliased(const IndexedReference &Other, 545e8d8bef9SDimitry Andric AAResults &AA) const { 5468bcb0991SDimitry Andric const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst); 5478bcb0991SDimitry Andric const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst); 5488bcb0991SDimitry Andric return AA.isMustAlias(Loc1, Loc2); 5498bcb0991SDimitry Andric } 5508bcb0991SDimitry Andric 5518bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 5528bcb0991SDimitry Andric // CacheCost implementation 5538bcb0991SDimitry Andric // 5548bcb0991SDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { 5558bcb0991SDimitry Andric for (const auto &LC : CC.LoopCosts) { 5568bcb0991SDimitry Andric const Loop *L = LC.first; 5578bcb0991SDimitry Andric OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n"; 5588bcb0991SDimitry Andric } 5598bcb0991SDimitry Andric return OS; 5608bcb0991SDimitry Andric } 5618bcb0991SDimitry Andric 5628bcb0991SDimitry Andric CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, 5638bcb0991SDimitry Andric ScalarEvolution &SE, TargetTransformInfo &TTI, 564bdd1243dSDimitry Andric AAResults &AA, DependenceInfo &DI, 565bdd1243dSDimitry Andric std::optional<unsigned> TRT) 566bdd1243dSDimitry Andric : Loops(Loops), TRT(TRT.value_or(TemporalReuseThreshold)), LI(LI), SE(SE), 567bdd1243dSDimitry Andric TTI(TTI), AA(AA), DI(DI) { 5688bcb0991SDimitry Andric assert(!Loops.empty() && "Expecting a non-empty loop vector."); 5698bcb0991SDimitry Andric 5708bcb0991SDimitry Andric for (const Loop *L : Loops) { 5718bcb0991SDimitry Andric unsigned TripCount = SE.getSmallConstantTripCount(L); 5728bcb0991SDimitry Andric TripCount = (TripCount == 0) ? DefaultTripCount : TripCount; 5738bcb0991SDimitry Andric TripCounts.push_back({L, TripCount}); 5748bcb0991SDimitry Andric } 5758bcb0991SDimitry Andric 5768bcb0991SDimitry Andric calculateCacheFootprint(); 5778bcb0991SDimitry Andric } 5788bcb0991SDimitry Andric 5798bcb0991SDimitry Andric std::unique_ptr<CacheCost> 5808bcb0991SDimitry Andric CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, 581bdd1243dSDimitry Andric DependenceInfo &DI, std::optional<unsigned> TRT) { 582e8d8bef9SDimitry Andric if (!Root.isOutermost()) { 5838bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n"); 5848bcb0991SDimitry Andric return nullptr; 5858bcb0991SDimitry Andric } 5868bcb0991SDimitry Andric 5878bcb0991SDimitry Andric LoopVectorTy Loops; 588e8d8bef9SDimitry Andric append_range(Loops, breadth_first(&Root)); 5898bcb0991SDimitry Andric 5908bcb0991SDimitry Andric if (!getInnerMostLoop(Loops)) { 5918bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more " 5928bcb0991SDimitry Andric "than one innermost loop\n"); 5938bcb0991SDimitry Andric return nullptr; 5948bcb0991SDimitry Andric } 5958bcb0991SDimitry Andric 5968bcb0991SDimitry Andric return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT); 5978bcb0991SDimitry Andric } 5988bcb0991SDimitry Andric 5998bcb0991SDimitry Andric void CacheCost::calculateCacheFootprint() { 6008bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n"); 6018bcb0991SDimitry Andric ReferenceGroupsTy RefGroups; 6028bcb0991SDimitry Andric if (!populateReferenceGroups(RefGroups)) 6038bcb0991SDimitry Andric return; 6048bcb0991SDimitry Andric 6058bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n"); 6068bcb0991SDimitry Andric for (const Loop *L : Loops) { 607349cc55cSDimitry Andric assert(llvm::none_of( 608349cc55cSDimitry Andric LoopCosts, 609349cc55cSDimitry Andric [L](const LoopCacheCostTy &LCC) { return LCC.first == L; }) && 6108bcb0991SDimitry Andric "Should not add duplicate element"); 6118bcb0991SDimitry Andric CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups); 6128bcb0991SDimitry Andric LoopCosts.push_back(std::make_pair(L, LoopCost)); 6138bcb0991SDimitry Andric } 6148bcb0991SDimitry Andric 6158bcb0991SDimitry Andric sortLoopCosts(); 6168bcb0991SDimitry Andric RefGroups.clear(); 6178bcb0991SDimitry Andric } 6188bcb0991SDimitry Andric 6198bcb0991SDimitry Andric bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const { 6208bcb0991SDimitry Andric assert(RefGroups.empty() && "Reference groups should be empty"); 6218bcb0991SDimitry Andric 6228bcb0991SDimitry Andric unsigned CLS = TTI.getCacheLineSize(); 6238bcb0991SDimitry Andric Loop *InnerMostLoop = getInnerMostLoop(Loops); 6248bcb0991SDimitry Andric assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop"); 6258bcb0991SDimitry Andric 6268bcb0991SDimitry Andric for (BasicBlock *BB : InnerMostLoop->getBlocks()) { 6278bcb0991SDimitry Andric for (Instruction &I : *BB) { 6288bcb0991SDimitry Andric if (!isa<StoreInst>(I) && !isa<LoadInst>(I)) 6298bcb0991SDimitry Andric continue; 6308bcb0991SDimitry Andric 6318bcb0991SDimitry Andric std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE)); 6328bcb0991SDimitry Andric if (!R->isValid()) 6338bcb0991SDimitry Andric continue; 6348bcb0991SDimitry Andric 6358bcb0991SDimitry Andric bool Added = false; 6368bcb0991SDimitry Andric for (ReferenceGroupTy &RefGroup : RefGroups) { 63781ad6265SDimitry Andric const IndexedReference &Representative = *RefGroup.front(); 6388bcb0991SDimitry Andric LLVM_DEBUG({ 6398bcb0991SDimitry Andric dbgs() << "References:\n"; 6408bcb0991SDimitry Andric dbgs().indent(2) << *R << "\n"; 6418bcb0991SDimitry Andric dbgs().indent(2) << Representative << "\n"; 6428bcb0991SDimitry Andric }); 6438bcb0991SDimitry Andric 6445ffd83dbSDimitry Andric 6455ffd83dbSDimitry Andric // FIXME: Both positive and negative access functions will be placed 6465ffd83dbSDimitry Andric // into the same reference group, resulting in a bi-directional array 6475ffd83dbSDimitry Andric // access such as: 6485ffd83dbSDimitry Andric // for (i = N; i > 0; i--) 6495ffd83dbSDimitry Andric // A[i] = A[N - i]; 6505ffd83dbSDimitry Andric // having the same cost calculation as a single dimention access pattern 6515ffd83dbSDimitry Andric // for (i = 0; i < N; i++) 6525ffd83dbSDimitry Andric // A[i] = A[i]; 6535ffd83dbSDimitry Andric // when in actuality, depending on the array size, the first example 6545ffd83dbSDimitry Andric // should have a cost closer to 2x the second due to the two cache 6555ffd83dbSDimitry Andric // access per iteration from opposite ends of the array 656bdd1243dSDimitry Andric std::optional<bool> HasTemporalReuse = 6578bcb0991SDimitry Andric R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA); 658bdd1243dSDimitry Andric std::optional<bool> HasSpacialReuse = 6598bcb0991SDimitry Andric R->hasSpacialReuse(Representative, CLS, AA); 6608bcb0991SDimitry Andric 66181ad6265SDimitry Andric if ((HasTemporalReuse && *HasTemporalReuse) || 66281ad6265SDimitry Andric (HasSpacialReuse && *HasSpacialReuse)) { 6638bcb0991SDimitry Andric RefGroup.push_back(std::move(R)); 6648bcb0991SDimitry Andric Added = true; 6658bcb0991SDimitry Andric break; 6668bcb0991SDimitry Andric } 6678bcb0991SDimitry Andric } 6688bcb0991SDimitry Andric 6698bcb0991SDimitry Andric if (!Added) { 6708bcb0991SDimitry Andric ReferenceGroupTy RG; 6718bcb0991SDimitry Andric RG.push_back(std::move(R)); 6728bcb0991SDimitry Andric RefGroups.push_back(std::move(RG)); 6738bcb0991SDimitry Andric } 6748bcb0991SDimitry Andric } 6758bcb0991SDimitry Andric } 6768bcb0991SDimitry Andric 6778bcb0991SDimitry Andric if (RefGroups.empty()) 6788bcb0991SDimitry Andric return false; 6798bcb0991SDimitry Andric 6808bcb0991SDimitry Andric LLVM_DEBUG({ 6818bcb0991SDimitry Andric dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n"; 6828bcb0991SDimitry Andric int n = 1; 6838bcb0991SDimitry Andric for (const ReferenceGroupTy &RG : RefGroups) { 6848bcb0991SDimitry Andric dbgs().indent(2) << "RefGroup " << n << ":\n"; 6858bcb0991SDimitry Andric for (const auto &IR : RG) 6868bcb0991SDimitry Andric dbgs().indent(4) << *IR << "\n"; 6878bcb0991SDimitry Andric n++; 6888bcb0991SDimitry Andric } 6898bcb0991SDimitry Andric dbgs() << "\n"; 6908bcb0991SDimitry Andric }); 6918bcb0991SDimitry Andric 6928bcb0991SDimitry Andric return true; 6938bcb0991SDimitry Andric } 6948bcb0991SDimitry Andric 6958bcb0991SDimitry Andric CacheCostTy 6968bcb0991SDimitry Andric CacheCost::computeLoopCacheCost(const Loop &L, 6978bcb0991SDimitry Andric const ReferenceGroupsTy &RefGroups) const { 6988bcb0991SDimitry Andric if (!L.isLoopSimplifyForm()) 6998bcb0991SDimitry Andric return InvalidCost; 7008bcb0991SDimitry Andric 7018bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName() 7028bcb0991SDimitry Andric << "' as innermost loop.\n"); 7038bcb0991SDimitry Andric 7048bcb0991SDimitry Andric // Compute the product of the trip counts of each other loop in the nest. 7058bcb0991SDimitry Andric CacheCostTy TripCountsProduct = 1; 7068bcb0991SDimitry Andric for (const auto &TC : TripCounts) { 7078bcb0991SDimitry Andric if (TC.first == &L) 7088bcb0991SDimitry Andric continue; 7098bcb0991SDimitry Andric TripCountsProduct *= TC.second; 7108bcb0991SDimitry Andric } 7118bcb0991SDimitry Andric 7128bcb0991SDimitry Andric CacheCostTy LoopCost = 0; 7138bcb0991SDimitry Andric for (const ReferenceGroupTy &RG : RefGroups) { 7148bcb0991SDimitry Andric CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L); 7158bcb0991SDimitry Andric LoopCost += RefGroupCost * TripCountsProduct; 7168bcb0991SDimitry Andric } 7178bcb0991SDimitry Andric 7188bcb0991SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName() 7198bcb0991SDimitry Andric << "' has cost=" << LoopCost << "\n"); 7208bcb0991SDimitry Andric 7218bcb0991SDimitry Andric return LoopCost; 7228bcb0991SDimitry Andric } 7238bcb0991SDimitry Andric 7248bcb0991SDimitry Andric CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG, 7258bcb0991SDimitry Andric const Loop &L) const { 7268bcb0991SDimitry Andric assert(!RG.empty() && "Reference group should have at least one member."); 7278bcb0991SDimitry Andric 7288bcb0991SDimitry Andric const IndexedReference *Representative = RG.front().get(); 7298bcb0991SDimitry Andric return Representative->computeRefCost(L, TTI.getCacheLineSize()); 7308bcb0991SDimitry Andric } 7318bcb0991SDimitry Andric 7328bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 7338bcb0991SDimitry Andric // LoopCachePrinterPass implementation 7348bcb0991SDimitry Andric // 7358bcb0991SDimitry Andric PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM, 7368bcb0991SDimitry Andric LoopStandardAnalysisResults &AR, 7378bcb0991SDimitry Andric LPMUpdater &U) { 7388bcb0991SDimitry Andric Function *F = L.getHeader()->getParent(); 7398bcb0991SDimitry Andric DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); 7408bcb0991SDimitry Andric 7418bcb0991SDimitry Andric if (auto CC = CacheCost::getCacheCost(L, AR, DI)) 7428bcb0991SDimitry Andric OS << *CC; 7438bcb0991SDimitry Andric 7448bcb0991SDimitry Andric return PreservedAnalyses::all(); 7458bcb0991SDimitry Andric } 746