10b57cec5SDimitry Andric //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements a Loop Data Prefetching Pass. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopDataPrefetch.h" 14480093f4SDimitry Andric #include "llvm/InitializePasses.h" 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 170b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 180b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 190b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 250b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 260b57cec5SDimitry Andric #include "llvm/IR/Function.h" 270b57cec5SDimitry Andric #include "llvm/IR/Module.h" 280b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 290b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 300b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 31349cc55cSDimitry Andric #include "llvm/Transforms/Utils.h" 325ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 33fe6060f1SDimitry Andric 34fe6060f1SDimitry Andric #define DEBUG_TYPE "loop-data-prefetch" 35fe6060f1SDimitry Andric 360b57cec5SDimitry Andric using namespace llvm; 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric // By default, we limit this to creating 16 PHIs (which is a little over half 390b57cec5SDimitry Andric // of the allocatable register set). 400b57cec5SDimitry Andric static cl::opt<bool> 410b57cec5SDimitry Andric PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), 420b57cec5SDimitry Andric cl::desc("Prefetch write addresses")); 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric static cl::opt<unsigned> 450b57cec5SDimitry Andric PrefetchDistance("prefetch-distance", 460b57cec5SDimitry Andric cl::desc("Number of instructions to prefetch ahead"), 470b57cec5SDimitry Andric cl::Hidden); 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric static cl::opt<unsigned> 500b57cec5SDimitry Andric MinPrefetchStride("min-prefetch-stride", 510b57cec5SDimitry Andric cl::desc("Min stride to add prefetches"), cl::Hidden); 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric static cl::opt<unsigned> MaxPrefetchIterationsAhead( 540b57cec5SDimitry Andric "max-prefetch-iters-ahead", 550b57cec5SDimitry Andric cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden); 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric STATISTIC(NumPrefetches, "Number of prefetches inserted"); 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric namespace { 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric /// Loop prefetch implementation class. 620b57cec5SDimitry Andric class LoopDataPrefetch { 630b57cec5SDimitry Andric public: 645ffd83dbSDimitry Andric LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, 655ffd83dbSDimitry Andric ScalarEvolution *SE, const TargetTransformInfo *TTI, 660b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE) 675ffd83dbSDimitry Andric : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {} 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric bool run(); 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric private: 720b57cec5SDimitry Andric bool runOnLoop(Loop *L); 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric /// Check if the stride of the accesses is large enough to 750b57cec5SDimitry Andric /// warrant a prefetch. 765ffd83dbSDimitry Andric bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride); 770b57cec5SDimitry Andric 785ffd83dbSDimitry Andric unsigned getMinPrefetchStride(unsigned NumMemAccesses, 795ffd83dbSDimitry Andric unsigned NumStridedMemAccesses, 805ffd83dbSDimitry Andric unsigned NumPrefetches, 815ffd83dbSDimitry Andric bool HasCall) { 820b57cec5SDimitry Andric if (MinPrefetchStride.getNumOccurrences() > 0) 830b57cec5SDimitry Andric return MinPrefetchStride; 845ffd83dbSDimitry Andric return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, 855ffd83dbSDimitry Andric NumPrefetches, HasCall); 860b57cec5SDimitry Andric } 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric unsigned getPrefetchDistance() { 890b57cec5SDimitry Andric if (PrefetchDistance.getNumOccurrences() > 0) 900b57cec5SDimitry Andric return PrefetchDistance; 910b57cec5SDimitry Andric return TTI->getPrefetchDistance(); 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric unsigned getMaxPrefetchIterationsAhead() { 950b57cec5SDimitry Andric if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0) 960b57cec5SDimitry Andric return MaxPrefetchIterationsAhead; 970b57cec5SDimitry Andric return TTI->getMaxPrefetchIterationsAhead(); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1005ffd83dbSDimitry Andric bool doPrefetchWrites() { 1015ffd83dbSDimitry Andric if (PrefetchWrites.getNumOccurrences() > 0) 1025ffd83dbSDimitry Andric return PrefetchWrites; 1035ffd83dbSDimitry Andric return TTI->enableWritePrefetching(); 1045ffd83dbSDimitry Andric } 1055ffd83dbSDimitry Andric 1060b57cec5SDimitry Andric AssumptionCache *AC; 1075ffd83dbSDimitry Andric DominatorTree *DT; 1080b57cec5SDimitry Andric LoopInfo *LI; 1090b57cec5SDimitry Andric ScalarEvolution *SE; 1100b57cec5SDimitry Andric const TargetTransformInfo *TTI; 1110b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE; 1120b57cec5SDimitry Andric }; 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric /// Legacy class for inserting loop data prefetches. 1150b57cec5SDimitry Andric class LoopDataPrefetchLegacyPass : public FunctionPass { 1160b57cec5SDimitry Andric public: 1170b57cec5SDimitry Andric static char ID; // Pass ID, replacement for typeid 1180b57cec5SDimitry Andric LoopDataPrefetchLegacyPass() : FunctionPass(ID) { 1190b57cec5SDimitry Andric initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry()); 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1230b57cec5SDimitry Andric AU.addRequired<AssumptionCacheTracker>(); 1245ffd83dbSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 1250b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 1260b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 1270b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>(); 128349cc55cSDimitry Andric AU.addRequiredID(LoopSimplifyID); 129349cc55cSDimitry Andric AU.addPreservedID(LoopSimplifyID); 1300b57cec5SDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 1310b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>(); 1320b57cec5SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>(); 1330b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric bool runOnFunction(Function &F) override; 1370b57cec5SDimitry Andric }; 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric char LoopDataPrefetchLegacyPass::ID = 0; 1410b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", 1420b57cec5SDimitry Andric "Loop Data Prefetch", false, false) 1430b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1440b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 1450b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 146349cc55cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1470b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 1480b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1490b57cec5SDimitry Andric INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch", 1500b57cec5SDimitry Andric "Loop Data Prefetch", false, false) 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric FunctionPass *llvm::createLoopDataPrefetchPass() { 1530b57cec5SDimitry Andric return new LoopDataPrefetchLegacyPass(); 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric 1565ffd83dbSDimitry Andric bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR, 1575ffd83dbSDimitry Andric unsigned TargetMinStride) { 1580b57cec5SDimitry Andric // No need to check if any stride goes. 1590b57cec5SDimitry Andric if (TargetMinStride <= 1) 1600b57cec5SDimitry Andric return true; 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1630b57cec5SDimitry Andric // If MinStride is set, don't prefetch unless we can ensure that stride is 1640b57cec5SDimitry Andric // larger. 1650b57cec5SDimitry Andric if (!ConstStride) 1660b57cec5SDimitry Andric return false; 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue()); 1690b57cec5SDimitry Andric return TargetMinStride <= AbsStride; 1700b57cec5SDimitry Andric } 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric PreservedAnalyses LoopDataPrefetchPass::run(Function &F, 1730b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 1745ffd83dbSDimitry Andric DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1750b57cec5SDimitry Andric LoopInfo *LI = &AM.getResult<LoopAnalysis>(F); 1760b57cec5SDimitry Andric ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); 1770b57cec5SDimitry Andric AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); 1780b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE = 1790b57cec5SDimitry Andric &AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 1800b57cec5SDimitry Andric const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F); 1810b57cec5SDimitry Andric 1825ffd83dbSDimitry Andric LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE); 1830b57cec5SDimitry Andric bool Changed = LDP.run(); 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric if (Changed) { 1860b57cec5SDimitry Andric PreservedAnalyses PA; 1870b57cec5SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 1880b57cec5SDimitry Andric PA.preserve<LoopAnalysis>(); 1890b57cec5SDimitry Andric return PA; 1900b57cec5SDimitry Andric } 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric return PreservedAnalyses::all(); 1930b57cec5SDimitry Andric } 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) { 1960b57cec5SDimitry Andric if (skipFunction(F)) 1970b57cec5SDimitry Andric return false; 1980b57cec5SDimitry Andric 1995ffd83dbSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2000b57cec5SDimitry Andric LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2010b57cec5SDimitry Andric ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2020b57cec5SDimitry Andric AssumptionCache *AC = 2030b57cec5SDimitry Andric &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 2040b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE = 2050b57cec5SDimitry Andric &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 2060b57cec5SDimitry Andric const TargetTransformInfo *TTI = 2070b57cec5SDimitry Andric &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2080b57cec5SDimitry Andric 2095ffd83dbSDimitry Andric LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE); 2100b57cec5SDimitry Andric return LDP.run(); 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric bool LoopDataPrefetch::run() { 2140b57cec5SDimitry Andric // If PrefetchDistance is not set, don't run the pass. This gives an 2150b57cec5SDimitry Andric // opportunity for targets to run this pass for selected subtargets only 216972a253aSDimitry Andric // (whose TTI sets PrefetchDistance and CacheLineSize). 217972a253aSDimitry Andric if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) { 218972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize " 219972a253aSDimitry Andric "for loop data prefetch.\n"); 2200b57cec5SDimitry Andric return false; 221972a253aSDimitry Andric } 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric bool MadeChange = false; 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric for (Loop *I : *LI) 2260eae32dcSDimitry Andric for (Loop *L : depth_first(I)) 2270eae32dcSDimitry Andric MadeChange |= runOnLoop(L); 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric return MadeChange; 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2325ffd83dbSDimitry Andric /// A record for a potential prefetch made during the initial scan of the 2335ffd83dbSDimitry Andric /// loop. This is used to let a single prefetch target multiple memory accesses. 2345ffd83dbSDimitry Andric struct Prefetch { 2355ffd83dbSDimitry Andric /// The address formula for this prefetch as returned by ScalarEvolution. 2365ffd83dbSDimitry Andric const SCEVAddRecExpr *LSCEVAddRec; 2375ffd83dbSDimitry Andric /// The point of insertion for the prefetch instruction. 23881ad6265SDimitry Andric Instruction *InsertPt = nullptr; 2395ffd83dbSDimitry Andric /// True if targeting a write memory access. 24081ad6265SDimitry Andric bool Writes = false; 2415ffd83dbSDimitry Andric /// The (first seen) prefetched instruction. 24281ad6265SDimitry Andric Instruction *MemI = nullptr; 2435ffd83dbSDimitry Andric 2445ffd83dbSDimitry Andric /// Constructor to create a new Prefetch for \p I. 24581ad6265SDimitry Andric Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) { 2465ffd83dbSDimitry Andric addInstruction(I); 2475ffd83dbSDimitry Andric }; 2485ffd83dbSDimitry Andric 2495ffd83dbSDimitry Andric /// Add the instruction \param I to this prefetch. If it's not the first 2505ffd83dbSDimitry Andric /// one, 'InsertPt' and 'Writes' will be updated as required. 2515ffd83dbSDimitry Andric /// \param PtrDiff the known constant address difference to the first added 2525ffd83dbSDimitry Andric /// instruction. 2535ffd83dbSDimitry Andric void addInstruction(Instruction *I, DominatorTree *DT = nullptr, 2545ffd83dbSDimitry Andric int64_t PtrDiff = 0) { 2555ffd83dbSDimitry Andric if (!InsertPt) { 2565ffd83dbSDimitry Andric MemI = I; 2575ffd83dbSDimitry Andric InsertPt = I; 2585ffd83dbSDimitry Andric Writes = isa<StoreInst>(I); 2595ffd83dbSDimitry Andric } else { 2605ffd83dbSDimitry Andric BasicBlock *PrefBB = InsertPt->getParent(); 2615ffd83dbSDimitry Andric BasicBlock *InsBB = I->getParent(); 2625ffd83dbSDimitry Andric if (PrefBB != InsBB) { 2635ffd83dbSDimitry Andric BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB); 2645ffd83dbSDimitry Andric if (DomBB != PrefBB) 2655ffd83dbSDimitry Andric InsertPt = DomBB->getTerminator(); 2665ffd83dbSDimitry Andric } 2675ffd83dbSDimitry Andric 2685ffd83dbSDimitry Andric if (isa<StoreInst>(I) && PtrDiff == 0) 2695ffd83dbSDimitry Andric Writes = true; 2705ffd83dbSDimitry Andric } 2715ffd83dbSDimitry Andric } 2725ffd83dbSDimitry Andric }; 2735ffd83dbSDimitry Andric 2740b57cec5SDimitry Andric bool LoopDataPrefetch::runOnLoop(Loop *L) { 2750b57cec5SDimitry Andric bool MadeChange = false; 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric // Only prefetch in the inner-most loop 278e8d8bef9SDimitry Andric if (!L->isInnermost()) 2790b57cec5SDimitry Andric return MadeChange; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 2820b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(L, AC, EphValues); 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric // Calculate the number of iterations ahead to prefetch 2850b57cec5SDimitry Andric CodeMetrics Metrics; 2865ffd83dbSDimitry Andric bool HasCall = false; 2870b57cec5SDimitry Andric for (const auto BB : L->blocks()) { 2880b57cec5SDimitry Andric // If the loop already has prefetches, then assume that the user knows 2890b57cec5SDimitry Andric // what they are doing and don't add any more. 2905ffd83dbSDimitry Andric for (auto &I : *BB) { 2915ffd83dbSDimitry Andric if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) { 2925ffd83dbSDimitry Andric if (const Function *F = cast<CallBase>(I).getCalledFunction()) { 2930b57cec5SDimitry Andric if (F->getIntrinsicID() == Intrinsic::prefetch) 2940b57cec5SDimitry Andric return MadeChange; 2955ffd83dbSDimitry Andric if (TTI->isLoweredToCall(F)) 2965ffd83dbSDimitry Andric HasCall = true; 2975ffd83dbSDimitry Andric } else { // indirect call. 2985ffd83dbSDimitry Andric HasCall = true; 2995ffd83dbSDimitry Andric } 3005ffd83dbSDimitry Andric } 3015ffd83dbSDimitry Andric } 3020b57cec5SDimitry Andric Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 3030b57cec5SDimitry Andric } 30481ad6265SDimitry Andric 30581ad6265SDimitry Andric if (!Metrics.NumInsts.isValid()) 30681ad6265SDimitry Andric return MadeChange; 30781ad6265SDimitry Andric 30881ad6265SDimitry Andric unsigned LoopSize = *Metrics.NumInsts.getValue(); 3090b57cec5SDimitry Andric if (!LoopSize) 3100b57cec5SDimitry Andric LoopSize = 1; 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric unsigned ItersAhead = getPrefetchDistance() / LoopSize; 3130b57cec5SDimitry Andric if (!ItersAhead) 3140b57cec5SDimitry Andric ItersAhead = 1; 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric if (ItersAhead > getMaxPrefetchIterationsAhead()) 3170b57cec5SDimitry Andric return MadeChange; 3180b57cec5SDimitry Andric 3195ffd83dbSDimitry Andric unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L); 3205ffd83dbSDimitry Andric if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1) 3215ffd83dbSDimitry Andric return MadeChange; 3220b57cec5SDimitry Andric 3235ffd83dbSDimitry Andric unsigned NumMemAccesses = 0; 3245ffd83dbSDimitry Andric unsigned NumStridedMemAccesses = 0; 3255ffd83dbSDimitry Andric SmallVector<Prefetch, 16> Prefetches; 3265ffd83dbSDimitry Andric for (const auto BB : L->blocks()) 3270b57cec5SDimitry Andric for (auto &I : *BB) { 3280b57cec5SDimitry Andric Value *PtrValue; 3290b57cec5SDimitry Andric Instruction *MemI; 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) { 3320b57cec5SDimitry Andric MemI = LMemI; 3330b57cec5SDimitry Andric PtrValue = LMemI->getPointerOperand(); 3340b57cec5SDimitry Andric } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) { 3355ffd83dbSDimitry Andric if (!doPrefetchWrites()) continue; 3360b57cec5SDimitry Andric MemI = SMemI; 3370b57cec5SDimitry Andric PtrValue = SMemI->getPointerOperand(); 3380b57cec5SDimitry Andric } else continue; 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 341bdd1243dSDimitry Andric if (!TTI->shouldPrefetchAddressSpace(PtrAddrSpace)) 3420b57cec5SDimitry Andric continue; 3435ffd83dbSDimitry Andric NumMemAccesses++; 3440b57cec5SDimitry Andric if (L->isLoopInvariant(PtrValue)) 3450b57cec5SDimitry Andric continue; 3460b57cec5SDimitry Andric 3470b57cec5SDimitry Andric const SCEV *LSCEV = SE->getSCEV(PtrValue); 3480b57cec5SDimitry Andric const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); 3490b57cec5SDimitry Andric if (!LSCEVAddRec) 3500b57cec5SDimitry Andric continue; 3515ffd83dbSDimitry Andric NumStridedMemAccesses++; 3520b57cec5SDimitry Andric 3535ffd83dbSDimitry Andric // We don't want to double prefetch individual cache lines. If this 3545ffd83dbSDimitry Andric // access is known to be within one cache line of some other one that 3555ffd83dbSDimitry Andric // has already been prefetched, then don't prefetch this one as well. 3560b57cec5SDimitry Andric bool DupPref = false; 3575ffd83dbSDimitry Andric for (auto &Pref : Prefetches) { 3585ffd83dbSDimitry Andric const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec); 3590b57cec5SDimitry Andric if (const SCEVConstant *ConstPtrDiff = 3600b57cec5SDimitry Andric dyn_cast<SCEVConstant>(PtrDiff)) { 3610b57cec5SDimitry Andric int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); 3620b57cec5SDimitry Andric if (PD < (int64_t) TTI->getCacheLineSize()) { 3635ffd83dbSDimitry Andric Pref.addInstruction(MemI, DT, PD); 3640b57cec5SDimitry Andric DupPref = true; 3650b57cec5SDimitry Andric break; 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric } 3680b57cec5SDimitry Andric } 3695ffd83dbSDimitry Andric if (!DupPref) 3705ffd83dbSDimitry Andric Prefetches.push_back(Prefetch(LSCEVAddRec, MemI)); 3715ffd83dbSDimitry Andric } 3725ffd83dbSDimitry Andric 3735ffd83dbSDimitry Andric unsigned TargetMinStride = 3745ffd83dbSDimitry Andric getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, 3755ffd83dbSDimitry Andric Prefetches.size(), HasCall); 3765ffd83dbSDimitry Andric 3775ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead 3785ffd83dbSDimitry Andric << " iterations ahead (loop size: " << LoopSize << ") in " 3795ffd83dbSDimitry Andric << L->getHeader()->getParent()->getName() << ": " << *L); 3805ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Loop has: " 3815ffd83dbSDimitry Andric << NumMemAccesses << " memory accesses, " 3825ffd83dbSDimitry Andric << NumStridedMemAccesses << " strided memory accesses, " 3835ffd83dbSDimitry Andric << Prefetches.size() << " potential prefetch(es), " 3845ffd83dbSDimitry Andric << "a minimum stride of " << TargetMinStride << ", " 3855ffd83dbSDimitry Andric << (HasCall ? "calls" : "no calls") << ".\n"); 3865ffd83dbSDimitry Andric 3875ffd83dbSDimitry Andric for (auto &P : Prefetches) { 3885ffd83dbSDimitry Andric // Check if the stride of the accesses is large enough to warrant a 3895ffd83dbSDimitry Andric // prefetch. 3905ffd83dbSDimitry Andric if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride)) 3910b57cec5SDimitry Andric continue; 3920b57cec5SDimitry Andric 393fcaf7f86SDimitry Andric BasicBlock *BB = P.InsertPt->getParent(); 394*0fca6ea1SDimitry Andric SCEVExpander SCEVE(*SE, BB->getDataLayout(), "prefaddr"); 3955ffd83dbSDimitry Andric const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr( 3965ffd83dbSDimitry Andric SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead), 3975ffd83dbSDimitry Andric P.LSCEVAddRec->getStepRecurrence(*SE))); 398fcaf7f86SDimitry Andric if (!SCEVE.isSafeToExpand(NextLSCEV)) 3990b57cec5SDimitry Andric continue; 4000b57cec5SDimitry Andric 401bdd1243dSDimitry Andric unsigned PtrAddrSpace = NextLSCEV->getType()->getPointerAddressSpace(); 4025f757f3fSDimitry Andric Type *I8Ptr = PointerType::get(BB->getContext(), PtrAddrSpace); 4035ffd83dbSDimitry Andric Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt); 4040b57cec5SDimitry Andric 4055ffd83dbSDimitry Andric IRBuilder<> Builder(P.InsertPt); 4060b57cec5SDimitry Andric Module *M = BB->getParent()->getParent(); 4070b57cec5SDimitry Andric Type *I32 = Type::getInt32Ty(BB->getContext()); 4088bcb0991SDimitry Andric Function *PrefetchFunc = Intrinsic::getDeclaration( 4098bcb0991SDimitry Andric M, Intrinsic::prefetch, PrefPtrValue->getType()); 4100b57cec5SDimitry Andric Builder.CreateCall( 4110b57cec5SDimitry Andric PrefetchFunc, 4120b57cec5SDimitry Andric {PrefPtrValue, 4135ffd83dbSDimitry Andric ConstantInt::get(I32, P.Writes), 4140b57cec5SDimitry Andric ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); 4150b57cec5SDimitry Andric ++NumPrefetches; 4165ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " Access: " 4175ffd83dbSDimitry Andric << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1) 4185ffd83dbSDimitry Andric << ", SCEV: " << *P.LSCEVAddRec << "\n"); 4190b57cec5SDimitry Andric ORE->emit([&]() { 4205ffd83dbSDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI) 4210b57cec5SDimitry Andric << "prefetched memory access"; 4220b57cec5SDimitry Andric }); 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric MadeChange = true; 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric return MadeChange; 4280b57cec5SDimitry Andric } 429