109467b48Spatrick //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements a Loop Data Prefetching Pass.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick
1309467b48Spatrick #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
1409467b48Spatrick #include "llvm/InitializePasses.h"
1509467b48Spatrick
1609467b48Spatrick #include "llvm/ADT/DepthFirstIterator.h"
1709467b48Spatrick #include "llvm/ADT/Statistic.h"
1809467b48Spatrick #include "llvm/Analysis/AssumptionCache.h"
1909467b48Spatrick #include "llvm/Analysis/CodeMetrics.h"
2009467b48Spatrick #include "llvm/Analysis/LoopInfo.h"
2109467b48Spatrick #include "llvm/Analysis/OptimizationRemarkEmitter.h"
2209467b48Spatrick #include "llvm/Analysis/ScalarEvolution.h"
2309467b48Spatrick #include "llvm/Analysis/ScalarEvolutionExpressions.h"
2409467b48Spatrick #include "llvm/Analysis/TargetTransformInfo.h"
2509467b48Spatrick #include "llvm/IR/Dominators.h"
2609467b48Spatrick #include "llvm/IR/Function.h"
2709467b48Spatrick #include "llvm/IR/Module.h"
2809467b48Spatrick #include "llvm/Support/CommandLine.h"
2909467b48Spatrick #include "llvm/Support/Debug.h"
3009467b48Spatrick #include "llvm/Transforms/Scalar.h"
31*d415bd75Srobert #include "llvm/Transforms/Utils.h"
32097a140dSpatrick #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
3373471bf0Spatrick
3473471bf0Spatrick #define DEBUG_TYPE "loop-data-prefetch"
3573471bf0Spatrick
3609467b48Spatrick using namespace llvm;
3709467b48Spatrick
3809467b48Spatrick // By default, we limit this to creating 16 PHIs (which is a little over half
3909467b48Spatrick // of the allocatable register set).
4009467b48Spatrick static cl::opt<bool>
4109467b48Spatrick PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
4209467b48Spatrick cl::desc("Prefetch write addresses"));
4309467b48Spatrick
4409467b48Spatrick static cl::opt<unsigned>
4509467b48Spatrick PrefetchDistance("prefetch-distance",
4609467b48Spatrick cl::desc("Number of instructions to prefetch ahead"),
4709467b48Spatrick cl::Hidden);
4809467b48Spatrick
4909467b48Spatrick static cl::opt<unsigned>
5009467b48Spatrick MinPrefetchStride("min-prefetch-stride",
5109467b48Spatrick cl::desc("Min stride to add prefetches"), cl::Hidden);
5209467b48Spatrick
5309467b48Spatrick static cl::opt<unsigned> MaxPrefetchIterationsAhead(
5409467b48Spatrick "max-prefetch-iters-ahead",
5509467b48Spatrick cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
5609467b48Spatrick
5709467b48Spatrick STATISTIC(NumPrefetches, "Number of prefetches inserted");
5809467b48Spatrick
5909467b48Spatrick namespace {
6009467b48Spatrick
6109467b48Spatrick /// Loop prefetch implementation class.
6209467b48Spatrick class LoopDataPrefetch {
6309467b48Spatrick public:
LoopDataPrefetch(AssumptionCache * AC,DominatorTree * DT,LoopInfo * LI,ScalarEvolution * SE,const TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE)64097a140dSpatrick LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
65097a140dSpatrick ScalarEvolution *SE, const TargetTransformInfo *TTI,
6609467b48Spatrick OptimizationRemarkEmitter *ORE)
67097a140dSpatrick : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
6809467b48Spatrick
6909467b48Spatrick bool run();
7009467b48Spatrick
7109467b48Spatrick private:
7209467b48Spatrick bool runOnLoop(Loop *L);
7309467b48Spatrick
7409467b48Spatrick /// Check if the stride of the accesses is large enough to
7509467b48Spatrick /// warrant a prefetch.
76097a140dSpatrick bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
7709467b48Spatrick
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)78097a140dSpatrick unsigned getMinPrefetchStride(unsigned NumMemAccesses,
79097a140dSpatrick unsigned NumStridedMemAccesses,
80097a140dSpatrick unsigned NumPrefetches,
81097a140dSpatrick bool HasCall) {
8209467b48Spatrick if (MinPrefetchStride.getNumOccurrences() > 0)
8309467b48Spatrick return MinPrefetchStride;
84097a140dSpatrick return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
85097a140dSpatrick NumPrefetches, HasCall);
8609467b48Spatrick }
8709467b48Spatrick
getPrefetchDistance()8809467b48Spatrick unsigned getPrefetchDistance() {
8909467b48Spatrick if (PrefetchDistance.getNumOccurrences() > 0)
9009467b48Spatrick return PrefetchDistance;
9109467b48Spatrick return TTI->getPrefetchDistance();
9209467b48Spatrick }
9309467b48Spatrick
getMaxPrefetchIterationsAhead()9409467b48Spatrick unsigned getMaxPrefetchIterationsAhead() {
9509467b48Spatrick if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
9609467b48Spatrick return MaxPrefetchIterationsAhead;
9709467b48Spatrick return TTI->getMaxPrefetchIterationsAhead();
9809467b48Spatrick }
9909467b48Spatrick
doPrefetchWrites()100097a140dSpatrick bool doPrefetchWrites() {
101097a140dSpatrick if (PrefetchWrites.getNumOccurrences() > 0)
102097a140dSpatrick return PrefetchWrites;
103097a140dSpatrick return TTI->enableWritePrefetching();
104097a140dSpatrick }
105097a140dSpatrick
10609467b48Spatrick AssumptionCache *AC;
107097a140dSpatrick DominatorTree *DT;
10809467b48Spatrick LoopInfo *LI;
10909467b48Spatrick ScalarEvolution *SE;
11009467b48Spatrick const TargetTransformInfo *TTI;
11109467b48Spatrick OptimizationRemarkEmitter *ORE;
11209467b48Spatrick };
11309467b48Spatrick
11409467b48Spatrick /// Legacy class for inserting loop data prefetches.
11509467b48Spatrick class LoopDataPrefetchLegacyPass : public FunctionPass {
11609467b48Spatrick public:
11709467b48Spatrick static char ID; // Pass ID, replacement for typeid
LoopDataPrefetchLegacyPass()11809467b48Spatrick LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
11909467b48Spatrick initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());
12009467b48Spatrick }
12109467b48Spatrick
getAnalysisUsage(AnalysisUsage & AU) const12209467b48Spatrick void getAnalysisUsage(AnalysisUsage &AU) const override {
12309467b48Spatrick AU.addRequired<AssumptionCacheTracker>();
124097a140dSpatrick AU.addRequired<DominatorTreeWrapperPass>();
12509467b48Spatrick AU.addPreserved<DominatorTreeWrapperPass>();
12609467b48Spatrick AU.addRequired<LoopInfoWrapperPass>();
12709467b48Spatrick AU.addPreserved<LoopInfoWrapperPass>();
128*d415bd75Srobert AU.addRequiredID(LoopSimplifyID);
129*d415bd75Srobert AU.addPreservedID(LoopSimplifyID);
13009467b48Spatrick AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
13109467b48Spatrick AU.addRequired<ScalarEvolutionWrapperPass>();
13209467b48Spatrick AU.addPreserved<ScalarEvolutionWrapperPass>();
13309467b48Spatrick AU.addRequired<TargetTransformInfoWrapperPass>();
13409467b48Spatrick }
13509467b48Spatrick
13609467b48Spatrick bool runOnFunction(Function &F) override;
13709467b48Spatrick };
13809467b48Spatrick }
13909467b48Spatrick
14009467b48Spatrick char LoopDataPrefetchLegacyPass::ID = 0;
14109467b48Spatrick INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
14209467b48Spatrick "Loop Data Prefetch", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)14309467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
14409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
14509467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
146*d415bd75Srobert INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
14709467b48Spatrick INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
14809467b48Spatrick INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
14909467b48Spatrick INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
15009467b48Spatrick "Loop Data Prefetch", false, false)
15109467b48Spatrick
15209467b48Spatrick FunctionPass *llvm::createLoopDataPrefetchPass() {
15309467b48Spatrick return new LoopDataPrefetchLegacyPass();
15409467b48Spatrick }
15509467b48Spatrick
isStrideLargeEnough(const SCEVAddRecExpr * AR,unsigned TargetMinStride)156097a140dSpatrick bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
157097a140dSpatrick unsigned TargetMinStride) {
15809467b48Spatrick // No need to check if any stride goes.
15909467b48Spatrick if (TargetMinStride <= 1)
16009467b48Spatrick return true;
16109467b48Spatrick
16209467b48Spatrick const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
16309467b48Spatrick // If MinStride is set, don't prefetch unless we can ensure that stride is
16409467b48Spatrick // larger.
16509467b48Spatrick if (!ConstStride)
16609467b48Spatrick return false;
16709467b48Spatrick
16809467b48Spatrick unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
16909467b48Spatrick return TargetMinStride <= AbsStride;
17009467b48Spatrick }
17109467b48Spatrick
run(Function & F,FunctionAnalysisManager & AM)17209467b48Spatrick PreservedAnalyses LoopDataPrefetchPass::run(Function &F,
17309467b48Spatrick FunctionAnalysisManager &AM) {
174097a140dSpatrick DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
17509467b48Spatrick LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
17609467b48Spatrick ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
17709467b48Spatrick AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
17809467b48Spatrick OptimizationRemarkEmitter *ORE =
17909467b48Spatrick &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
18009467b48Spatrick const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
18109467b48Spatrick
182097a140dSpatrick LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
18309467b48Spatrick bool Changed = LDP.run();
18409467b48Spatrick
18509467b48Spatrick if (Changed) {
18609467b48Spatrick PreservedAnalyses PA;
18709467b48Spatrick PA.preserve<DominatorTreeAnalysis>();
18809467b48Spatrick PA.preserve<LoopAnalysis>();
18909467b48Spatrick return PA;
19009467b48Spatrick }
19109467b48Spatrick
19209467b48Spatrick return PreservedAnalyses::all();
19309467b48Spatrick }
19409467b48Spatrick
runOnFunction(Function & F)19509467b48Spatrick bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
19609467b48Spatrick if (skipFunction(F))
19709467b48Spatrick return false;
19809467b48Spatrick
199097a140dSpatrick DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
20009467b48Spatrick LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
20109467b48Spatrick ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
20209467b48Spatrick AssumptionCache *AC =
20309467b48Spatrick &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
20409467b48Spatrick OptimizationRemarkEmitter *ORE =
20509467b48Spatrick &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
20609467b48Spatrick const TargetTransformInfo *TTI =
20709467b48Spatrick &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
20809467b48Spatrick
209097a140dSpatrick LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
21009467b48Spatrick return LDP.run();
21109467b48Spatrick }
21209467b48Spatrick
run()21309467b48Spatrick bool LoopDataPrefetch::run() {
21409467b48Spatrick // If PrefetchDistance is not set, don't run the pass. This gives an
21509467b48Spatrick // opportunity for targets to run this pass for selected subtargets only
216*d415bd75Srobert // (whose TTI sets PrefetchDistance and CacheLineSize).
217*d415bd75Srobert if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) {
218*d415bd75Srobert LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "
219*d415bd75Srobert "for loop data prefetch.\n");
22009467b48Spatrick return false;
221*d415bd75Srobert }
22209467b48Spatrick
22309467b48Spatrick bool MadeChange = false;
22409467b48Spatrick
22509467b48Spatrick for (Loop *I : *LI)
226*d415bd75Srobert for (Loop *L : depth_first(I))
227*d415bd75Srobert MadeChange |= runOnLoop(L);
22809467b48Spatrick
22909467b48Spatrick return MadeChange;
23009467b48Spatrick }
23109467b48Spatrick
232097a140dSpatrick /// A record for a potential prefetch made during the initial scan of the
233097a140dSpatrick /// loop. This is used to let a single prefetch target multiple memory accesses.
234097a140dSpatrick struct Prefetch {
235097a140dSpatrick /// The address formula for this prefetch as returned by ScalarEvolution.
236097a140dSpatrick const SCEVAddRecExpr *LSCEVAddRec;
237097a140dSpatrick /// The point of insertion for the prefetch instruction.
238*d415bd75Srobert Instruction *InsertPt = nullptr;
239097a140dSpatrick /// True if targeting a write memory access.
240*d415bd75Srobert bool Writes = false;
241097a140dSpatrick /// The (first seen) prefetched instruction.
242*d415bd75Srobert Instruction *MemI = nullptr;
243097a140dSpatrick
244097a140dSpatrick /// Constructor to create a new Prefetch for \p I.
PrefetchPrefetch245*d415bd75Srobert Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
246097a140dSpatrick addInstruction(I);
247097a140dSpatrick };
248097a140dSpatrick
249097a140dSpatrick /// Add the instruction \param I to this prefetch. If it's not the first
250097a140dSpatrick /// one, 'InsertPt' and 'Writes' will be updated as required.
251097a140dSpatrick /// \param PtrDiff the known constant address difference to the first added
252097a140dSpatrick /// instruction.
addInstructionPrefetch253097a140dSpatrick void addInstruction(Instruction *I, DominatorTree *DT = nullptr,
254097a140dSpatrick int64_t PtrDiff = 0) {
255097a140dSpatrick if (!InsertPt) {
256097a140dSpatrick MemI = I;
257097a140dSpatrick InsertPt = I;
258097a140dSpatrick Writes = isa<StoreInst>(I);
259097a140dSpatrick } else {
260097a140dSpatrick BasicBlock *PrefBB = InsertPt->getParent();
261097a140dSpatrick BasicBlock *InsBB = I->getParent();
262097a140dSpatrick if (PrefBB != InsBB) {
263097a140dSpatrick BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
264097a140dSpatrick if (DomBB != PrefBB)
265097a140dSpatrick InsertPt = DomBB->getTerminator();
266097a140dSpatrick }
267097a140dSpatrick
268097a140dSpatrick if (isa<StoreInst>(I) && PtrDiff == 0)
269097a140dSpatrick Writes = true;
270097a140dSpatrick }
271097a140dSpatrick }
272097a140dSpatrick };
273097a140dSpatrick
runOnLoop(Loop * L)27409467b48Spatrick bool LoopDataPrefetch::runOnLoop(Loop *L) {
27509467b48Spatrick bool MadeChange = false;
27609467b48Spatrick
27709467b48Spatrick // Only prefetch in the inner-most loop
27873471bf0Spatrick if (!L->isInnermost())
27909467b48Spatrick return MadeChange;
28009467b48Spatrick
28109467b48Spatrick SmallPtrSet<const Value *, 32> EphValues;
28209467b48Spatrick CodeMetrics::collectEphemeralValues(L, AC, EphValues);
28309467b48Spatrick
28409467b48Spatrick // Calculate the number of iterations ahead to prefetch
28509467b48Spatrick CodeMetrics Metrics;
286097a140dSpatrick bool HasCall = false;
28709467b48Spatrick for (const auto BB : L->blocks()) {
28809467b48Spatrick // If the loop already has prefetches, then assume that the user knows
28909467b48Spatrick // what they are doing and don't add any more.
290097a140dSpatrick for (auto &I : *BB) {
291097a140dSpatrick if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
292097a140dSpatrick if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
29309467b48Spatrick if (F->getIntrinsicID() == Intrinsic::prefetch)
29409467b48Spatrick return MadeChange;
295097a140dSpatrick if (TTI->isLoweredToCall(F))
296097a140dSpatrick HasCall = true;
297097a140dSpatrick } else { // indirect call.
298097a140dSpatrick HasCall = true;
299097a140dSpatrick }
300097a140dSpatrick }
301097a140dSpatrick }
30209467b48Spatrick Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
30309467b48Spatrick }
304*d415bd75Srobert
305*d415bd75Srobert if (!Metrics.NumInsts.isValid())
306*d415bd75Srobert return MadeChange;
307*d415bd75Srobert
308*d415bd75Srobert unsigned LoopSize = *Metrics.NumInsts.getValue();
30909467b48Spatrick if (!LoopSize)
31009467b48Spatrick LoopSize = 1;
31109467b48Spatrick
31209467b48Spatrick unsigned ItersAhead = getPrefetchDistance() / LoopSize;
31309467b48Spatrick if (!ItersAhead)
31409467b48Spatrick ItersAhead = 1;
31509467b48Spatrick
31609467b48Spatrick if (ItersAhead > getMaxPrefetchIterationsAhead())
31709467b48Spatrick return MadeChange;
31809467b48Spatrick
319097a140dSpatrick unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
320097a140dSpatrick if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
321097a140dSpatrick return MadeChange;
32209467b48Spatrick
323097a140dSpatrick unsigned NumMemAccesses = 0;
324097a140dSpatrick unsigned NumStridedMemAccesses = 0;
325097a140dSpatrick SmallVector<Prefetch, 16> Prefetches;
326097a140dSpatrick for (const auto BB : L->blocks())
32709467b48Spatrick for (auto &I : *BB) {
32809467b48Spatrick Value *PtrValue;
32909467b48Spatrick Instruction *MemI;
33009467b48Spatrick
33109467b48Spatrick if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
33209467b48Spatrick MemI = LMemI;
33309467b48Spatrick PtrValue = LMemI->getPointerOperand();
33409467b48Spatrick } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
335097a140dSpatrick if (!doPrefetchWrites()) continue;
33609467b48Spatrick MemI = SMemI;
33709467b48Spatrick PtrValue = SMemI->getPointerOperand();
33809467b48Spatrick } else continue;
33909467b48Spatrick
34009467b48Spatrick unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
341*d415bd75Srobert if (!TTI->shouldPrefetchAddressSpace(PtrAddrSpace))
34209467b48Spatrick continue;
343097a140dSpatrick NumMemAccesses++;
34409467b48Spatrick if (L->isLoopInvariant(PtrValue))
34509467b48Spatrick continue;
34609467b48Spatrick
34709467b48Spatrick const SCEV *LSCEV = SE->getSCEV(PtrValue);
34809467b48Spatrick const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
34909467b48Spatrick if (!LSCEVAddRec)
35009467b48Spatrick continue;
351097a140dSpatrick NumStridedMemAccesses++;
35209467b48Spatrick
353097a140dSpatrick // We don't want to double prefetch individual cache lines. If this
354097a140dSpatrick // access is known to be within one cache line of some other one that
355097a140dSpatrick // has already been prefetched, then don't prefetch this one as well.
35609467b48Spatrick bool DupPref = false;
357097a140dSpatrick for (auto &Pref : Prefetches) {
358097a140dSpatrick const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
35909467b48Spatrick if (const SCEVConstant *ConstPtrDiff =
36009467b48Spatrick dyn_cast<SCEVConstant>(PtrDiff)) {
36109467b48Spatrick int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
36209467b48Spatrick if (PD < (int64_t) TTI->getCacheLineSize()) {
363097a140dSpatrick Pref.addInstruction(MemI, DT, PD);
36409467b48Spatrick DupPref = true;
36509467b48Spatrick break;
36609467b48Spatrick }
36709467b48Spatrick }
36809467b48Spatrick }
369097a140dSpatrick if (!DupPref)
370097a140dSpatrick Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
371097a140dSpatrick }
372097a140dSpatrick
373097a140dSpatrick unsigned TargetMinStride =
374097a140dSpatrick getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
375097a140dSpatrick Prefetches.size(), HasCall);
376097a140dSpatrick
377097a140dSpatrick LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
378097a140dSpatrick << " iterations ahead (loop size: " << LoopSize << ") in "
379097a140dSpatrick << L->getHeader()->getParent()->getName() << ": " << *L);
380097a140dSpatrick LLVM_DEBUG(dbgs() << "Loop has: "
381097a140dSpatrick << NumMemAccesses << " memory accesses, "
382097a140dSpatrick << NumStridedMemAccesses << " strided memory accesses, "
383097a140dSpatrick << Prefetches.size() << " potential prefetch(es), "
384097a140dSpatrick << "a minimum stride of " << TargetMinStride << ", "
385097a140dSpatrick << (HasCall ? "calls" : "no calls") << ".\n");
386097a140dSpatrick
387097a140dSpatrick for (auto &P : Prefetches) {
388097a140dSpatrick // Check if the stride of the accesses is large enough to warrant a
389097a140dSpatrick // prefetch.
390097a140dSpatrick if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
39109467b48Spatrick continue;
39209467b48Spatrick
393*d415bd75Srobert BasicBlock *BB = P.InsertPt->getParent();
394*d415bd75Srobert SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
395097a140dSpatrick const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
396097a140dSpatrick SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
397097a140dSpatrick P.LSCEVAddRec->getStepRecurrence(*SE)));
398*d415bd75Srobert if (!SCEVE.isSafeToExpand(NextLSCEV))
39909467b48Spatrick continue;
40009467b48Spatrick
401*d415bd75Srobert unsigned PtrAddrSpace = NextLSCEV->getType()->getPointerAddressSpace();
402*d415bd75Srobert Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
403097a140dSpatrick Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
40409467b48Spatrick
405097a140dSpatrick IRBuilder<> Builder(P.InsertPt);
40609467b48Spatrick Module *M = BB->getParent()->getParent();
40709467b48Spatrick Type *I32 = Type::getInt32Ty(BB->getContext());
40809467b48Spatrick Function *PrefetchFunc = Intrinsic::getDeclaration(
40909467b48Spatrick M, Intrinsic::prefetch, PrefPtrValue->getType());
41009467b48Spatrick Builder.CreateCall(
41109467b48Spatrick PrefetchFunc,
41209467b48Spatrick {PrefPtrValue,
413097a140dSpatrick ConstantInt::get(I32, P.Writes),
41409467b48Spatrick ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
41509467b48Spatrick ++NumPrefetches;
416097a140dSpatrick LLVM_DEBUG(dbgs() << " Access: "
417097a140dSpatrick << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
418097a140dSpatrick << ", SCEV: " << *P.LSCEVAddRec << "\n");
41909467b48Spatrick ORE->emit([&]() {
420097a140dSpatrick return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
42109467b48Spatrick << "prefetched memory access";
42209467b48Spatrick });
42309467b48Spatrick
42409467b48Spatrick MadeChange = true;
42509467b48Spatrick }
42609467b48Spatrick
42709467b48Spatrick return MadeChange;
42809467b48Spatrick }
429