xref: /openbsd-src/gnu/llvm/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements a Loop Data Prefetching Pass.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick 
1309467b48Spatrick #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
1409467b48Spatrick #include "llvm/InitializePasses.h"
1509467b48Spatrick 
1609467b48Spatrick #include "llvm/ADT/DepthFirstIterator.h"
1709467b48Spatrick #include "llvm/ADT/Statistic.h"
1809467b48Spatrick #include "llvm/Analysis/AssumptionCache.h"
1909467b48Spatrick #include "llvm/Analysis/CodeMetrics.h"
2009467b48Spatrick #include "llvm/Analysis/LoopInfo.h"
2109467b48Spatrick #include "llvm/Analysis/OptimizationRemarkEmitter.h"
2209467b48Spatrick #include "llvm/Analysis/ScalarEvolution.h"
2309467b48Spatrick #include "llvm/Analysis/ScalarEvolutionExpressions.h"
2409467b48Spatrick #include "llvm/Analysis/TargetTransformInfo.h"
2509467b48Spatrick #include "llvm/IR/Dominators.h"
2609467b48Spatrick #include "llvm/IR/Function.h"
2709467b48Spatrick #include "llvm/IR/Module.h"
2809467b48Spatrick #include "llvm/Support/CommandLine.h"
2909467b48Spatrick #include "llvm/Support/Debug.h"
3009467b48Spatrick #include "llvm/Transforms/Scalar.h"
31*d415bd75Srobert #include "llvm/Transforms/Utils.h"
32097a140dSpatrick #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
3373471bf0Spatrick 
3473471bf0Spatrick #define DEBUG_TYPE "loop-data-prefetch"
3573471bf0Spatrick 
3609467b48Spatrick using namespace llvm;
3709467b48Spatrick 
3809467b48Spatrick // By default, we limit this to creating 16 PHIs (which is a little over half
3909467b48Spatrick // of the allocatable register set).
4009467b48Spatrick static cl::opt<bool>
4109467b48Spatrick PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
4209467b48Spatrick                cl::desc("Prefetch write addresses"));
4309467b48Spatrick 
4409467b48Spatrick static cl::opt<unsigned>
4509467b48Spatrick     PrefetchDistance("prefetch-distance",
4609467b48Spatrick                      cl::desc("Number of instructions to prefetch ahead"),
4709467b48Spatrick                      cl::Hidden);
4809467b48Spatrick 
4909467b48Spatrick static cl::opt<unsigned>
5009467b48Spatrick     MinPrefetchStride("min-prefetch-stride",
5109467b48Spatrick                       cl::desc("Min stride to add prefetches"), cl::Hidden);
5209467b48Spatrick 
5309467b48Spatrick static cl::opt<unsigned> MaxPrefetchIterationsAhead(
5409467b48Spatrick     "max-prefetch-iters-ahead",
5509467b48Spatrick     cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
5609467b48Spatrick 
5709467b48Spatrick STATISTIC(NumPrefetches, "Number of prefetches inserted");
5809467b48Spatrick 
5909467b48Spatrick namespace {
6009467b48Spatrick 
6109467b48Spatrick /// Loop prefetch implementation class.
6209467b48Spatrick class LoopDataPrefetch {
6309467b48Spatrick public:
LoopDataPrefetch(AssumptionCache * AC,DominatorTree * DT,LoopInfo * LI,ScalarEvolution * SE,const TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE)64097a140dSpatrick   LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
65097a140dSpatrick                    ScalarEvolution *SE, const TargetTransformInfo *TTI,
6609467b48Spatrick                    OptimizationRemarkEmitter *ORE)
67097a140dSpatrick       : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
6809467b48Spatrick 
6909467b48Spatrick   bool run();
7009467b48Spatrick 
7109467b48Spatrick private:
7209467b48Spatrick   bool runOnLoop(Loop *L);
7309467b48Spatrick 
7409467b48Spatrick   /// Check if the stride of the accesses is large enough to
7509467b48Spatrick   /// warrant a prefetch.
76097a140dSpatrick   bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
7709467b48Spatrick 
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)78097a140dSpatrick   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
79097a140dSpatrick                                 unsigned NumStridedMemAccesses,
80097a140dSpatrick                                 unsigned NumPrefetches,
81097a140dSpatrick                                 bool HasCall) {
8209467b48Spatrick     if (MinPrefetchStride.getNumOccurrences() > 0)
8309467b48Spatrick       return MinPrefetchStride;
84097a140dSpatrick     return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
85097a140dSpatrick                                      NumPrefetches, HasCall);
8609467b48Spatrick   }
8709467b48Spatrick 
getPrefetchDistance()8809467b48Spatrick   unsigned getPrefetchDistance() {
8909467b48Spatrick     if (PrefetchDistance.getNumOccurrences() > 0)
9009467b48Spatrick       return PrefetchDistance;
9109467b48Spatrick     return TTI->getPrefetchDistance();
9209467b48Spatrick   }
9309467b48Spatrick 
getMaxPrefetchIterationsAhead()9409467b48Spatrick   unsigned getMaxPrefetchIterationsAhead() {
9509467b48Spatrick     if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
9609467b48Spatrick       return MaxPrefetchIterationsAhead;
9709467b48Spatrick     return TTI->getMaxPrefetchIterationsAhead();
9809467b48Spatrick   }
9909467b48Spatrick 
doPrefetchWrites()100097a140dSpatrick   bool doPrefetchWrites() {
101097a140dSpatrick     if (PrefetchWrites.getNumOccurrences() > 0)
102097a140dSpatrick       return PrefetchWrites;
103097a140dSpatrick     return TTI->enableWritePrefetching();
104097a140dSpatrick   }
105097a140dSpatrick 
10609467b48Spatrick   AssumptionCache *AC;
107097a140dSpatrick   DominatorTree *DT;
10809467b48Spatrick   LoopInfo *LI;
10909467b48Spatrick   ScalarEvolution *SE;
11009467b48Spatrick   const TargetTransformInfo *TTI;
11109467b48Spatrick   OptimizationRemarkEmitter *ORE;
11209467b48Spatrick };
11309467b48Spatrick 
11409467b48Spatrick /// Legacy class for inserting loop data prefetches.
11509467b48Spatrick class LoopDataPrefetchLegacyPass : public FunctionPass {
11609467b48Spatrick public:
11709467b48Spatrick   static char ID; // Pass ID, replacement for typeid
LoopDataPrefetchLegacyPass()11809467b48Spatrick   LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
11909467b48Spatrick     initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());
12009467b48Spatrick   }
12109467b48Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const12209467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
12309467b48Spatrick     AU.addRequired<AssumptionCacheTracker>();
124097a140dSpatrick     AU.addRequired<DominatorTreeWrapperPass>();
12509467b48Spatrick     AU.addPreserved<DominatorTreeWrapperPass>();
12609467b48Spatrick     AU.addRequired<LoopInfoWrapperPass>();
12709467b48Spatrick     AU.addPreserved<LoopInfoWrapperPass>();
128*d415bd75Srobert     AU.addRequiredID(LoopSimplifyID);
129*d415bd75Srobert     AU.addPreservedID(LoopSimplifyID);
13009467b48Spatrick     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
13109467b48Spatrick     AU.addRequired<ScalarEvolutionWrapperPass>();
13209467b48Spatrick     AU.addPreserved<ScalarEvolutionWrapperPass>();
13309467b48Spatrick     AU.addRequired<TargetTransformInfoWrapperPass>();
13409467b48Spatrick   }
13509467b48Spatrick 
13609467b48Spatrick   bool runOnFunction(Function &F) override;
13709467b48Spatrick   };
13809467b48Spatrick }
13909467b48Spatrick 
14009467b48Spatrick char LoopDataPrefetchLegacyPass::ID = 0;
14109467b48Spatrick INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
14209467b48Spatrick                       "Loop Data Prefetch", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)14309467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
14409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
14509467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
146*d415bd75Srobert INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
14709467b48Spatrick INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
14809467b48Spatrick INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
14909467b48Spatrick INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
15009467b48Spatrick                     "Loop Data Prefetch", false, false)
15109467b48Spatrick 
15209467b48Spatrick FunctionPass *llvm::createLoopDataPrefetchPass() {
15309467b48Spatrick   return new LoopDataPrefetchLegacyPass();
15409467b48Spatrick }
15509467b48Spatrick 
isStrideLargeEnough(const SCEVAddRecExpr * AR,unsigned TargetMinStride)156097a140dSpatrick bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
157097a140dSpatrick                                            unsigned TargetMinStride) {
15809467b48Spatrick   // No need to check if any stride goes.
15909467b48Spatrick   if (TargetMinStride <= 1)
16009467b48Spatrick     return true;
16109467b48Spatrick 
16209467b48Spatrick   const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
16309467b48Spatrick   // If MinStride is set, don't prefetch unless we can ensure that stride is
16409467b48Spatrick   // larger.
16509467b48Spatrick   if (!ConstStride)
16609467b48Spatrick     return false;
16709467b48Spatrick 
16809467b48Spatrick   unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
16909467b48Spatrick   return TargetMinStride <= AbsStride;
17009467b48Spatrick }
17109467b48Spatrick 
run(Function & F,FunctionAnalysisManager & AM)17209467b48Spatrick PreservedAnalyses LoopDataPrefetchPass::run(Function &F,
17309467b48Spatrick                                             FunctionAnalysisManager &AM) {
174097a140dSpatrick   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
17509467b48Spatrick   LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
17609467b48Spatrick   ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
17709467b48Spatrick   AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
17809467b48Spatrick   OptimizationRemarkEmitter *ORE =
17909467b48Spatrick       &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
18009467b48Spatrick   const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
18109467b48Spatrick 
182097a140dSpatrick   LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
18309467b48Spatrick   bool Changed = LDP.run();
18409467b48Spatrick 
18509467b48Spatrick   if (Changed) {
18609467b48Spatrick     PreservedAnalyses PA;
18709467b48Spatrick     PA.preserve<DominatorTreeAnalysis>();
18809467b48Spatrick     PA.preserve<LoopAnalysis>();
18909467b48Spatrick     return PA;
19009467b48Spatrick   }
19109467b48Spatrick 
19209467b48Spatrick   return PreservedAnalyses::all();
19309467b48Spatrick }
19409467b48Spatrick 
runOnFunction(Function & F)19509467b48Spatrick bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
19609467b48Spatrick   if (skipFunction(F))
19709467b48Spatrick     return false;
19809467b48Spatrick 
199097a140dSpatrick   DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
20009467b48Spatrick   LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
20109467b48Spatrick   ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
20209467b48Spatrick   AssumptionCache *AC =
20309467b48Spatrick       &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
20409467b48Spatrick   OptimizationRemarkEmitter *ORE =
20509467b48Spatrick       &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
20609467b48Spatrick   const TargetTransformInfo *TTI =
20709467b48Spatrick       &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
20809467b48Spatrick 
209097a140dSpatrick   LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
21009467b48Spatrick   return LDP.run();
21109467b48Spatrick }
21209467b48Spatrick 
run()21309467b48Spatrick bool LoopDataPrefetch::run() {
21409467b48Spatrick   // If PrefetchDistance is not set, don't run the pass.  This gives an
21509467b48Spatrick   // opportunity for targets to run this pass for selected subtargets only
216*d415bd75Srobert   // (whose TTI sets PrefetchDistance and CacheLineSize).
217*d415bd75Srobert   if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) {
218*d415bd75Srobert     LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "
219*d415bd75Srobert                          "for loop data prefetch.\n");
22009467b48Spatrick     return false;
221*d415bd75Srobert   }
22209467b48Spatrick 
22309467b48Spatrick   bool MadeChange = false;
22409467b48Spatrick 
22509467b48Spatrick   for (Loop *I : *LI)
226*d415bd75Srobert     for (Loop *L : depth_first(I))
227*d415bd75Srobert       MadeChange |= runOnLoop(L);
22809467b48Spatrick 
22909467b48Spatrick   return MadeChange;
23009467b48Spatrick }
23109467b48Spatrick 
232097a140dSpatrick /// A record for a potential prefetch made during the initial scan of the
233097a140dSpatrick /// loop. This is used to let a single prefetch target multiple memory accesses.
234097a140dSpatrick struct Prefetch {
235097a140dSpatrick   /// The address formula for this prefetch as returned by ScalarEvolution.
236097a140dSpatrick   const SCEVAddRecExpr *LSCEVAddRec;
237097a140dSpatrick   /// The point of insertion for the prefetch instruction.
238*d415bd75Srobert   Instruction *InsertPt = nullptr;
239097a140dSpatrick   /// True if targeting a write memory access.
240*d415bd75Srobert   bool Writes = false;
241097a140dSpatrick   /// The (first seen) prefetched instruction.
242*d415bd75Srobert   Instruction *MemI = nullptr;
243097a140dSpatrick 
244097a140dSpatrick   /// Constructor to create a new Prefetch for \p I.
PrefetchPrefetch245*d415bd75Srobert   Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
246097a140dSpatrick     addInstruction(I);
247097a140dSpatrick   };
248097a140dSpatrick 
249097a140dSpatrick   /// Add the instruction \param I to this prefetch. If it's not the first
250097a140dSpatrick   /// one, 'InsertPt' and 'Writes' will be updated as required.
251097a140dSpatrick   /// \param PtrDiff the known constant address difference to the first added
252097a140dSpatrick   /// instruction.
addInstructionPrefetch253097a140dSpatrick   void addInstruction(Instruction *I, DominatorTree *DT = nullptr,
254097a140dSpatrick                       int64_t PtrDiff = 0) {
255097a140dSpatrick     if (!InsertPt) {
256097a140dSpatrick       MemI = I;
257097a140dSpatrick       InsertPt = I;
258097a140dSpatrick       Writes = isa<StoreInst>(I);
259097a140dSpatrick     } else {
260097a140dSpatrick       BasicBlock *PrefBB = InsertPt->getParent();
261097a140dSpatrick       BasicBlock *InsBB = I->getParent();
262097a140dSpatrick       if (PrefBB != InsBB) {
263097a140dSpatrick         BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
264097a140dSpatrick         if (DomBB != PrefBB)
265097a140dSpatrick           InsertPt = DomBB->getTerminator();
266097a140dSpatrick       }
267097a140dSpatrick 
268097a140dSpatrick       if (isa<StoreInst>(I) && PtrDiff == 0)
269097a140dSpatrick         Writes = true;
270097a140dSpatrick     }
271097a140dSpatrick   }
272097a140dSpatrick };
273097a140dSpatrick 
runOnLoop(Loop * L)27409467b48Spatrick bool LoopDataPrefetch::runOnLoop(Loop *L) {
27509467b48Spatrick   bool MadeChange = false;
27609467b48Spatrick 
27709467b48Spatrick   // Only prefetch in the inner-most loop
27873471bf0Spatrick   if (!L->isInnermost())
27909467b48Spatrick     return MadeChange;
28009467b48Spatrick 
28109467b48Spatrick   SmallPtrSet<const Value *, 32> EphValues;
28209467b48Spatrick   CodeMetrics::collectEphemeralValues(L, AC, EphValues);
28309467b48Spatrick 
28409467b48Spatrick   // Calculate the number of iterations ahead to prefetch
28509467b48Spatrick   CodeMetrics Metrics;
286097a140dSpatrick   bool HasCall = false;
28709467b48Spatrick   for (const auto BB : L->blocks()) {
28809467b48Spatrick     // If the loop already has prefetches, then assume that the user knows
28909467b48Spatrick     // what they are doing and don't add any more.
290097a140dSpatrick     for (auto &I : *BB) {
291097a140dSpatrick       if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
292097a140dSpatrick         if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
29309467b48Spatrick           if (F->getIntrinsicID() == Intrinsic::prefetch)
29409467b48Spatrick             return MadeChange;
295097a140dSpatrick           if (TTI->isLoweredToCall(F))
296097a140dSpatrick             HasCall = true;
297097a140dSpatrick         } else { // indirect call.
298097a140dSpatrick           HasCall = true;
299097a140dSpatrick         }
300097a140dSpatrick       }
301097a140dSpatrick     }
30209467b48Spatrick     Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
30309467b48Spatrick   }
304*d415bd75Srobert 
305*d415bd75Srobert   if (!Metrics.NumInsts.isValid())
306*d415bd75Srobert     return MadeChange;
307*d415bd75Srobert 
308*d415bd75Srobert   unsigned LoopSize = *Metrics.NumInsts.getValue();
30909467b48Spatrick   if (!LoopSize)
31009467b48Spatrick     LoopSize = 1;
31109467b48Spatrick 
31209467b48Spatrick   unsigned ItersAhead = getPrefetchDistance() / LoopSize;
31309467b48Spatrick   if (!ItersAhead)
31409467b48Spatrick     ItersAhead = 1;
31509467b48Spatrick 
31609467b48Spatrick   if (ItersAhead > getMaxPrefetchIterationsAhead())
31709467b48Spatrick     return MadeChange;
31809467b48Spatrick 
319097a140dSpatrick   unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
320097a140dSpatrick   if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
321097a140dSpatrick     return MadeChange;
32209467b48Spatrick 
323097a140dSpatrick   unsigned NumMemAccesses = 0;
324097a140dSpatrick   unsigned NumStridedMemAccesses = 0;
325097a140dSpatrick   SmallVector<Prefetch, 16> Prefetches;
326097a140dSpatrick   for (const auto BB : L->blocks())
32709467b48Spatrick     for (auto &I : *BB) {
32809467b48Spatrick       Value *PtrValue;
32909467b48Spatrick       Instruction *MemI;
33009467b48Spatrick 
33109467b48Spatrick       if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
33209467b48Spatrick         MemI = LMemI;
33309467b48Spatrick         PtrValue = LMemI->getPointerOperand();
33409467b48Spatrick       } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
335097a140dSpatrick         if (!doPrefetchWrites()) continue;
33609467b48Spatrick         MemI = SMemI;
33709467b48Spatrick         PtrValue = SMemI->getPointerOperand();
33809467b48Spatrick       } else continue;
33909467b48Spatrick 
34009467b48Spatrick       unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
341*d415bd75Srobert       if (!TTI->shouldPrefetchAddressSpace(PtrAddrSpace))
34209467b48Spatrick         continue;
343097a140dSpatrick       NumMemAccesses++;
34409467b48Spatrick       if (L->isLoopInvariant(PtrValue))
34509467b48Spatrick         continue;
34609467b48Spatrick 
34709467b48Spatrick       const SCEV *LSCEV = SE->getSCEV(PtrValue);
34809467b48Spatrick       const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
34909467b48Spatrick       if (!LSCEVAddRec)
35009467b48Spatrick         continue;
351097a140dSpatrick       NumStridedMemAccesses++;
35209467b48Spatrick 
353097a140dSpatrick       // We don't want to double prefetch individual cache lines. If this
354097a140dSpatrick       // access is known to be within one cache line of some other one that
355097a140dSpatrick       // has already been prefetched, then don't prefetch this one as well.
35609467b48Spatrick       bool DupPref = false;
357097a140dSpatrick       for (auto &Pref : Prefetches) {
358097a140dSpatrick         const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
35909467b48Spatrick         if (const SCEVConstant *ConstPtrDiff =
36009467b48Spatrick             dyn_cast<SCEVConstant>(PtrDiff)) {
36109467b48Spatrick           int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
36209467b48Spatrick           if (PD < (int64_t) TTI->getCacheLineSize()) {
363097a140dSpatrick             Pref.addInstruction(MemI, DT, PD);
36409467b48Spatrick             DupPref = true;
36509467b48Spatrick             break;
36609467b48Spatrick           }
36709467b48Spatrick         }
36809467b48Spatrick       }
369097a140dSpatrick       if (!DupPref)
370097a140dSpatrick         Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
371097a140dSpatrick     }
372097a140dSpatrick 
373097a140dSpatrick   unsigned TargetMinStride =
374097a140dSpatrick     getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
375097a140dSpatrick                          Prefetches.size(), HasCall);
376097a140dSpatrick 
377097a140dSpatrick   LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
378097a140dSpatrick              << " iterations ahead (loop size: " << LoopSize << ") in "
379097a140dSpatrick              << L->getHeader()->getParent()->getName() << ": " << *L);
380097a140dSpatrick   LLVM_DEBUG(dbgs() << "Loop has: "
381097a140dSpatrick              << NumMemAccesses << " memory accesses, "
382097a140dSpatrick              << NumStridedMemAccesses << " strided memory accesses, "
383097a140dSpatrick              << Prefetches.size() << " potential prefetch(es), "
384097a140dSpatrick              << "a minimum stride of " << TargetMinStride << ", "
385097a140dSpatrick              << (HasCall ? "calls" : "no calls") << ".\n");
386097a140dSpatrick 
387097a140dSpatrick   for (auto &P : Prefetches) {
388097a140dSpatrick     // Check if the stride of the accesses is large enough to warrant a
389097a140dSpatrick     // prefetch.
390097a140dSpatrick     if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
39109467b48Spatrick       continue;
39209467b48Spatrick 
393*d415bd75Srobert     BasicBlock *BB = P.InsertPt->getParent();
394*d415bd75Srobert     SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
395097a140dSpatrick     const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
396097a140dSpatrick       SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
397097a140dSpatrick       P.LSCEVAddRec->getStepRecurrence(*SE)));
398*d415bd75Srobert     if (!SCEVE.isSafeToExpand(NextLSCEV))
39909467b48Spatrick       continue;
40009467b48Spatrick 
401*d415bd75Srobert     unsigned PtrAddrSpace = NextLSCEV->getType()->getPointerAddressSpace();
402*d415bd75Srobert     Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
403097a140dSpatrick     Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
40409467b48Spatrick 
405097a140dSpatrick     IRBuilder<> Builder(P.InsertPt);
40609467b48Spatrick     Module *M = BB->getParent()->getParent();
40709467b48Spatrick     Type *I32 = Type::getInt32Ty(BB->getContext());
40809467b48Spatrick     Function *PrefetchFunc = Intrinsic::getDeclaration(
40909467b48Spatrick         M, Intrinsic::prefetch, PrefPtrValue->getType());
41009467b48Spatrick     Builder.CreateCall(
41109467b48Spatrick         PrefetchFunc,
41209467b48Spatrick         {PrefPtrValue,
413097a140dSpatrick          ConstantInt::get(I32, P.Writes),
41409467b48Spatrick          ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
41509467b48Spatrick     ++NumPrefetches;
416097a140dSpatrick     LLVM_DEBUG(dbgs() << "  Access: "
417097a140dSpatrick                << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
418097a140dSpatrick                << ", SCEV: " << *P.LSCEVAddRec << "\n");
41909467b48Spatrick     ORE->emit([&]() {
420097a140dSpatrick         return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
42109467b48Spatrick           << "prefetched memory access";
42209467b48Spatrick       });
42309467b48Spatrick 
42409467b48Spatrick     MadeChange = true;
42509467b48Spatrick   }
42609467b48Spatrick 
42709467b48Spatrick   return MadeChange;
42809467b48Spatrick }
429