1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a Loop Data Prefetching Pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "loop-data-prefetch" 15 #include "llvm/Transforms/Scalar.h" 16 #include "llvm/ADT/DepthFirstIterator.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/AssumptionCache.h" 19 #include "llvm/Analysis/CodeMetrics.h" 20 #include "llvm/Analysis/InstructionSimplify.h" 21 #include "llvm/Analysis/LoopInfo.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 24 #include "llvm/Analysis/ScalarEvolutionExpander.h" 25 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/CFG.h" 29 #include "llvm/IR/Dominators.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 #include "llvm/Transforms/Utils/Local.h" 37 #include "llvm/Transforms/Utils/ValueMapper.h" 38 using namespace llvm; 39 40 // By default, we limit this to creating 16 PHIs (which is a little over half 41 // of the allocatable register set). 42 static cl::opt<bool> 43 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), 44 cl::desc("Prefetch write addresses")); 45 46 namespace llvm { 47 void initializeLoopDataPrefetchPass(PassRegistry&); 48 } 49 50 namespace { 51 52 class LoopDataPrefetch : public FunctionPass { 53 public: 54 static char ID; // Pass ID, replacement for typeid 55 LoopDataPrefetch() : FunctionPass(ID) { 56 initializeLoopDataPrefetchPass(*PassRegistry::getPassRegistry()); 57 } 58 59 void getAnalysisUsage(AnalysisUsage &AU) const override { 60 AU.addRequired<AssumptionCacheTracker>(); 61 AU.addPreserved<DominatorTreeWrapperPass>(); 62 AU.addRequired<LoopInfoWrapperPass>(); 63 AU.addPreserved<LoopInfoWrapperPass>(); 64 AU.addRequired<ScalarEvolutionWrapperPass>(); 65 // FIXME: For some reason, preserving SE here breaks LSR (even if 66 // this pass changes nothing). 67 // AU.addPreserved<ScalarEvolutionWrapperPass>(); 68 AU.addRequired<TargetTransformInfoWrapperPass>(); 69 } 70 71 bool runOnFunction(Function &F) override; 72 bool runOnLoop(Loop *L); 73 74 private: 75 AssumptionCache *AC; 76 LoopInfo *LI; 77 ScalarEvolution *SE; 78 const TargetTransformInfo *TTI; 79 const DataLayout *DL; 80 }; 81 } 82 83 char LoopDataPrefetch::ID = 0; 84 INITIALIZE_PASS_BEGIN(LoopDataPrefetch, "loop-data-prefetch", 85 "Loop Data Prefetch", false, false) 86 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 87 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 88 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 89 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 90 INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch", 91 "Loop Data Prefetch", false, false) 92 93 FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); } 94 95 bool LoopDataPrefetch::runOnFunction(Function &F) { 96 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 97 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 98 DL = &F.getParent()->getDataLayout(); 99 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 100 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 101 102 assert(TTI->getCacheLineSize() && "Cache line size is not set for target"); 103 assert(TTI->getPrefetchDistance() && 104 "Prefetch distance is not set for target"); 105 106 bool MadeChange = false; 107 108 for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) 109 for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) 110 MadeChange |= runOnLoop(*L); 111 112 return MadeChange; 113 } 114 115 bool LoopDataPrefetch::runOnLoop(Loop *L) { 116 bool MadeChange = false; 117 118 // Only prefetch in the inner-most loop 119 if (!L->empty()) 120 return MadeChange; 121 122 SmallPtrSet<const Value *, 32> EphValues; 123 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 124 125 // Calculate the number of iterations ahead to prefetch 126 CodeMetrics Metrics; 127 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 128 I != IE; ++I) { 129 130 // If the loop already has prefetches, then assume that the user knows 131 // what he or she is doing and don't add any more. 132 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 133 J != JE; ++J) 134 if (CallInst *CI = dyn_cast<CallInst>(J)) 135 if (Function *F = CI->getCalledFunction()) 136 if (F->getIntrinsicID() == Intrinsic::prefetch) 137 return MadeChange; 138 139 Metrics.analyzeBasicBlock(*I, *TTI, EphValues); 140 } 141 unsigned LoopSize = Metrics.NumInsts; 142 if (!LoopSize) 143 LoopSize = 1; 144 145 unsigned ItersAhead = TTI->getPrefetchDistance() / LoopSize; 146 if (!ItersAhead) 147 ItersAhead = 1; 148 149 SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads; 150 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 151 I != IE; ++I) { 152 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 153 J != JE; ++J) { 154 Value *PtrValue; 155 Instruction *MemI; 156 157 if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { 158 MemI = LMemI; 159 PtrValue = LMemI->getPointerOperand(); 160 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { 161 if (!PrefetchWrites) continue; 162 MemI = SMemI; 163 PtrValue = SMemI->getPointerOperand(); 164 } else continue; 165 166 unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 167 if (PtrAddrSpace) 168 continue; 169 170 if (L->isLoopInvariant(PtrValue)) 171 continue; 172 173 const SCEV *LSCEV = SE->getSCEV(PtrValue); 174 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); 175 if (!LSCEVAddRec) 176 continue; 177 178 // We don't want to double prefetch individual cache lines. If this load 179 // is known to be within one cache line of some other load that has 180 // already been prefetched, then don't prefetch this one as well. 181 bool DupPref = false; 182 for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 183 16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end(); 184 K != KE; ++K) { 185 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second); 186 if (const SCEVConstant *ConstPtrDiff = 187 dyn_cast<SCEVConstant>(PtrDiff)) { 188 int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); 189 if (PD < (int64_t) TTI->getCacheLineSize()) { 190 DupPref = true; 191 break; 192 } 193 } 194 } 195 if (DupPref) 196 continue; 197 198 const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr( 199 SE->getConstant(LSCEVAddRec->getType(), ItersAhead), 200 LSCEVAddRec->getStepRecurrence(*SE))); 201 if (!isSafeToExpand(NextLSCEV, *SE)) 202 continue; 203 204 PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); 205 206 Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); 207 SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); 208 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); 209 210 IRBuilder<> Builder(MemI); 211 Module *M = (*I)->getParent()->getParent(); 212 Type *I32 = Type::getInt32Ty((*I)->getContext()); 213 Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); 214 Builder.CreateCall( 215 PrefetchFunc, 216 {PrefPtrValue, 217 ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), 218 ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); 219 220 MadeChange = true; 221 } 222 } 223 224 return MadeChange; 225 } 226 227