1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a Loop Data Prefetching Pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "loop-data-prefetch" 15 #include "llvm/Transforms/Scalar.h" 16 #include "llvm/ADT/DepthFirstIterator.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/AssumptionCache.h" 19 #include "llvm/Analysis/CodeMetrics.h" 20 #include "llvm/Analysis/InstructionSimplify.h" 21 #include "llvm/Analysis/LoopInfo.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 24 #include "llvm/Analysis/ScalarEvolutionExpander.h" 25 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/CFG.h" 29 #include "llvm/IR/Dominators.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 #include "llvm/Transforms/Utils/Local.h" 37 #include "llvm/Transforms/Utils/ValueMapper.h" 38 using namespace llvm; 39 40 // By default, we limit this to creating 16 PHIs (which is a little over half 41 // of the allocatable register set). 42 static cl::opt<bool> 43 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), 44 cl::desc("Prefetch write addresses")); 45 46 static cl::opt<unsigned> 47 PrefetchDistance("prefetch-distance", 48 cl::desc("Number of instructions to prefetch ahead"), 49 cl::Hidden); 50 51 static cl::opt<unsigned> 52 MinPrefetchStride("min-prefetch-stride", 53 cl::desc("Min stride to add prefetches"), cl::Hidden); 54 55 static cl::opt<unsigned> MaxPrefetchIterationsAhead( 56 "max-prefetch-iters-ahead", 57 cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden); 58 59 STATISTIC(NumPrefetches, "Number of prefetches inserted"); 60 61 namespace llvm { 62 void initializeLoopDataPrefetchPass(PassRegistry&); 63 } 64 65 namespace { 66 67 class LoopDataPrefetch : public FunctionPass { 68 public: 69 static char ID; // Pass ID, replacement for typeid 70 LoopDataPrefetch() : FunctionPass(ID) { 71 initializeLoopDataPrefetchPass(*PassRegistry::getPassRegistry()); 72 } 73 74 void getAnalysisUsage(AnalysisUsage &AU) const override { 75 AU.addRequired<AssumptionCacheTracker>(); 76 AU.addPreserved<DominatorTreeWrapperPass>(); 77 AU.addRequired<LoopInfoWrapperPass>(); 78 AU.addPreserved<LoopInfoWrapperPass>(); 79 AU.addRequired<ScalarEvolutionWrapperPass>(); 80 // FIXME: For some reason, preserving SE here breaks LSR (even if 81 // this pass changes nothing). 82 // AU.addPreserved<ScalarEvolutionWrapperPass>(); 83 AU.addRequired<TargetTransformInfoWrapperPass>(); 84 } 85 86 bool runOnFunction(Function &F) override; 87 88 private: 89 bool runOnLoop(Loop *L); 90 91 /// \brief Check if the the stride of the accesses is large enough to 92 /// warrant a prefetch. 93 bool isStrideLargeEnough(const SCEVAddRecExpr *AR); 94 95 unsigned getMinPrefetchStride() { 96 if (MinPrefetchStride.getNumOccurrences() > 0) 97 return MinPrefetchStride; 98 return TTI->getMinPrefetchStride(); 99 } 100 101 unsigned getPrefetchDistance() { 102 if (PrefetchDistance.getNumOccurrences() > 0) 103 return PrefetchDistance; 104 return TTI->getPrefetchDistance(); 105 } 106 107 unsigned getMaxPrefetchIterationsAhead() { 108 if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0) 109 return MaxPrefetchIterationsAhead; 110 return TTI->getMaxPrefetchIterationsAhead(); 111 } 112 113 AssumptionCache *AC; 114 LoopInfo *LI; 115 ScalarEvolution *SE; 116 const TargetTransformInfo *TTI; 117 const DataLayout *DL; 118 }; 119 } 120 121 char LoopDataPrefetch::ID = 0; 122 INITIALIZE_PASS_BEGIN(LoopDataPrefetch, "loop-data-prefetch", 123 "Loop Data Prefetch", false, false) 124 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 125 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 126 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 127 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 128 INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch", 129 "Loop Data Prefetch", false, false) 130 131 FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); } 132 133 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) { 134 unsigned TargetMinStride = getMinPrefetchStride(); 135 // No need to check if any stride goes. 136 if (TargetMinStride <= 1) 137 return true; 138 139 const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 140 // If MinStride is set, don't prefetch unless we can ensure that stride is 141 // larger. 142 if (!ConstStride) 143 return false; 144 145 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue()); 146 return TargetMinStride <= AbsStride; 147 } 148 149 bool LoopDataPrefetch::runOnFunction(Function &F) { 150 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 151 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 152 DL = &F.getParent()->getDataLayout(); 153 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 154 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 155 156 // If PrefetchDistance is not set, don't run the pass. This gives an 157 // opportunity for targets to run this pass for selected subtargets only 158 // (whose TTI sets PrefetchDistance). 159 if (getPrefetchDistance() == 0) 160 return false; 161 assert(TTI->getCacheLineSize() && "Cache line size is not set for target"); 162 163 bool MadeChange = false; 164 165 for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) 166 for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) 167 MadeChange |= runOnLoop(*L); 168 169 return MadeChange; 170 } 171 172 bool LoopDataPrefetch::runOnLoop(Loop *L) { 173 bool MadeChange = false; 174 175 // Only prefetch in the inner-most loop 176 if (!L->empty()) 177 return MadeChange; 178 179 SmallPtrSet<const Value *, 32> EphValues; 180 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 181 182 // Calculate the number of iterations ahead to prefetch 183 CodeMetrics Metrics; 184 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 185 I != IE; ++I) { 186 187 // If the loop already has prefetches, then assume that the user knows 188 // what he or she is doing and don't add any more. 189 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 190 J != JE; ++J) 191 if (CallInst *CI = dyn_cast<CallInst>(J)) 192 if (Function *F = CI->getCalledFunction()) 193 if (F->getIntrinsicID() == Intrinsic::prefetch) 194 return MadeChange; 195 196 Metrics.analyzeBasicBlock(*I, *TTI, EphValues); 197 } 198 unsigned LoopSize = Metrics.NumInsts; 199 if (!LoopSize) 200 LoopSize = 1; 201 202 unsigned ItersAhead = getPrefetchDistance() / LoopSize; 203 if (!ItersAhead) 204 ItersAhead = 1; 205 206 if (ItersAhead > getMaxPrefetchIterationsAhead()) 207 return MadeChange; 208 209 DEBUG(dbgs() << "Prefetching " << ItersAhead 210 << " iterations ahead (loop size: " << LoopSize << ") in " 211 << L->getHeader()->getParent()->getName() << ": " << *L); 212 213 SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads; 214 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 215 I != IE; ++I) { 216 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 217 J != JE; ++J) { 218 Value *PtrValue; 219 Instruction *MemI; 220 221 if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { 222 MemI = LMemI; 223 PtrValue = LMemI->getPointerOperand(); 224 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { 225 if (!PrefetchWrites) continue; 226 MemI = SMemI; 227 PtrValue = SMemI->getPointerOperand(); 228 } else continue; 229 230 unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 231 if (PtrAddrSpace) 232 continue; 233 234 if (L->isLoopInvariant(PtrValue)) 235 continue; 236 237 const SCEV *LSCEV = SE->getSCEV(PtrValue); 238 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); 239 if (!LSCEVAddRec) 240 continue; 241 242 // Check if the the stride of the accesses is large enough to warrant a 243 // prefetch. 244 if (!isStrideLargeEnough(LSCEVAddRec)) 245 continue; 246 247 // We don't want to double prefetch individual cache lines. If this load 248 // is known to be within one cache line of some other load that has 249 // already been prefetched, then don't prefetch this one as well. 250 bool DupPref = false; 251 for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 252 16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end(); 253 K != KE; ++K) { 254 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second); 255 if (const SCEVConstant *ConstPtrDiff = 256 dyn_cast<SCEVConstant>(PtrDiff)) { 257 int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); 258 if (PD < (int64_t) TTI->getCacheLineSize()) { 259 DupPref = true; 260 break; 261 } 262 } 263 } 264 if (DupPref) 265 continue; 266 267 const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr( 268 SE->getConstant(LSCEVAddRec->getType(), ItersAhead), 269 LSCEVAddRec->getStepRecurrence(*SE))); 270 if (!isSafeToExpand(NextLSCEV, *SE)) 271 continue; 272 273 PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); 274 275 Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); 276 SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); 277 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); 278 279 IRBuilder<> Builder(MemI); 280 Module *M = (*I)->getParent()->getParent(); 281 Type *I32 = Type::getInt32Ty((*I)->getContext()); 282 Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); 283 Builder.CreateCall( 284 PrefetchFunc, 285 {PrefPtrValue, 286 ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), 287 ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); 288 ++NumPrefetches; 289 DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV 290 << "\n"); 291 292 MadeChange = true; 293 } 294 } 295 296 return MadeChange; 297 } 298 299