1 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a Loop Data Prefetching Pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "loop-data-prefetch" 15 #include "llvm/ADT/DepthFirstIterator.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/Analysis/AssumptionCache.h" 18 #include "llvm/Analysis/CodeMetrics.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/OptimizationDiagnosticInfo.h" 22 #include "llvm/Analysis/ScalarEvolution.h" 23 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 24 #include "llvm/Analysis/ScalarEvolutionExpander.h" 25 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/CFG.h" 29 #include "llvm/IR/Dominators.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Transforms/Scalar.h" 36 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 37 #include "llvm/Transforms/Utils/Local.h" 38 #include "llvm/Transforms/Utils/ValueMapper.h" 39 using namespace llvm; 40 41 // By default, we limit this to creating 16 PHIs (which is a little over half 42 // of the allocatable register set). 43 static cl::opt<bool> 44 PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), 45 cl::desc("Prefetch write addresses")); 46 47 static cl::opt<unsigned> 48 PrefetchDistance("prefetch-distance", 49 cl::desc("Number of instructions to prefetch ahead"), 50 cl::Hidden); 51 52 static cl::opt<unsigned> 53 MinPrefetchStride("min-prefetch-stride", 54 cl::desc("Min stride to add prefetches"), cl::Hidden); 55 56 static cl::opt<unsigned> MaxPrefetchIterationsAhead( 57 "max-prefetch-iters-ahead", 58 cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden); 59 60 STATISTIC(NumPrefetches, "Number of prefetches inserted"); 61 62 namespace llvm { 63 void initializeLoopDataPrefetchPass(PassRegistry&); 64 } 65 66 namespace { 67 68 class LoopDataPrefetch : public FunctionPass { 69 public: 70 static char ID; // Pass ID, replacement for typeid 71 LoopDataPrefetch() : FunctionPass(ID) { 72 initializeLoopDataPrefetchPass(*PassRegistry::getPassRegistry()); 73 } 74 75 void getAnalysisUsage(AnalysisUsage &AU) const override { 76 AU.addRequired<AssumptionCacheTracker>(); 77 AU.addPreserved<DominatorTreeWrapperPass>(); 78 AU.addRequired<LoopInfoWrapperPass>(); 79 AU.addPreserved<LoopInfoWrapperPass>(); 80 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 81 AU.addRequired<ScalarEvolutionWrapperPass>(); 82 // FIXME: For some reason, preserving SE here breaks LSR (even if 83 // this pass changes nothing). 84 // AU.addPreserved<ScalarEvolutionWrapperPass>(); 85 AU.addRequired<TargetTransformInfoWrapperPass>(); 86 } 87 88 bool runOnFunction(Function &F) override; 89 90 private: 91 bool runOnLoop(Loop *L); 92 93 /// \brief Check if the the stride of the accesses is large enough to 94 /// warrant a prefetch. 95 bool isStrideLargeEnough(const SCEVAddRecExpr *AR); 96 97 unsigned getMinPrefetchStride() { 98 if (MinPrefetchStride.getNumOccurrences() > 0) 99 return MinPrefetchStride; 100 return TTI->getMinPrefetchStride(); 101 } 102 103 unsigned getPrefetchDistance() { 104 if (PrefetchDistance.getNumOccurrences() > 0) 105 return PrefetchDistance; 106 return TTI->getPrefetchDistance(); 107 } 108 109 unsigned getMaxPrefetchIterationsAhead() { 110 if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0) 111 return MaxPrefetchIterationsAhead; 112 return TTI->getMaxPrefetchIterationsAhead(); 113 } 114 115 AssumptionCache *AC; 116 LoopInfo *LI; 117 ScalarEvolution *SE; 118 const TargetTransformInfo *TTI; 119 const DataLayout *DL; 120 OptimizationRemarkEmitter *ORE; 121 }; 122 } 123 124 char LoopDataPrefetch::ID = 0; 125 INITIALIZE_PASS_BEGIN(LoopDataPrefetch, "loop-data-prefetch", 126 "Loop Data Prefetch", false, false) 127 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 128 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 129 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 132 INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch", 133 "Loop Data Prefetch", false, false) 134 135 FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); } 136 137 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) { 138 unsigned TargetMinStride = getMinPrefetchStride(); 139 // No need to check if any stride goes. 140 if (TargetMinStride <= 1) 141 return true; 142 143 const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 144 // If MinStride is set, don't prefetch unless we can ensure that stride is 145 // larger. 146 if (!ConstStride) 147 return false; 148 149 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue()); 150 return TargetMinStride <= AbsStride; 151 } 152 153 bool LoopDataPrefetch::runOnFunction(Function &F) { 154 if (skipFunction(F)) 155 return false; 156 157 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 158 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 159 DL = &F.getParent()->getDataLayout(); 160 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 161 ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 162 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 163 164 // If PrefetchDistance is not set, don't run the pass. This gives an 165 // opportunity for targets to run this pass for selected subtargets only 166 // (whose TTI sets PrefetchDistance). 167 if (getPrefetchDistance() == 0) 168 return false; 169 assert(TTI->getCacheLineSize() && "Cache line size is not set for target"); 170 171 bool MadeChange = false; 172 173 for (Loop *I : *LI) 174 for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) 175 MadeChange |= runOnLoop(*L); 176 177 return MadeChange; 178 } 179 180 bool LoopDataPrefetch::runOnLoop(Loop *L) { 181 bool MadeChange = false; 182 183 // Only prefetch in the inner-most loop 184 if (!L->empty()) 185 return MadeChange; 186 187 SmallPtrSet<const Value *, 32> EphValues; 188 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 189 190 // Calculate the number of iterations ahead to prefetch 191 CodeMetrics Metrics; 192 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 193 I != IE; ++I) { 194 195 // If the loop already has prefetches, then assume that the user knows 196 // what they are doing and don't add any more. 197 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 198 J != JE; ++J) 199 if (CallInst *CI = dyn_cast<CallInst>(J)) 200 if (Function *F = CI->getCalledFunction()) 201 if (F->getIntrinsicID() == Intrinsic::prefetch) 202 return MadeChange; 203 204 Metrics.analyzeBasicBlock(*I, *TTI, EphValues); 205 } 206 unsigned LoopSize = Metrics.NumInsts; 207 if (!LoopSize) 208 LoopSize = 1; 209 210 unsigned ItersAhead = getPrefetchDistance() / LoopSize; 211 if (!ItersAhead) 212 ItersAhead = 1; 213 214 if (ItersAhead > getMaxPrefetchIterationsAhead()) 215 return MadeChange; 216 217 Function *F = L->getHeader()->getParent(); 218 DEBUG(dbgs() << "Prefetching " << ItersAhead 219 << " iterations ahead (loop size: " << LoopSize << ") in " 220 << F->getName() << ": " << *L); 221 222 SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads; 223 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 224 I != IE; ++I) { 225 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 226 J != JE; ++J) { 227 Value *PtrValue; 228 Instruction *MemI; 229 230 if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { 231 MemI = LMemI; 232 PtrValue = LMemI->getPointerOperand(); 233 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { 234 if (!PrefetchWrites) continue; 235 MemI = SMemI; 236 PtrValue = SMemI->getPointerOperand(); 237 } else continue; 238 239 unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 240 if (PtrAddrSpace) 241 continue; 242 243 if (L->isLoopInvariant(PtrValue)) 244 continue; 245 246 const SCEV *LSCEV = SE->getSCEV(PtrValue); 247 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); 248 if (!LSCEVAddRec) 249 continue; 250 251 // Check if the the stride of the accesses is large enough to warrant a 252 // prefetch. 253 if (!isStrideLargeEnough(LSCEVAddRec)) 254 continue; 255 256 // We don't want to double prefetch individual cache lines. If this load 257 // is known to be within one cache line of some other load that has 258 // already been prefetched, then don't prefetch this one as well. 259 bool DupPref = false; 260 for (const auto &PrefLoad : PrefLoads) { 261 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second); 262 if (const SCEVConstant *ConstPtrDiff = 263 dyn_cast<SCEVConstant>(PtrDiff)) { 264 int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); 265 if (PD < (int64_t) TTI->getCacheLineSize()) { 266 DupPref = true; 267 break; 268 } 269 } 270 } 271 if (DupPref) 272 continue; 273 274 const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr( 275 SE->getConstant(LSCEVAddRec->getType(), ItersAhead), 276 LSCEVAddRec->getStepRecurrence(*SE))); 277 if (!isSafeToExpand(NextLSCEV, *SE)) 278 continue; 279 280 PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); 281 282 Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); 283 SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); 284 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); 285 286 IRBuilder<> Builder(MemI); 287 Module *M = (*I)->getParent()->getParent(); 288 Type *I32 = Type::getInt32Ty((*I)->getContext()); 289 Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); 290 Builder.CreateCall( 291 PrefetchFunc, 292 {PrefPtrValue, 293 ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), 294 ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); 295 ++NumPrefetches; 296 DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV 297 << "\n"); 298 ORE->emitOptimizationRemark(DEBUG_TYPE, MemI, "prefetched memory access"); 299 300 MadeChange = true; 301 } 302 } 303 304 return MadeChange; 305 } 306 307