1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of MemProfiler. Memory accesses are instrumented 10 // to increment the access count held in a shadow memory location, or 11 // alternatively to call into the runtime. Memory intrinsic calls (memmove, 12 // memcpy, memset) are changed to call the memory profiling runtime version 13 // instead. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Instrumentation/MemProfiler.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Analysis/MemoryBuiltins.h" 22 #include "llvm/Analysis/MemoryProfileInfo.h" 23 #include "llvm/Analysis/TargetLibraryInfo.h" 24 #include "llvm/Analysis/ValueTracking.h" 25 #include "llvm/IR/Constant.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/GlobalValue.h" 30 #include "llvm/IR/IRBuilder.h" 31 #include "llvm/IR/Instruction.h" 32 #include "llvm/IR/IntrinsicInst.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/ProfileData/InstrProf.h" 37 #include "llvm/ProfileData/InstrProfReader.h" 38 #include "llvm/Support/BLAKE3.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/HashBuilder.h" 42 #include "llvm/Support/VirtualFileSystem.h" 43 #include "llvm/TargetParser/Triple.h" 44 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 45 #include "llvm/Transforms/Utils/LongestCommonSequence.h" 46 #include "llvm/Transforms/Utils/ModuleUtils.h" 47 #include <map> 48 #include <set> 49 50 using namespace llvm; 51 using namespace llvm::memprof; 52 53 #define DEBUG_TYPE "memprof" 54 55 namespace llvm { 56 extern cl::opt<bool> PGOWarnMissing; 57 extern cl::opt<bool> NoPGOWarnMismatch; 58 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; 59 } // namespace llvm 60 61 constexpr int LLVM_MEM_PROFILER_VERSION = 1; 62 63 // Size of memory mapped to a single shadow location. 64 constexpr uint64_t DefaultMemGranularity = 64; 65 66 // Size of memory mapped to a single histogram bucket. 67 constexpr uint64_t HistogramGranularity = 8; 68 69 // Scale from granularity down to shadow size. 70 constexpr uint64_t DefaultShadowScale = 3; 71 72 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor"; 73 constexpr uint64_t MemProfCtorAndDtorPriority = 1; 74 // On Emscripten, the system needs more than one priorities for constructors. 75 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; 76 constexpr char MemProfInitName[] = "__memprof_init"; 77 constexpr char MemProfVersionCheckNamePrefix[] = 78 "__memprof_version_mismatch_check_v"; 79 80 constexpr char MemProfShadowMemoryDynamicAddress[] = 81 "__memprof_shadow_memory_dynamic_address"; 82 83 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename"; 84 85 constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram"; 86 87 // Command-line flags. 88 89 static cl::opt<bool> ClInsertVersionCheck( 90 "memprof-guard-against-version-mismatch", 91 cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, 92 cl::init(true)); 93 94 // This flag may need to be replaced with -f[no-]memprof-reads. 95 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads", 96 cl::desc("instrument read instructions"), 97 cl::Hidden, cl::init(true)); 98 99 static cl::opt<bool> 100 ClInstrumentWrites("memprof-instrument-writes", 101 cl::desc("instrument write instructions"), cl::Hidden, 102 cl::init(true)); 103 104 static cl::opt<bool> ClInstrumentAtomics( 105 "memprof-instrument-atomics", 106 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 107 cl::init(true)); 108 109 static cl::opt<bool> ClUseCalls( 110 "memprof-use-callbacks", 111 cl::desc("Use callbacks instead of inline instrumentation sequences."), 112 cl::Hidden, cl::init(false)); 113 114 static cl::opt<std::string> 115 ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", 116 cl::desc("Prefix for memory access callbacks"), 117 cl::Hidden, cl::init("__memprof_")); 118 119 // These flags allow to change the shadow mapping. 120 // The shadow mapping looks like 121 // Shadow = ((Mem & mask) >> scale) + offset 122 123 static cl::opt<int> ClMappingScale("memprof-mapping-scale", 124 cl::desc("scale of memprof shadow mapping"), 125 cl::Hidden, cl::init(DefaultShadowScale)); 126 127 static cl::opt<int> 128 ClMappingGranularity("memprof-mapping-granularity", 129 cl::desc("granularity of memprof shadow mapping"), 130 cl::Hidden, cl::init(DefaultMemGranularity)); 131 132 static cl::opt<bool> ClStack("memprof-instrument-stack", 133 cl::desc("Instrument scalar stack variables"), 134 cl::Hidden, cl::init(false)); 135 136 // Debug flags. 137 138 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, 139 cl::init(0)); 140 141 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden, 142 cl::desc("Debug func")); 143 144 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), 145 cl::Hidden, cl::init(-1)); 146 147 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), 148 cl::Hidden, cl::init(-1)); 149 150 // By default disable matching of allocation profiles onto operator new that 151 // already explicitly pass a hot/cold hint, since we don't currently 152 // override these hints anyway. 153 static cl::opt<bool> ClMemProfMatchHotColdNew( 154 "memprof-match-hot-cold-new", 155 cl::desc( 156 "Match allocation profiles onto existing hot/cold operator new calls"), 157 cl::Hidden, cl::init(false)); 158 159 static cl::opt<bool> ClHistogram("memprof-histogram", 160 cl::desc("Collect access count histograms"), 161 cl::Hidden, cl::init(false)); 162 163 static cl::opt<bool> 164 ClPrintMemProfMatchInfo("memprof-print-match-info", 165 cl::desc("Print matching stats for each allocation " 166 "context in this module's profiles"), 167 cl::Hidden, cl::init(false)); 168 169 static cl::opt<std::string> 170 MemprofRuntimeDefaultOptions("memprof-runtime-default-options", 171 cl::desc("The default memprof options"), 172 cl::Hidden, cl::init("")); 173 174 extern cl::opt<bool> MemProfReportHintedSizes; 175 176 static cl::opt<unsigned> MinMatchedColdBytePercent( 177 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, 178 cl::desc("Min percent of cold bytes matched to hint allocation cold")); 179 180 // Instrumentation statistics 181 STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 182 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 183 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); 184 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); 185 186 // Matching statistics 187 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); 188 STATISTIC(NumOfMemProfMismatch, 189 "Number of functions having mismatched memory profile hash."); 190 STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile."); 191 STATISTIC(NumOfMemProfAllocContextProfiles, 192 "Number of alloc contexts in memory profile."); 193 STATISTIC(NumOfMemProfCallSiteProfiles, 194 "Number of callsites in memory profile."); 195 STATISTIC(NumOfMemProfMatchedAllocContexts, 196 "Number of matched memory profile alloc contexts."); 197 STATISTIC(NumOfMemProfMatchedAllocs, 198 "Number of matched memory profile allocs."); 199 STATISTIC(NumOfMemProfMatchedCallSites, 200 "Number of matched memory profile callsites."); 201 202 namespace { 203 204 /// This struct defines the shadow mapping using the rule: 205 /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. 206 struct ShadowMapping { 207 ShadowMapping() { 208 Scale = ClMappingScale; 209 Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity; 210 Mask = ~(Granularity - 1); 211 } 212 213 int Scale; 214 int Granularity; 215 uint64_t Mask; // Computed as ~(Granularity-1) 216 }; 217 218 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { 219 return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority 220 : MemProfCtorAndDtorPriority; 221 } 222 223 struct InterestingMemoryAccess { 224 Value *Addr = nullptr; 225 bool IsWrite; 226 Type *AccessTy; 227 Value *MaybeMask = nullptr; 228 }; 229 230 /// Instrument the code in module to profile memory accesses. 231 class MemProfiler { 232 public: 233 MemProfiler(Module &M) { 234 C = &(M.getContext()); 235 LongSize = M.getDataLayout().getPointerSizeInBits(); 236 IntptrTy = Type::getIntNTy(*C, LongSize); 237 PtrTy = PointerType::getUnqual(*C); 238 } 239 240 /// If it is an interesting memory access, populate information 241 /// about the access and return a InterestingMemoryAccess struct. 242 /// Otherwise return std::nullopt. 243 std::optional<InterestingMemoryAccess> 244 isInterestingMemoryAccess(Instruction *I) const; 245 246 void instrumentMop(Instruction *I, const DataLayout &DL, 247 InterestingMemoryAccess &Access); 248 void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, 249 Value *Addr, bool IsWrite); 250 void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 251 Instruction *I, Value *Addr, Type *AccessTy, 252 bool IsWrite); 253 void instrumentMemIntrinsic(MemIntrinsic *MI); 254 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 255 bool instrumentFunction(Function &F); 256 bool maybeInsertMemProfInitAtFunctionEntry(Function &F); 257 bool insertDynamicShadowAtFunctionEntry(Function &F); 258 259 private: 260 void initializeCallbacks(Module &M); 261 262 LLVMContext *C; 263 int LongSize; 264 Type *IntptrTy; 265 PointerType *PtrTy; 266 ShadowMapping Mapping; 267 268 // These arrays is indexed by AccessIsWrite 269 FunctionCallee MemProfMemoryAccessCallback[2]; 270 271 FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; 272 Value *DynamicShadowOffset = nullptr; 273 }; 274 275 class ModuleMemProfiler { 276 public: 277 ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } 278 279 bool instrumentModule(Module &); 280 281 private: 282 Triple TargetTriple; 283 ShadowMapping Mapping; 284 Function *MemProfCtorFunction = nullptr; 285 }; 286 287 } // end anonymous namespace 288 289 MemProfilerPass::MemProfilerPass() = default; 290 291 PreservedAnalyses MemProfilerPass::run(Function &F, 292 AnalysisManager<Function> &AM) { 293 assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) && 294 "Memprof with histogram only supports default mapping granularity"); 295 Module &M = *F.getParent(); 296 MemProfiler Profiler(M); 297 if (Profiler.instrumentFunction(F)) 298 return PreservedAnalyses::none(); 299 return PreservedAnalyses::all(); 300 } 301 302 ModuleMemProfilerPass::ModuleMemProfilerPass() = default; 303 304 PreservedAnalyses ModuleMemProfilerPass::run(Module &M, 305 AnalysisManager<Module> &AM) { 306 307 ModuleMemProfiler Profiler(M); 308 if (Profiler.instrumentModule(M)) 309 return PreservedAnalyses::none(); 310 return PreservedAnalyses::all(); 311 } 312 313 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { 314 // (Shadow & mask) >> scale 315 Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); 316 Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); 317 // (Shadow >> scale) | offset 318 assert(DynamicShadowOffset); 319 return IRB.CreateAdd(Shadow, DynamicShadowOffset); 320 } 321 322 // Instrument memset/memmove/memcpy 323 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { 324 IRBuilder<> IRB(MI); 325 if (isa<MemTransferInst>(MI)) { 326 IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy, 327 {MI->getOperand(0), MI->getOperand(1), 328 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 329 } else if (isa<MemSetInst>(MI)) { 330 IRB.CreateCall( 331 MemProfMemset, 332 {MI->getOperand(0), 333 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 334 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 335 } 336 MI->eraseFromParent(); 337 } 338 339 std::optional<InterestingMemoryAccess> 340 MemProfiler::isInterestingMemoryAccess(Instruction *I) const { 341 // Do not instrument the load fetching the dynamic shadow address. 342 if (DynamicShadowOffset == I) 343 return std::nullopt; 344 345 InterestingMemoryAccess Access; 346 347 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 348 if (!ClInstrumentReads) 349 return std::nullopt; 350 Access.IsWrite = false; 351 Access.AccessTy = LI->getType(); 352 Access.Addr = LI->getPointerOperand(); 353 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 354 if (!ClInstrumentWrites) 355 return std::nullopt; 356 Access.IsWrite = true; 357 Access.AccessTy = SI->getValueOperand()->getType(); 358 Access.Addr = SI->getPointerOperand(); 359 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 360 if (!ClInstrumentAtomics) 361 return std::nullopt; 362 Access.IsWrite = true; 363 Access.AccessTy = RMW->getValOperand()->getType(); 364 Access.Addr = RMW->getPointerOperand(); 365 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 366 if (!ClInstrumentAtomics) 367 return std::nullopt; 368 Access.IsWrite = true; 369 Access.AccessTy = XCHG->getCompareOperand()->getType(); 370 Access.Addr = XCHG->getPointerOperand(); 371 } else if (auto *CI = dyn_cast<CallInst>(I)) { 372 auto *F = CI->getCalledFunction(); 373 if (F && (F->getIntrinsicID() == Intrinsic::masked_load || 374 F->getIntrinsicID() == Intrinsic::masked_store)) { 375 unsigned OpOffset = 0; 376 if (F->getIntrinsicID() == Intrinsic::masked_store) { 377 if (!ClInstrumentWrites) 378 return std::nullopt; 379 // Masked store has an initial operand for the value. 380 OpOffset = 1; 381 Access.AccessTy = CI->getArgOperand(0)->getType(); 382 Access.IsWrite = true; 383 } else { 384 if (!ClInstrumentReads) 385 return std::nullopt; 386 Access.AccessTy = CI->getType(); 387 Access.IsWrite = false; 388 } 389 390 auto *BasePtr = CI->getOperand(0 + OpOffset); 391 Access.MaybeMask = CI->getOperand(2 + OpOffset); 392 Access.Addr = BasePtr; 393 } 394 } 395 396 if (!Access.Addr) 397 return std::nullopt; 398 399 // Do not instrument accesses from different address spaces; we cannot deal 400 // with them. 401 Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType()); 402 if (PtrTy->getPointerAddressSpace() != 0) 403 return std::nullopt; 404 405 // Ignore swifterror addresses. 406 // swifterror memory addresses are mem2reg promoted by instruction 407 // selection. As such they cannot have regular uses like an instrumentation 408 // function and it makes no sense to track them as memory. 409 if (Access.Addr->isSwiftError()) 410 return std::nullopt; 411 412 // Peel off GEPs and BitCasts. 413 auto *Addr = Access.Addr->stripInBoundsOffsets(); 414 415 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 416 // Do not instrument PGO counter updates. 417 if (GV->hasSection()) { 418 StringRef SectionName = GV->getSection(); 419 // Check if the global is in the PGO counters section. 420 auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); 421 if (SectionName.ends_with( 422 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) 423 return std::nullopt; 424 } 425 426 // Do not instrument accesses to LLVM internal variables. 427 if (GV->getName().starts_with("__llvm")) 428 return std::nullopt; 429 } 430 431 return Access; 432 } 433 434 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 435 Instruction *I, Value *Addr, 436 Type *AccessTy, bool IsWrite) { 437 auto *VTy = cast<FixedVectorType>(AccessTy); 438 unsigned Num = VTy->getNumElements(); 439 auto *Zero = ConstantInt::get(IntptrTy, 0); 440 for (unsigned Idx = 0; Idx < Num; ++Idx) { 441 Value *InstrumentedAddress = nullptr; 442 Instruction *InsertBefore = I; 443 if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { 444 // dyn_cast as we might get UndefValue 445 if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { 446 if (Masked->isZero()) 447 // Mask is constant false, so no instrumentation needed. 448 continue; 449 // If we have a true or undef value, fall through to instrumentAddress. 450 // with InsertBefore == I 451 } 452 } else { 453 IRBuilder<> IRB(I); 454 Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); 455 Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); 456 InsertBefore = ThenTerm; 457 } 458 459 IRBuilder<> IRB(InsertBefore); 460 InstrumentedAddress = 461 IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); 462 instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite); 463 } 464 } 465 466 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, 467 InterestingMemoryAccess &Access) { 468 // Skip instrumentation of stack accesses unless requested. 469 if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) { 470 if (Access.IsWrite) 471 ++NumSkippedStackWrites; 472 else 473 ++NumSkippedStackReads; 474 return; 475 } 476 477 if (Access.IsWrite) 478 NumInstrumentedWrites++; 479 else 480 NumInstrumentedReads++; 481 482 if (Access.MaybeMask) { 483 instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, 484 Access.AccessTy, Access.IsWrite); 485 } else { 486 // Since the access counts will be accumulated across the entire allocation, 487 // we only update the shadow access count for the first location and thus 488 // don't need to worry about alignment and type size. 489 instrumentAddress(I, I, Access.Addr, Access.IsWrite); 490 } 491 } 492 493 void MemProfiler::instrumentAddress(Instruction *OrigIns, 494 Instruction *InsertBefore, Value *Addr, 495 bool IsWrite) { 496 IRBuilder<> IRB(InsertBefore); 497 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 498 499 if (ClUseCalls) { 500 IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong); 501 return; 502 } 503 504 Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C); 505 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); 506 507 Value *ShadowPtr = memToShadow(AddrLong, IRB); 508 Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); 509 Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); 510 // If we are profiling with histograms, add overflow protection at 255. 511 if (ClHistogram) { 512 Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255); 513 Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount); 514 Instruction *IncBlock = 515 SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false); 516 IRB.SetInsertPoint(IncBlock); 517 } 518 Value *Inc = ConstantInt::get(ShadowTy, 1); 519 ShadowValue = IRB.CreateAdd(ShadowValue, Inc); 520 IRB.CreateStore(ShadowValue, ShadowAddr); 521 } 522 523 // Create the variable for the profile file name. 524 void createProfileFileNameVar(Module &M) { 525 const MDString *MemProfFilename = 526 dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename")); 527 if (!MemProfFilename) 528 return; 529 assert(!MemProfFilename->getString().empty() && 530 "Unexpected MemProfProfileFilename metadata with empty string"); 531 Constant *ProfileNameConst = ConstantDataArray::getString( 532 M.getContext(), MemProfFilename->getString(), true); 533 GlobalVariable *ProfileNameVar = new GlobalVariable( 534 M, ProfileNameConst->getType(), /*isConstant=*/true, 535 GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); 536 Triple TT(M.getTargetTriple()); 537 if (TT.supportsCOMDAT()) { 538 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); 539 ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar)); 540 } 541 } 542 543 // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible 544 // to the runtime, changing shadow count behavior. 545 void createMemprofHistogramFlagVar(Module &M) { 546 const StringRef VarName(MemProfHistogramFlagVar); 547 Type *IntTy1 = Type::getInt1Ty(M.getContext()); 548 auto MemprofHistogramFlag = new GlobalVariable( 549 M, IntTy1, true, GlobalValue::WeakAnyLinkage, 550 Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName); 551 Triple TT(M.getTargetTriple()); 552 if (TT.supportsCOMDAT()) { 553 MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage); 554 MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName)); 555 } 556 appendToCompilerUsed(M, MemprofHistogramFlag); 557 } 558 559 void createMemprofDefaultOptionsVar(Module &M) { 560 Constant *OptionsConst = ConstantDataArray::getString( 561 M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true); 562 GlobalVariable *OptionsVar = 563 new GlobalVariable(M, OptionsConst->getType(), /*isConstant=*/true, 564 GlobalValue::WeakAnyLinkage, OptionsConst, 565 "__memprof_default_options_str"); 566 Triple TT(M.getTargetTriple()); 567 if (TT.supportsCOMDAT()) { 568 OptionsVar->setLinkage(GlobalValue::ExternalLinkage); 569 OptionsVar->setComdat(M.getOrInsertComdat(OptionsVar->getName())); 570 } 571 } 572 573 bool ModuleMemProfiler::instrumentModule(Module &M) { 574 575 // Create a module constructor. 576 std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION); 577 std::string VersionCheckName = 578 ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) 579 : ""; 580 std::tie(MemProfCtorFunction, std::ignore) = 581 createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName, 582 MemProfInitName, /*InitArgTypes=*/{}, 583 /*InitArgs=*/{}, VersionCheckName); 584 585 const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); 586 appendToGlobalCtors(M, MemProfCtorFunction, Priority); 587 588 createProfileFileNameVar(M); 589 590 createMemprofHistogramFlagVar(M); 591 592 createMemprofDefaultOptionsVar(M); 593 594 return true; 595 } 596 597 void MemProfiler::initializeCallbacks(Module &M) { 598 IRBuilder<> IRB(*C); 599 600 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 601 const std::string TypeStr = AccessIsWrite ? "store" : "load"; 602 const std::string HistPrefix = ClHistogram ? "hist_" : ""; 603 604 SmallVector<Type *, 2> Args1{1, IntptrTy}; 605 MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction( 606 ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr, 607 FunctionType::get(IRB.getVoidTy(), Args1, false)); 608 } 609 MemProfMemmove = M.getOrInsertFunction( 610 ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy); 611 MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy", 612 PtrTy, PtrTy, PtrTy, IntptrTy); 613 MemProfMemset = 614 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy, 615 PtrTy, IRB.getInt32Ty(), IntptrTy); 616 } 617 618 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { 619 // For each NSObject descendant having a +load method, this method is invoked 620 // by the ObjC runtime before any of the static constructors is called. 621 // Therefore we need to instrument such methods with a call to __memprof_init 622 // at the beginning in order to initialize our runtime before any access to 623 // the shadow memory. 624 // We cannot just ignore these methods, because they may call other 625 // instrumented functions. 626 if (F.getName().contains(" load]")) { 627 FunctionCallee MemProfInitFunction = 628 declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); 629 IRBuilder<> IRB(&F.front(), F.front().begin()); 630 IRB.CreateCall(MemProfInitFunction, {}); 631 return true; 632 } 633 return false; 634 } 635 636 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { 637 IRBuilder<> IRB(&F.front().front()); 638 Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( 639 MemProfShadowMemoryDynamicAddress, IntptrTy); 640 if (F.getParent()->getPICLevel() == PICLevel::NotPIC) 641 cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true); 642 DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); 643 return true; 644 } 645 646 bool MemProfiler::instrumentFunction(Function &F) { 647 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 648 return false; 649 if (ClDebugFunc == F.getName()) 650 return false; 651 if (F.getName().starts_with("__memprof_")) 652 return false; 653 654 bool FunctionModified = false; 655 656 // If needed, insert __memprof_init. 657 // This function needs to be called even if the function body is not 658 // instrumented. 659 if (maybeInsertMemProfInitAtFunctionEntry(F)) 660 FunctionModified = true; 661 662 LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n"); 663 664 initializeCallbacks(*F.getParent()); 665 666 SmallVector<Instruction *, 16> ToInstrument; 667 668 // Fill the set of memory operations to instrument. 669 for (auto &BB : F) { 670 for (auto &Inst : BB) { 671 if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst)) 672 ToInstrument.push_back(&Inst); 673 } 674 } 675 676 if (ToInstrument.empty()) { 677 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified 678 << " " << F << "\n"); 679 680 return FunctionModified; 681 } 682 683 FunctionModified |= insertDynamicShadowAtFunctionEntry(F); 684 685 int NumInstrumented = 0; 686 for (auto *Inst : ToInstrument) { 687 if (ClDebugMin < 0 || ClDebugMax < 0 || 688 (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { 689 std::optional<InterestingMemoryAccess> Access = 690 isInterestingMemoryAccess(Inst); 691 if (Access) 692 instrumentMop(Inst, F.getDataLayout(), *Access); 693 else 694 instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 695 } 696 NumInstrumented++; 697 } 698 699 if (NumInstrumented > 0) 700 FunctionModified = true; 701 702 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " 703 << F << "\n"); 704 705 return FunctionModified; 706 } 707 708 static void addCallsiteMetadata(Instruction &I, 709 ArrayRef<uint64_t> InlinedCallStack, 710 LLVMContext &Ctx) { 711 I.setMetadata(LLVMContext::MD_callsite, 712 buildCallstackMetadata(InlinedCallStack, Ctx)); 713 } 714 715 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, 716 uint32_t Column) { 717 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 718 HashBuilder; 719 HashBuilder.add(Function, LineOffset, Column); 720 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 721 uint64_t Id; 722 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 723 return Id; 724 } 725 726 static uint64_t computeStackId(const memprof::Frame &Frame) { 727 return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); 728 } 729 730 // Helper to generate a single hash id for a given callstack, used for emitting 731 // matching statistics and useful for uniquing such statistics across modules. 732 static uint64_t computeFullStackId(ArrayRef<Frame> CallStack) { 733 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 734 HashBuilder; 735 for (auto &F : CallStack) 736 HashBuilder.add(F.Function, F.LineOffset, F.Column); 737 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 738 uint64_t Id; 739 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 740 return Id; 741 } 742 743 static AllocationType addCallStack(CallStackTrie &AllocTrie, 744 const AllocationInfo *AllocInfo, 745 uint64_t FullStackId) { 746 SmallVector<uint64_t> StackIds; 747 for (const auto &StackFrame : AllocInfo->CallStack) 748 StackIds.push_back(computeStackId(StackFrame)); 749 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), 750 AllocInfo->Info.getAllocCount(), 751 AllocInfo->Info.getTotalLifetime()); 752 std::vector<ContextTotalSize> ContextSizeInfo; 753 if (MemProfReportHintedSizes) { 754 auto TotalSize = AllocInfo->Info.getTotalSize(); 755 assert(TotalSize); 756 assert(FullStackId != 0); 757 ContextSizeInfo.push_back({FullStackId, TotalSize}); 758 } 759 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo)); 760 return AllocType; 761 } 762 763 // Helper to compare the InlinedCallStack computed from an instruction's debug 764 // info to a list of Frames from profile data (either the allocation data or a 765 // callsite). For callsites, the StartIndex to use in the Frame array may be 766 // non-zero. 767 static bool 768 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, 769 ArrayRef<uint64_t> InlinedCallStack) { 770 auto StackFrame = ProfileCallStack.begin(); 771 auto InlCallStackIter = InlinedCallStack.begin(); 772 for (; StackFrame != ProfileCallStack.end() && 773 InlCallStackIter != InlinedCallStack.end(); 774 ++StackFrame, ++InlCallStackIter) { 775 uint64_t StackId = computeStackId(*StackFrame); 776 if (StackId != *InlCallStackIter) 777 return false; 778 } 779 // Return true if we found and matched all stack ids from the call 780 // instruction. 781 return InlCallStackIter == InlinedCallStack.end(); 782 } 783 784 static bool isAllocationWithHotColdVariant(const Function *Callee, 785 const TargetLibraryInfo &TLI) { 786 if (!Callee) 787 return false; 788 LibFunc Func; 789 if (!TLI.getLibFunc(*Callee, Func)) 790 return false; 791 switch (Func) { 792 case LibFunc_Znwm: 793 case LibFunc_ZnwmRKSt9nothrow_t: 794 case LibFunc_ZnwmSt11align_val_t: 795 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: 796 case LibFunc_Znam: 797 case LibFunc_ZnamRKSt9nothrow_t: 798 case LibFunc_ZnamSt11align_val_t: 799 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: 800 case LibFunc_size_returning_new: 801 case LibFunc_size_returning_new_aligned: 802 return true; 803 case LibFunc_Znwm12__hot_cold_t: 804 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: 805 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: 806 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 807 case LibFunc_Znam12__hot_cold_t: 808 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: 809 case LibFunc_ZnamSt11align_val_t12__hot_cold_t: 810 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 811 case LibFunc_size_returning_new_hot_cold: 812 case LibFunc_size_returning_new_aligned_hot_cold: 813 return ClMemProfMatchHotColdNew; 814 default: 815 return false; 816 } 817 } 818 819 struct AllocMatchInfo { 820 uint64_t TotalSize = 0; 821 AllocationType AllocType = AllocationType::None; 822 bool Matched = false; 823 }; 824 825 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> 826 memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) { 827 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls; 828 829 auto GetOffset = [](const DILocation *DIL) { 830 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 831 0xffff; 832 }; 833 834 for (Function &F : M) { 835 if (F.isDeclaration()) 836 continue; 837 838 for (auto &BB : F) { 839 for (auto &I : BB) { 840 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) 841 continue; 842 843 auto *CB = dyn_cast<CallBase>(&I); 844 auto *CalledFunction = CB->getCalledFunction(); 845 // Disregard indirect calls and intrinsics. 846 if (!CalledFunction || CalledFunction->isIntrinsic()) 847 continue; 848 849 StringRef CalleeName = CalledFunction->getName(); 850 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI); 851 for (const DILocation *DIL = I.getDebugLoc(); DIL; 852 DIL = DIL->getInlinedAt()) { 853 StringRef CallerName = DIL->getSubprogramLinkageName(); 854 assert(!CallerName.empty() && 855 "Be sure to enable -fdebug-info-for-profiling"); 856 uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName); 857 uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); 858 // Pretend that we are calling a function with GUID == 0 if we are 859 // calling a heap allocation function. 860 if (IsAlloc) 861 CalleeGUID = 0; 862 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()}; 863 Calls[CallerGUID].emplace_back(Loc, CalleeGUID); 864 CalleeName = CallerName; 865 // FIXME: Recognize other frames that are associated with heap 866 // allocation functions. It may be too early to reset IsAlloc to 867 // false here. 868 IsAlloc = false; 869 } 870 } 871 } 872 } 873 874 // Sort each call list by the source location. 875 for (auto &[CallerGUID, CallList] : Calls) { 876 llvm::sort(CallList); 877 CallList.erase(llvm::unique(CallList), CallList.end()); 878 } 879 880 return Calls; 881 } 882 883 DenseMap<uint64_t, LocToLocMap> 884 memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, 885 const TargetLibraryInfo &TLI) { 886 DenseMap<uint64_t, LocToLocMap> UndriftMaps; 887 888 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile = 889 MemProfReader->getMemProfCallerCalleePairs(); 890 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR = 891 extractCallsFromIR(M, TLI); 892 893 // Compute an undrift map for each CallerGUID. 894 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) { 895 auto It = CallsFromProfile.find(CallerGUID); 896 if (It == CallsFromProfile.end()) 897 continue; 898 const auto &ProfileAnchors = It->second; 899 900 LocToLocMap Matchings; 901 longestCommonSequence<LineLocation, GlobalValue::GUID>( 902 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(), 903 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); }); 904 bool Inserted = UndriftMaps.try_emplace(CallerGUID, Matchings).second; 905 906 // The insertion must succeed because we visit each GUID exactly once. 907 assert(Inserted); 908 (void)Inserted; 909 } 910 911 return UndriftMaps; 912 } 913 914 static void 915 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, 916 const TargetLibraryInfo &TLI, 917 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) { 918 auto &Ctx = M.getContext(); 919 // Previously we used getIRPGOFuncName() here. If F is local linkage, 920 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But 921 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't 922 // contain FileName's prefix. It caused local linkage function can't 923 // find MemProfRecord. So we use getName() now. 924 // 'unique-internal-linkage-names' can make MemProf work better for local 925 // linkage function. 926 auto FuncName = F.getName(); 927 auto FuncGUID = Function::getGUID(FuncName); 928 std::optional<memprof::MemProfRecord> MemProfRec; 929 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); 930 if (Err) { 931 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 932 auto Err = IPE.get(); 933 bool SkipWarning = false; 934 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName 935 << ": "); 936 if (Err == instrprof_error::unknown_function) { 937 NumOfMemProfMissing++; 938 SkipWarning = !PGOWarnMissing; 939 LLVM_DEBUG(dbgs() << "unknown function"); 940 } else if (Err == instrprof_error::hash_mismatch) { 941 NumOfMemProfMismatch++; 942 SkipWarning = 943 NoPGOWarnMismatch || 944 (NoPGOWarnMismatchComdatWeak && 945 (F.hasComdat() || 946 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 947 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 948 } 949 950 if (SkipWarning) 951 return; 952 953 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + 954 Twine(" Hash = ") + std::to_string(FuncGUID)) 955 .str(); 956 957 Ctx.diagnose( 958 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 959 }); 960 return; 961 } 962 963 NumOfMemProfFunc++; 964 965 // Detect if there are non-zero column numbers in the profile. If not, 966 // treat all column numbers as 0 when matching (i.e. ignore any non-zero 967 // columns in the IR). The profiled binary might have been built with 968 // column numbers disabled, for example. 969 bool ProfileHasColumns = false; 970 971 // Build maps of the location hash to all profile data with that leaf location 972 // (allocation info and the callsites). 973 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; 974 // A hash function for std::unordered_set<ArrayRef<Frame>> to work. 975 struct CallStackHash { 976 size_t operator()(ArrayRef<Frame> CS) const { 977 return computeFullStackId(CS); 978 } 979 }; 980 // For the callsites we need to record slices of the frame array (see comments 981 // below where the map entries are added). 982 std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>> 983 LocHashToCallSites; 984 for (auto &AI : MemProfRec->AllocSites) { 985 NumOfMemProfAllocContextProfiles++; 986 // Associate the allocation info with the leaf frame. The later matching 987 // code will match any inlined call sequences in the IR with a longer prefix 988 // of call stack frames. 989 uint64_t StackId = computeStackId(AI.CallStack[0]); 990 LocHashToAllocInfo[StackId].insert(&AI); 991 ProfileHasColumns |= AI.CallStack[0].Column; 992 } 993 for (auto &CS : MemProfRec->CallSites) { 994 NumOfMemProfCallSiteProfiles++; 995 // Need to record all frames from leaf up to and including this function, 996 // as any of these may or may not have been inlined at this point. 997 unsigned Idx = 0; 998 for (auto &StackFrame : CS) { 999 uint64_t StackId = computeStackId(StackFrame); 1000 LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++)); 1001 ProfileHasColumns |= StackFrame.Column; 1002 // Once we find this function, we can stop recording. 1003 if (StackFrame.Function == FuncGUID) 1004 break; 1005 } 1006 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); 1007 } 1008 1009 auto GetOffset = [](const DILocation *DIL) { 1010 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 1011 0xffff; 1012 }; 1013 1014 // Now walk the instructions, looking up the associated profile data using 1015 // debug locations. 1016 for (auto &BB : F) { 1017 for (auto &I : BB) { 1018 if (I.isDebugOrPseudoInst()) 1019 continue; 1020 // We are only interested in calls (allocation or interior call stack 1021 // context calls). 1022 auto *CI = dyn_cast<CallBase>(&I); 1023 if (!CI) 1024 continue; 1025 auto *CalledFunction = CI->getCalledFunction(); 1026 if (CalledFunction && CalledFunction->isIntrinsic()) 1027 continue; 1028 // List of call stack ids computed from the location hashes on debug 1029 // locations (leaf to inlined at root). 1030 SmallVector<uint64_t, 8> InlinedCallStack; 1031 // Was the leaf location found in one of the profile maps? 1032 bool LeafFound = false; 1033 // If leaf was found in a map, iterators pointing to its location in both 1034 // of the maps. It might exist in neither, one, or both (the latter case 1035 // can happen because we don't currently have discriminators to 1036 // distinguish the case when a single line/col maps to both an allocation 1037 // and another callsite). 1038 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator 1039 AllocInfoIter; 1040 decltype(LocHashToCallSites)::iterator CallSitesIter; 1041 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; 1042 DIL = DIL->getInlinedAt()) { 1043 // Use C++ linkage name if possible. Need to compile with 1044 // -fdebug-info-for-profiling to get linkage name. 1045 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); 1046 if (Name.empty()) 1047 Name = DIL->getScope()->getSubprogram()->getName(); 1048 auto CalleeGUID = Function::getGUID(Name); 1049 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL), 1050 ProfileHasColumns ? DIL->getColumn() : 0); 1051 // Check if we have found the profile's leaf frame. If yes, collect 1052 // the rest of the call's inlined context starting here. If not, see if 1053 // we find a match further up the inlined context (in case the profile 1054 // was missing debug frames at the leaf). 1055 if (!LeafFound) { 1056 AllocInfoIter = LocHashToAllocInfo.find(StackId); 1057 CallSitesIter = LocHashToCallSites.find(StackId); 1058 if (AllocInfoIter != LocHashToAllocInfo.end() || 1059 CallSitesIter != LocHashToCallSites.end()) 1060 LeafFound = true; 1061 } 1062 if (LeafFound) 1063 InlinedCallStack.push_back(StackId); 1064 } 1065 // If leaf not in either of the maps, skip inst. 1066 if (!LeafFound) 1067 continue; 1068 1069 // First add !memprof metadata from allocation info, if we found the 1070 // instruction's leaf location in that map, and if the rest of the 1071 // instruction's locations match the prefix Frame locations on an 1072 // allocation context with the same leaf. 1073 if (AllocInfoIter != LocHashToAllocInfo.end()) { 1074 // Only consider allocations which support hinting. 1075 if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI)) 1076 continue; 1077 // We may match this instruction's location list to multiple MIB 1078 // contexts. Add them to a Trie specialized for trimming the contexts to 1079 // the minimal needed to disambiguate contexts with unique behavior. 1080 CallStackTrie AllocTrie; 1081 uint64_t TotalSize = 0; 1082 uint64_t TotalColdSize = 0; 1083 for (auto *AllocInfo : AllocInfoIter->second) { 1084 // Check the full inlined call stack against this one. 1085 // If we found and thus matched all frames on the call, include 1086 // this MIB. 1087 if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, 1088 InlinedCallStack)) { 1089 NumOfMemProfMatchedAllocContexts++; 1090 uint64_t FullStackId = 0; 1091 if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes) 1092 FullStackId = computeFullStackId(AllocInfo->CallStack); 1093 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId); 1094 TotalSize += AllocInfo->Info.getTotalSize(); 1095 if (AllocType == AllocationType::Cold) 1096 TotalColdSize += AllocInfo->Info.getTotalSize(); 1097 // Record information about the allocation if match info printing 1098 // was requested. 1099 if (ClPrintMemProfMatchInfo) { 1100 assert(FullStackId != 0); 1101 FullStackIdToAllocMatchInfo[FullStackId] = { 1102 AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true}; 1103 } 1104 } 1105 } 1106 // If the threshold for the percent of cold bytes is less than 100%, 1107 // and not all bytes are cold, see if we should still hint this 1108 // allocation as cold without context sensitivity. 1109 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 && 1110 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) { 1111 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, 1112 "dominant"); 1113 continue; 1114 } 1115 1116 // We might not have matched any to the full inlined call stack. 1117 // But if we did, create and attach metadata, or a function attribute if 1118 // all contexts have identical profiled behavior. 1119 if (!AllocTrie.empty()) { 1120 NumOfMemProfMatchedAllocs++; 1121 // MemprofMDAttached will be false if a function attribute was 1122 // attached. 1123 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); 1124 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); 1125 if (MemprofMDAttached) { 1126 // Add callsite metadata for the instruction's location list so that 1127 // it simpler later on to identify which part of the MIB contexts 1128 // are from this particular instruction (including during inlining, 1129 // when the callsite metadata will be updated appropriately). 1130 // FIXME: can this be changed to strip out the matching stack 1131 // context ids from the MIB contexts and not add any callsite 1132 // metadata here to save space? 1133 addCallsiteMetadata(I, InlinedCallStack, Ctx); 1134 } 1135 } 1136 continue; 1137 } 1138 1139 // Otherwise, add callsite metadata. If we reach here then we found the 1140 // instruction's leaf location in the callsites map and not the allocation 1141 // map. 1142 assert(CallSitesIter != LocHashToCallSites.end()); 1143 for (auto CallStackIdx : CallSitesIter->second) { 1144 // If we found and thus matched all frames on the call, create and 1145 // attach call stack metadata. 1146 if (stackFrameIncludesInlinedCallStack(CallStackIdx, 1147 InlinedCallStack)) { 1148 NumOfMemProfMatchedCallSites++; 1149 addCallsiteMetadata(I, InlinedCallStack, Ctx); 1150 // Only need to find one with a matching call stack and add a single 1151 // callsite metadata. 1152 break; 1153 } 1154 } 1155 } 1156 } 1157 } 1158 1159 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, 1160 IntrusiveRefCntPtr<vfs::FileSystem> FS) 1161 : MemoryProfileFileName(MemoryProfileFile), FS(FS) { 1162 if (!FS) 1163 this->FS = vfs::getRealFileSystem(); 1164 } 1165 1166 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { 1167 // Return immediately if the module doesn't contain any function. 1168 if (M.empty()) 1169 return PreservedAnalyses::all(); 1170 1171 LLVM_DEBUG(dbgs() << "Read in memory profile:"); 1172 auto &Ctx = M.getContext(); 1173 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); 1174 if (Error E = ReaderOrErr.takeError()) { 1175 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1176 Ctx.diagnose( 1177 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); 1178 }); 1179 return PreservedAnalyses::all(); 1180 } 1181 1182 std::unique_ptr<IndexedInstrProfReader> MemProfReader = 1183 std::move(ReaderOrErr.get()); 1184 if (!MemProfReader) { 1185 Ctx.diagnose(DiagnosticInfoPGOProfile( 1186 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); 1187 return PreservedAnalyses::all(); 1188 } 1189 1190 if (!MemProfReader->hasMemoryProfile()) { 1191 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), 1192 "Not a memory profile")); 1193 return PreservedAnalyses::all(); 1194 } 1195 1196 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1197 1198 // Map from the stack has of each allocation context in the function profiles 1199 // to the total profiled size (bytes), allocation type, and whether we matched 1200 // it to an allocation in the IR. 1201 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo; 1202 1203 for (auto &F : M) { 1204 if (F.isDeclaration()) 1205 continue; 1206 1207 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 1208 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo); 1209 } 1210 1211 if (ClPrintMemProfMatchInfo) { 1212 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) 1213 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType) 1214 << " context with id " << Id << " has total profiled size " 1215 << Info.TotalSize << (Info.Matched ? " is" : " not") 1216 << " matched\n"; 1217 } 1218 1219 return PreservedAnalyses::none(); 1220 } 1221