1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of MemProfiler. Memory accesses are instrumented 10 // to increment the access count held in a shadow memory location, or 11 // alternatively to call into the runtime. Memory intrinsic calls (memmove, 12 // memcpy, memset) are changed to call the memory profiling runtime version 13 // instead. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Instrumentation/MemProfiler.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Analysis/MemoryBuiltins.h" 22 #include "llvm/Analysis/MemoryProfileInfo.h" 23 #include "llvm/Analysis/TargetLibraryInfo.h" 24 #include "llvm/Analysis/ValueTracking.h" 25 #include "llvm/IR/Constant.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/GlobalValue.h" 30 #include "llvm/IR/IRBuilder.h" 31 #include "llvm/IR/Instruction.h" 32 #include "llvm/IR/IntrinsicInst.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/ProfileData/InstrProf.h" 37 #include "llvm/ProfileData/InstrProfReader.h" 38 #include "llvm/Support/BLAKE3.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/HashBuilder.h" 42 #include "llvm/Support/VirtualFileSystem.h" 43 #include "llvm/TargetParser/Triple.h" 44 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 45 #include "llvm/Transforms/Utils/LongestCommonSequence.h" 46 #include "llvm/Transforms/Utils/ModuleUtils.h" 47 #include <map> 48 #include <set> 49 50 using namespace llvm; 51 using namespace llvm::memprof; 52 53 #define DEBUG_TYPE "memprof" 54 55 namespace llvm { 56 extern cl::opt<bool> PGOWarnMissing; 57 extern cl::opt<bool> NoPGOWarnMismatch; 58 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; 59 } // namespace llvm 60 61 constexpr int LLVM_MEM_PROFILER_VERSION = 1; 62 63 // Size of memory mapped to a single shadow location. 64 constexpr uint64_t DefaultMemGranularity = 64; 65 66 // Size of memory mapped to a single histogram bucket. 67 constexpr uint64_t HistogramGranularity = 8; 68 69 // Scale from granularity down to shadow size. 70 constexpr uint64_t DefaultShadowScale = 3; 71 72 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor"; 73 constexpr uint64_t MemProfCtorAndDtorPriority = 1; 74 // On Emscripten, the system needs more than one priorities for constructors. 75 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; 76 constexpr char MemProfInitName[] = "__memprof_init"; 77 constexpr char MemProfVersionCheckNamePrefix[] = 78 "__memprof_version_mismatch_check_v"; 79 80 constexpr char MemProfShadowMemoryDynamicAddress[] = 81 "__memprof_shadow_memory_dynamic_address"; 82 83 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename"; 84 85 constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram"; 86 87 // Command-line flags. 88 89 static cl::opt<bool> ClInsertVersionCheck( 90 "memprof-guard-against-version-mismatch", 91 cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, 92 cl::init(true)); 93 94 // This flag may need to be replaced with -f[no-]memprof-reads. 95 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads", 96 cl::desc("instrument read instructions"), 97 cl::Hidden, cl::init(true)); 98 99 static cl::opt<bool> 100 ClInstrumentWrites("memprof-instrument-writes", 101 cl::desc("instrument write instructions"), cl::Hidden, 102 cl::init(true)); 103 104 static cl::opt<bool> ClInstrumentAtomics( 105 "memprof-instrument-atomics", 106 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 107 cl::init(true)); 108 109 static cl::opt<bool> ClUseCalls( 110 "memprof-use-callbacks", 111 cl::desc("Use callbacks instead of inline instrumentation sequences."), 112 cl::Hidden, cl::init(false)); 113 114 static cl::opt<std::string> 115 ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", 116 cl::desc("Prefix for memory access callbacks"), 117 cl::Hidden, cl::init("__memprof_")); 118 119 // These flags allow to change the shadow mapping. 120 // The shadow mapping looks like 121 // Shadow = ((Mem & mask) >> scale) + offset 122 123 static cl::opt<int> ClMappingScale("memprof-mapping-scale", 124 cl::desc("scale of memprof shadow mapping"), 125 cl::Hidden, cl::init(DefaultShadowScale)); 126 127 static cl::opt<int> 128 ClMappingGranularity("memprof-mapping-granularity", 129 cl::desc("granularity of memprof shadow mapping"), 130 cl::Hidden, cl::init(DefaultMemGranularity)); 131 132 static cl::opt<bool> ClStack("memprof-instrument-stack", 133 cl::desc("Instrument scalar stack variables"), 134 cl::Hidden, cl::init(false)); 135 136 // Debug flags. 137 138 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, 139 cl::init(0)); 140 141 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden, 142 cl::desc("Debug func")); 143 144 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), 145 cl::Hidden, cl::init(-1)); 146 147 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), 148 cl::Hidden, cl::init(-1)); 149 150 // By default disable matching of allocation profiles onto operator new that 151 // already explicitly pass a hot/cold hint, since we don't currently 152 // override these hints anyway. 153 static cl::opt<bool> ClMemProfMatchHotColdNew( 154 "memprof-match-hot-cold-new", 155 cl::desc( 156 "Match allocation profiles onto existing hot/cold operator new calls"), 157 cl::Hidden, cl::init(false)); 158 159 static cl::opt<bool> ClHistogram("memprof-histogram", 160 cl::desc("Collect access count histograms"), 161 cl::Hidden, cl::init(false)); 162 163 static cl::opt<bool> 164 ClPrintMemProfMatchInfo("memprof-print-match-info", 165 cl::desc("Print matching stats for each allocation " 166 "context in this module's profiles"), 167 cl::Hidden, cl::init(false)); 168 169 static cl::opt<std::string> 170 MemprofRuntimeDefaultOptions("memprof-runtime-default-options", 171 cl::desc("The default memprof options"), 172 cl::Hidden, cl::init("")); 173 174 extern cl::opt<bool> MemProfReportHintedSizes; 175 176 // Instrumentation statistics 177 STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 178 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 179 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); 180 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); 181 182 // Matching statistics 183 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); 184 STATISTIC(NumOfMemProfMismatch, 185 "Number of functions having mismatched memory profile hash."); 186 STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile."); 187 STATISTIC(NumOfMemProfAllocContextProfiles, 188 "Number of alloc contexts in memory profile."); 189 STATISTIC(NumOfMemProfCallSiteProfiles, 190 "Number of callsites in memory profile."); 191 STATISTIC(NumOfMemProfMatchedAllocContexts, 192 "Number of matched memory profile alloc contexts."); 193 STATISTIC(NumOfMemProfMatchedAllocs, 194 "Number of matched memory profile allocs."); 195 STATISTIC(NumOfMemProfMatchedCallSites, 196 "Number of matched memory profile callsites."); 197 198 namespace { 199 200 /// This struct defines the shadow mapping using the rule: 201 /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. 202 struct ShadowMapping { 203 ShadowMapping() { 204 Scale = ClMappingScale; 205 Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity; 206 Mask = ~(Granularity - 1); 207 } 208 209 int Scale; 210 int Granularity; 211 uint64_t Mask; // Computed as ~(Granularity-1) 212 }; 213 214 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { 215 return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority 216 : MemProfCtorAndDtorPriority; 217 } 218 219 struct InterestingMemoryAccess { 220 Value *Addr = nullptr; 221 bool IsWrite; 222 Type *AccessTy; 223 Value *MaybeMask = nullptr; 224 }; 225 226 /// Instrument the code in module to profile memory accesses. 227 class MemProfiler { 228 public: 229 MemProfiler(Module &M) { 230 C = &(M.getContext()); 231 LongSize = M.getDataLayout().getPointerSizeInBits(); 232 IntptrTy = Type::getIntNTy(*C, LongSize); 233 PtrTy = PointerType::getUnqual(*C); 234 } 235 236 /// If it is an interesting memory access, populate information 237 /// about the access and return a InterestingMemoryAccess struct. 238 /// Otherwise return std::nullopt. 239 std::optional<InterestingMemoryAccess> 240 isInterestingMemoryAccess(Instruction *I) const; 241 242 void instrumentMop(Instruction *I, const DataLayout &DL, 243 InterestingMemoryAccess &Access); 244 void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, 245 Value *Addr, bool IsWrite); 246 void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 247 Instruction *I, Value *Addr, Type *AccessTy, 248 bool IsWrite); 249 void instrumentMemIntrinsic(MemIntrinsic *MI); 250 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 251 bool instrumentFunction(Function &F); 252 bool maybeInsertMemProfInitAtFunctionEntry(Function &F); 253 bool insertDynamicShadowAtFunctionEntry(Function &F); 254 255 private: 256 void initializeCallbacks(Module &M); 257 258 LLVMContext *C; 259 int LongSize; 260 Type *IntptrTy; 261 PointerType *PtrTy; 262 ShadowMapping Mapping; 263 264 // These arrays is indexed by AccessIsWrite 265 FunctionCallee MemProfMemoryAccessCallback[2]; 266 267 FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; 268 Value *DynamicShadowOffset = nullptr; 269 }; 270 271 class ModuleMemProfiler { 272 public: 273 ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } 274 275 bool instrumentModule(Module &); 276 277 private: 278 Triple TargetTriple; 279 ShadowMapping Mapping; 280 Function *MemProfCtorFunction = nullptr; 281 }; 282 283 } // end anonymous namespace 284 285 MemProfilerPass::MemProfilerPass() = default; 286 287 PreservedAnalyses MemProfilerPass::run(Function &F, 288 AnalysisManager<Function> &AM) { 289 assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) && 290 "Memprof with histogram only supports default mapping granularity"); 291 Module &M = *F.getParent(); 292 MemProfiler Profiler(M); 293 if (Profiler.instrumentFunction(F)) 294 return PreservedAnalyses::none(); 295 return PreservedAnalyses::all(); 296 } 297 298 ModuleMemProfilerPass::ModuleMemProfilerPass() = default; 299 300 PreservedAnalyses ModuleMemProfilerPass::run(Module &M, 301 AnalysisManager<Module> &AM) { 302 303 ModuleMemProfiler Profiler(M); 304 if (Profiler.instrumentModule(M)) 305 return PreservedAnalyses::none(); 306 return PreservedAnalyses::all(); 307 } 308 309 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { 310 // (Shadow & mask) >> scale 311 Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); 312 Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); 313 // (Shadow >> scale) | offset 314 assert(DynamicShadowOffset); 315 return IRB.CreateAdd(Shadow, DynamicShadowOffset); 316 } 317 318 // Instrument memset/memmove/memcpy 319 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { 320 IRBuilder<> IRB(MI); 321 if (isa<MemTransferInst>(MI)) { 322 IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy, 323 {MI->getOperand(0), MI->getOperand(1), 324 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 325 } else if (isa<MemSetInst>(MI)) { 326 IRB.CreateCall( 327 MemProfMemset, 328 {MI->getOperand(0), 329 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 330 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 331 } 332 MI->eraseFromParent(); 333 } 334 335 std::optional<InterestingMemoryAccess> 336 MemProfiler::isInterestingMemoryAccess(Instruction *I) const { 337 // Do not instrument the load fetching the dynamic shadow address. 338 if (DynamicShadowOffset == I) 339 return std::nullopt; 340 341 InterestingMemoryAccess Access; 342 343 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 344 if (!ClInstrumentReads) 345 return std::nullopt; 346 Access.IsWrite = false; 347 Access.AccessTy = LI->getType(); 348 Access.Addr = LI->getPointerOperand(); 349 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 350 if (!ClInstrumentWrites) 351 return std::nullopt; 352 Access.IsWrite = true; 353 Access.AccessTy = SI->getValueOperand()->getType(); 354 Access.Addr = SI->getPointerOperand(); 355 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 356 if (!ClInstrumentAtomics) 357 return std::nullopt; 358 Access.IsWrite = true; 359 Access.AccessTy = RMW->getValOperand()->getType(); 360 Access.Addr = RMW->getPointerOperand(); 361 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 362 if (!ClInstrumentAtomics) 363 return std::nullopt; 364 Access.IsWrite = true; 365 Access.AccessTy = XCHG->getCompareOperand()->getType(); 366 Access.Addr = XCHG->getPointerOperand(); 367 } else if (auto *CI = dyn_cast<CallInst>(I)) { 368 auto *F = CI->getCalledFunction(); 369 if (F && (F->getIntrinsicID() == Intrinsic::masked_load || 370 F->getIntrinsicID() == Intrinsic::masked_store)) { 371 unsigned OpOffset = 0; 372 if (F->getIntrinsicID() == Intrinsic::masked_store) { 373 if (!ClInstrumentWrites) 374 return std::nullopt; 375 // Masked store has an initial operand for the value. 376 OpOffset = 1; 377 Access.AccessTy = CI->getArgOperand(0)->getType(); 378 Access.IsWrite = true; 379 } else { 380 if (!ClInstrumentReads) 381 return std::nullopt; 382 Access.AccessTy = CI->getType(); 383 Access.IsWrite = false; 384 } 385 386 auto *BasePtr = CI->getOperand(0 + OpOffset); 387 Access.MaybeMask = CI->getOperand(2 + OpOffset); 388 Access.Addr = BasePtr; 389 } 390 } 391 392 if (!Access.Addr) 393 return std::nullopt; 394 395 // Do not instrument accesses from different address spaces; we cannot deal 396 // with them. 397 Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType()); 398 if (PtrTy->getPointerAddressSpace() != 0) 399 return std::nullopt; 400 401 // Ignore swifterror addresses. 402 // swifterror memory addresses are mem2reg promoted by instruction 403 // selection. As such they cannot have regular uses like an instrumentation 404 // function and it makes no sense to track them as memory. 405 if (Access.Addr->isSwiftError()) 406 return std::nullopt; 407 408 // Peel off GEPs and BitCasts. 409 auto *Addr = Access.Addr->stripInBoundsOffsets(); 410 411 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 412 // Do not instrument PGO counter updates. 413 if (GV->hasSection()) { 414 StringRef SectionName = GV->getSection(); 415 // Check if the global is in the PGO counters section. 416 auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); 417 if (SectionName.ends_with( 418 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) 419 return std::nullopt; 420 } 421 422 // Do not instrument accesses to LLVM internal variables. 423 if (GV->getName().starts_with("__llvm")) 424 return std::nullopt; 425 } 426 427 return Access; 428 } 429 430 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 431 Instruction *I, Value *Addr, 432 Type *AccessTy, bool IsWrite) { 433 auto *VTy = cast<FixedVectorType>(AccessTy); 434 unsigned Num = VTy->getNumElements(); 435 auto *Zero = ConstantInt::get(IntptrTy, 0); 436 for (unsigned Idx = 0; Idx < Num; ++Idx) { 437 Value *InstrumentedAddress = nullptr; 438 Instruction *InsertBefore = I; 439 if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { 440 // dyn_cast as we might get UndefValue 441 if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { 442 if (Masked->isZero()) 443 // Mask is constant false, so no instrumentation needed. 444 continue; 445 // If we have a true or undef value, fall through to instrumentAddress. 446 // with InsertBefore == I 447 } 448 } else { 449 IRBuilder<> IRB(I); 450 Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); 451 Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); 452 InsertBefore = ThenTerm; 453 } 454 455 IRBuilder<> IRB(InsertBefore); 456 InstrumentedAddress = 457 IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); 458 instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite); 459 } 460 } 461 462 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, 463 InterestingMemoryAccess &Access) { 464 // Skip instrumentation of stack accesses unless requested. 465 if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) { 466 if (Access.IsWrite) 467 ++NumSkippedStackWrites; 468 else 469 ++NumSkippedStackReads; 470 return; 471 } 472 473 if (Access.IsWrite) 474 NumInstrumentedWrites++; 475 else 476 NumInstrumentedReads++; 477 478 if (Access.MaybeMask) { 479 instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, 480 Access.AccessTy, Access.IsWrite); 481 } else { 482 // Since the access counts will be accumulated across the entire allocation, 483 // we only update the shadow access count for the first location and thus 484 // don't need to worry about alignment and type size. 485 instrumentAddress(I, I, Access.Addr, Access.IsWrite); 486 } 487 } 488 489 void MemProfiler::instrumentAddress(Instruction *OrigIns, 490 Instruction *InsertBefore, Value *Addr, 491 bool IsWrite) { 492 IRBuilder<> IRB(InsertBefore); 493 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 494 495 if (ClUseCalls) { 496 IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong); 497 return; 498 } 499 500 Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C); 501 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); 502 503 Value *ShadowPtr = memToShadow(AddrLong, IRB); 504 Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); 505 Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); 506 // If we are profiling with histograms, add overflow protection at 255. 507 if (ClHistogram) { 508 Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255); 509 Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount); 510 Instruction *IncBlock = 511 SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false); 512 IRB.SetInsertPoint(IncBlock); 513 } 514 Value *Inc = ConstantInt::get(ShadowTy, 1); 515 ShadowValue = IRB.CreateAdd(ShadowValue, Inc); 516 IRB.CreateStore(ShadowValue, ShadowAddr); 517 } 518 519 // Create the variable for the profile file name. 520 void createProfileFileNameVar(Module &M) { 521 const MDString *MemProfFilename = 522 dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename")); 523 if (!MemProfFilename) 524 return; 525 assert(!MemProfFilename->getString().empty() && 526 "Unexpected MemProfProfileFilename metadata with empty string"); 527 Constant *ProfileNameConst = ConstantDataArray::getString( 528 M.getContext(), MemProfFilename->getString(), true); 529 GlobalVariable *ProfileNameVar = new GlobalVariable( 530 M, ProfileNameConst->getType(), /*isConstant=*/true, 531 GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); 532 Triple TT(M.getTargetTriple()); 533 if (TT.supportsCOMDAT()) { 534 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); 535 ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar)); 536 } 537 } 538 539 // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible 540 // to the runtime, changing shadow count behavior. 541 void createMemprofHistogramFlagVar(Module &M) { 542 const StringRef VarName(MemProfHistogramFlagVar); 543 Type *IntTy1 = Type::getInt1Ty(M.getContext()); 544 auto MemprofHistogramFlag = new GlobalVariable( 545 M, IntTy1, true, GlobalValue::WeakAnyLinkage, 546 Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName); 547 Triple TT(M.getTargetTriple()); 548 if (TT.supportsCOMDAT()) { 549 MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage); 550 MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName)); 551 } 552 appendToCompilerUsed(M, MemprofHistogramFlag); 553 } 554 555 void createMemprofDefaultOptionsVar(Module &M) { 556 Constant *OptionsConst = ConstantDataArray::getString( 557 M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true); 558 GlobalVariable *OptionsVar = 559 new GlobalVariable(M, OptionsConst->getType(), /*isConstant=*/true, 560 GlobalValue::WeakAnyLinkage, OptionsConst, 561 "__memprof_default_options_str"); 562 Triple TT(M.getTargetTriple()); 563 if (TT.supportsCOMDAT()) { 564 OptionsVar->setLinkage(GlobalValue::ExternalLinkage); 565 OptionsVar->setComdat(M.getOrInsertComdat(OptionsVar->getName())); 566 } 567 } 568 569 bool ModuleMemProfiler::instrumentModule(Module &M) { 570 571 // Create a module constructor. 572 std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION); 573 std::string VersionCheckName = 574 ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) 575 : ""; 576 std::tie(MemProfCtorFunction, std::ignore) = 577 createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName, 578 MemProfInitName, /*InitArgTypes=*/{}, 579 /*InitArgs=*/{}, VersionCheckName); 580 581 const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); 582 appendToGlobalCtors(M, MemProfCtorFunction, Priority); 583 584 createProfileFileNameVar(M); 585 586 createMemprofHistogramFlagVar(M); 587 588 createMemprofDefaultOptionsVar(M); 589 590 return true; 591 } 592 593 void MemProfiler::initializeCallbacks(Module &M) { 594 IRBuilder<> IRB(*C); 595 596 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 597 const std::string TypeStr = AccessIsWrite ? "store" : "load"; 598 const std::string HistPrefix = ClHistogram ? "hist_" : ""; 599 600 SmallVector<Type *, 2> Args1{1, IntptrTy}; 601 MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction( 602 ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr, 603 FunctionType::get(IRB.getVoidTy(), Args1, false)); 604 } 605 MemProfMemmove = M.getOrInsertFunction( 606 ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy); 607 MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy", 608 PtrTy, PtrTy, PtrTy, IntptrTy); 609 MemProfMemset = 610 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy, 611 PtrTy, IRB.getInt32Ty(), IntptrTy); 612 } 613 614 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { 615 // For each NSObject descendant having a +load method, this method is invoked 616 // by the ObjC runtime before any of the static constructors is called. 617 // Therefore we need to instrument such methods with a call to __memprof_init 618 // at the beginning in order to initialize our runtime before any access to 619 // the shadow memory. 620 // We cannot just ignore these methods, because they may call other 621 // instrumented functions. 622 if (F.getName().contains(" load]")) { 623 FunctionCallee MemProfInitFunction = 624 declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); 625 IRBuilder<> IRB(&F.front(), F.front().begin()); 626 IRB.CreateCall(MemProfInitFunction, {}); 627 return true; 628 } 629 return false; 630 } 631 632 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { 633 IRBuilder<> IRB(&F.front().front()); 634 Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( 635 MemProfShadowMemoryDynamicAddress, IntptrTy); 636 if (F.getParent()->getPICLevel() == PICLevel::NotPIC) 637 cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true); 638 DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); 639 return true; 640 } 641 642 bool MemProfiler::instrumentFunction(Function &F) { 643 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 644 return false; 645 if (ClDebugFunc == F.getName()) 646 return false; 647 if (F.getName().starts_with("__memprof_")) 648 return false; 649 650 bool FunctionModified = false; 651 652 // If needed, insert __memprof_init. 653 // This function needs to be called even if the function body is not 654 // instrumented. 655 if (maybeInsertMemProfInitAtFunctionEntry(F)) 656 FunctionModified = true; 657 658 LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n"); 659 660 initializeCallbacks(*F.getParent()); 661 662 SmallVector<Instruction *, 16> ToInstrument; 663 664 // Fill the set of memory operations to instrument. 665 for (auto &BB : F) { 666 for (auto &Inst : BB) { 667 if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst)) 668 ToInstrument.push_back(&Inst); 669 } 670 } 671 672 if (ToInstrument.empty()) { 673 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified 674 << " " << F << "\n"); 675 676 return FunctionModified; 677 } 678 679 FunctionModified |= insertDynamicShadowAtFunctionEntry(F); 680 681 int NumInstrumented = 0; 682 for (auto *Inst : ToInstrument) { 683 if (ClDebugMin < 0 || ClDebugMax < 0 || 684 (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { 685 std::optional<InterestingMemoryAccess> Access = 686 isInterestingMemoryAccess(Inst); 687 if (Access) 688 instrumentMop(Inst, F.getDataLayout(), *Access); 689 else 690 instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 691 } 692 NumInstrumented++; 693 } 694 695 if (NumInstrumented > 0) 696 FunctionModified = true; 697 698 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " 699 << F << "\n"); 700 701 return FunctionModified; 702 } 703 704 static void addCallsiteMetadata(Instruction &I, 705 ArrayRef<uint64_t> InlinedCallStack, 706 LLVMContext &Ctx) { 707 I.setMetadata(LLVMContext::MD_callsite, 708 buildCallstackMetadata(InlinedCallStack, Ctx)); 709 } 710 711 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, 712 uint32_t Column) { 713 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 714 HashBuilder; 715 HashBuilder.add(Function, LineOffset, Column); 716 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 717 uint64_t Id; 718 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 719 return Id; 720 } 721 722 static uint64_t computeStackId(const memprof::Frame &Frame) { 723 return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); 724 } 725 726 // Helper to generate a single hash id for a given callstack, used for emitting 727 // matching statistics and useful for uniquing such statistics across modules. 728 static uint64_t 729 computeFullStackId(const std::vector<memprof::Frame> &CallStack) { 730 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 731 HashBuilder; 732 for (auto &F : CallStack) 733 HashBuilder.add(F.Function, F.LineOffset, F.Column); 734 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 735 uint64_t Id; 736 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 737 return Id; 738 } 739 740 static AllocationType addCallStack(CallStackTrie &AllocTrie, 741 const AllocationInfo *AllocInfo, 742 uint64_t FullStackId) { 743 SmallVector<uint64_t> StackIds; 744 for (const auto &StackFrame : AllocInfo->CallStack) 745 StackIds.push_back(computeStackId(StackFrame)); 746 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), 747 AllocInfo->Info.getAllocCount(), 748 AllocInfo->Info.getTotalLifetime()); 749 std::vector<ContextTotalSize> ContextSizeInfo; 750 if (MemProfReportHintedSizes) { 751 auto TotalSize = AllocInfo->Info.getTotalSize(); 752 assert(TotalSize); 753 assert(FullStackId != 0); 754 ContextSizeInfo.push_back({FullStackId, TotalSize}); 755 } 756 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo)); 757 return AllocType; 758 } 759 760 // Helper to compare the InlinedCallStack computed from an instruction's debug 761 // info to a list of Frames from profile data (either the allocation data or a 762 // callsite). For callsites, the StartIndex to use in the Frame array may be 763 // non-zero. 764 static bool 765 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, 766 ArrayRef<uint64_t> InlinedCallStack, 767 unsigned StartIndex = 0) { 768 auto StackFrame = ProfileCallStack.begin() + StartIndex; 769 auto InlCallStackIter = InlinedCallStack.begin(); 770 for (; StackFrame != ProfileCallStack.end() && 771 InlCallStackIter != InlinedCallStack.end(); 772 ++StackFrame, ++InlCallStackIter) { 773 uint64_t StackId = computeStackId(*StackFrame); 774 if (StackId != *InlCallStackIter) 775 return false; 776 } 777 // Return true if we found and matched all stack ids from the call 778 // instruction. 779 return InlCallStackIter == InlinedCallStack.end(); 780 } 781 782 static bool isAllocationWithHotColdVariant(const Function *Callee, 783 const TargetLibraryInfo &TLI) { 784 if (!Callee) 785 return false; 786 LibFunc Func; 787 if (!TLI.getLibFunc(*Callee, Func)) 788 return false; 789 switch (Func) { 790 case LibFunc_Znwm: 791 case LibFunc_ZnwmRKSt9nothrow_t: 792 case LibFunc_ZnwmSt11align_val_t: 793 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: 794 case LibFunc_Znam: 795 case LibFunc_ZnamRKSt9nothrow_t: 796 case LibFunc_ZnamSt11align_val_t: 797 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: 798 case LibFunc_size_returning_new: 799 case LibFunc_size_returning_new_aligned: 800 return true; 801 case LibFunc_Znwm12__hot_cold_t: 802 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: 803 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: 804 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 805 case LibFunc_Znam12__hot_cold_t: 806 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: 807 case LibFunc_ZnamSt11align_val_t12__hot_cold_t: 808 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 809 case LibFunc_size_returning_new_hot_cold: 810 case LibFunc_size_returning_new_aligned_hot_cold: 811 return ClMemProfMatchHotColdNew; 812 default: 813 return false; 814 } 815 } 816 817 struct AllocMatchInfo { 818 uint64_t TotalSize = 0; 819 AllocationType AllocType = AllocationType::None; 820 bool Matched = false; 821 }; 822 823 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> 824 memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) { 825 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls; 826 827 auto GetOffset = [](const DILocation *DIL) { 828 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 829 0xffff; 830 }; 831 832 for (Function &F : M) { 833 if (F.isDeclaration()) 834 continue; 835 836 for (auto &BB : F) { 837 for (auto &I : BB) { 838 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) 839 continue; 840 841 auto *CB = dyn_cast<CallBase>(&I); 842 auto *CalledFunction = CB->getCalledFunction(); 843 // Disregard indirect calls and intrinsics. 844 if (!CalledFunction || CalledFunction->isIntrinsic()) 845 continue; 846 847 StringRef CalleeName = CalledFunction->getName(); 848 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI); 849 for (const DILocation *DIL = I.getDebugLoc(); DIL; 850 DIL = DIL->getInlinedAt()) { 851 StringRef CallerName = DIL->getSubprogramLinkageName(); 852 assert(!CallerName.empty() && 853 "Be sure to enable -fdebug-info-for-profiling"); 854 uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName); 855 uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); 856 // Pretend that we are calling a function with GUID == 0 if we are 857 // calling a heap allocation function. 858 if (IsAlloc) 859 CalleeGUID = 0; 860 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()}; 861 Calls[CallerGUID].emplace_back(Loc, CalleeGUID); 862 CalleeName = CallerName; 863 // FIXME: Recognize other frames that are associated with heap 864 // allocation functions. It may be too early to reset IsAlloc to 865 // false here. 866 IsAlloc = false; 867 } 868 } 869 } 870 } 871 872 // Sort each call list by the source location. 873 for (auto &[CallerGUID, CallList] : Calls) { 874 llvm::sort(CallList); 875 CallList.erase(llvm::unique(CallList), CallList.end()); 876 } 877 878 return Calls; 879 } 880 881 DenseMap<uint64_t, LocToLocMap> 882 memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, 883 const TargetLibraryInfo &TLI) { 884 DenseMap<uint64_t, LocToLocMap> UndriftMaps; 885 886 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile = 887 MemProfReader->getMemProfCallerCalleePairs(); 888 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR = 889 extractCallsFromIR(M, TLI); 890 891 // Compute an undrift map for each CallerGUID. 892 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) { 893 auto It = CallsFromProfile.find(CallerGUID); 894 if (It == CallsFromProfile.end()) 895 continue; 896 const auto &ProfileAnchors = It->second; 897 898 LocToLocMap Matchings; 899 longestCommonSequence<LineLocation, GlobalValue::GUID>( 900 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(), 901 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); }); 902 bool Inserted = UndriftMaps.try_emplace(CallerGUID, Matchings).second; 903 904 // The insertion must succeed because we visit each GUID exactly once. 905 assert(Inserted); 906 (void)Inserted; 907 } 908 909 return UndriftMaps; 910 } 911 912 static void 913 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, 914 const TargetLibraryInfo &TLI, 915 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) { 916 auto &Ctx = M.getContext(); 917 // Previously we used getIRPGOFuncName() here. If F is local linkage, 918 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But 919 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't 920 // contain FileName's prefix. It caused local linkage function can't 921 // find MemProfRecord. So we use getName() now. 922 // 'unique-internal-linkage-names' can make MemProf work better for local 923 // linkage function. 924 auto FuncName = F.getName(); 925 auto FuncGUID = Function::getGUID(FuncName); 926 std::optional<memprof::MemProfRecord> MemProfRec; 927 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); 928 if (Err) { 929 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 930 auto Err = IPE.get(); 931 bool SkipWarning = false; 932 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName 933 << ": "); 934 if (Err == instrprof_error::unknown_function) { 935 NumOfMemProfMissing++; 936 SkipWarning = !PGOWarnMissing; 937 LLVM_DEBUG(dbgs() << "unknown function"); 938 } else if (Err == instrprof_error::hash_mismatch) { 939 NumOfMemProfMismatch++; 940 SkipWarning = 941 NoPGOWarnMismatch || 942 (NoPGOWarnMismatchComdatWeak && 943 (F.hasComdat() || 944 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 945 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 946 } 947 948 if (SkipWarning) 949 return; 950 951 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + 952 Twine(" Hash = ") + std::to_string(FuncGUID)) 953 .str(); 954 955 Ctx.diagnose( 956 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 957 }); 958 return; 959 } 960 961 NumOfMemProfFunc++; 962 963 // Detect if there are non-zero column numbers in the profile. If not, 964 // treat all column numbers as 0 when matching (i.e. ignore any non-zero 965 // columns in the IR). The profiled binary might have been built with 966 // column numbers disabled, for example. 967 bool ProfileHasColumns = false; 968 969 // Build maps of the location hash to all profile data with that leaf location 970 // (allocation info and the callsites). 971 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; 972 // For the callsites we need to record the index of the associated frame in 973 // the frame array (see comments below where the map entries are added). 974 std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>> 975 LocHashToCallSites; 976 for (auto &AI : MemProfRec->AllocSites) { 977 NumOfMemProfAllocContextProfiles++; 978 // Associate the allocation info with the leaf frame. The later matching 979 // code will match any inlined call sequences in the IR with a longer prefix 980 // of call stack frames. 981 uint64_t StackId = computeStackId(AI.CallStack[0]); 982 LocHashToAllocInfo[StackId].insert(&AI); 983 ProfileHasColumns |= AI.CallStack[0].Column; 984 } 985 for (auto &CS : MemProfRec->CallSites) { 986 NumOfMemProfCallSiteProfiles++; 987 // Need to record all frames from leaf up to and including this function, 988 // as any of these may or may not have been inlined at this point. 989 unsigned Idx = 0; 990 for (auto &StackFrame : CS) { 991 uint64_t StackId = computeStackId(StackFrame); 992 LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); 993 ProfileHasColumns |= StackFrame.Column; 994 // Once we find this function, we can stop recording. 995 if (StackFrame.Function == FuncGUID) 996 break; 997 } 998 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); 999 } 1000 1001 auto GetOffset = [](const DILocation *DIL) { 1002 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 1003 0xffff; 1004 }; 1005 1006 // Now walk the instructions, looking up the associated profile data using 1007 // debug locations. 1008 for (auto &BB : F) { 1009 for (auto &I : BB) { 1010 if (I.isDebugOrPseudoInst()) 1011 continue; 1012 // We are only interested in calls (allocation or interior call stack 1013 // context calls). 1014 auto *CI = dyn_cast<CallBase>(&I); 1015 if (!CI) 1016 continue; 1017 auto *CalledFunction = CI->getCalledFunction(); 1018 if (CalledFunction && CalledFunction->isIntrinsic()) 1019 continue; 1020 // List of call stack ids computed from the location hashes on debug 1021 // locations (leaf to inlined at root). 1022 SmallVector<uint64_t, 8> InlinedCallStack; 1023 // Was the leaf location found in one of the profile maps? 1024 bool LeafFound = false; 1025 // If leaf was found in a map, iterators pointing to its location in both 1026 // of the maps. It might exist in neither, one, or both (the latter case 1027 // can happen because we don't currently have discriminators to 1028 // distinguish the case when a single line/col maps to both an allocation 1029 // and another callsite). 1030 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator 1031 AllocInfoIter; 1032 std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, 1033 unsigned>>>::iterator CallSitesIter; 1034 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; 1035 DIL = DIL->getInlinedAt()) { 1036 // Use C++ linkage name if possible. Need to compile with 1037 // -fdebug-info-for-profiling to get linkage name. 1038 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); 1039 if (Name.empty()) 1040 Name = DIL->getScope()->getSubprogram()->getName(); 1041 auto CalleeGUID = Function::getGUID(Name); 1042 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL), 1043 ProfileHasColumns ? DIL->getColumn() : 0); 1044 // Check if we have found the profile's leaf frame. If yes, collect 1045 // the rest of the call's inlined context starting here. If not, see if 1046 // we find a match further up the inlined context (in case the profile 1047 // was missing debug frames at the leaf). 1048 if (!LeafFound) { 1049 AllocInfoIter = LocHashToAllocInfo.find(StackId); 1050 CallSitesIter = LocHashToCallSites.find(StackId); 1051 if (AllocInfoIter != LocHashToAllocInfo.end() || 1052 CallSitesIter != LocHashToCallSites.end()) 1053 LeafFound = true; 1054 } 1055 if (LeafFound) 1056 InlinedCallStack.push_back(StackId); 1057 } 1058 // If leaf not in either of the maps, skip inst. 1059 if (!LeafFound) 1060 continue; 1061 1062 // First add !memprof metadata from allocation info, if we found the 1063 // instruction's leaf location in that map, and if the rest of the 1064 // instruction's locations match the prefix Frame locations on an 1065 // allocation context with the same leaf. 1066 if (AllocInfoIter != LocHashToAllocInfo.end()) { 1067 // Only consider allocations which support hinting. 1068 if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI)) 1069 continue; 1070 // We may match this instruction's location list to multiple MIB 1071 // contexts. Add them to a Trie specialized for trimming the contexts to 1072 // the minimal needed to disambiguate contexts with unique behavior. 1073 CallStackTrie AllocTrie; 1074 for (auto *AllocInfo : AllocInfoIter->second) { 1075 // Check the full inlined call stack against this one. 1076 // If we found and thus matched all frames on the call, include 1077 // this MIB. 1078 if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, 1079 InlinedCallStack)) { 1080 NumOfMemProfMatchedAllocContexts++; 1081 uint64_t FullStackId = 0; 1082 if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes) 1083 FullStackId = computeFullStackId(AllocInfo->CallStack); 1084 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId); 1085 // Record information about the allocation if match info printing 1086 // was requested. 1087 if (ClPrintMemProfMatchInfo) { 1088 assert(FullStackId != 0); 1089 FullStackIdToAllocMatchInfo[FullStackId] = { 1090 AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true}; 1091 } 1092 } 1093 } 1094 // We might not have matched any to the full inlined call stack. 1095 // But if we did, create and attach metadata, or a function attribute if 1096 // all contexts have identical profiled behavior. 1097 if (!AllocTrie.empty()) { 1098 NumOfMemProfMatchedAllocs++; 1099 // MemprofMDAttached will be false if a function attribute was 1100 // attached. 1101 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); 1102 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); 1103 if (MemprofMDAttached) { 1104 // Add callsite metadata for the instruction's location list so that 1105 // it simpler later on to identify which part of the MIB contexts 1106 // are from this particular instruction (including during inlining, 1107 // when the callsite metadata will be updated appropriately). 1108 // FIXME: can this be changed to strip out the matching stack 1109 // context ids from the MIB contexts and not add any callsite 1110 // metadata here to save space? 1111 addCallsiteMetadata(I, InlinedCallStack, Ctx); 1112 } 1113 } 1114 continue; 1115 } 1116 1117 // Otherwise, add callsite metadata. If we reach here then we found the 1118 // instruction's leaf location in the callsites map and not the allocation 1119 // map. 1120 assert(CallSitesIter != LocHashToCallSites.end()); 1121 for (auto CallStackIdx : CallSitesIter->second) { 1122 // If we found and thus matched all frames on the call, create and 1123 // attach call stack metadata. 1124 if (stackFrameIncludesInlinedCallStack( 1125 *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { 1126 NumOfMemProfMatchedCallSites++; 1127 addCallsiteMetadata(I, InlinedCallStack, Ctx); 1128 // Only need to find one with a matching call stack and add a single 1129 // callsite metadata. 1130 break; 1131 } 1132 } 1133 } 1134 } 1135 } 1136 1137 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, 1138 IntrusiveRefCntPtr<vfs::FileSystem> FS) 1139 : MemoryProfileFileName(MemoryProfileFile), FS(FS) { 1140 if (!FS) 1141 this->FS = vfs::getRealFileSystem(); 1142 } 1143 1144 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { 1145 // Return immediately if the module doesn't contain any function. 1146 if (M.empty()) 1147 return PreservedAnalyses::all(); 1148 1149 LLVM_DEBUG(dbgs() << "Read in memory profile:"); 1150 auto &Ctx = M.getContext(); 1151 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); 1152 if (Error E = ReaderOrErr.takeError()) { 1153 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1154 Ctx.diagnose( 1155 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); 1156 }); 1157 return PreservedAnalyses::all(); 1158 } 1159 1160 std::unique_ptr<IndexedInstrProfReader> MemProfReader = 1161 std::move(ReaderOrErr.get()); 1162 if (!MemProfReader) { 1163 Ctx.diagnose(DiagnosticInfoPGOProfile( 1164 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); 1165 return PreservedAnalyses::all(); 1166 } 1167 1168 if (!MemProfReader->hasMemoryProfile()) { 1169 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), 1170 "Not a memory profile")); 1171 return PreservedAnalyses::all(); 1172 } 1173 1174 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1175 1176 // Map from the stack has of each allocation context in the function profiles 1177 // to the total profiled size (bytes), allocation type, and whether we matched 1178 // it to an allocation in the IR. 1179 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo; 1180 1181 for (auto &F : M) { 1182 if (F.isDeclaration()) 1183 continue; 1184 1185 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 1186 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo); 1187 } 1188 1189 if (ClPrintMemProfMatchInfo) { 1190 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) 1191 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType) 1192 << " context with id " << Id << " has total profiled size " 1193 << Info.TotalSize << (Info.Matched ? " is" : " not") 1194 << " matched\n"; 1195 } 1196 1197 return PreservedAnalyses::none(); 1198 } 1199