1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of MemProfiler. Memory accesses are instrumented 10 // to increment the access count held in a shadow memory location, or 11 // alternatively to call into the runtime. Memory intrinsic calls (memmove, 12 // memcpy, memset) are changed to call the memory profiling runtime version 13 // instead. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Instrumentation/MemProfiler.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Analysis/MemoryBuiltins.h" 22 #include "llvm/Analysis/MemoryProfileInfo.h" 23 #include "llvm/Analysis/TargetLibraryInfo.h" 24 #include "llvm/Analysis/ValueTracking.h" 25 #include "llvm/IR/Constant.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/GlobalValue.h" 30 #include "llvm/IR/IRBuilder.h" 31 #include "llvm/IR/Instruction.h" 32 #include "llvm/IR/IntrinsicInst.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/ProfileData/InstrProf.h" 37 #include "llvm/ProfileData/InstrProfReader.h" 38 #include "llvm/Support/BLAKE3.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/HashBuilder.h" 42 #include "llvm/Support/VirtualFileSystem.h" 43 #include "llvm/TargetParser/Triple.h" 44 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 45 #include "llvm/Transforms/Utils/LongestCommonSequence.h" 46 #include "llvm/Transforms/Utils/ModuleUtils.h" 47 #include <map> 48 #include <set> 49 50 using namespace llvm; 51 using namespace llvm::memprof; 52 53 #define DEBUG_TYPE "memprof" 54 55 namespace llvm { 56 extern cl::opt<bool> PGOWarnMissing; 57 extern cl::opt<bool> NoPGOWarnMismatch; 58 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; 59 } // namespace llvm 60 61 constexpr int LLVM_MEM_PROFILER_VERSION = 1; 62 63 // Size of memory mapped to a single shadow location. 64 constexpr uint64_t DefaultMemGranularity = 64; 65 66 // Size of memory mapped to a single histogram bucket. 67 constexpr uint64_t HistogramGranularity = 8; 68 69 // Scale from granularity down to shadow size. 70 constexpr uint64_t DefaultShadowScale = 3; 71 72 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor"; 73 constexpr uint64_t MemProfCtorAndDtorPriority = 1; 74 // On Emscripten, the system needs more than one priorities for constructors. 75 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; 76 constexpr char MemProfInitName[] = "__memprof_init"; 77 constexpr char MemProfVersionCheckNamePrefix[] = 78 "__memprof_version_mismatch_check_v"; 79 80 constexpr char MemProfShadowMemoryDynamicAddress[] = 81 "__memprof_shadow_memory_dynamic_address"; 82 83 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename"; 84 85 constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram"; 86 87 // Command-line flags. 88 89 static cl::opt<bool> ClInsertVersionCheck( 90 "memprof-guard-against-version-mismatch", 91 cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, 92 cl::init(true)); 93 94 // This flag may need to be replaced with -f[no-]memprof-reads. 95 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads", 96 cl::desc("instrument read instructions"), 97 cl::Hidden, cl::init(true)); 98 99 static cl::opt<bool> 100 ClInstrumentWrites("memprof-instrument-writes", 101 cl::desc("instrument write instructions"), cl::Hidden, 102 cl::init(true)); 103 104 static cl::opt<bool> ClInstrumentAtomics( 105 "memprof-instrument-atomics", 106 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 107 cl::init(true)); 108 109 static cl::opt<bool> ClUseCalls( 110 "memprof-use-callbacks", 111 cl::desc("Use callbacks instead of inline instrumentation sequences."), 112 cl::Hidden, cl::init(false)); 113 114 static cl::opt<std::string> 115 ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", 116 cl::desc("Prefix for memory access callbacks"), 117 cl::Hidden, cl::init("__memprof_")); 118 119 // These flags allow to change the shadow mapping. 120 // The shadow mapping looks like 121 // Shadow = ((Mem & mask) >> scale) + offset 122 123 static cl::opt<int> ClMappingScale("memprof-mapping-scale", 124 cl::desc("scale of memprof shadow mapping"), 125 cl::Hidden, cl::init(DefaultShadowScale)); 126 127 static cl::opt<int> 128 ClMappingGranularity("memprof-mapping-granularity", 129 cl::desc("granularity of memprof shadow mapping"), 130 cl::Hidden, cl::init(DefaultMemGranularity)); 131 132 static cl::opt<bool> ClStack("memprof-instrument-stack", 133 cl::desc("Instrument scalar stack variables"), 134 cl::Hidden, cl::init(false)); 135 136 // Debug flags. 137 138 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, 139 cl::init(0)); 140 141 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden, 142 cl::desc("Debug func")); 143 144 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), 145 cl::Hidden, cl::init(-1)); 146 147 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), 148 cl::Hidden, cl::init(-1)); 149 150 // By default disable matching of allocation profiles onto operator new that 151 // already explicitly pass a hot/cold hint, since we don't currently 152 // override these hints anyway. 153 static cl::opt<bool> ClMemProfMatchHotColdNew( 154 "memprof-match-hot-cold-new", 155 cl::desc( 156 "Match allocation profiles onto existing hot/cold operator new calls"), 157 cl::Hidden, cl::init(false)); 158 159 static cl::opt<bool> ClHistogram("memprof-histogram", 160 cl::desc("Collect access count histograms"), 161 cl::Hidden, cl::init(false)); 162 163 static cl::opt<bool> 164 ClPrintMemProfMatchInfo("memprof-print-match-info", 165 cl::desc("Print matching stats for each allocation " 166 "context in this module's profiles"), 167 cl::Hidden, cl::init(false)); 168 169 static cl::opt<std::string> 170 MemprofRuntimeDefaultOptions("memprof-runtime-default-options", 171 cl::desc("The default memprof options"), 172 cl::Hidden, cl::init("")); 173 174 static cl::opt<bool> 175 SalvageStaleProfile("memprof-salvage-stale-profile", 176 cl::desc("Salvage stale MemProf profile"), 177 cl::init(false), cl::Hidden); 178 179 cl::opt<unsigned> MinClonedColdBytePercent( 180 "memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, 181 cl::desc("Min percent of cold bytes to hint alloc cold during cloning")); 182 183 extern cl::opt<bool> MemProfReportHintedSizes; 184 185 static cl::opt<unsigned> MinMatchedColdBytePercent( 186 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, 187 cl::desc("Min percent of cold bytes matched to hint allocation cold")); 188 189 // Instrumentation statistics 190 STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 191 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 192 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); 193 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); 194 195 // Matching statistics 196 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); 197 STATISTIC(NumOfMemProfMismatch, 198 "Number of functions having mismatched memory profile hash."); 199 STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile."); 200 STATISTIC(NumOfMemProfAllocContextProfiles, 201 "Number of alloc contexts in memory profile."); 202 STATISTIC(NumOfMemProfCallSiteProfiles, 203 "Number of callsites in memory profile."); 204 STATISTIC(NumOfMemProfMatchedAllocContexts, 205 "Number of matched memory profile alloc contexts."); 206 STATISTIC(NumOfMemProfMatchedAllocs, 207 "Number of matched memory profile allocs."); 208 STATISTIC(NumOfMemProfMatchedCallSites, 209 "Number of matched memory profile callsites."); 210 211 namespace { 212 213 /// This struct defines the shadow mapping using the rule: 214 /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. 215 struct ShadowMapping { 216 ShadowMapping() { 217 Scale = ClMappingScale; 218 Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity; 219 Mask = ~(Granularity - 1); 220 } 221 222 int Scale; 223 int Granularity; 224 uint64_t Mask; // Computed as ~(Granularity-1) 225 }; 226 227 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { 228 return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority 229 : MemProfCtorAndDtorPriority; 230 } 231 232 struct InterestingMemoryAccess { 233 Value *Addr = nullptr; 234 bool IsWrite; 235 Type *AccessTy; 236 Value *MaybeMask = nullptr; 237 }; 238 239 /// Instrument the code in module to profile memory accesses. 240 class MemProfiler { 241 public: 242 MemProfiler(Module &M) { 243 C = &(M.getContext()); 244 LongSize = M.getDataLayout().getPointerSizeInBits(); 245 IntptrTy = Type::getIntNTy(*C, LongSize); 246 PtrTy = PointerType::getUnqual(*C); 247 } 248 249 /// If it is an interesting memory access, populate information 250 /// about the access and return a InterestingMemoryAccess struct. 251 /// Otherwise return std::nullopt. 252 std::optional<InterestingMemoryAccess> 253 isInterestingMemoryAccess(Instruction *I) const; 254 255 void instrumentMop(Instruction *I, const DataLayout &DL, 256 InterestingMemoryAccess &Access); 257 void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, 258 Value *Addr, bool IsWrite); 259 void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 260 Instruction *I, Value *Addr, Type *AccessTy, 261 bool IsWrite); 262 void instrumentMemIntrinsic(MemIntrinsic *MI); 263 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 264 bool instrumentFunction(Function &F); 265 bool maybeInsertMemProfInitAtFunctionEntry(Function &F); 266 bool insertDynamicShadowAtFunctionEntry(Function &F); 267 268 private: 269 void initializeCallbacks(Module &M); 270 271 LLVMContext *C; 272 int LongSize; 273 Type *IntptrTy; 274 PointerType *PtrTy; 275 ShadowMapping Mapping; 276 277 // These arrays is indexed by AccessIsWrite 278 FunctionCallee MemProfMemoryAccessCallback[2]; 279 280 FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; 281 Value *DynamicShadowOffset = nullptr; 282 }; 283 284 class ModuleMemProfiler { 285 public: 286 ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } 287 288 bool instrumentModule(Module &); 289 290 private: 291 Triple TargetTriple; 292 ShadowMapping Mapping; 293 Function *MemProfCtorFunction = nullptr; 294 }; 295 296 } // end anonymous namespace 297 298 MemProfilerPass::MemProfilerPass() = default; 299 300 PreservedAnalyses MemProfilerPass::run(Function &F, 301 AnalysisManager<Function> &AM) { 302 assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) && 303 "Memprof with histogram only supports default mapping granularity"); 304 Module &M = *F.getParent(); 305 MemProfiler Profiler(M); 306 if (Profiler.instrumentFunction(F)) 307 return PreservedAnalyses::none(); 308 return PreservedAnalyses::all(); 309 } 310 311 ModuleMemProfilerPass::ModuleMemProfilerPass() = default; 312 313 PreservedAnalyses ModuleMemProfilerPass::run(Module &M, 314 AnalysisManager<Module> &AM) { 315 316 ModuleMemProfiler Profiler(M); 317 if (Profiler.instrumentModule(M)) 318 return PreservedAnalyses::none(); 319 return PreservedAnalyses::all(); 320 } 321 322 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { 323 // (Shadow & mask) >> scale 324 Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); 325 Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); 326 // (Shadow >> scale) | offset 327 assert(DynamicShadowOffset); 328 return IRB.CreateAdd(Shadow, DynamicShadowOffset); 329 } 330 331 // Instrument memset/memmove/memcpy 332 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { 333 IRBuilder<> IRB(MI); 334 if (isa<MemTransferInst>(MI)) { 335 IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy, 336 {MI->getOperand(0), MI->getOperand(1), 337 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 338 } else if (isa<MemSetInst>(MI)) { 339 IRB.CreateCall( 340 MemProfMemset, 341 {MI->getOperand(0), 342 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 343 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 344 } 345 MI->eraseFromParent(); 346 } 347 348 std::optional<InterestingMemoryAccess> 349 MemProfiler::isInterestingMemoryAccess(Instruction *I) const { 350 // Do not instrument the load fetching the dynamic shadow address. 351 if (DynamicShadowOffset == I) 352 return std::nullopt; 353 354 InterestingMemoryAccess Access; 355 356 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 357 if (!ClInstrumentReads) 358 return std::nullopt; 359 Access.IsWrite = false; 360 Access.AccessTy = LI->getType(); 361 Access.Addr = LI->getPointerOperand(); 362 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 363 if (!ClInstrumentWrites) 364 return std::nullopt; 365 Access.IsWrite = true; 366 Access.AccessTy = SI->getValueOperand()->getType(); 367 Access.Addr = SI->getPointerOperand(); 368 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 369 if (!ClInstrumentAtomics) 370 return std::nullopt; 371 Access.IsWrite = true; 372 Access.AccessTy = RMW->getValOperand()->getType(); 373 Access.Addr = RMW->getPointerOperand(); 374 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 375 if (!ClInstrumentAtomics) 376 return std::nullopt; 377 Access.IsWrite = true; 378 Access.AccessTy = XCHG->getCompareOperand()->getType(); 379 Access.Addr = XCHG->getPointerOperand(); 380 } else if (auto *CI = dyn_cast<CallInst>(I)) { 381 auto *F = CI->getCalledFunction(); 382 if (F && (F->getIntrinsicID() == Intrinsic::masked_load || 383 F->getIntrinsicID() == Intrinsic::masked_store)) { 384 unsigned OpOffset = 0; 385 if (F->getIntrinsicID() == Intrinsic::masked_store) { 386 if (!ClInstrumentWrites) 387 return std::nullopt; 388 // Masked store has an initial operand for the value. 389 OpOffset = 1; 390 Access.AccessTy = CI->getArgOperand(0)->getType(); 391 Access.IsWrite = true; 392 } else { 393 if (!ClInstrumentReads) 394 return std::nullopt; 395 Access.AccessTy = CI->getType(); 396 Access.IsWrite = false; 397 } 398 399 auto *BasePtr = CI->getOperand(0 + OpOffset); 400 Access.MaybeMask = CI->getOperand(2 + OpOffset); 401 Access.Addr = BasePtr; 402 } 403 } 404 405 if (!Access.Addr) 406 return std::nullopt; 407 408 // Do not instrument accesses from different address spaces; we cannot deal 409 // with them. 410 Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType()); 411 if (PtrTy->getPointerAddressSpace() != 0) 412 return std::nullopt; 413 414 // Ignore swifterror addresses. 415 // swifterror memory addresses are mem2reg promoted by instruction 416 // selection. As such they cannot have regular uses like an instrumentation 417 // function and it makes no sense to track them as memory. 418 if (Access.Addr->isSwiftError()) 419 return std::nullopt; 420 421 // Peel off GEPs and BitCasts. 422 auto *Addr = Access.Addr->stripInBoundsOffsets(); 423 424 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 425 // Do not instrument PGO counter updates. 426 if (GV->hasSection()) { 427 StringRef SectionName = GV->getSection(); 428 // Check if the global is in the PGO counters section. 429 auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); 430 if (SectionName.ends_with( 431 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) 432 return std::nullopt; 433 } 434 435 // Do not instrument accesses to LLVM internal variables. 436 if (GV->getName().starts_with("__llvm")) 437 return std::nullopt; 438 } 439 440 return Access; 441 } 442 443 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 444 Instruction *I, Value *Addr, 445 Type *AccessTy, bool IsWrite) { 446 auto *VTy = cast<FixedVectorType>(AccessTy); 447 unsigned Num = VTy->getNumElements(); 448 auto *Zero = ConstantInt::get(IntptrTy, 0); 449 for (unsigned Idx = 0; Idx < Num; ++Idx) { 450 Value *InstrumentedAddress = nullptr; 451 Instruction *InsertBefore = I; 452 if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { 453 // dyn_cast as we might get UndefValue 454 if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { 455 if (Masked->isZero()) 456 // Mask is constant false, so no instrumentation needed. 457 continue; 458 // If we have a true or undef value, fall through to instrumentAddress. 459 // with InsertBefore == I 460 } 461 } else { 462 IRBuilder<> IRB(I); 463 Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); 464 Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); 465 InsertBefore = ThenTerm; 466 } 467 468 IRBuilder<> IRB(InsertBefore); 469 InstrumentedAddress = 470 IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); 471 instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite); 472 } 473 } 474 475 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, 476 InterestingMemoryAccess &Access) { 477 // Skip instrumentation of stack accesses unless requested. 478 if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) { 479 if (Access.IsWrite) 480 ++NumSkippedStackWrites; 481 else 482 ++NumSkippedStackReads; 483 return; 484 } 485 486 if (Access.IsWrite) 487 NumInstrumentedWrites++; 488 else 489 NumInstrumentedReads++; 490 491 if (Access.MaybeMask) { 492 instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, 493 Access.AccessTy, Access.IsWrite); 494 } else { 495 // Since the access counts will be accumulated across the entire allocation, 496 // we only update the shadow access count for the first location and thus 497 // don't need to worry about alignment and type size. 498 instrumentAddress(I, I, Access.Addr, Access.IsWrite); 499 } 500 } 501 502 void MemProfiler::instrumentAddress(Instruction *OrigIns, 503 Instruction *InsertBefore, Value *Addr, 504 bool IsWrite) { 505 IRBuilder<> IRB(InsertBefore); 506 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 507 508 if (ClUseCalls) { 509 IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong); 510 return; 511 } 512 513 Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C); 514 Type *ShadowPtrTy = PointerType::get(*C, 0); 515 516 Value *ShadowPtr = memToShadow(AddrLong, IRB); 517 Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); 518 Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); 519 // If we are profiling with histograms, add overflow protection at 255. 520 if (ClHistogram) { 521 Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255); 522 Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount); 523 Instruction *IncBlock = 524 SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false); 525 IRB.SetInsertPoint(IncBlock); 526 } 527 Value *Inc = ConstantInt::get(ShadowTy, 1); 528 ShadowValue = IRB.CreateAdd(ShadowValue, Inc); 529 IRB.CreateStore(ShadowValue, ShadowAddr); 530 } 531 532 // Create the variable for the profile file name. 533 void createProfileFileNameVar(Module &M) { 534 const MDString *MemProfFilename = 535 dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename")); 536 if (!MemProfFilename) 537 return; 538 assert(!MemProfFilename->getString().empty() && 539 "Unexpected MemProfProfileFilename metadata with empty string"); 540 Constant *ProfileNameConst = ConstantDataArray::getString( 541 M.getContext(), MemProfFilename->getString(), true); 542 GlobalVariable *ProfileNameVar = new GlobalVariable( 543 M, ProfileNameConst->getType(), /*isConstant=*/true, 544 GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); 545 Triple TT(M.getTargetTriple()); 546 if (TT.supportsCOMDAT()) { 547 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); 548 ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar)); 549 } 550 } 551 552 // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible 553 // to the runtime, changing shadow count behavior. 554 void createMemprofHistogramFlagVar(Module &M) { 555 const StringRef VarName(MemProfHistogramFlagVar); 556 Type *IntTy1 = Type::getInt1Ty(M.getContext()); 557 auto MemprofHistogramFlag = new GlobalVariable( 558 M, IntTy1, true, GlobalValue::WeakAnyLinkage, 559 Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName); 560 Triple TT(M.getTargetTriple()); 561 if (TT.supportsCOMDAT()) { 562 MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage); 563 MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName)); 564 } 565 appendToCompilerUsed(M, MemprofHistogramFlag); 566 } 567 568 void createMemprofDefaultOptionsVar(Module &M) { 569 Constant *OptionsConst = ConstantDataArray::getString( 570 M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true); 571 GlobalVariable *OptionsVar = 572 new GlobalVariable(M, OptionsConst->getType(), /*isConstant=*/true, 573 GlobalValue::WeakAnyLinkage, OptionsConst, 574 "__memprof_default_options_str"); 575 Triple TT(M.getTargetTriple()); 576 if (TT.supportsCOMDAT()) { 577 OptionsVar->setLinkage(GlobalValue::ExternalLinkage); 578 OptionsVar->setComdat(M.getOrInsertComdat(OptionsVar->getName())); 579 } 580 } 581 582 bool ModuleMemProfiler::instrumentModule(Module &M) { 583 584 // Create a module constructor. 585 std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION); 586 std::string VersionCheckName = 587 ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) 588 : ""; 589 std::tie(MemProfCtorFunction, std::ignore) = 590 createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName, 591 MemProfInitName, /*InitArgTypes=*/{}, 592 /*InitArgs=*/{}, VersionCheckName); 593 594 const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); 595 appendToGlobalCtors(M, MemProfCtorFunction, Priority); 596 597 createProfileFileNameVar(M); 598 599 createMemprofHistogramFlagVar(M); 600 601 createMemprofDefaultOptionsVar(M); 602 603 return true; 604 } 605 606 void MemProfiler::initializeCallbacks(Module &M) { 607 IRBuilder<> IRB(*C); 608 609 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 610 const std::string TypeStr = AccessIsWrite ? "store" : "load"; 611 const std::string HistPrefix = ClHistogram ? "hist_" : ""; 612 613 SmallVector<Type *, 2> Args1{1, IntptrTy}; 614 MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction( 615 ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr, 616 FunctionType::get(IRB.getVoidTy(), Args1, false)); 617 } 618 MemProfMemmove = M.getOrInsertFunction( 619 ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy); 620 MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy", 621 PtrTy, PtrTy, PtrTy, IntptrTy); 622 MemProfMemset = 623 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy, 624 PtrTy, IRB.getInt32Ty(), IntptrTy); 625 } 626 627 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { 628 // For each NSObject descendant having a +load method, this method is invoked 629 // by the ObjC runtime before any of the static constructors is called. 630 // Therefore we need to instrument such methods with a call to __memprof_init 631 // at the beginning in order to initialize our runtime before any access to 632 // the shadow memory. 633 // We cannot just ignore these methods, because they may call other 634 // instrumented functions. 635 if (F.getName().contains(" load]")) { 636 FunctionCallee MemProfInitFunction = 637 declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); 638 IRBuilder<> IRB(&F.front(), F.front().begin()); 639 IRB.CreateCall(MemProfInitFunction, {}); 640 return true; 641 } 642 return false; 643 } 644 645 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { 646 IRBuilder<> IRB(&F.front().front()); 647 Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( 648 MemProfShadowMemoryDynamicAddress, IntptrTy); 649 if (F.getParent()->getPICLevel() == PICLevel::NotPIC) 650 cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true); 651 DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); 652 return true; 653 } 654 655 bool MemProfiler::instrumentFunction(Function &F) { 656 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 657 return false; 658 if (ClDebugFunc == F.getName()) 659 return false; 660 if (F.getName().starts_with("__memprof_")) 661 return false; 662 663 bool FunctionModified = false; 664 665 // If needed, insert __memprof_init. 666 // This function needs to be called even if the function body is not 667 // instrumented. 668 if (maybeInsertMemProfInitAtFunctionEntry(F)) 669 FunctionModified = true; 670 671 LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n"); 672 673 initializeCallbacks(*F.getParent()); 674 675 SmallVector<Instruction *, 16> ToInstrument; 676 677 // Fill the set of memory operations to instrument. 678 for (auto &BB : F) { 679 for (auto &Inst : BB) { 680 if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst)) 681 ToInstrument.push_back(&Inst); 682 } 683 } 684 685 if (ToInstrument.empty()) { 686 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified 687 << " " << F << "\n"); 688 689 return FunctionModified; 690 } 691 692 FunctionModified |= insertDynamicShadowAtFunctionEntry(F); 693 694 int NumInstrumented = 0; 695 for (auto *Inst : ToInstrument) { 696 if (ClDebugMin < 0 || ClDebugMax < 0 || 697 (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { 698 std::optional<InterestingMemoryAccess> Access = 699 isInterestingMemoryAccess(Inst); 700 if (Access) 701 instrumentMop(Inst, F.getDataLayout(), *Access); 702 else 703 instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 704 } 705 NumInstrumented++; 706 } 707 708 if (NumInstrumented > 0) 709 FunctionModified = true; 710 711 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " 712 << F << "\n"); 713 714 return FunctionModified; 715 } 716 717 static void addCallsiteMetadata(Instruction &I, 718 ArrayRef<uint64_t> InlinedCallStack, 719 LLVMContext &Ctx) { 720 I.setMetadata(LLVMContext::MD_callsite, 721 buildCallstackMetadata(InlinedCallStack, Ctx)); 722 } 723 724 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, 725 uint32_t Column) { 726 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 727 HashBuilder; 728 HashBuilder.add(Function, LineOffset, Column); 729 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 730 uint64_t Id; 731 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 732 return Id; 733 } 734 735 static uint64_t computeStackId(const memprof::Frame &Frame) { 736 return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); 737 } 738 739 // Helper to generate a single hash id for a given callstack, used for emitting 740 // matching statistics and useful for uniquing such statistics across modules. 741 static uint64_t computeFullStackId(ArrayRef<Frame> CallStack) { 742 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 743 HashBuilder; 744 for (auto &F : CallStack) 745 HashBuilder.add(F.Function, F.LineOffset, F.Column); 746 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 747 uint64_t Id; 748 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 749 return Id; 750 } 751 752 static AllocationType addCallStack(CallStackTrie &AllocTrie, 753 const AllocationInfo *AllocInfo, 754 uint64_t FullStackId) { 755 SmallVector<uint64_t> StackIds; 756 for (const auto &StackFrame : AllocInfo->CallStack) 757 StackIds.push_back(computeStackId(StackFrame)); 758 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), 759 AllocInfo->Info.getAllocCount(), 760 AllocInfo->Info.getTotalLifetime()); 761 std::vector<ContextTotalSize> ContextSizeInfo; 762 if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) { 763 auto TotalSize = AllocInfo->Info.getTotalSize(); 764 assert(TotalSize); 765 assert(FullStackId != 0); 766 ContextSizeInfo.push_back({FullStackId, TotalSize}); 767 } 768 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo)); 769 return AllocType; 770 } 771 772 // Helper to compare the InlinedCallStack computed from an instruction's debug 773 // info to a list of Frames from profile data (either the allocation data or a 774 // callsite). For callsites, the StartIndex to use in the Frame array may be 775 // non-zero. 776 static bool 777 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, 778 ArrayRef<uint64_t> InlinedCallStack) { 779 auto StackFrame = ProfileCallStack.begin(); 780 auto InlCallStackIter = InlinedCallStack.begin(); 781 for (; StackFrame != ProfileCallStack.end() && 782 InlCallStackIter != InlinedCallStack.end(); 783 ++StackFrame, ++InlCallStackIter) { 784 uint64_t StackId = computeStackId(*StackFrame); 785 if (StackId != *InlCallStackIter) 786 return false; 787 } 788 // Return true if we found and matched all stack ids from the call 789 // instruction. 790 return InlCallStackIter == InlinedCallStack.end(); 791 } 792 793 static bool isAllocationWithHotColdVariant(const Function *Callee, 794 const TargetLibraryInfo &TLI) { 795 if (!Callee) 796 return false; 797 LibFunc Func; 798 if (!TLI.getLibFunc(*Callee, Func)) 799 return false; 800 switch (Func) { 801 case LibFunc_Znwm: 802 case LibFunc_ZnwmRKSt9nothrow_t: 803 case LibFunc_ZnwmSt11align_val_t: 804 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: 805 case LibFunc_Znam: 806 case LibFunc_ZnamRKSt9nothrow_t: 807 case LibFunc_ZnamSt11align_val_t: 808 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: 809 case LibFunc_size_returning_new: 810 case LibFunc_size_returning_new_aligned: 811 return true; 812 case LibFunc_Znwm12__hot_cold_t: 813 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: 814 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: 815 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 816 case LibFunc_Znam12__hot_cold_t: 817 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: 818 case LibFunc_ZnamSt11align_val_t12__hot_cold_t: 819 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 820 case LibFunc_size_returning_new_hot_cold: 821 case LibFunc_size_returning_new_aligned_hot_cold: 822 return ClMemProfMatchHotColdNew; 823 default: 824 return false; 825 } 826 } 827 828 struct AllocMatchInfo { 829 uint64_t TotalSize = 0; 830 AllocationType AllocType = AllocationType::None; 831 bool Matched = false; 832 }; 833 834 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> 835 memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, 836 function_ref<bool(uint64_t)> IsPresentInProfile) { 837 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls; 838 839 auto GetOffset = [](const DILocation *DIL) { 840 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 841 0xffff; 842 }; 843 844 for (Function &F : M) { 845 if (F.isDeclaration()) 846 continue; 847 848 for (auto &BB : F) { 849 for (auto &I : BB) { 850 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) 851 continue; 852 853 auto *CB = dyn_cast<CallBase>(&I); 854 auto *CalledFunction = CB->getCalledFunction(); 855 // Disregard indirect calls and intrinsics. 856 if (!CalledFunction || CalledFunction->isIntrinsic()) 857 continue; 858 859 StringRef CalleeName = CalledFunction->getName(); 860 // True if we are calling a heap allocation function that supports 861 // hot/cold variants. 862 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI); 863 // True for the first iteration below, indicating that we are looking at 864 // a leaf node. 865 bool IsLeaf = true; 866 for (const DILocation *DIL = I.getDebugLoc(); DIL; 867 DIL = DIL->getInlinedAt()) { 868 StringRef CallerName = DIL->getSubprogramLinkageName(); 869 assert(!CallerName.empty() && 870 "Be sure to enable -fdebug-info-for-profiling"); 871 uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName); 872 uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); 873 // Pretend that we are calling a function with GUID == 0 if we are 874 // in the inline stack leading to a heap allocation function. 875 if (IsAlloc) { 876 if (IsLeaf) { 877 // For leaf nodes, set CalleeGUID to 0 without consulting 878 // IsPresentInProfile. 879 CalleeGUID = 0; 880 } else if (!IsPresentInProfile(CalleeGUID)) { 881 // In addition to the leaf case above, continue to set CalleeGUID 882 // to 0 as long as we don't see CalleeGUID in the profile. 883 CalleeGUID = 0; 884 } else { 885 // Once we encounter a callee that exists in the profile, stop 886 // setting CalleeGUID to 0. 887 IsAlloc = false; 888 } 889 } 890 891 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()}; 892 Calls[CallerGUID].emplace_back(Loc, CalleeGUID); 893 CalleeName = CallerName; 894 IsLeaf = false; 895 } 896 } 897 } 898 } 899 900 // Sort each call list by the source location. 901 for (auto &[CallerGUID, CallList] : Calls) { 902 llvm::sort(CallList); 903 CallList.erase(llvm::unique(CallList), CallList.end()); 904 } 905 906 return Calls; 907 } 908 909 DenseMap<uint64_t, LocToLocMap> 910 memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, 911 const TargetLibraryInfo &TLI) { 912 DenseMap<uint64_t, LocToLocMap> UndriftMaps; 913 914 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile = 915 MemProfReader->getMemProfCallerCalleePairs(); 916 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR = 917 extractCallsFromIR(M, TLI, [&](uint64_t GUID) { 918 return CallsFromProfile.contains(GUID); 919 }); 920 921 // Compute an undrift map for each CallerGUID. 922 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) { 923 auto It = CallsFromProfile.find(CallerGUID); 924 if (It == CallsFromProfile.end()) 925 continue; 926 const auto &ProfileAnchors = It->second; 927 928 LocToLocMap Matchings; 929 longestCommonSequence<LineLocation, GlobalValue::GUID>( 930 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(), 931 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); }); 932 bool Inserted = UndriftMaps.try_emplace(CallerGUID, Matchings).second; 933 934 // The insertion must succeed because we visit each GUID exactly once. 935 assert(Inserted); 936 (void)Inserted; 937 } 938 939 return UndriftMaps; 940 } 941 942 // Given a MemProfRecord, undrift all the source locations present in the 943 // record in place. 944 static void 945 undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps, 946 memprof::MemProfRecord &MemProfRec) { 947 // Undrift a call stack in place. 948 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) { 949 for (auto &F : CallStack) { 950 auto I = UndriftMaps.find(F.Function); 951 if (I == UndriftMaps.end()) 952 continue; 953 auto J = I->second.find(LineLocation(F.LineOffset, F.Column)); 954 if (J == I->second.end()) 955 continue; 956 auto &NewLoc = J->second; 957 F.LineOffset = NewLoc.LineOffset; 958 F.Column = NewLoc.Column; 959 } 960 }; 961 962 for (auto &AS : MemProfRec.AllocSites) 963 UndriftCallStack(AS.CallStack); 964 965 for (auto &CS : MemProfRec.CallSites) 966 UndriftCallStack(CS); 967 } 968 969 static void 970 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, 971 const TargetLibraryInfo &TLI, 972 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo, 973 DenseMap<uint64_t, LocToLocMap> &UndriftMaps) { 974 auto &Ctx = M.getContext(); 975 // Previously we used getIRPGOFuncName() here. If F is local linkage, 976 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But 977 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't 978 // contain FileName's prefix. It caused local linkage function can't 979 // find MemProfRecord. So we use getName() now. 980 // 'unique-internal-linkage-names' can make MemProf work better for local 981 // linkage function. 982 auto FuncName = F.getName(); 983 auto FuncGUID = Function::getGUID(FuncName); 984 std::optional<memprof::MemProfRecord> MemProfRec; 985 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); 986 if (Err) { 987 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 988 auto Err = IPE.get(); 989 bool SkipWarning = false; 990 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName 991 << ": "); 992 if (Err == instrprof_error::unknown_function) { 993 NumOfMemProfMissing++; 994 SkipWarning = !PGOWarnMissing; 995 LLVM_DEBUG(dbgs() << "unknown function"); 996 } else if (Err == instrprof_error::hash_mismatch) { 997 NumOfMemProfMismatch++; 998 SkipWarning = 999 NoPGOWarnMismatch || 1000 (NoPGOWarnMismatchComdatWeak && 1001 (F.hasComdat() || 1002 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 1003 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 1004 } 1005 1006 if (SkipWarning) 1007 return; 1008 1009 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + 1010 Twine(" Hash = ") + std::to_string(FuncGUID)) 1011 .str(); 1012 1013 Ctx.diagnose( 1014 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 1015 }); 1016 return; 1017 } 1018 1019 NumOfMemProfFunc++; 1020 1021 // If requested, undrfit MemProfRecord so that the source locations in it 1022 // match those in the IR. 1023 if (SalvageStaleProfile) 1024 undriftMemProfRecord(UndriftMaps, *MemProfRec); 1025 1026 // Detect if there are non-zero column numbers in the profile. If not, 1027 // treat all column numbers as 0 when matching (i.e. ignore any non-zero 1028 // columns in the IR). The profiled binary might have been built with 1029 // column numbers disabled, for example. 1030 bool ProfileHasColumns = false; 1031 1032 // Build maps of the location hash to all profile data with that leaf location 1033 // (allocation info and the callsites). 1034 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; 1035 // A hash function for std::unordered_set<ArrayRef<Frame>> to work. 1036 struct CallStackHash { 1037 size_t operator()(ArrayRef<Frame> CS) const { 1038 return computeFullStackId(CS); 1039 } 1040 }; 1041 // For the callsites we need to record slices of the frame array (see comments 1042 // below where the map entries are added). 1043 std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>> 1044 LocHashToCallSites; 1045 for (auto &AI : MemProfRec->AllocSites) { 1046 NumOfMemProfAllocContextProfiles++; 1047 // Associate the allocation info with the leaf frame. The later matching 1048 // code will match any inlined call sequences in the IR with a longer prefix 1049 // of call stack frames. 1050 uint64_t StackId = computeStackId(AI.CallStack[0]); 1051 LocHashToAllocInfo[StackId].insert(&AI); 1052 ProfileHasColumns |= AI.CallStack[0].Column; 1053 } 1054 for (auto &CS : MemProfRec->CallSites) { 1055 NumOfMemProfCallSiteProfiles++; 1056 // Need to record all frames from leaf up to and including this function, 1057 // as any of these may or may not have been inlined at this point. 1058 unsigned Idx = 0; 1059 for (auto &StackFrame : CS) { 1060 uint64_t StackId = computeStackId(StackFrame); 1061 LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++)); 1062 ProfileHasColumns |= StackFrame.Column; 1063 // Once we find this function, we can stop recording. 1064 if (StackFrame.Function == FuncGUID) 1065 break; 1066 } 1067 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); 1068 } 1069 1070 auto GetOffset = [](const DILocation *DIL) { 1071 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 1072 0xffff; 1073 }; 1074 1075 // Now walk the instructions, looking up the associated profile data using 1076 // debug locations. 1077 for (auto &BB : F) { 1078 for (auto &I : BB) { 1079 if (I.isDebugOrPseudoInst()) 1080 continue; 1081 // We are only interested in calls (allocation or interior call stack 1082 // context calls). 1083 auto *CI = dyn_cast<CallBase>(&I); 1084 if (!CI) 1085 continue; 1086 auto *CalledFunction = CI->getCalledFunction(); 1087 if (CalledFunction && CalledFunction->isIntrinsic()) 1088 continue; 1089 // List of call stack ids computed from the location hashes on debug 1090 // locations (leaf to inlined at root). 1091 SmallVector<uint64_t, 8> InlinedCallStack; 1092 // Was the leaf location found in one of the profile maps? 1093 bool LeafFound = false; 1094 // If leaf was found in a map, iterators pointing to its location in both 1095 // of the maps. It might exist in neither, one, or both (the latter case 1096 // can happen because we don't currently have discriminators to 1097 // distinguish the case when a single line/col maps to both an allocation 1098 // and another callsite). 1099 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator 1100 AllocInfoIter; 1101 decltype(LocHashToCallSites)::iterator CallSitesIter; 1102 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; 1103 DIL = DIL->getInlinedAt()) { 1104 // Use C++ linkage name if possible. Need to compile with 1105 // -fdebug-info-for-profiling to get linkage name. 1106 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); 1107 if (Name.empty()) 1108 Name = DIL->getScope()->getSubprogram()->getName(); 1109 auto CalleeGUID = Function::getGUID(Name); 1110 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL), 1111 ProfileHasColumns ? DIL->getColumn() : 0); 1112 // Check if we have found the profile's leaf frame. If yes, collect 1113 // the rest of the call's inlined context starting here. If not, see if 1114 // we find a match further up the inlined context (in case the profile 1115 // was missing debug frames at the leaf). 1116 if (!LeafFound) { 1117 AllocInfoIter = LocHashToAllocInfo.find(StackId); 1118 CallSitesIter = LocHashToCallSites.find(StackId); 1119 if (AllocInfoIter != LocHashToAllocInfo.end() || 1120 CallSitesIter != LocHashToCallSites.end()) 1121 LeafFound = true; 1122 } 1123 if (LeafFound) 1124 InlinedCallStack.push_back(StackId); 1125 } 1126 // If leaf not in either of the maps, skip inst. 1127 if (!LeafFound) 1128 continue; 1129 1130 // First add !memprof metadata from allocation info, if we found the 1131 // instruction's leaf location in that map, and if the rest of the 1132 // instruction's locations match the prefix Frame locations on an 1133 // allocation context with the same leaf. 1134 if (AllocInfoIter != LocHashToAllocInfo.end()) { 1135 // Only consider allocations which support hinting. 1136 if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI)) 1137 continue; 1138 // We may match this instruction's location list to multiple MIB 1139 // contexts. Add them to a Trie specialized for trimming the contexts to 1140 // the minimal needed to disambiguate contexts with unique behavior. 1141 CallStackTrie AllocTrie; 1142 uint64_t TotalSize = 0; 1143 uint64_t TotalColdSize = 0; 1144 for (auto *AllocInfo : AllocInfoIter->second) { 1145 // Check the full inlined call stack against this one. 1146 // If we found and thus matched all frames on the call, include 1147 // this MIB. 1148 if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, 1149 InlinedCallStack)) { 1150 NumOfMemProfMatchedAllocContexts++; 1151 uint64_t FullStackId = 0; 1152 if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes || 1153 MinClonedColdBytePercent < 100) 1154 FullStackId = computeFullStackId(AllocInfo->CallStack); 1155 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId); 1156 TotalSize += AllocInfo->Info.getTotalSize(); 1157 if (AllocType == AllocationType::Cold) 1158 TotalColdSize += AllocInfo->Info.getTotalSize(); 1159 // Record information about the allocation if match info printing 1160 // was requested. 1161 if (ClPrintMemProfMatchInfo) { 1162 assert(FullStackId != 0); 1163 FullStackIdToAllocMatchInfo[FullStackId] = { 1164 AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true}; 1165 } 1166 } 1167 } 1168 // If the threshold for the percent of cold bytes is less than 100%, 1169 // and not all bytes are cold, see if we should still hint this 1170 // allocation as cold without context sensitivity. 1171 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 && 1172 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) { 1173 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, 1174 "dominant"); 1175 continue; 1176 } 1177 1178 // We might not have matched any to the full inlined call stack. 1179 // But if we did, create and attach metadata, or a function attribute if 1180 // all contexts have identical profiled behavior. 1181 if (!AllocTrie.empty()) { 1182 NumOfMemProfMatchedAllocs++; 1183 // MemprofMDAttached will be false if a function attribute was 1184 // attached. 1185 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); 1186 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); 1187 if (MemprofMDAttached) { 1188 // Add callsite metadata for the instruction's location list so that 1189 // it simpler later on to identify which part of the MIB contexts 1190 // are from this particular instruction (including during inlining, 1191 // when the callsite metadata will be updated appropriately). 1192 // FIXME: can this be changed to strip out the matching stack 1193 // context ids from the MIB contexts and not add any callsite 1194 // metadata here to save space? 1195 addCallsiteMetadata(I, InlinedCallStack, Ctx); 1196 } 1197 } 1198 continue; 1199 } 1200 1201 // Otherwise, add callsite metadata. If we reach here then we found the 1202 // instruction's leaf location in the callsites map and not the allocation 1203 // map. 1204 assert(CallSitesIter != LocHashToCallSites.end()); 1205 for (auto CallStackIdx : CallSitesIter->second) { 1206 // If we found and thus matched all frames on the call, create and 1207 // attach call stack metadata. 1208 if (stackFrameIncludesInlinedCallStack(CallStackIdx, 1209 InlinedCallStack)) { 1210 NumOfMemProfMatchedCallSites++; 1211 addCallsiteMetadata(I, InlinedCallStack, Ctx); 1212 // Only need to find one with a matching call stack and add a single 1213 // callsite metadata. 1214 break; 1215 } 1216 } 1217 } 1218 } 1219 } 1220 1221 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, 1222 IntrusiveRefCntPtr<vfs::FileSystem> FS) 1223 : MemoryProfileFileName(MemoryProfileFile), FS(FS) { 1224 if (!FS) 1225 this->FS = vfs::getRealFileSystem(); 1226 } 1227 1228 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { 1229 // Return immediately if the module doesn't contain any function. 1230 if (M.empty()) 1231 return PreservedAnalyses::all(); 1232 1233 LLVM_DEBUG(dbgs() << "Read in memory profile:"); 1234 auto &Ctx = M.getContext(); 1235 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); 1236 if (Error E = ReaderOrErr.takeError()) { 1237 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1238 Ctx.diagnose( 1239 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); 1240 }); 1241 return PreservedAnalyses::all(); 1242 } 1243 1244 std::unique_ptr<IndexedInstrProfReader> MemProfReader = 1245 std::move(ReaderOrErr.get()); 1246 if (!MemProfReader) { 1247 Ctx.diagnose(DiagnosticInfoPGOProfile( 1248 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); 1249 return PreservedAnalyses::all(); 1250 } 1251 1252 if (!MemProfReader->hasMemoryProfile()) { 1253 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), 1254 "Not a memory profile")); 1255 return PreservedAnalyses::all(); 1256 } 1257 1258 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1259 1260 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin()); 1261 DenseMap<uint64_t, LocToLocMap> UndriftMaps; 1262 if (SalvageStaleProfile) 1263 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI); 1264 1265 // Map from the stack has of each allocation context in the function profiles 1266 // to the total profiled size (bytes), allocation type, and whether we matched 1267 // it to an allocation in the IR. 1268 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo; 1269 1270 for (auto &F : M) { 1271 if (F.isDeclaration()) 1272 continue; 1273 1274 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 1275 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo, 1276 UndriftMaps); 1277 } 1278 1279 if (ClPrintMemProfMatchInfo) { 1280 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) 1281 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType) 1282 << " context with id " << Id << " has total profiled size " 1283 << Info.TotalSize << (Info.Matched ? " is" : " not") 1284 << " matched\n"; 1285 } 1286 1287 return PreservedAnalyses::none(); 1288 } 1289