1226d80ebSTeresa Johnson //===- MemProfiler.cpp - memory allocation and access profiler ------------===// 2226d80ebSTeresa Johnson // 3226d80ebSTeresa Johnson // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4226d80ebSTeresa Johnson // See https://llvm.org/LICENSE.txt for license information. 5226d80ebSTeresa Johnson // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6226d80ebSTeresa Johnson // 7226d80ebSTeresa Johnson //===----------------------------------------------------------------------===// 8226d80ebSTeresa Johnson // 9226d80ebSTeresa Johnson // This file is a part of MemProfiler. Memory accesses are instrumented 10226d80ebSTeresa Johnson // to increment the access count held in a shadow memory location, or 11226d80ebSTeresa Johnson // alternatively to call into the runtime. Memory intrinsic calls (memmove, 12226d80ebSTeresa Johnson // memcpy, memset) are changed to call the memory profiling runtime version 13226d80ebSTeresa Johnson // instead. 14226d80ebSTeresa Johnson // 15226d80ebSTeresa Johnson //===----------------------------------------------------------------------===// 16226d80ebSTeresa Johnson 17226d80ebSTeresa Johnson #include "llvm/Transforms/Instrumentation/MemProfiler.h" 18226d80ebSTeresa Johnson #include "llvm/ADT/SmallVector.h" 19226d80ebSTeresa Johnson #include "llvm/ADT/Statistic.h" 20226d80ebSTeresa Johnson #include "llvm/ADT/StringRef.h" 2195014050STeresa Johnson #include "llvm/Analysis/MemoryBuiltins.h" 2295014050STeresa Johnson #include "llvm/Analysis/MemoryProfileInfo.h" 2395daf1aeSSnehasish Kumar #include "llvm/Analysis/TargetLibraryInfo.h" 2488cb3e2cSTeresa Johnson #include "llvm/Analysis/ValueTracking.h" 25226d80ebSTeresa Johnson #include "llvm/IR/Constant.h" 26226d80ebSTeresa Johnson #include "llvm/IR/DataLayout.h" 2795014050STeresa Johnson #include "llvm/IR/DiagnosticInfo.h" 28226d80ebSTeresa Johnson #include "llvm/IR/Function.h" 29226d80ebSTeresa Johnson #include "llvm/IR/GlobalValue.h" 30226d80ebSTeresa Johnson #include "llvm/IR/IRBuilder.h" 31226d80ebSTeresa Johnson #include "llvm/IR/Instruction.h" 32e188aae4Sserge-sans-paille #include "llvm/IR/IntrinsicInst.h" 33226d80ebSTeresa Johnson #include "llvm/IR/Module.h" 34226d80ebSTeresa Johnson #include "llvm/IR/Type.h" 35226d80ebSTeresa Johnson #include "llvm/IR/Value.h" 36a0b5af46STeresa Johnson #include "llvm/ProfileData/InstrProf.h" 3795014050STeresa Johnson #include "llvm/ProfileData/InstrProfReader.h" 3895014050STeresa Johnson #include "llvm/Support/BLAKE3.h" 39226d80ebSTeresa Johnson #include "llvm/Support/CommandLine.h" 40226d80ebSTeresa Johnson #include "llvm/Support/Debug.h" 4195014050STeresa Johnson #include "llvm/Support/HashBuilder.h" 429b00ef52SSnehasish Kumar #include "llvm/Support/VirtualFileSystem.h" 4362c7f035SArchibald Elliott #include "llvm/TargetParser/Triple.h" 44226d80ebSTeresa Johnson #include "llvm/Transforms/Utils/BasicBlockUtils.h" 45a2e266b3SKazu Hirata #include "llvm/Transforms/Utils/LongestCommonSequence.h" 46226d80ebSTeresa Johnson #include "llvm/Transforms/Utils/ModuleUtils.h" 4795014050STeresa Johnson #include <map> 4895014050STeresa Johnson #include <set> 49226d80ebSTeresa Johnson 50226d80ebSTeresa Johnson using namespace llvm; 5195014050STeresa Johnson using namespace llvm::memprof; 52226d80ebSTeresa Johnson 53226d80ebSTeresa Johnson #define DEBUG_TYPE "memprof" 54226d80ebSTeresa Johnson 5595014050STeresa Johnson namespace llvm { 5695014050STeresa Johnson extern cl::opt<bool> PGOWarnMissing; 5795014050STeresa Johnson extern cl::opt<bool> NoPGOWarnMismatch; 5895014050STeresa Johnson extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; 5995014050STeresa Johnson } // namespace llvm 6095014050STeresa Johnson 61226d80ebSTeresa Johnson constexpr int LLVM_MEM_PROFILER_VERSION = 1; 62226d80ebSTeresa Johnson 63226d80ebSTeresa Johnson // Size of memory mapped to a single shadow location. 64e0ade459SEnna1 constexpr uint64_t DefaultMemGranularity = 64; 65226d80ebSTeresa Johnson 6617993eb1SMatthew Weingarten // Size of memory mapped to a single histogram bucket. 6717993eb1SMatthew Weingarten constexpr uint64_t HistogramGranularity = 8; 6817993eb1SMatthew Weingarten 69226d80ebSTeresa Johnson // Scale from granularity down to shadow size. 70226d80ebSTeresa Johnson constexpr uint64_t DefaultShadowScale = 3; 71226d80ebSTeresa Johnson 72226d80ebSTeresa Johnson constexpr char MemProfModuleCtorName[] = "memprof.module_ctor"; 73226d80ebSTeresa Johnson constexpr uint64_t MemProfCtorAndDtorPriority = 1; 74226d80ebSTeresa Johnson // On Emscripten, the system needs more than one priorities for constructors. 75226d80ebSTeresa Johnson constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; 76226d80ebSTeresa Johnson constexpr char MemProfInitName[] = "__memprof_init"; 77226d80ebSTeresa Johnson constexpr char MemProfVersionCheckNamePrefix[] = 78226d80ebSTeresa Johnson "__memprof_version_mismatch_check_v"; 79226d80ebSTeresa Johnson 80226d80ebSTeresa Johnson constexpr char MemProfShadowMemoryDynamicAddress[] = 81226d80ebSTeresa Johnson "__memprof_shadow_memory_dynamic_address"; 82226d80ebSTeresa Johnson 830949f96dSTeresa Johnson constexpr char MemProfFilenameVar[] = "__memprof_profile_filename"; 840949f96dSTeresa Johnson 8530b93db5SMatthew Weingarten constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram"; 8630b93db5SMatthew Weingarten 87226d80ebSTeresa Johnson // Command-line flags. 88226d80ebSTeresa Johnson 89226d80ebSTeresa Johnson static cl::opt<bool> ClInsertVersionCheck( 90226d80ebSTeresa Johnson "memprof-guard-against-version-mismatch", 91226d80ebSTeresa Johnson cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, 92226d80ebSTeresa Johnson cl::init(true)); 93226d80ebSTeresa Johnson 94226d80ebSTeresa Johnson // This flag may need to be replaced with -f[no-]memprof-reads. 95226d80ebSTeresa Johnson static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads", 96226d80ebSTeresa Johnson cl::desc("instrument read instructions"), 97226d80ebSTeresa Johnson cl::Hidden, cl::init(true)); 98226d80ebSTeresa Johnson 99226d80ebSTeresa Johnson static cl::opt<bool> 100226d80ebSTeresa Johnson ClInstrumentWrites("memprof-instrument-writes", 101226d80ebSTeresa Johnson cl::desc("instrument write instructions"), cl::Hidden, 102226d80ebSTeresa Johnson cl::init(true)); 103226d80ebSTeresa Johnson 104226d80ebSTeresa Johnson static cl::opt<bool> ClInstrumentAtomics( 105226d80ebSTeresa Johnson "memprof-instrument-atomics", 106226d80ebSTeresa Johnson cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 107226d80ebSTeresa Johnson cl::init(true)); 108226d80ebSTeresa Johnson 109226d80ebSTeresa Johnson static cl::opt<bool> ClUseCalls( 110226d80ebSTeresa Johnson "memprof-use-callbacks", 111226d80ebSTeresa Johnson cl::desc("Use callbacks instead of inline instrumentation sequences."), 112226d80ebSTeresa Johnson cl::Hidden, cl::init(false)); 113226d80ebSTeresa Johnson 114226d80ebSTeresa Johnson static cl::opt<std::string> 115226d80ebSTeresa Johnson ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", 116226d80ebSTeresa Johnson cl::desc("Prefix for memory access callbacks"), 117226d80ebSTeresa Johnson cl::Hidden, cl::init("__memprof_")); 118226d80ebSTeresa Johnson 119226d80ebSTeresa Johnson // These flags allow to change the shadow mapping. 120226d80ebSTeresa Johnson // The shadow mapping looks like 121226d80ebSTeresa Johnson // Shadow = ((Mem & mask) >> scale) + offset 122226d80ebSTeresa Johnson 123226d80ebSTeresa Johnson static cl::opt<int> ClMappingScale("memprof-mapping-scale", 124226d80ebSTeresa Johnson cl::desc("scale of memprof shadow mapping"), 125226d80ebSTeresa Johnson cl::Hidden, cl::init(DefaultShadowScale)); 126226d80ebSTeresa Johnson 127226d80ebSTeresa Johnson static cl::opt<int> 128226d80ebSTeresa Johnson ClMappingGranularity("memprof-mapping-granularity", 129226d80ebSTeresa Johnson cl::desc("granularity of memprof shadow mapping"), 130e0ade459SEnna1 cl::Hidden, cl::init(DefaultMemGranularity)); 131226d80ebSTeresa Johnson 13288cb3e2cSTeresa Johnson static cl::opt<bool> ClStack("memprof-instrument-stack", 13388cb3e2cSTeresa Johnson cl::desc("Instrument scalar stack variables"), 13488cb3e2cSTeresa Johnson cl::Hidden, cl::init(false)); 13588cb3e2cSTeresa Johnson 136226d80ebSTeresa Johnson // Debug flags. 137226d80ebSTeresa Johnson 138226d80ebSTeresa Johnson static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, 139226d80ebSTeresa Johnson cl::init(0)); 140226d80ebSTeresa Johnson 141226d80ebSTeresa Johnson static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden, 142226d80ebSTeresa Johnson cl::desc("Debug func")); 143226d80ebSTeresa Johnson 144226d80ebSTeresa Johnson static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), 145226d80ebSTeresa Johnson cl::Hidden, cl::init(-1)); 146226d80ebSTeresa Johnson 147226d80ebSTeresa Johnson static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), 148226d80ebSTeresa Johnson cl::Hidden, cl::init(-1)); 149226d80ebSTeresa Johnson 150e5cbe8fdSTeresa Johnson // By default disable matching of allocation profiles onto operator new that 151e5cbe8fdSTeresa Johnson // already explicitly pass a hot/cold hint, since we don't currently 1529b00ef52SSnehasish Kumar // override these hints anyway. 1539b00ef52SSnehasish Kumar static cl::opt<bool> ClMemProfMatchHotColdNew( 154e5cbe8fdSTeresa Johnson "memprof-match-hot-cold-new", 155e5cbe8fdSTeresa Johnson cl::desc( 156e5cbe8fdSTeresa Johnson "Match allocation profiles onto existing hot/cold operator new calls"), 157e5cbe8fdSTeresa Johnson cl::Hidden, cl::init(false)); 158e5cbe8fdSTeresa Johnson 15930b93db5SMatthew Weingarten static cl::opt<bool> ClHistogram("memprof-histogram", 16030b93db5SMatthew Weingarten cl::desc("Collect access count histograms"), 16130b93db5SMatthew Weingarten cl::Hidden, cl::init(false)); 16230b93db5SMatthew Weingarten 1637536474eSTeresa Johnson static cl::opt<bool> 1647536474eSTeresa Johnson ClPrintMemProfMatchInfo("memprof-print-match-info", 1657536474eSTeresa Johnson cl::desc("Print matching stats for each allocation " 1667536474eSTeresa Johnson "context in this module's profiles"), 1677536474eSTeresa Johnson cl::Hidden, cl::init(false)); 1687536474eSTeresa Johnson 1692e33ed9eSEllis Hoag static cl::opt<std::string> 1702e33ed9eSEllis Hoag MemprofRuntimeDefaultOptions("memprof-runtime-default-options", 1712e33ed9eSEllis Hoag cl::desc("The default memprof options"), 1722e33ed9eSEllis Hoag cl::Hidden, cl::init("")); 1732e33ed9eSEllis Hoag 174ac8a9f8fSKazu Hirata static cl::opt<bool> 175ac8a9f8fSKazu Hirata SalvageStaleProfile("memprof-salvage-stale-profile", 176ac8a9f8fSKazu Hirata cl::desc("Salvage stale MemProf profile"), 177ac8a9f8fSKazu Hirata cl::init(false), cl::Hidden); 178ac8a9f8fSKazu Hirata 179c7451ffcSTeresa Johnson cl::opt<unsigned> MinClonedColdBytePercent( 180c7451ffcSTeresa Johnson "memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, 181c7451ffcSTeresa Johnson cl::desc("Min percent of cold bytes to hint alloc cold during cloning")); 182c7451ffcSTeresa Johnson 1838c1bd67dSTeresa Johnson extern cl::opt<bool> MemProfReportHintedSizes; 1848c1bd67dSTeresa Johnson 185a15e7b11STeresa Johnson static cl::opt<unsigned> MinMatchedColdBytePercent( 186a15e7b11STeresa Johnson "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, 187a15e7b11STeresa Johnson cl::desc("Min percent of cold bytes matched to hint allocation cold")); 188a15e7b11STeresa Johnson 1897536474eSTeresa Johnson // Instrumentation statistics 190226d80ebSTeresa Johnson STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 191226d80ebSTeresa Johnson STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 19288cb3e2cSTeresa Johnson STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); 19388cb3e2cSTeresa Johnson STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); 1947536474eSTeresa Johnson 1957536474eSTeresa Johnson // Matching statistics 19695014050STeresa Johnson STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); 1977536474eSTeresa Johnson STATISTIC(NumOfMemProfMismatch, 1987536474eSTeresa Johnson "Number of functions having mismatched memory profile hash."); 1997536474eSTeresa Johnson STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile."); 2007536474eSTeresa Johnson STATISTIC(NumOfMemProfAllocContextProfiles, 2017536474eSTeresa Johnson "Number of alloc contexts in memory profile."); 2027536474eSTeresa Johnson STATISTIC(NumOfMemProfCallSiteProfiles, 2037536474eSTeresa Johnson "Number of callsites in memory profile."); 2047536474eSTeresa Johnson STATISTIC(NumOfMemProfMatchedAllocContexts, 2057536474eSTeresa Johnson "Number of matched memory profile alloc contexts."); 2067536474eSTeresa Johnson STATISTIC(NumOfMemProfMatchedAllocs, 2077536474eSTeresa Johnson "Number of matched memory profile allocs."); 2087536474eSTeresa Johnson STATISTIC(NumOfMemProfMatchedCallSites, 2097536474eSTeresa Johnson "Number of matched memory profile callsites."); 210226d80ebSTeresa Johnson 211226d80ebSTeresa Johnson namespace { 212226d80ebSTeresa Johnson 213226d80ebSTeresa Johnson /// This struct defines the shadow mapping using the rule: 214226d80ebSTeresa Johnson /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. 215226d80ebSTeresa Johnson struct ShadowMapping { 216226d80ebSTeresa Johnson ShadowMapping() { 217226d80ebSTeresa Johnson Scale = ClMappingScale; 21817993eb1SMatthew Weingarten Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity; 219226d80ebSTeresa Johnson Mask = ~(Granularity - 1); 220226d80ebSTeresa Johnson } 221226d80ebSTeresa Johnson 222226d80ebSTeresa Johnson int Scale; 223226d80ebSTeresa Johnson int Granularity; 224226d80ebSTeresa Johnson uint64_t Mask; // Computed as ~(Granularity-1) 225226d80ebSTeresa Johnson }; 226226d80ebSTeresa Johnson 227226d80ebSTeresa Johnson static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { 228226d80ebSTeresa Johnson return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority 229226d80ebSTeresa Johnson : MemProfCtorAndDtorPriority; 230226d80ebSTeresa Johnson } 231226d80ebSTeresa Johnson 232226d80ebSTeresa Johnson struct InterestingMemoryAccess { 233226d80ebSTeresa Johnson Value *Addr = nullptr; 234226d80ebSTeresa Johnson bool IsWrite; 2357cc3e141SNikita Popov Type *AccessTy; 236226d80ebSTeresa Johnson Value *MaybeMask = nullptr; 237226d80ebSTeresa Johnson }; 238226d80ebSTeresa Johnson 239226d80ebSTeresa Johnson /// Instrument the code in module to profile memory accesses. 240226d80ebSTeresa Johnson class MemProfiler { 241226d80ebSTeresa Johnson public: 242226d80ebSTeresa Johnson MemProfiler(Module &M) { 243226d80ebSTeresa Johnson C = &(M.getContext()); 244226d80ebSTeresa Johnson LongSize = M.getDataLayout().getPointerSizeInBits(); 245226d80ebSTeresa Johnson IntptrTy = Type::getIntNTy(*C, LongSize); 2467ca135cdSFangrui Song PtrTy = PointerType::getUnqual(*C); 247226d80ebSTeresa Johnson } 248226d80ebSTeresa Johnson 249226d80ebSTeresa Johnson /// If it is an interesting memory access, populate information 250226d80ebSTeresa Johnson /// about the access and return a InterestingMemoryAccess struct. 2513c09ed00SKazu Hirata /// Otherwise return std::nullopt. 2521b9ca45aSFangrui Song std::optional<InterestingMemoryAccess> 253226d80ebSTeresa Johnson isInterestingMemoryAccess(Instruction *I) const; 254226d80ebSTeresa Johnson 255226d80ebSTeresa Johnson void instrumentMop(Instruction *I, const DataLayout &DL, 256226d80ebSTeresa Johnson InterestingMemoryAccess &Access); 257226d80ebSTeresa Johnson void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, 258a7395891SEnna1 Value *Addr, bool IsWrite); 259226d80ebSTeresa Johnson void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 26086f45575SGuillaume Chatelet Instruction *I, Value *Addr, Type *AccessTy, 261226d80ebSTeresa Johnson bool IsWrite); 262226d80ebSTeresa Johnson void instrumentMemIntrinsic(MemIntrinsic *MI); 263226d80ebSTeresa Johnson Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 264226d80ebSTeresa Johnson bool instrumentFunction(Function &F); 265226d80ebSTeresa Johnson bool maybeInsertMemProfInitAtFunctionEntry(Function &F); 266226d80ebSTeresa Johnson bool insertDynamicShadowAtFunctionEntry(Function &F); 267226d80ebSTeresa Johnson 268226d80ebSTeresa Johnson private: 269226d80ebSTeresa Johnson void initializeCallbacks(Module &M); 270226d80ebSTeresa Johnson 271226d80ebSTeresa Johnson LLVMContext *C; 272226d80ebSTeresa Johnson int LongSize; 273226d80ebSTeresa Johnson Type *IntptrTy; 2747ca135cdSFangrui Song PointerType *PtrTy; 275226d80ebSTeresa Johnson ShadowMapping Mapping; 276226d80ebSTeresa Johnson 277226d80ebSTeresa Johnson // These arrays is indexed by AccessIsWrite 278226d80ebSTeresa Johnson FunctionCallee MemProfMemoryAccessCallback[2]; 279226d80ebSTeresa Johnson 280226d80ebSTeresa Johnson FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; 281226d80ebSTeresa Johnson Value *DynamicShadowOffset = nullptr; 282226d80ebSTeresa Johnson }; 283226d80ebSTeresa Johnson 284226d80ebSTeresa Johnson class ModuleMemProfiler { 285226d80ebSTeresa Johnson public: 286226d80ebSTeresa Johnson ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } 287226d80ebSTeresa Johnson 288226d80ebSTeresa Johnson bool instrumentModule(Module &); 289226d80ebSTeresa Johnson 290226d80ebSTeresa Johnson private: 291226d80ebSTeresa Johnson Triple TargetTriple; 292226d80ebSTeresa Johnson ShadowMapping Mapping; 293226d80ebSTeresa Johnson Function *MemProfCtorFunction = nullptr; 294226d80ebSTeresa Johnson }; 295226d80ebSTeresa Johnson 296226d80ebSTeresa Johnson } // end anonymous namespace 297226d80ebSTeresa Johnson 2983a3cb929SKazu Hirata MemProfilerPass::MemProfilerPass() = default; 299226d80ebSTeresa Johnson 300226d80ebSTeresa Johnson PreservedAnalyses MemProfilerPass::run(Function &F, 301226d80ebSTeresa Johnson AnalysisManager<Function> &AM) { 30217993eb1SMatthew Weingarten assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) && 30317993eb1SMatthew Weingarten "Memprof with histogram only supports default mapping granularity"); 304226d80ebSTeresa Johnson Module &M = *F.getParent(); 305226d80ebSTeresa Johnson MemProfiler Profiler(M); 306226d80ebSTeresa Johnson if (Profiler.instrumentFunction(F)) 307226d80ebSTeresa Johnson return PreservedAnalyses::none(); 308226d80ebSTeresa Johnson return PreservedAnalyses::all(); 309226d80ebSTeresa Johnson } 310226d80ebSTeresa Johnson 3113a3cb929SKazu Hirata ModuleMemProfilerPass::ModuleMemProfilerPass() = default; 312226d80ebSTeresa Johnson 313226d80ebSTeresa Johnson PreservedAnalyses ModuleMemProfilerPass::run(Module &M, 314226d80ebSTeresa Johnson AnalysisManager<Module> &AM) { 31530b93db5SMatthew Weingarten 316226d80ebSTeresa Johnson ModuleMemProfiler Profiler(M); 317226d80ebSTeresa Johnson if (Profiler.instrumentModule(M)) 318226d80ebSTeresa Johnson return PreservedAnalyses::none(); 319226d80ebSTeresa Johnson return PreservedAnalyses::all(); 320226d80ebSTeresa Johnson } 321226d80ebSTeresa Johnson 322226d80ebSTeresa Johnson Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { 323226d80ebSTeresa Johnson // (Shadow & mask) >> scale 324226d80ebSTeresa Johnson Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); 325226d80ebSTeresa Johnson Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); 326226d80ebSTeresa Johnson // (Shadow >> scale) | offset 327226d80ebSTeresa Johnson assert(DynamicShadowOffset); 328226d80ebSTeresa Johnson return IRB.CreateAdd(Shadow, DynamicShadowOffset); 329226d80ebSTeresa Johnson } 330226d80ebSTeresa Johnson 331226d80ebSTeresa Johnson // Instrument memset/memmove/memcpy 332226d80ebSTeresa Johnson void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { 333226d80ebSTeresa Johnson IRBuilder<> IRB(MI); 334226d80ebSTeresa Johnson if (isa<MemTransferInst>(MI)) { 3357ca135cdSFangrui Song IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy, 3367ca135cdSFangrui Song {MI->getOperand(0), MI->getOperand(1), 337226d80ebSTeresa Johnson IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 338226d80ebSTeresa Johnson } else if (isa<MemSetInst>(MI)) { 339226d80ebSTeresa Johnson IRB.CreateCall( 340226d80ebSTeresa Johnson MemProfMemset, 3417ca135cdSFangrui Song {MI->getOperand(0), 342226d80ebSTeresa Johnson IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 343226d80ebSTeresa Johnson IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 344226d80ebSTeresa Johnson } 345226d80ebSTeresa Johnson MI->eraseFromParent(); 346226d80ebSTeresa Johnson } 347226d80ebSTeresa Johnson 3481b9ca45aSFangrui Song std::optional<InterestingMemoryAccess> 349226d80ebSTeresa Johnson MemProfiler::isInterestingMemoryAccess(Instruction *I) const { 350226d80ebSTeresa Johnson // Do not instrument the load fetching the dynamic shadow address. 351226d80ebSTeresa Johnson if (DynamicShadowOffset == I) 352343de685SKazu Hirata return std::nullopt; 353226d80ebSTeresa Johnson 354226d80ebSTeresa Johnson InterestingMemoryAccess Access; 355226d80ebSTeresa Johnson 356226d80ebSTeresa Johnson if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 357226d80ebSTeresa Johnson if (!ClInstrumentReads) 358343de685SKazu Hirata return std::nullopt; 359226d80ebSTeresa Johnson Access.IsWrite = false; 3607cc3e141SNikita Popov Access.AccessTy = LI->getType(); 361226d80ebSTeresa Johnson Access.Addr = LI->getPointerOperand(); 362226d80ebSTeresa Johnson } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 363226d80ebSTeresa Johnson if (!ClInstrumentWrites) 364343de685SKazu Hirata return std::nullopt; 365226d80ebSTeresa Johnson Access.IsWrite = true; 3667cc3e141SNikita Popov Access.AccessTy = SI->getValueOperand()->getType(); 367226d80ebSTeresa Johnson Access.Addr = SI->getPointerOperand(); 368226d80ebSTeresa Johnson } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 369226d80ebSTeresa Johnson if (!ClInstrumentAtomics) 370343de685SKazu Hirata return std::nullopt; 371226d80ebSTeresa Johnson Access.IsWrite = true; 3727cc3e141SNikita Popov Access.AccessTy = RMW->getValOperand()->getType(); 373226d80ebSTeresa Johnson Access.Addr = RMW->getPointerOperand(); 374226d80ebSTeresa Johnson } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 375226d80ebSTeresa Johnson if (!ClInstrumentAtomics) 376343de685SKazu Hirata return std::nullopt; 377226d80ebSTeresa Johnson Access.IsWrite = true; 3787cc3e141SNikita Popov Access.AccessTy = XCHG->getCompareOperand()->getType(); 379226d80ebSTeresa Johnson Access.Addr = XCHG->getPointerOperand(); 380226d80ebSTeresa Johnson } else if (auto *CI = dyn_cast<CallInst>(I)) { 381226d80ebSTeresa Johnson auto *F = CI->getCalledFunction(); 382226d80ebSTeresa Johnson if (F && (F->getIntrinsicID() == Intrinsic::masked_load || 383226d80ebSTeresa Johnson F->getIntrinsicID() == Intrinsic::masked_store)) { 384226d80ebSTeresa Johnson unsigned OpOffset = 0; 385226d80ebSTeresa Johnson if (F->getIntrinsicID() == Intrinsic::masked_store) { 386226d80ebSTeresa Johnson if (!ClInstrumentWrites) 387343de685SKazu Hirata return std::nullopt; 388226d80ebSTeresa Johnson // Masked store has an initial operand for the value. 389226d80ebSTeresa Johnson OpOffset = 1; 3907cc3e141SNikita Popov Access.AccessTy = CI->getArgOperand(0)->getType(); 391226d80ebSTeresa Johnson Access.IsWrite = true; 392226d80ebSTeresa Johnson } else { 393226d80ebSTeresa Johnson if (!ClInstrumentReads) 394343de685SKazu Hirata return std::nullopt; 3957cc3e141SNikita Popov Access.AccessTy = CI->getType(); 396226d80ebSTeresa Johnson Access.IsWrite = false; 397226d80ebSTeresa Johnson } 398226d80ebSTeresa Johnson 399226d80ebSTeresa Johnson auto *BasePtr = CI->getOperand(0 + OpOffset); 400226d80ebSTeresa Johnson Access.MaybeMask = CI->getOperand(2 + OpOffset); 401226d80ebSTeresa Johnson Access.Addr = BasePtr; 402226d80ebSTeresa Johnson } 403226d80ebSTeresa Johnson } 404226d80ebSTeresa Johnson 405226d80ebSTeresa Johnson if (!Access.Addr) 406343de685SKazu Hirata return std::nullopt; 407226d80ebSTeresa Johnson 4080e37ef01SKazu Hirata // Do not instrument accesses from different address spaces; we cannot deal 409226d80ebSTeresa Johnson // with them. 410226d80ebSTeresa Johnson Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType()); 411226d80ebSTeresa Johnson if (PtrTy->getPointerAddressSpace() != 0) 412343de685SKazu Hirata return std::nullopt; 413226d80ebSTeresa Johnson 414226d80ebSTeresa Johnson // Ignore swifterror addresses. 415226d80ebSTeresa Johnson // swifterror memory addresses are mem2reg promoted by instruction 416226d80ebSTeresa Johnson // selection. As such they cannot have regular uses like an instrumentation 417226d80ebSTeresa Johnson // function and it makes no sense to track them as memory. 418226d80ebSTeresa Johnson if (Access.Addr->isSwiftError()) 419343de685SKazu Hirata return std::nullopt; 420226d80ebSTeresa Johnson 421a0b5af46STeresa Johnson // Peel off GEPs and BitCasts. 422a0b5af46STeresa Johnson auto *Addr = Access.Addr->stripInBoundsOffsets(); 423a0b5af46STeresa Johnson 424a0b5af46STeresa Johnson if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 425a0b5af46STeresa Johnson // Do not instrument PGO counter updates. 426a0b5af46STeresa Johnson if (GV->hasSection()) { 427a0b5af46STeresa Johnson StringRef SectionName = GV->getSection(); 428a0b5af46STeresa Johnson // Check if the global is in the PGO counters section. 429a0b5af46STeresa Johnson auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); 4303ca4fe80SSimon Pilgrim if (SectionName.ends_with( 431a0b5af46STeresa Johnson getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) 432343de685SKazu Hirata return std::nullopt; 433a0b5af46STeresa Johnson } 434a0b5af46STeresa Johnson 435a0b5af46STeresa Johnson // Do not instrument accesses to LLVM internal variables. 4363ca4fe80SSimon Pilgrim if (GV->getName().starts_with("__llvm")) 437343de685SKazu Hirata return std::nullopt; 438a0b5af46STeresa Johnson } 439a0b5af46STeresa Johnson 440226d80ebSTeresa Johnson return Access; 441226d80ebSTeresa Johnson } 442226d80ebSTeresa Johnson 443226d80ebSTeresa Johnson void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 444226d80ebSTeresa Johnson Instruction *I, Value *Addr, 4457cc3e141SNikita Popov Type *AccessTy, bool IsWrite) { 4467cc3e141SNikita Popov auto *VTy = cast<FixedVectorType>(AccessTy); 447226d80ebSTeresa Johnson unsigned Num = VTy->getNumElements(); 448226d80ebSTeresa Johnson auto *Zero = ConstantInt::get(IntptrTy, 0); 449226d80ebSTeresa Johnson for (unsigned Idx = 0; Idx < Num; ++Idx) { 450226d80ebSTeresa Johnson Value *InstrumentedAddress = nullptr; 451226d80ebSTeresa Johnson Instruction *InsertBefore = I; 452226d80ebSTeresa Johnson if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { 453226d80ebSTeresa Johnson // dyn_cast as we might get UndefValue 454226d80ebSTeresa Johnson if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { 455226d80ebSTeresa Johnson if (Masked->isZero()) 456226d80ebSTeresa Johnson // Mask is constant false, so no instrumentation needed. 457226d80ebSTeresa Johnson continue; 458226d80ebSTeresa Johnson // If we have a true or undef value, fall through to instrumentAddress. 459226d80ebSTeresa Johnson // with InsertBefore == I 460226d80ebSTeresa Johnson } 461226d80ebSTeresa Johnson } else { 462226d80ebSTeresa Johnson IRBuilder<> IRB(I); 463226d80ebSTeresa Johnson Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); 464226d80ebSTeresa Johnson Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); 465226d80ebSTeresa Johnson InsertBefore = ThenTerm; 466226d80ebSTeresa Johnson } 467226d80ebSTeresa Johnson 468226d80ebSTeresa Johnson IRBuilder<> IRB(InsertBefore); 469226d80ebSTeresa Johnson InstrumentedAddress = 470226d80ebSTeresa Johnson IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); 471a7395891SEnna1 instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite); 472226d80ebSTeresa Johnson } 473226d80ebSTeresa Johnson } 474226d80ebSTeresa Johnson 475226d80ebSTeresa Johnson void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, 476226d80ebSTeresa Johnson InterestingMemoryAccess &Access) { 47788cb3e2cSTeresa Johnson // Skip instrumentation of stack accesses unless requested. 47888cb3e2cSTeresa Johnson if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) { 47988cb3e2cSTeresa Johnson if (Access.IsWrite) 48088cb3e2cSTeresa Johnson ++NumSkippedStackWrites; 48188cb3e2cSTeresa Johnson else 48288cb3e2cSTeresa Johnson ++NumSkippedStackReads; 48388cb3e2cSTeresa Johnson return; 48488cb3e2cSTeresa Johnson } 48588cb3e2cSTeresa Johnson 486226d80ebSTeresa Johnson if (Access.IsWrite) 487226d80ebSTeresa Johnson NumInstrumentedWrites++; 488226d80ebSTeresa Johnson else 489226d80ebSTeresa Johnson NumInstrumentedReads++; 490226d80ebSTeresa Johnson 491226d80ebSTeresa Johnson if (Access.MaybeMask) { 492226d80ebSTeresa Johnson instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, 49386f45575SGuillaume Chatelet Access.AccessTy, Access.IsWrite); 494226d80ebSTeresa Johnson } else { 495226d80ebSTeresa Johnson // Since the access counts will be accumulated across the entire allocation, 496226d80ebSTeresa Johnson // we only update the shadow access count for the first location and thus 497226d80ebSTeresa Johnson // don't need to worry about alignment and type size. 498a7395891SEnna1 instrumentAddress(I, I, Access.Addr, Access.IsWrite); 499226d80ebSTeresa Johnson } 500226d80ebSTeresa Johnson } 501226d80ebSTeresa Johnson 502226d80ebSTeresa Johnson void MemProfiler::instrumentAddress(Instruction *OrigIns, 503226d80ebSTeresa Johnson Instruction *InsertBefore, Value *Addr, 504a7395891SEnna1 bool IsWrite) { 505226d80ebSTeresa Johnson IRBuilder<> IRB(InsertBefore); 506226d80ebSTeresa Johnson Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 507226d80ebSTeresa Johnson 508226d80ebSTeresa Johnson if (ClUseCalls) { 509226d80ebSTeresa Johnson IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong); 510226d80ebSTeresa Johnson return; 511226d80ebSTeresa Johnson } 512226d80ebSTeresa Johnson 51317993eb1SMatthew Weingarten Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C); 514*416f1c46SMats Jun Larsen Type *ShadowPtrTy = PointerType::get(*C, 0); 51517993eb1SMatthew Weingarten 516226d80ebSTeresa Johnson Value *ShadowPtr = memToShadow(AddrLong, IRB); 517226d80ebSTeresa Johnson Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); 518226d80ebSTeresa Johnson Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); 51917993eb1SMatthew Weingarten // If we are profiling with histograms, add overflow protection at 255. 52017993eb1SMatthew Weingarten if (ClHistogram) { 52117993eb1SMatthew Weingarten Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255); 52217993eb1SMatthew Weingarten Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount); 52317993eb1SMatthew Weingarten Instruction *IncBlock = 52417993eb1SMatthew Weingarten SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false); 52517993eb1SMatthew Weingarten IRB.SetInsertPoint(IncBlock); 52617993eb1SMatthew Weingarten } 52717993eb1SMatthew Weingarten Value *Inc = ConstantInt::get(ShadowTy, 1); 528226d80ebSTeresa Johnson ShadowValue = IRB.CreateAdd(ShadowValue, Inc); 529226d80ebSTeresa Johnson IRB.CreateStore(ShadowValue, ShadowAddr); 530226d80ebSTeresa Johnson } 531226d80ebSTeresa Johnson 5320949f96dSTeresa Johnson // Create the variable for the profile file name. 5330949f96dSTeresa Johnson void createProfileFileNameVar(Module &M) { 5340949f96dSTeresa Johnson const MDString *MemProfFilename = 5350949f96dSTeresa Johnson dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename")); 5360949f96dSTeresa Johnson if (!MemProfFilename) 5370949f96dSTeresa Johnson return; 5380949f96dSTeresa Johnson assert(!MemProfFilename->getString().empty() && 5390949f96dSTeresa Johnson "Unexpected MemProfProfileFilename metadata with empty string"); 5400949f96dSTeresa Johnson Constant *ProfileNameConst = ConstantDataArray::getString( 5410949f96dSTeresa Johnson M.getContext(), MemProfFilename->getString(), true); 5420949f96dSTeresa Johnson GlobalVariable *ProfileNameVar = new GlobalVariable( 5430949f96dSTeresa Johnson M, ProfileNameConst->getType(), /*isConstant=*/true, 5440949f96dSTeresa Johnson GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); 5450949f96dSTeresa Johnson Triple TT(M.getTargetTriple()); 5460949f96dSTeresa Johnson if (TT.supportsCOMDAT()) { 5470949f96dSTeresa Johnson ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); 5480949f96dSTeresa Johnson ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar)); 5490949f96dSTeresa Johnson } 5500949f96dSTeresa Johnson } 5510949f96dSTeresa Johnson 55230b93db5SMatthew Weingarten // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible 55330b93db5SMatthew Weingarten // to the runtime, changing shadow count behavior. 55430b93db5SMatthew Weingarten void createMemprofHistogramFlagVar(Module &M) { 55530b93db5SMatthew Weingarten const StringRef VarName(MemProfHistogramFlagVar); 55630b93db5SMatthew Weingarten Type *IntTy1 = Type::getInt1Ty(M.getContext()); 55730b93db5SMatthew Weingarten auto MemprofHistogramFlag = new GlobalVariable( 55830b93db5SMatthew Weingarten M, IntTy1, true, GlobalValue::WeakAnyLinkage, 55930b93db5SMatthew Weingarten Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName); 56030b93db5SMatthew Weingarten Triple TT(M.getTargetTriple()); 56130b93db5SMatthew Weingarten if (TT.supportsCOMDAT()) { 56230b93db5SMatthew Weingarten MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage); 56330b93db5SMatthew Weingarten MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName)); 56430b93db5SMatthew Weingarten } 56530b93db5SMatthew Weingarten appendToCompilerUsed(M, MemprofHistogramFlag); 56630b93db5SMatthew Weingarten } 56730b93db5SMatthew Weingarten 5682e33ed9eSEllis Hoag void createMemprofDefaultOptionsVar(Module &M) { 5692e33ed9eSEllis Hoag Constant *OptionsConst = ConstantDataArray::getString( 5702e33ed9eSEllis Hoag M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true); 5712e33ed9eSEllis Hoag GlobalVariable *OptionsVar = 5722e33ed9eSEllis Hoag new GlobalVariable(M, OptionsConst->getType(), /*isConstant=*/true, 5732e33ed9eSEllis Hoag GlobalValue::WeakAnyLinkage, OptionsConst, 5742e33ed9eSEllis Hoag "__memprof_default_options_str"); 5752e33ed9eSEllis Hoag Triple TT(M.getTargetTriple()); 5762e33ed9eSEllis Hoag if (TT.supportsCOMDAT()) { 5772e33ed9eSEllis Hoag OptionsVar->setLinkage(GlobalValue::ExternalLinkage); 5782e33ed9eSEllis Hoag OptionsVar->setComdat(M.getOrInsertComdat(OptionsVar->getName())); 5792e33ed9eSEllis Hoag } 5802e33ed9eSEllis Hoag } 5812e33ed9eSEllis Hoag 582226d80ebSTeresa Johnson bool ModuleMemProfiler::instrumentModule(Module &M) { 58330b93db5SMatthew Weingarten 584226d80ebSTeresa Johnson // Create a module constructor. 585226d80ebSTeresa Johnson std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION); 586226d80ebSTeresa Johnson std::string VersionCheckName = 587226d80ebSTeresa Johnson ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) 588226d80ebSTeresa Johnson : ""; 589226d80ebSTeresa Johnson std::tie(MemProfCtorFunction, std::ignore) = 590226d80ebSTeresa Johnson createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName, 591226d80ebSTeresa Johnson MemProfInitName, /*InitArgTypes=*/{}, 592226d80ebSTeresa Johnson /*InitArgs=*/{}, VersionCheckName); 593226d80ebSTeresa Johnson 594226d80ebSTeresa Johnson const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); 595226d80ebSTeresa Johnson appendToGlobalCtors(M, MemProfCtorFunction, Priority); 596226d80ebSTeresa Johnson 5970949f96dSTeresa Johnson createProfileFileNameVar(M); 5980949f96dSTeresa Johnson 59930b93db5SMatthew Weingarten createMemprofHistogramFlagVar(M); 60030b93db5SMatthew Weingarten 6012e33ed9eSEllis Hoag createMemprofDefaultOptionsVar(M); 6022e33ed9eSEllis Hoag 603226d80ebSTeresa Johnson return true; 604226d80ebSTeresa Johnson } 605226d80ebSTeresa Johnson 606226d80ebSTeresa Johnson void MemProfiler::initializeCallbacks(Module &M) { 607226d80ebSTeresa Johnson IRBuilder<> IRB(*C); 608226d80ebSTeresa Johnson 609226d80ebSTeresa Johnson for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 610226d80ebSTeresa Johnson const std::string TypeStr = AccessIsWrite ? "store" : "load"; 61130b93db5SMatthew Weingarten const std::string HistPrefix = ClHistogram ? "hist_" : ""; 612226d80ebSTeresa Johnson 613226d80ebSTeresa Johnson SmallVector<Type *, 2> Args1{1, IntptrTy}; 61430b93db5SMatthew Weingarten MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction( 61530b93db5SMatthew Weingarten ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr, 616226d80ebSTeresa Johnson FunctionType::get(IRB.getVoidTy(), Args1, false)); 617226d80ebSTeresa Johnson } 618226d80ebSTeresa Johnson MemProfMemmove = M.getOrInsertFunction( 6197ca135cdSFangrui Song ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy); 620226d80ebSTeresa Johnson MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy", 6217ca135cdSFangrui Song PtrTy, PtrTy, PtrTy, IntptrTy); 6227ca135cdSFangrui Song MemProfMemset = 6237ca135cdSFangrui Song M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy, 6247ca135cdSFangrui Song PtrTy, IRB.getInt32Ty(), IntptrTy); 625226d80ebSTeresa Johnson } 626226d80ebSTeresa Johnson 627226d80ebSTeresa Johnson bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { 628226d80ebSTeresa Johnson // For each NSObject descendant having a +load method, this method is invoked 629226d80ebSTeresa Johnson // by the ObjC runtime before any of the static constructors is called. 630226d80ebSTeresa Johnson // Therefore we need to instrument such methods with a call to __memprof_init 631226d80ebSTeresa Johnson // at the beginning in order to initialize our runtime before any access to 632226d80ebSTeresa Johnson // the shadow memory. 633226d80ebSTeresa Johnson // We cannot just ignore these methods, because they may call other 634226d80ebSTeresa Johnson // instrumented functions. 6351daf2994SKazu Hirata if (F.getName().contains(" load]")) { 636226d80ebSTeresa Johnson FunctionCallee MemProfInitFunction = 637226d80ebSTeresa Johnson declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); 638d75f9dd1SStephen Tozer IRBuilder<> IRB(&F.front(), F.front().begin()); 639226d80ebSTeresa Johnson IRB.CreateCall(MemProfInitFunction, {}); 640226d80ebSTeresa Johnson return true; 641226d80ebSTeresa Johnson } 642226d80ebSTeresa Johnson return false; 643226d80ebSTeresa Johnson } 644226d80ebSTeresa Johnson 645226d80ebSTeresa Johnson bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { 646226d80ebSTeresa Johnson IRBuilder<> IRB(&F.front().front()); 647226d80ebSTeresa Johnson Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( 648226d80ebSTeresa Johnson MemProfShadowMemoryDynamicAddress, IntptrTy); 649204d0d51SFangrui Song if (F.getParent()->getPICLevel() == PICLevel::NotPIC) 65084d5768dSSimon Pilgrim cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true); 651226d80ebSTeresa Johnson DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); 652226d80ebSTeresa Johnson return true; 653226d80ebSTeresa Johnson } 654226d80ebSTeresa Johnson 655226d80ebSTeresa Johnson bool MemProfiler::instrumentFunction(Function &F) { 656226d80ebSTeresa Johnson if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 657226d80ebSTeresa Johnson return false; 658226d80ebSTeresa Johnson if (ClDebugFunc == F.getName()) 659226d80ebSTeresa Johnson return false; 6603ca4fe80SSimon Pilgrim if (F.getName().starts_with("__memprof_")) 661226d80ebSTeresa Johnson return false; 662226d80ebSTeresa Johnson 663226d80ebSTeresa Johnson bool FunctionModified = false; 664226d80ebSTeresa Johnson 665226d80ebSTeresa Johnson // If needed, insert __memprof_init. 666226d80ebSTeresa Johnson // This function needs to be called even if the function body is not 667226d80ebSTeresa Johnson // instrumented. 668226d80ebSTeresa Johnson if (maybeInsertMemProfInitAtFunctionEntry(F)) 669226d80ebSTeresa Johnson FunctionModified = true; 670226d80ebSTeresa Johnson 671226d80ebSTeresa Johnson LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n"); 672226d80ebSTeresa Johnson 673226d80ebSTeresa Johnson initializeCallbacks(*F.getParent()); 674226d80ebSTeresa Johnson 675226d80ebSTeresa Johnson SmallVector<Instruction *, 16> ToInstrument; 676226d80ebSTeresa Johnson 677226d80ebSTeresa Johnson // Fill the set of memory operations to instrument. 678226d80ebSTeresa Johnson for (auto &BB : F) { 679226d80ebSTeresa Johnson for (auto &Inst : BB) { 680226d80ebSTeresa Johnson if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst)) 681226d80ebSTeresa Johnson ToInstrument.push_back(&Inst); 682226d80ebSTeresa Johnson } 683226d80ebSTeresa Johnson } 684226d80ebSTeresa Johnson 685084b65f7STeresa Johnson if (ToInstrument.empty()) { 686084b65f7STeresa Johnson LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified 687084b65f7STeresa Johnson << " " << F << "\n"); 688084b65f7STeresa Johnson 689084b65f7STeresa Johnson return FunctionModified; 690084b65f7STeresa Johnson } 691084b65f7STeresa Johnson 692084b65f7STeresa Johnson FunctionModified |= insertDynamicShadowAtFunctionEntry(F); 693084b65f7STeresa Johnson 694226d80ebSTeresa Johnson int NumInstrumented = 0; 695226d80ebSTeresa Johnson for (auto *Inst : ToInstrument) { 696226d80ebSTeresa Johnson if (ClDebugMin < 0 || ClDebugMax < 0 || 697226d80ebSTeresa Johnson (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { 6981b9ca45aSFangrui Song std::optional<InterestingMemoryAccess> Access = 699226d80ebSTeresa Johnson isInterestingMemoryAccess(Inst); 700226d80ebSTeresa Johnson if (Access) 7019df71d76SNikita Popov instrumentMop(Inst, F.getDataLayout(), *Access); 702226d80ebSTeresa Johnson else 703226d80ebSTeresa Johnson instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 704226d80ebSTeresa Johnson } 705226d80ebSTeresa Johnson NumInstrumented++; 706226d80ebSTeresa Johnson } 707226d80ebSTeresa Johnson 708226d80ebSTeresa Johnson if (NumInstrumented > 0) 709226d80ebSTeresa Johnson FunctionModified = true; 710226d80ebSTeresa Johnson 711226d80ebSTeresa Johnson LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " 712226d80ebSTeresa Johnson << F << "\n"); 713226d80ebSTeresa Johnson 714226d80ebSTeresa Johnson return FunctionModified; 715226d80ebSTeresa Johnson } 71695014050STeresa Johnson 71795014050STeresa Johnson static void addCallsiteMetadata(Instruction &I, 718890c4becSKazu Hirata ArrayRef<uint64_t> InlinedCallStack, 71995014050STeresa Johnson LLVMContext &Ctx) { 72095014050STeresa Johnson I.setMetadata(LLVMContext::MD_callsite, 72195014050STeresa Johnson buildCallstackMetadata(InlinedCallStack, Ctx)); 72295014050STeresa Johnson } 72395014050STeresa Johnson 72495014050STeresa Johnson static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, 72595014050STeresa Johnson uint32_t Column) { 726d7b18d50SKazu Hirata llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 72795014050STeresa Johnson HashBuilder; 72895014050STeresa Johnson HashBuilder.add(Function, LineOffset, Column); 72995014050STeresa Johnson llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 73095014050STeresa Johnson uint64_t Id; 73195014050STeresa Johnson std::memcpy(&Id, Hash.data(), sizeof(Hash)); 73295014050STeresa Johnson return Id; 73395014050STeresa Johnson } 73495014050STeresa Johnson 73595014050STeresa Johnson static uint64_t computeStackId(const memprof::Frame &Frame) { 73695014050STeresa Johnson return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); 73795014050STeresa Johnson } 73895014050STeresa Johnson 7397536474eSTeresa Johnson // Helper to generate a single hash id for a given callstack, used for emitting 7407536474eSTeresa Johnson // matching statistics and useful for uniquing such statistics across modules. 7417c294eb7SKazu Hirata static uint64_t computeFullStackId(ArrayRef<Frame> CallStack) { 7427536474eSTeresa Johnson llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 7437536474eSTeresa Johnson HashBuilder; 7447536474eSTeresa Johnson for (auto &F : CallStack) 7457536474eSTeresa Johnson HashBuilder.add(F.Function, F.LineOffset, F.Column); 7467536474eSTeresa Johnson llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 7477536474eSTeresa Johnson uint64_t Id; 7487536474eSTeresa Johnson std::memcpy(&Id, Hash.data(), sizeof(Hash)); 7497536474eSTeresa Johnson return Id; 7507536474eSTeresa Johnson } 7517536474eSTeresa Johnson 7527536474eSTeresa Johnson static AllocationType addCallStack(CallStackTrie &AllocTrie, 7539513f2fdSTeresa Johnson const AllocationInfo *AllocInfo, 7549513f2fdSTeresa Johnson uint64_t FullStackId) { 75595014050STeresa Johnson SmallVector<uint64_t> StackIds; 75695014050STeresa Johnson for (const auto &StackFrame : AllocInfo->CallStack) 75795014050STeresa Johnson StackIds.push_back(computeStackId(StackFrame)); 75895014050STeresa Johnson auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), 75995014050STeresa Johnson AllocInfo->Info.getAllocCount(), 76095014050STeresa Johnson AllocInfo->Info.getTotalLifetime()); 7619513f2fdSTeresa Johnson std::vector<ContextTotalSize> ContextSizeInfo; 762c7451ffcSTeresa Johnson if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) { 7639513f2fdSTeresa Johnson auto TotalSize = AllocInfo->Info.getTotalSize(); 7648c1bd67dSTeresa Johnson assert(TotalSize); 7659513f2fdSTeresa Johnson assert(FullStackId != 0); 7669513f2fdSTeresa Johnson ContextSizeInfo.push_back({FullStackId, TotalSize}); 7678c1bd67dSTeresa Johnson } 7689513f2fdSTeresa Johnson AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo)); 7697536474eSTeresa Johnson return AllocType; 77095014050STeresa Johnson } 77195014050STeresa Johnson 77295014050STeresa Johnson // Helper to compare the InlinedCallStack computed from an instruction's debug 77395014050STeresa Johnson // info to a list of Frames from profile data (either the allocation data or a 77495014050STeresa Johnson // callsite). For callsites, the StartIndex to use in the Frame array may be 77595014050STeresa Johnson // non-zero. 77695014050STeresa Johnson static bool 77795014050STeresa Johnson stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, 7787c294eb7SKazu Hirata ArrayRef<uint64_t> InlinedCallStack) { 7797c294eb7SKazu Hirata auto StackFrame = ProfileCallStack.begin(); 78095014050STeresa Johnson auto InlCallStackIter = InlinedCallStack.begin(); 78195014050STeresa Johnson for (; StackFrame != ProfileCallStack.end() && 78295014050STeresa Johnson InlCallStackIter != InlinedCallStack.end(); 78395014050STeresa Johnson ++StackFrame, ++InlCallStackIter) { 78495014050STeresa Johnson uint64_t StackId = computeStackId(*StackFrame); 78595014050STeresa Johnson if (StackId != *InlCallStackIter) 78695014050STeresa Johnson return false; 78795014050STeresa Johnson } 78895014050STeresa Johnson // Return true if we found and matched all stack ids from the call 78995014050STeresa Johnson // instruction. 79095014050STeresa Johnson return InlCallStackIter == InlinedCallStack.end(); 79195014050STeresa Johnson } 79295014050STeresa Johnson 79377b7d9deSKazu Hirata static bool isAllocationWithHotColdVariant(const Function *Callee, 794e5cbe8fdSTeresa Johnson const TargetLibraryInfo &TLI) { 795e5cbe8fdSTeresa Johnson if (!Callee) 796e5cbe8fdSTeresa Johnson return false; 797e5cbe8fdSTeresa Johnson LibFunc Func; 798e5cbe8fdSTeresa Johnson if (!TLI.getLibFunc(*Callee, Func)) 799e5cbe8fdSTeresa Johnson return false; 800e5cbe8fdSTeresa Johnson switch (Func) { 801e5cbe8fdSTeresa Johnson case LibFunc_Znwm: 802e5cbe8fdSTeresa Johnson case LibFunc_ZnwmRKSt9nothrow_t: 803e5cbe8fdSTeresa Johnson case LibFunc_ZnwmSt11align_val_t: 804e5cbe8fdSTeresa Johnson case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: 805e5cbe8fdSTeresa Johnson case LibFunc_Znam: 806e5cbe8fdSTeresa Johnson case LibFunc_ZnamRKSt9nothrow_t: 807e5cbe8fdSTeresa Johnson case LibFunc_ZnamSt11align_val_t: 808e5cbe8fdSTeresa Johnson case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: 80995daf1aeSSnehasish Kumar case LibFunc_size_returning_new: 81095daf1aeSSnehasish Kumar case LibFunc_size_returning_new_aligned: 811e5cbe8fdSTeresa Johnson return true; 812e5cbe8fdSTeresa Johnson case LibFunc_Znwm12__hot_cold_t: 813e5cbe8fdSTeresa Johnson case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: 814e5cbe8fdSTeresa Johnson case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: 815e5cbe8fdSTeresa Johnson case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 816e5cbe8fdSTeresa Johnson case LibFunc_Znam12__hot_cold_t: 817e5cbe8fdSTeresa Johnson case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: 818e5cbe8fdSTeresa Johnson case LibFunc_ZnamSt11align_val_t12__hot_cold_t: 819e5cbe8fdSTeresa Johnson case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 82095daf1aeSSnehasish Kumar case LibFunc_size_returning_new_hot_cold: 82195daf1aeSSnehasish Kumar case LibFunc_size_returning_new_aligned_hot_cold: 822e5cbe8fdSTeresa Johnson return ClMemProfMatchHotColdNew; 823e5cbe8fdSTeresa Johnson default: 824e5cbe8fdSTeresa Johnson return false; 825e5cbe8fdSTeresa Johnson } 826e5cbe8fdSTeresa Johnson } 827e5cbe8fdSTeresa Johnson 8289b00ef52SSnehasish Kumar struct AllocMatchInfo { 8299b00ef52SSnehasish Kumar uint64_t TotalSize = 0; 8309b00ef52SSnehasish Kumar AllocationType AllocType = AllocationType::None; 8319b00ef52SSnehasish Kumar bool Matched = false; 8329b00ef52SSnehasish Kumar }; 8339b00ef52SSnehasish Kumar 834e189d619SKazu Hirata DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> 835adf0c817SKazu Hirata memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI, 836adf0c817SKazu Hirata function_ref<bool(uint64_t)> IsPresentInProfile) { 837e189d619SKazu Hirata DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls; 838e189d619SKazu Hirata 839e189d619SKazu Hirata auto GetOffset = [](const DILocation *DIL) { 840e189d619SKazu Hirata return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 841e189d619SKazu Hirata 0xffff; 842e189d619SKazu Hirata }; 843e189d619SKazu Hirata 844e189d619SKazu Hirata for (Function &F : M) { 845e189d619SKazu Hirata if (F.isDeclaration()) 846e189d619SKazu Hirata continue; 847e189d619SKazu Hirata 848e189d619SKazu Hirata for (auto &BB : F) { 849e189d619SKazu Hirata for (auto &I : BB) { 850e189d619SKazu Hirata if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) 851e189d619SKazu Hirata continue; 852e189d619SKazu Hirata 853e189d619SKazu Hirata auto *CB = dyn_cast<CallBase>(&I); 854e189d619SKazu Hirata auto *CalledFunction = CB->getCalledFunction(); 855e189d619SKazu Hirata // Disregard indirect calls and intrinsics. 856e189d619SKazu Hirata if (!CalledFunction || CalledFunction->isIntrinsic()) 857e189d619SKazu Hirata continue; 858e189d619SKazu Hirata 859e189d619SKazu Hirata StringRef CalleeName = CalledFunction->getName(); 860adf0c817SKazu Hirata // True if we are calling a heap allocation function that supports 861adf0c817SKazu Hirata // hot/cold variants. 86295554cbdSKazu Hirata bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI); 863adf0c817SKazu Hirata // True for the first iteration below, indicating that we are looking at 864adf0c817SKazu Hirata // a leaf node. 865adf0c817SKazu Hirata bool IsLeaf = true; 866c6183244SKazu Hirata for (const DILocation *DIL = I.getDebugLoc(); DIL; 867c6183244SKazu Hirata DIL = DIL->getInlinedAt()) { 868c6183244SKazu Hirata StringRef CallerName = DIL->getSubprogramLinkageName(); 869c6183244SKazu Hirata assert(!CallerName.empty() && 870c6183244SKazu Hirata "Be sure to enable -fdebug-info-for-profiling"); 871c6183244SKazu Hirata uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName); 872e189d619SKazu Hirata uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName); 87395554cbdSKazu Hirata // Pretend that we are calling a function with GUID == 0 if we are 874adf0c817SKazu Hirata // in the inline stack leading to a heap allocation function. 875adf0c817SKazu Hirata if (IsAlloc) { 876adf0c817SKazu Hirata if (IsLeaf) { 877adf0c817SKazu Hirata // For leaf nodes, set CalleeGUID to 0 without consulting 878adf0c817SKazu Hirata // IsPresentInProfile. 87995554cbdSKazu Hirata CalleeGUID = 0; 880adf0c817SKazu Hirata } else if (!IsPresentInProfile(CalleeGUID)) { 881adf0c817SKazu Hirata // In addition to the leaf case above, continue to set CalleeGUID 882adf0c817SKazu Hirata // to 0 as long as we don't see CalleeGUID in the profile. 883adf0c817SKazu Hirata CalleeGUID = 0; 884adf0c817SKazu Hirata } else { 885adf0c817SKazu Hirata // Once we encounter a callee that exists in the profile, stop 886adf0c817SKazu Hirata // setting CalleeGUID to 0. 887adf0c817SKazu Hirata IsAlloc = false; 888adf0c817SKazu Hirata } 889adf0c817SKazu Hirata } 890adf0c817SKazu Hirata 891e189d619SKazu Hirata LineLocation Loc = {GetOffset(DIL), DIL->getColumn()}; 892e189d619SKazu Hirata Calls[CallerGUID].emplace_back(Loc, CalleeGUID); 893c6183244SKazu Hirata CalleeName = CallerName; 894adf0c817SKazu Hirata IsLeaf = false; 895c6183244SKazu Hirata } 896e189d619SKazu Hirata } 897e189d619SKazu Hirata } 898e189d619SKazu Hirata } 899e189d619SKazu Hirata 900e189d619SKazu Hirata // Sort each call list by the source location. 901e189d619SKazu Hirata for (auto &[CallerGUID, CallList] : Calls) { 902e189d619SKazu Hirata llvm::sort(CallList); 903e189d619SKazu Hirata CallList.erase(llvm::unique(CallList), CallList.end()); 904e189d619SKazu Hirata } 905e189d619SKazu Hirata 906e189d619SKazu Hirata return Calls; 907e189d619SKazu Hirata } 908e189d619SKazu Hirata 909a2e266b3SKazu Hirata DenseMap<uint64_t, LocToLocMap> 910a2e266b3SKazu Hirata memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, 911a2e266b3SKazu Hirata const TargetLibraryInfo &TLI) { 912a2e266b3SKazu Hirata DenseMap<uint64_t, LocToLocMap> UndriftMaps; 913a2e266b3SKazu Hirata 914a2e266b3SKazu Hirata DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile = 915a2e266b3SKazu Hirata MemProfReader->getMemProfCallerCalleePairs(); 916a2e266b3SKazu Hirata DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR = 917adf0c817SKazu Hirata extractCallsFromIR(M, TLI, [&](uint64_t GUID) { 918adf0c817SKazu Hirata return CallsFromProfile.contains(GUID); 919adf0c817SKazu Hirata }); 920a2e266b3SKazu Hirata 921a2e266b3SKazu Hirata // Compute an undrift map for each CallerGUID. 922a2e266b3SKazu Hirata for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) { 923a2e266b3SKazu Hirata auto It = CallsFromProfile.find(CallerGUID); 924a2e266b3SKazu Hirata if (It == CallsFromProfile.end()) 925a2e266b3SKazu Hirata continue; 926a2e266b3SKazu Hirata const auto &ProfileAnchors = It->second; 927a2e266b3SKazu Hirata 928a2e266b3SKazu Hirata LocToLocMap Matchings; 929a2e266b3SKazu Hirata longestCommonSequence<LineLocation, GlobalValue::GUID>( 930a2e266b3SKazu Hirata ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(), 931a2e266b3SKazu Hirata [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); }); 932a2e266b3SKazu Hirata bool Inserted = UndriftMaps.try_emplace(CallerGUID, Matchings).second; 933a2e266b3SKazu Hirata 934a2e266b3SKazu Hirata // The insertion must succeed because we visit each GUID exactly once. 935a2e266b3SKazu Hirata assert(Inserted); 936a2e266b3SKazu Hirata (void)Inserted; 937a2e266b3SKazu Hirata } 938a2e266b3SKazu Hirata 939a2e266b3SKazu Hirata return UndriftMaps; 940a2e266b3SKazu Hirata } 941a2e266b3SKazu Hirata 942ac8a9f8fSKazu Hirata // Given a MemProfRecord, undrift all the source locations present in the 943ac8a9f8fSKazu Hirata // record in place. 944ac8a9f8fSKazu Hirata static void 945ac8a9f8fSKazu Hirata undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps, 946ac8a9f8fSKazu Hirata memprof::MemProfRecord &MemProfRec) { 947ac8a9f8fSKazu Hirata // Undrift a call stack in place. 948ac8a9f8fSKazu Hirata auto UndriftCallStack = [&](std::vector<Frame> &CallStack) { 949ac8a9f8fSKazu Hirata for (auto &F : CallStack) { 950ac8a9f8fSKazu Hirata auto I = UndriftMaps.find(F.Function); 951ac8a9f8fSKazu Hirata if (I == UndriftMaps.end()) 952ac8a9f8fSKazu Hirata continue; 953ac8a9f8fSKazu Hirata auto J = I->second.find(LineLocation(F.LineOffset, F.Column)); 954ac8a9f8fSKazu Hirata if (J == I->second.end()) 955ac8a9f8fSKazu Hirata continue; 956ac8a9f8fSKazu Hirata auto &NewLoc = J->second; 957ac8a9f8fSKazu Hirata F.LineOffset = NewLoc.LineOffset; 958ac8a9f8fSKazu Hirata F.Column = NewLoc.Column; 959ac8a9f8fSKazu Hirata } 960ac8a9f8fSKazu Hirata }; 961ac8a9f8fSKazu Hirata 962ac8a9f8fSKazu Hirata for (auto &AS : MemProfRec.AllocSites) 963ac8a9f8fSKazu Hirata UndriftCallStack(AS.CallStack); 964ac8a9f8fSKazu Hirata 965ac8a9f8fSKazu Hirata for (auto &CS : MemProfRec.CallSites) 966ac8a9f8fSKazu Hirata UndriftCallStack(CS); 967ac8a9f8fSKazu Hirata } 968ac8a9f8fSKazu Hirata 9699b00ef52SSnehasish Kumar static void 9709b00ef52SSnehasish Kumar readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, 9717536474eSTeresa Johnson const TargetLibraryInfo &TLI, 972ac8a9f8fSKazu Hirata std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo, 973ac8a9f8fSKazu Hirata DenseMap<uint64_t, LocToLocMap> &UndriftMaps) { 9749b00ef52SSnehasish Kumar auto &Ctx = M.getContext(); 975340cb19eSlifengxiang1025 // Previously we used getIRPGOFuncName() here. If F is local linkage, 976340cb19eSlifengxiang1025 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But 977340cb19eSlifengxiang1025 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't 978340cb19eSlifengxiang1025 // contain FileName's prefix. It caused local linkage function can't 979340cb19eSlifengxiang1025 // find MemProfRecord. So we use getName() now. 980340cb19eSlifengxiang1025 // 'unique-internal-linkage-names' can make MemProf work better for local 981340cb19eSlifengxiang1025 // linkage function. 982340cb19eSlifengxiang1025 auto FuncName = F.getName(); 983fe051934SEllis Hoag auto FuncGUID = Function::getGUID(FuncName); 984fe051934SEllis Hoag std::optional<memprof::MemProfRecord> MemProfRec; 9859b00ef52SSnehasish Kumar auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); 986fe051934SEllis Hoag if (Err) { 987fe051934SEllis Hoag handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 98895014050STeresa Johnson auto Err = IPE.get(); 98995014050STeresa Johnson bool SkipWarning = false; 99095014050STeresa Johnson LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName 99195014050STeresa Johnson << ": "); 99295014050STeresa Johnson if (Err == instrprof_error::unknown_function) { 99395014050STeresa Johnson NumOfMemProfMissing++; 99495014050STeresa Johnson SkipWarning = !PGOWarnMissing; 99595014050STeresa Johnson LLVM_DEBUG(dbgs() << "unknown function"); 99695014050STeresa Johnson } else if (Err == instrprof_error::hash_mismatch) { 9977536474eSTeresa Johnson NumOfMemProfMismatch++; 99895014050STeresa Johnson SkipWarning = 99995014050STeresa Johnson NoPGOWarnMismatch || 100095014050STeresa Johnson (NoPGOWarnMismatchComdatWeak && 100195014050STeresa Johnson (F.hasComdat() || 100295014050STeresa Johnson F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 100395014050STeresa Johnson LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 100495014050STeresa Johnson } 100595014050STeresa Johnson 100695014050STeresa Johnson if (SkipWarning) 100795014050STeresa Johnson return; 100895014050STeresa Johnson 100995014050STeresa Johnson std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + 101095014050STeresa Johnson Twine(" Hash = ") + std::to_string(FuncGUID)) 101195014050STeresa Johnson .str(); 101295014050STeresa Johnson 10139b00ef52SSnehasish Kumar Ctx.diagnose( 10149b00ef52SSnehasish Kumar DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 101595014050STeresa Johnson }); 101695014050STeresa Johnson return; 101795014050STeresa Johnson } 101895014050STeresa Johnson 10197536474eSTeresa Johnson NumOfMemProfFunc++; 10207536474eSTeresa Johnson 1021ac8a9f8fSKazu Hirata // If requested, undrfit MemProfRecord so that the source locations in it 1022ac8a9f8fSKazu Hirata // match those in the IR. 1023ac8a9f8fSKazu Hirata if (SalvageStaleProfile) 1024ac8a9f8fSKazu Hirata undriftMemProfRecord(UndriftMaps, *MemProfRec); 1025ac8a9f8fSKazu Hirata 10262446439fSTeresa Johnson // Detect if there are non-zero column numbers in the profile. If not, 10272446439fSTeresa Johnson // treat all column numbers as 0 when matching (i.e. ignore any non-zero 10282446439fSTeresa Johnson // columns in the IR). The profiled binary might have been built with 10292446439fSTeresa Johnson // column numbers disabled, for example. 10302446439fSTeresa Johnson bool ProfileHasColumns = false; 10312446439fSTeresa Johnson 103295014050STeresa Johnson // Build maps of the location hash to all profile data with that leaf location 103395014050STeresa Johnson // (allocation info and the callsites). 103495014050STeresa Johnson std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; 10357c294eb7SKazu Hirata // A hash function for std::unordered_set<ArrayRef<Frame>> to work. 10367c294eb7SKazu Hirata struct CallStackHash { 10377c294eb7SKazu Hirata size_t operator()(ArrayRef<Frame> CS) const { 10387c294eb7SKazu Hirata return computeFullStackId(CS); 10397c294eb7SKazu Hirata } 10407c294eb7SKazu Hirata }; 10417c294eb7SKazu Hirata // For the callsites we need to record slices of the frame array (see comments 10427c294eb7SKazu Hirata // below where the map entries are added). 10437c294eb7SKazu Hirata std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>> 104495014050STeresa Johnson LocHashToCallSites; 1045fe051934SEllis Hoag for (auto &AI : MemProfRec->AllocSites) { 10467536474eSTeresa Johnson NumOfMemProfAllocContextProfiles++; 104795014050STeresa Johnson // Associate the allocation info with the leaf frame. The later matching 104895014050STeresa Johnson // code will match any inlined call sequences in the IR with a longer prefix 104995014050STeresa Johnson // of call stack frames. 105095014050STeresa Johnson uint64_t StackId = computeStackId(AI.CallStack[0]); 105195014050STeresa Johnson LocHashToAllocInfo[StackId].insert(&AI); 10522446439fSTeresa Johnson ProfileHasColumns |= AI.CallStack[0].Column; 105395014050STeresa Johnson } 1054fe051934SEllis Hoag for (auto &CS : MemProfRec->CallSites) { 10557536474eSTeresa Johnson NumOfMemProfCallSiteProfiles++; 105695014050STeresa Johnson // Need to record all frames from leaf up to and including this function, 105795014050STeresa Johnson // as any of these may or may not have been inlined at this point. 105895014050STeresa Johnson unsigned Idx = 0; 105995014050STeresa Johnson for (auto &StackFrame : CS) { 106095014050STeresa Johnson uint64_t StackId = computeStackId(StackFrame); 10617c294eb7SKazu Hirata LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++)); 10622446439fSTeresa Johnson ProfileHasColumns |= StackFrame.Column; 106395014050STeresa Johnson // Once we find this function, we can stop recording. 106495014050STeresa Johnson if (StackFrame.Function == FuncGUID) 106595014050STeresa Johnson break; 106695014050STeresa Johnson } 106795014050STeresa Johnson assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); 106895014050STeresa Johnson } 106995014050STeresa Johnson 107095014050STeresa Johnson auto GetOffset = [](const DILocation *DIL) { 107195014050STeresa Johnson return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 107295014050STeresa Johnson 0xffff; 107395014050STeresa Johnson }; 107495014050STeresa Johnson 107595014050STeresa Johnson // Now walk the instructions, looking up the associated profile data using 1076e2d539bbSKazu Hirata // debug locations. 107795014050STeresa Johnson for (auto &BB : F) { 107895014050STeresa Johnson for (auto &I : BB) { 107995014050STeresa Johnson if (I.isDebugOrPseudoInst()) 108095014050STeresa Johnson continue; 108195014050STeresa Johnson // We are only interested in calls (allocation or interior call stack 108295014050STeresa Johnson // context calls). 108395014050STeresa Johnson auto *CI = dyn_cast<CallBase>(&I); 108495014050STeresa Johnson if (!CI) 108595014050STeresa Johnson continue; 108695014050STeresa Johnson auto *CalledFunction = CI->getCalledFunction(); 108795014050STeresa Johnson if (CalledFunction && CalledFunction->isIntrinsic()) 108895014050STeresa Johnson continue; 108995014050STeresa Johnson // List of call stack ids computed from the location hashes on debug 109095014050STeresa Johnson // locations (leaf to inlined at root). 1091890c4becSKazu Hirata SmallVector<uint64_t, 8> InlinedCallStack; 109295014050STeresa Johnson // Was the leaf location found in one of the profile maps? 109395014050STeresa Johnson bool LeafFound = false; 109495014050STeresa Johnson // If leaf was found in a map, iterators pointing to its location in both 109595014050STeresa Johnson // of the maps. It might exist in neither, one, or both (the latter case 109695014050STeresa Johnson // can happen because we don't currently have discriminators to 109795014050STeresa Johnson // distinguish the case when a single line/col maps to both an allocation 109895014050STeresa Johnson // and another callsite). 109995014050STeresa Johnson std::map<uint64_t, std::set<const AllocationInfo *>>::iterator 110095014050STeresa Johnson AllocInfoIter; 11017c294eb7SKazu Hirata decltype(LocHashToCallSites)::iterator CallSitesIter; 110295014050STeresa Johnson for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; 110395014050STeresa Johnson DIL = DIL->getInlinedAt()) { 110495014050STeresa Johnson // Use C++ linkage name if possible. Need to compile with 110595014050STeresa Johnson // -fdebug-info-for-profiling to get linkage name. 110695014050STeresa Johnson StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); 110795014050STeresa Johnson if (Name.empty()) 110895014050STeresa Johnson Name = DIL->getScope()->getSubprogram()->getName(); 110995014050STeresa Johnson auto CalleeGUID = Function::getGUID(Name); 11102446439fSTeresa Johnson auto StackId = computeStackId(CalleeGUID, GetOffset(DIL), 11112446439fSTeresa Johnson ProfileHasColumns ? DIL->getColumn() : 0); 111287f5e229STeresa Johnson // Check if we have found the profile's leaf frame. If yes, collect 111387f5e229STeresa Johnson // the rest of the call's inlined context starting here. If not, see if 111487f5e229STeresa Johnson // we find a match further up the inlined context (in case the profile 111587f5e229STeresa Johnson // was missing debug frames at the leaf). 111695014050STeresa Johnson if (!LeafFound) { 111795014050STeresa Johnson AllocInfoIter = LocHashToAllocInfo.find(StackId); 111895014050STeresa Johnson CallSitesIter = LocHashToCallSites.find(StackId); 111987f5e229STeresa Johnson if (AllocInfoIter != LocHashToAllocInfo.end() || 112087f5e229STeresa Johnson CallSitesIter != LocHashToCallSites.end()) 112195014050STeresa Johnson LeafFound = true; 112295014050STeresa Johnson } 112387f5e229STeresa Johnson if (LeafFound) 112495014050STeresa Johnson InlinedCallStack.push_back(StackId); 112595014050STeresa Johnson } 112695014050STeresa Johnson // If leaf not in either of the maps, skip inst. 112795014050STeresa Johnson if (!LeafFound) 112895014050STeresa Johnson continue; 112995014050STeresa Johnson 113095014050STeresa Johnson // First add !memprof metadata from allocation info, if we found the 113195014050STeresa Johnson // instruction's leaf location in that map, and if the rest of the 113295014050STeresa Johnson // instruction's locations match the prefix Frame locations on an 113395014050STeresa Johnson // allocation context with the same leaf. 113495014050STeresa Johnson if (AllocInfoIter != LocHashToAllocInfo.end()) { 113595daf1aeSSnehasish Kumar // Only consider allocations which support hinting. 113695daf1aeSSnehasish Kumar if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI)) 113795014050STeresa Johnson continue; 113895014050STeresa Johnson // We may match this instruction's location list to multiple MIB 113995014050STeresa Johnson // contexts. Add them to a Trie specialized for trimming the contexts to 114095014050STeresa Johnson // the minimal needed to disambiguate contexts with unique behavior. 114195014050STeresa Johnson CallStackTrie AllocTrie; 1142a15e7b11STeresa Johnson uint64_t TotalSize = 0; 1143a15e7b11STeresa Johnson uint64_t TotalColdSize = 0; 114495014050STeresa Johnson for (auto *AllocInfo : AllocInfoIter->second) { 114595014050STeresa Johnson // Check the full inlined call stack against this one. 114695014050STeresa Johnson // If we found and thus matched all frames on the call, include 114795014050STeresa Johnson // this MIB. 114895014050STeresa Johnson if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, 11497536474eSTeresa Johnson InlinedCallStack)) { 11507536474eSTeresa Johnson NumOfMemProfMatchedAllocContexts++; 11519513f2fdSTeresa Johnson uint64_t FullStackId = 0; 1152c7451ffcSTeresa Johnson if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes || 1153c7451ffcSTeresa Johnson MinClonedColdBytePercent < 100) 11549513f2fdSTeresa Johnson FullStackId = computeFullStackId(AllocInfo->CallStack); 11559513f2fdSTeresa Johnson auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId); 1156a15e7b11STeresa Johnson TotalSize += AllocInfo->Info.getTotalSize(); 1157a15e7b11STeresa Johnson if (AllocType == AllocationType::Cold) 1158a15e7b11STeresa Johnson TotalColdSize += AllocInfo->Info.getTotalSize(); 11597536474eSTeresa Johnson // Record information about the allocation if match info printing 11607536474eSTeresa Johnson // was requested. 11617536474eSTeresa Johnson if (ClPrintMemProfMatchInfo) { 11629513f2fdSTeresa Johnson assert(FullStackId != 0); 11637536474eSTeresa Johnson FullStackIdToAllocMatchInfo[FullStackId] = { 11647536474eSTeresa Johnson AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true}; 11657536474eSTeresa Johnson } 11667536474eSTeresa Johnson } 116795014050STeresa Johnson } 1168a15e7b11STeresa Johnson // If the threshold for the percent of cold bytes is less than 100%, 1169a15e7b11STeresa Johnson // and not all bytes are cold, see if we should still hint this 1170a15e7b11STeresa Johnson // allocation as cold without context sensitivity. 1171a15e7b11STeresa Johnson if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 && 1172a15e7b11STeresa Johnson TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) { 1173a15e7b11STeresa Johnson AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, 1174a15e7b11STeresa Johnson "dominant"); 1175a15e7b11STeresa Johnson continue; 1176a15e7b11STeresa Johnson } 1177a15e7b11STeresa Johnson 117895014050STeresa Johnson // We might not have matched any to the full inlined call stack. 117995014050STeresa Johnson // But if we did, create and attach metadata, or a function attribute if 118095014050STeresa Johnson // all contexts have identical profiled behavior. 118195014050STeresa Johnson if (!AllocTrie.empty()) { 11827536474eSTeresa Johnson NumOfMemProfMatchedAllocs++; 118395014050STeresa Johnson // MemprofMDAttached will be false if a function attribute was 118495014050STeresa Johnson // attached. 118595014050STeresa Johnson bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); 118695014050STeresa Johnson assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); 118795014050STeresa Johnson if (MemprofMDAttached) { 118895014050STeresa Johnson // Add callsite metadata for the instruction's location list so that 118995014050STeresa Johnson // it simpler later on to identify which part of the MIB contexts 119095014050STeresa Johnson // are from this particular instruction (including during inlining, 1191e2d539bbSKazu Hirata // when the callsite metadata will be updated appropriately). 119295014050STeresa Johnson // FIXME: can this be changed to strip out the matching stack 119395014050STeresa Johnson // context ids from the MIB contexts and not add any callsite 119495014050STeresa Johnson // metadata here to save space? 119595014050STeresa Johnson addCallsiteMetadata(I, InlinedCallStack, Ctx); 119695014050STeresa Johnson } 119795014050STeresa Johnson } 119895014050STeresa Johnson continue; 119995014050STeresa Johnson } 120095014050STeresa Johnson 120195014050STeresa Johnson // Otherwise, add callsite metadata. If we reach here then we found the 120295014050STeresa Johnson // instruction's leaf location in the callsites map and not the allocation 120395014050STeresa Johnson // map. 120495014050STeresa Johnson assert(CallSitesIter != LocHashToCallSites.end()); 120595014050STeresa Johnson for (auto CallStackIdx : CallSitesIter->second) { 120695014050STeresa Johnson // If we found and thus matched all frames on the call, create and 120795014050STeresa Johnson // attach call stack metadata. 12087c294eb7SKazu Hirata if (stackFrameIncludesInlinedCallStack(CallStackIdx, 12097c294eb7SKazu Hirata InlinedCallStack)) { 12107536474eSTeresa Johnson NumOfMemProfMatchedCallSites++; 121195014050STeresa Johnson addCallsiteMetadata(I, InlinedCallStack, Ctx); 121295014050STeresa Johnson // Only need to find one with a matching call stack and add a single 121395014050STeresa Johnson // callsite metadata. 121495014050STeresa Johnson break; 121595014050STeresa Johnson } 121695014050STeresa Johnson } 121795014050STeresa Johnson } 121895014050STeresa Johnson } 121995014050STeresa Johnson } 1220546ec641STeresa Johnson 1221546ec641STeresa Johnson MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, 1222546ec641STeresa Johnson IntrusiveRefCntPtr<vfs::FileSystem> FS) 1223546ec641STeresa Johnson : MemoryProfileFileName(MemoryProfileFile), FS(FS) { 1224546ec641STeresa Johnson if (!FS) 1225546ec641STeresa Johnson this->FS = vfs::getRealFileSystem(); 1226546ec641STeresa Johnson } 1227546ec641STeresa Johnson 1228546ec641STeresa Johnson PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { 122951cdf1f6SKazu Hirata // Return immediately if the module doesn't contain any function. 123051cdf1f6SKazu Hirata if (M.empty()) 123151cdf1f6SKazu Hirata return PreservedAnalyses::all(); 123251cdf1f6SKazu Hirata 1233546ec641STeresa Johnson LLVM_DEBUG(dbgs() << "Read in memory profile:"); 1234546ec641STeresa Johnson auto &Ctx = M.getContext(); 1235546ec641STeresa Johnson auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); 1236546ec641STeresa Johnson if (Error E = ReaderOrErr.takeError()) { 1237546ec641STeresa Johnson handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1238546ec641STeresa Johnson Ctx.diagnose( 1239546ec641STeresa Johnson DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); 1240546ec641STeresa Johnson }); 1241546ec641STeresa Johnson return PreservedAnalyses::all(); 1242546ec641STeresa Johnson } 1243546ec641STeresa Johnson 12449b00ef52SSnehasish Kumar std::unique_ptr<IndexedInstrProfReader> MemProfReader = 1245546ec641STeresa Johnson std::move(ReaderOrErr.get()); 12469b00ef52SSnehasish Kumar if (!MemProfReader) { 1247546ec641STeresa Johnson Ctx.diagnose(DiagnosticInfoPGOProfile( 12489b00ef52SSnehasish Kumar MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); 1249546ec641STeresa Johnson return PreservedAnalyses::all(); 1250546ec641STeresa Johnson } 1251546ec641STeresa Johnson 12529b00ef52SSnehasish Kumar if (!MemProfReader->hasMemoryProfile()) { 1253546ec641STeresa Johnson Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), 1254546ec641STeresa Johnson "Not a memory profile")); 1255546ec641STeresa Johnson return PreservedAnalyses::all(); 1256546ec641STeresa Johnson } 1257546ec641STeresa Johnson 1258546ec641STeresa Johnson auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1259546ec641STeresa Johnson 1260ac8a9f8fSKazu Hirata TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin()); 1261ac8a9f8fSKazu Hirata DenseMap<uint64_t, LocToLocMap> UndriftMaps; 1262ac8a9f8fSKazu Hirata if (SalvageStaleProfile) 1263ac8a9f8fSKazu Hirata UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI); 1264ac8a9f8fSKazu Hirata 12657536474eSTeresa Johnson // Map from the stack has of each allocation context in the function profiles 12667536474eSTeresa Johnson // to the total profiled size (bytes), allocation type, and whether we matched 12677536474eSTeresa Johnson // it to an allocation in the IR. 12687536474eSTeresa Johnson std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo; 12697536474eSTeresa Johnson 1270546ec641STeresa Johnson for (auto &F : M) { 1271546ec641STeresa Johnson if (F.isDeclaration()) 1272546ec641STeresa Johnson continue; 1273546ec641STeresa Johnson 1274546ec641STeresa Johnson const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 1275ac8a9f8fSKazu Hirata readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo, 1276ac8a9f8fSKazu Hirata UndriftMaps); 12777536474eSTeresa Johnson } 12787536474eSTeresa Johnson 12797536474eSTeresa Johnson if (ClPrintMemProfMatchInfo) { 12807536474eSTeresa Johnson for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) 12817536474eSTeresa Johnson errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType) 12827536474eSTeresa Johnson << " context with id " << Id << " has total profiled size " 12837536474eSTeresa Johnson << Info.TotalSize << (Info.Matched ? " is" : " not") 12847536474eSTeresa Johnson << " matched\n"; 1285546ec641STeresa Johnson } 1286546ec641STeresa Johnson 1287546ec641STeresa Johnson return PreservedAnalyses::none(); 1288546ec641STeresa Johnson } 1289