xref: /llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (revision 416f1c465db62d829283f6902ef35e027e127aa7)
1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of MemProfiler. Memory accesses are instrumented
10 // to increment the access count held in a shadow memory location, or
11 // alternatively to call into the runtime. Memory intrinsic calls (memmove,
12 // memcpy, memset) are changed to call the memory profiling runtime version
13 // instead.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/MemoryBuiltins.h"
22 #include "llvm/Analysis/MemoryProfileInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/ProfileData/InstrProf.h"
37 #include "llvm/ProfileData/InstrProfReader.h"
38 #include "llvm/Support/BLAKE3.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/HashBuilder.h"
42 #include "llvm/Support/VirtualFileSystem.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
45 #include "llvm/Transforms/Utils/LongestCommonSequence.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
47 #include <map>
48 #include <set>
49 
50 using namespace llvm;
51 using namespace llvm::memprof;
52 
53 #define DEBUG_TYPE "memprof"
54 
55 namespace llvm {
56 extern cl::opt<bool> PGOWarnMissing;
57 extern cl::opt<bool> NoPGOWarnMismatch;
58 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
59 } // namespace llvm
60 
61 constexpr int LLVM_MEM_PROFILER_VERSION = 1;
62 
63 // Size of memory mapped to a single shadow location.
64 constexpr uint64_t DefaultMemGranularity = 64;
65 
66 // Size of memory mapped to a single histogram bucket.
67 constexpr uint64_t HistogramGranularity = 8;
68 
69 // Scale from granularity down to shadow size.
70 constexpr uint64_t DefaultShadowScale = 3;
71 
72 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor";
73 constexpr uint64_t MemProfCtorAndDtorPriority = 1;
74 // On Emscripten, the system needs more than one priorities for constructors.
75 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50;
76 constexpr char MemProfInitName[] = "__memprof_init";
77 constexpr char MemProfVersionCheckNamePrefix[] =
78     "__memprof_version_mismatch_check_v";
79 
80 constexpr char MemProfShadowMemoryDynamicAddress[] =
81     "__memprof_shadow_memory_dynamic_address";
82 
83 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename";
84 
85 constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram";
86 
87 // Command-line flags.
88 
89 static cl::opt<bool> ClInsertVersionCheck(
90     "memprof-guard-against-version-mismatch",
91     cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden,
92     cl::init(true));
93 
94 // This flag may need to be replaced with -f[no-]memprof-reads.
95 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",
96                                        cl::desc("instrument read instructions"),
97                                        cl::Hidden, cl::init(true));
98 
99 static cl::opt<bool>
100     ClInstrumentWrites("memprof-instrument-writes",
101                        cl::desc("instrument write instructions"), cl::Hidden,
102                        cl::init(true));
103 
104 static cl::opt<bool> ClInstrumentAtomics(
105     "memprof-instrument-atomics",
106     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
107     cl::init(true));
108 
109 static cl::opt<bool> ClUseCalls(
110     "memprof-use-callbacks",
111     cl::desc("Use callbacks instead of inline instrumentation sequences."),
112     cl::Hidden, cl::init(false));
113 
114 static cl::opt<std::string>
115     ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix",
116                                  cl::desc("Prefix for memory access callbacks"),
117                                  cl::Hidden, cl::init("__memprof_"));
118 
119 // These flags allow to change the shadow mapping.
120 // The shadow mapping looks like
121 //    Shadow = ((Mem & mask) >> scale) + offset
122 
123 static cl::opt<int> ClMappingScale("memprof-mapping-scale",
124                                    cl::desc("scale of memprof shadow mapping"),
125                                    cl::Hidden, cl::init(DefaultShadowScale));
126 
127 static cl::opt<int>
128     ClMappingGranularity("memprof-mapping-granularity",
129                          cl::desc("granularity of memprof shadow mapping"),
130                          cl::Hidden, cl::init(DefaultMemGranularity));
131 
132 static cl::opt<bool> ClStack("memprof-instrument-stack",
133                              cl::desc("Instrument scalar stack variables"),
134                              cl::Hidden, cl::init(false));
135 
136 // Debug flags.
137 
138 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,
139                             cl::init(0));
140 
141 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden,
142                                         cl::desc("Debug func"));
143 
144 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"),
145                                cl::Hidden, cl::init(-1));
146 
147 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
148                                cl::Hidden, cl::init(-1));
149 
150 // By default disable matching of allocation profiles onto operator new that
151 // already explicitly pass a hot/cold hint, since we don't currently
152 // override these hints anyway.
153 static cl::opt<bool> ClMemProfMatchHotColdNew(
154     "memprof-match-hot-cold-new",
155     cl::desc(
156         "Match allocation profiles onto existing hot/cold operator new calls"),
157     cl::Hidden, cl::init(false));
158 
159 static cl::opt<bool> ClHistogram("memprof-histogram",
160                                  cl::desc("Collect access count histograms"),
161                                  cl::Hidden, cl::init(false));
162 
163 static cl::opt<bool>
164     ClPrintMemProfMatchInfo("memprof-print-match-info",
165                             cl::desc("Print matching stats for each allocation "
166                                      "context in this module's profiles"),
167                             cl::Hidden, cl::init(false));
168 
169 static cl::opt<std::string>
170     MemprofRuntimeDefaultOptions("memprof-runtime-default-options",
171                                  cl::desc("The default memprof options"),
172                                  cl::Hidden, cl::init(""));
173 
174 static cl::opt<bool>
175     SalvageStaleProfile("memprof-salvage-stale-profile",
176                         cl::desc("Salvage stale MemProf profile"),
177                         cl::init(false), cl::Hidden);
178 
179 cl::opt<unsigned> MinClonedColdBytePercent(
180     "memprof-cloning-cold-threshold", cl::init(100), cl::Hidden,
181     cl::desc("Min percent of cold bytes to hint alloc cold during cloning"));
182 
183 extern cl::opt<bool> MemProfReportHintedSizes;
184 
185 static cl::opt<unsigned> MinMatchedColdBytePercent(
186     "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
187     cl::desc("Min percent of cold bytes matched to hint allocation cold"));
188 
189 // Instrumentation statistics
190 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
191 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
192 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
193 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
194 
195 // Matching statistics
196 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
197 STATISTIC(NumOfMemProfMismatch,
198           "Number of functions having mismatched memory profile hash.");
199 STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
200 STATISTIC(NumOfMemProfAllocContextProfiles,
201           "Number of alloc contexts in memory profile.");
202 STATISTIC(NumOfMemProfCallSiteProfiles,
203           "Number of callsites in memory profile.");
204 STATISTIC(NumOfMemProfMatchedAllocContexts,
205           "Number of matched memory profile alloc contexts.");
206 STATISTIC(NumOfMemProfMatchedAllocs,
207           "Number of matched memory profile allocs.");
208 STATISTIC(NumOfMemProfMatchedCallSites,
209           "Number of matched memory profile callsites.");
210 
211 namespace {
212 
213 /// This struct defines the shadow mapping using the rule:
214 ///   shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset.
215 struct ShadowMapping {
216   ShadowMapping() {
217     Scale = ClMappingScale;
218     Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity;
219     Mask = ~(Granularity - 1);
220   }
221 
222   int Scale;
223   int Granularity;
224   uint64_t Mask; // Computed as ~(Granularity-1)
225 };
226 
227 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) {
228   return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority
229                                        : MemProfCtorAndDtorPriority;
230 }
231 
232 struct InterestingMemoryAccess {
233   Value *Addr = nullptr;
234   bool IsWrite;
235   Type *AccessTy;
236   Value *MaybeMask = nullptr;
237 };
238 
239 /// Instrument the code in module to profile memory accesses.
240 class MemProfiler {
241 public:
242   MemProfiler(Module &M) {
243     C = &(M.getContext());
244     LongSize = M.getDataLayout().getPointerSizeInBits();
245     IntptrTy = Type::getIntNTy(*C, LongSize);
246     PtrTy = PointerType::getUnqual(*C);
247   }
248 
249   /// If it is an interesting memory access, populate information
250   /// about the access and return a InterestingMemoryAccess struct.
251   /// Otherwise return std::nullopt.
252   std::optional<InterestingMemoryAccess>
253   isInterestingMemoryAccess(Instruction *I) const;
254 
255   void instrumentMop(Instruction *I, const DataLayout &DL,
256                      InterestingMemoryAccess &Access);
257   void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
258                          Value *Addr, bool IsWrite);
259   void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
260                                    Instruction *I, Value *Addr, Type *AccessTy,
261                                    bool IsWrite);
262   void instrumentMemIntrinsic(MemIntrinsic *MI);
263   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
264   bool instrumentFunction(Function &F);
265   bool maybeInsertMemProfInitAtFunctionEntry(Function &F);
266   bool insertDynamicShadowAtFunctionEntry(Function &F);
267 
268 private:
269   void initializeCallbacks(Module &M);
270 
271   LLVMContext *C;
272   int LongSize;
273   Type *IntptrTy;
274   PointerType *PtrTy;
275   ShadowMapping Mapping;
276 
277   // These arrays is indexed by AccessIsWrite
278   FunctionCallee MemProfMemoryAccessCallback[2];
279 
280   FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset;
281   Value *DynamicShadowOffset = nullptr;
282 };
283 
284 class ModuleMemProfiler {
285 public:
286   ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); }
287 
288   bool instrumentModule(Module &);
289 
290 private:
291   Triple TargetTriple;
292   ShadowMapping Mapping;
293   Function *MemProfCtorFunction = nullptr;
294 };
295 
296 } // end anonymous namespace
297 
298 MemProfilerPass::MemProfilerPass() = default;
299 
300 PreservedAnalyses MemProfilerPass::run(Function &F,
301                                        AnalysisManager<Function> &AM) {
302   assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) &&
303          "Memprof with histogram only supports default mapping granularity");
304   Module &M = *F.getParent();
305   MemProfiler Profiler(M);
306   if (Profiler.instrumentFunction(F))
307     return PreservedAnalyses::none();
308   return PreservedAnalyses::all();
309 }
310 
311 ModuleMemProfilerPass::ModuleMemProfilerPass() = default;
312 
313 PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
314                                              AnalysisManager<Module> &AM) {
315 
316   ModuleMemProfiler Profiler(M);
317   if (Profiler.instrumentModule(M))
318     return PreservedAnalyses::none();
319   return PreservedAnalyses::all();
320 }
321 
322 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
323   // (Shadow & mask) >> scale
324   Shadow = IRB.CreateAnd(Shadow, Mapping.Mask);
325   Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
326   // (Shadow >> scale) | offset
327   assert(DynamicShadowOffset);
328   return IRB.CreateAdd(Shadow, DynamicShadowOffset);
329 }
330 
331 // Instrument memset/memmove/memcpy
332 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) {
333   IRBuilder<> IRB(MI);
334   if (isa<MemTransferInst>(MI)) {
335     IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,
336                    {MI->getOperand(0), MI->getOperand(1),
337                     IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
338   } else if (isa<MemSetInst>(MI)) {
339     IRB.CreateCall(
340         MemProfMemset,
341         {MI->getOperand(0),
342          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
343          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
344   }
345   MI->eraseFromParent();
346 }
347 
348 std::optional<InterestingMemoryAccess>
349 MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
350   // Do not instrument the load fetching the dynamic shadow address.
351   if (DynamicShadowOffset == I)
352     return std::nullopt;
353 
354   InterestingMemoryAccess Access;
355 
356   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
357     if (!ClInstrumentReads)
358       return std::nullopt;
359     Access.IsWrite = false;
360     Access.AccessTy = LI->getType();
361     Access.Addr = LI->getPointerOperand();
362   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
363     if (!ClInstrumentWrites)
364       return std::nullopt;
365     Access.IsWrite = true;
366     Access.AccessTy = SI->getValueOperand()->getType();
367     Access.Addr = SI->getPointerOperand();
368   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
369     if (!ClInstrumentAtomics)
370       return std::nullopt;
371     Access.IsWrite = true;
372     Access.AccessTy = RMW->getValOperand()->getType();
373     Access.Addr = RMW->getPointerOperand();
374   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
375     if (!ClInstrumentAtomics)
376       return std::nullopt;
377     Access.IsWrite = true;
378     Access.AccessTy = XCHG->getCompareOperand()->getType();
379     Access.Addr = XCHG->getPointerOperand();
380   } else if (auto *CI = dyn_cast<CallInst>(I)) {
381     auto *F = CI->getCalledFunction();
382     if (F && (F->getIntrinsicID() == Intrinsic::masked_load ||
383               F->getIntrinsicID() == Intrinsic::masked_store)) {
384       unsigned OpOffset = 0;
385       if (F->getIntrinsicID() == Intrinsic::masked_store) {
386         if (!ClInstrumentWrites)
387           return std::nullopt;
388         // Masked store has an initial operand for the value.
389         OpOffset = 1;
390         Access.AccessTy = CI->getArgOperand(0)->getType();
391         Access.IsWrite = true;
392       } else {
393         if (!ClInstrumentReads)
394           return std::nullopt;
395         Access.AccessTy = CI->getType();
396         Access.IsWrite = false;
397       }
398 
399       auto *BasePtr = CI->getOperand(0 + OpOffset);
400       Access.MaybeMask = CI->getOperand(2 + OpOffset);
401       Access.Addr = BasePtr;
402     }
403   }
404 
405   if (!Access.Addr)
406     return std::nullopt;
407 
408   // Do not instrument accesses from different address spaces; we cannot deal
409   // with them.
410   Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType());
411   if (PtrTy->getPointerAddressSpace() != 0)
412     return std::nullopt;
413 
414   // Ignore swifterror addresses.
415   // swifterror memory addresses are mem2reg promoted by instruction
416   // selection. As such they cannot have regular uses like an instrumentation
417   // function and it makes no sense to track them as memory.
418   if (Access.Addr->isSwiftError())
419     return std::nullopt;
420 
421   // Peel off GEPs and BitCasts.
422   auto *Addr = Access.Addr->stripInBoundsOffsets();
423 
424   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
425     // Do not instrument PGO counter updates.
426     if (GV->hasSection()) {
427       StringRef SectionName = GV->getSection();
428       // Check if the global is in the PGO counters section.
429       auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();
430       if (SectionName.ends_with(
431               getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
432         return std::nullopt;
433     }
434 
435     // Do not instrument accesses to LLVM internal variables.
436     if (GV->getName().starts_with("__llvm"))
437       return std::nullopt;
438   }
439 
440   return Access;
441 }
442 
443 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
444                                               Instruction *I, Value *Addr,
445                                               Type *AccessTy, bool IsWrite) {
446   auto *VTy = cast<FixedVectorType>(AccessTy);
447   unsigned Num = VTy->getNumElements();
448   auto *Zero = ConstantInt::get(IntptrTy, 0);
449   for (unsigned Idx = 0; Idx < Num; ++Idx) {
450     Value *InstrumentedAddress = nullptr;
451     Instruction *InsertBefore = I;
452     if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
453       // dyn_cast as we might get UndefValue
454       if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
455         if (Masked->isZero())
456           // Mask is constant false, so no instrumentation needed.
457           continue;
458         // If we have a true or undef value, fall through to instrumentAddress.
459         // with InsertBefore == I
460       }
461     } else {
462       IRBuilder<> IRB(I);
463       Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
464       Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
465       InsertBefore = ThenTerm;
466     }
467 
468     IRBuilder<> IRB(InsertBefore);
469     InstrumentedAddress =
470         IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
471     instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite);
472   }
473 }
474 
475 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
476                                 InterestingMemoryAccess &Access) {
477   // Skip instrumentation of stack accesses unless requested.
478   if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) {
479     if (Access.IsWrite)
480       ++NumSkippedStackWrites;
481     else
482       ++NumSkippedStackReads;
483     return;
484   }
485 
486   if (Access.IsWrite)
487     NumInstrumentedWrites++;
488   else
489     NumInstrumentedReads++;
490 
491   if (Access.MaybeMask) {
492     instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
493                                 Access.AccessTy, Access.IsWrite);
494   } else {
495     // Since the access counts will be accumulated across the entire allocation,
496     // we only update the shadow access count for the first location and thus
497     // don't need to worry about alignment and type size.
498     instrumentAddress(I, I, Access.Addr, Access.IsWrite);
499   }
500 }
501 
502 void MemProfiler::instrumentAddress(Instruction *OrigIns,
503                                     Instruction *InsertBefore, Value *Addr,
504                                     bool IsWrite) {
505   IRBuilder<> IRB(InsertBefore);
506   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
507 
508   if (ClUseCalls) {
509     IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong);
510     return;
511   }
512 
513   Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C);
514   Type *ShadowPtrTy = PointerType::get(*C, 0);
515 
516   Value *ShadowPtr = memToShadow(AddrLong, IRB);
517   Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
518   Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
519   // If we are profiling with histograms, add overflow protection at 255.
520   if (ClHistogram) {
521     Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255);
522     Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount);
523     Instruction *IncBlock =
524         SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false);
525     IRB.SetInsertPoint(IncBlock);
526   }
527   Value *Inc = ConstantInt::get(ShadowTy, 1);
528   ShadowValue = IRB.CreateAdd(ShadowValue, Inc);
529   IRB.CreateStore(ShadowValue, ShadowAddr);
530 }
531 
532 // Create the variable for the profile file name.
533 void createProfileFileNameVar(Module &M) {
534   const MDString *MemProfFilename =
535       dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename"));
536   if (!MemProfFilename)
537     return;
538   assert(!MemProfFilename->getString().empty() &&
539          "Unexpected MemProfProfileFilename metadata with empty string");
540   Constant *ProfileNameConst = ConstantDataArray::getString(
541       M.getContext(), MemProfFilename->getString(), true);
542   GlobalVariable *ProfileNameVar = new GlobalVariable(
543       M, ProfileNameConst->getType(), /*isConstant=*/true,
544       GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar);
545   Triple TT(M.getTargetTriple());
546   if (TT.supportsCOMDAT()) {
547     ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
548     ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar));
549   }
550 }
551 
552 // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
553 // to the runtime, changing shadow count behavior.
554 void createMemprofHistogramFlagVar(Module &M) {
555   const StringRef VarName(MemProfHistogramFlagVar);
556   Type *IntTy1 = Type::getInt1Ty(M.getContext());
557   auto MemprofHistogramFlag = new GlobalVariable(
558       M, IntTy1, true, GlobalValue::WeakAnyLinkage,
559       Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName);
560   Triple TT(M.getTargetTriple());
561   if (TT.supportsCOMDAT()) {
562     MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage);
563     MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName));
564   }
565   appendToCompilerUsed(M, MemprofHistogramFlag);
566 }
567 
568 void createMemprofDefaultOptionsVar(Module &M) {
569   Constant *OptionsConst = ConstantDataArray::getString(
570       M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true);
571   GlobalVariable *OptionsVar =
572       new GlobalVariable(M, OptionsConst->getType(), /*isConstant=*/true,
573                          GlobalValue::WeakAnyLinkage, OptionsConst,
574                          "__memprof_default_options_str");
575   Triple TT(M.getTargetTriple());
576   if (TT.supportsCOMDAT()) {
577     OptionsVar->setLinkage(GlobalValue::ExternalLinkage);
578     OptionsVar->setComdat(M.getOrInsertComdat(OptionsVar->getName()));
579   }
580 }
581 
582 bool ModuleMemProfiler::instrumentModule(Module &M) {
583 
584   // Create a module constructor.
585   std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION);
586   std::string VersionCheckName =
587       ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion)
588                            : "";
589   std::tie(MemProfCtorFunction, std::ignore) =
590       createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName,
591                                           MemProfInitName, /*InitArgTypes=*/{},
592                                           /*InitArgs=*/{}, VersionCheckName);
593 
594   const uint64_t Priority = getCtorAndDtorPriority(TargetTriple);
595   appendToGlobalCtors(M, MemProfCtorFunction, Priority);
596 
597   createProfileFileNameVar(M);
598 
599   createMemprofHistogramFlagVar(M);
600 
601   createMemprofDefaultOptionsVar(M);
602 
603   return true;
604 }
605 
606 void MemProfiler::initializeCallbacks(Module &M) {
607   IRBuilder<> IRB(*C);
608 
609   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
610     const std::string TypeStr = AccessIsWrite ? "store" : "load";
611     const std::string HistPrefix = ClHistogram ? "hist_" : "";
612 
613     SmallVector<Type *, 2> Args1{1, IntptrTy};
614     MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(
615         ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr,
616         FunctionType::get(IRB.getVoidTy(), Args1, false));
617   }
618   MemProfMemmove = M.getOrInsertFunction(
619       ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
620   MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy",
621                                         PtrTy, PtrTy, PtrTy, IntptrTy);
622   MemProfMemset =
623       M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy,
624                             PtrTy, IRB.getInt32Ty(), IntptrTy);
625 }
626 
627 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {
628   // For each NSObject descendant having a +load method, this method is invoked
629   // by the ObjC runtime before any of the static constructors is called.
630   // Therefore we need to instrument such methods with a call to __memprof_init
631   // at the beginning in order to initialize our runtime before any access to
632   // the shadow memory.
633   // We cannot just ignore these methods, because they may call other
634   // instrumented functions.
635   if (F.getName().contains(" load]")) {
636     FunctionCallee MemProfInitFunction =
637         declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {});
638     IRBuilder<> IRB(&F.front(), F.front().begin());
639     IRB.CreateCall(MemProfInitFunction, {});
640     return true;
641   }
642   return false;
643 }
644 
645 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) {
646   IRBuilder<> IRB(&F.front().front());
647   Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
648       MemProfShadowMemoryDynamicAddress, IntptrTy);
649   if (F.getParent()->getPICLevel() == PICLevel::NotPIC)
650     cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true);
651   DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
652   return true;
653 }
654 
655 bool MemProfiler::instrumentFunction(Function &F) {
656   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
657     return false;
658   if (ClDebugFunc == F.getName())
659     return false;
660   if (F.getName().starts_with("__memprof_"))
661     return false;
662 
663   bool FunctionModified = false;
664 
665   // If needed, insert __memprof_init.
666   // This function needs to be called even if the function body is not
667   // instrumented.
668   if (maybeInsertMemProfInitAtFunctionEntry(F))
669     FunctionModified = true;
670 
671   LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n");
672 
673   initializeCallbacks(*F.getParent());
674 
675   SmallVector<Instruction *, 16> ToInstrument;
676 
677   // Fill the set of memory operations to instrument.
678   for (auto &BB : F) {
679     for (auto &Inst : BB) {
680       if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst))
681         ToInstrument.push_back(&Inst);
682     }
683   }
684 
685   if (ToInstrument.empty()) {
686     LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified
687                       << " " << F << "\n");
688 
689     return FunctionModified;
690   }
691 
692   FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
693 
694   int NumInstrumented = 0;
695   for (auto *Inst : ToInstrument) {
696     if (ClDebugMin < 0 || ClDebugMax < 0 ||
697         (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
698       std::optional<InterestingMemoryAccess> Access =
699           isInterestingMemoryAccess(Inst);
700       if (Access)
701         instrumentMop(Inst, F.getDataLayout(), *Access);
702       else
703         instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
704     }
705     NumInstrumented++;
706   }
707 
708   if (NumInstrumented > 0)
709     FunctionModified = true;
710 
711   LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " "
712                     << F << "\n");
713 
714   return FunctionModified;
715 }
716 
717 static void addCallsiteMetadata(Instruction &I,
718                                 ArrayRef<uint64_t> InlinedCallStack,
719                                 LLVMContext &Ctx) {
720   I.setMetadata(LLVMContext::MD_callsite,
721                 buildCallstackMetadata(InlinedCallStack, Ctx));
722 }
723 
724 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
725                                uint32_t Column) {
726   llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
727       HashBuilder;
728   HashBuilder.add(Function, LineOffset, Column);
729   llvm::BLAKE3Result<8> Hash = HashBuilder.final();
730   uint64_t Id;
731   std::memcpy(&Id, Hash.data(), sizeof(Hash));
732   return Id;
733 }
734 
735 static uint64_t computeStackId(const memprof::Frame &Frame) {
736   return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
737 }
738 
739 // Helper to generate a single hash id for a given callstack, used for emitting
740 // matching statistics and useful for uniquing such statistics across modules.
741 static uint64_t computeFullStackId(ArrayRef<Frame> CallStack) {
742   llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
743       HashBuilder;
744   for (auto &F : CallStack)
745     HashBuilder.add(F.Function, F.LineOffset, F.Column);
746   llvm::BLAKE3Result<8> Hash = HashBuilder.final();
747   uint64_t Id;
748   std::memcpy(&Id, Hash.data(), sizeof(Hash));
749   return Id;
750 }
751 
752 static AllocationType addCallStack(CallStackTrie &AllocTrie,
753                                    const AllocationInfo *AllocInfo,
754                                    uint64_t FullStackId) {
755   SmallVector<uint64_t> StackIds;
756   for (const auto &StackFrame : AllocInfo->CallStack)
757     StackIds.push_back(computeStackId(StackFrame));
758   auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
759                                 AllocInfo->Info.getAllocCount(),
760                                 AllocInfo->Info.getTotalLifetime());
761   std::vector<ContextTotalSize> ContextSizeInfo;
762   if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) {
763     auto TotalSize = AllocInfo->Info.getTotalSize();
764     assert(TotalSize);
765     assert(FullStackId != 0);
766     ContextSizeInfo.push_back({FullStackId, TotalSize});
767   }
768   AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
769   return AllocType;
770 }
771 
772 // Helper to compare the InlinedCallStack computed from an instruction's debug
773 // info to a list of Frames from profile data (either the allocation data or a
774 // callsite). For callsites, the StartIndex to use in the Frame array may be
775 // non-zero.
776 static bool
777 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
778                                    ArrayRef<uint64_t> InlinedCallStack) {
779   auto StackFrame = ProfileCallStack.begin();
780   auto InlCallStackIter = InlinedCallStack.begin();
781   for (; StackFrame != ProfileCallStack.end() &&
782          InlCallStackIter != InlinedCallStack.end();
783        ++StackFrame, ++InlCallStackIter) {
784     uint64_t StackId = computeStackId(*StackFrame);
785     if (StackId != *InlCallStackIter)
786       return false;
787   }
788   // Return true if we found and matched all stack ids from the call
789   // instruction.
790   return InlCallStackIter == InlinedCallStack.end();
791 }
792 
793 static bool isAllocationWithHotColdVariant(const Function *Callee,
794                                            const TargetLibraryInfo &TLI) {
795   if (!Callee)
796     return false;
797   LibFunc Func;
798   if (!TLI.getLibFunc(*Callee, Func))
799     return false;
800   switch (Func) {
801   case LibFunc_Znwm:
802   case LibFunc_ZnwmRKSt9nothrow_t:
803   case LibFunc_ZnwmSt11align_val_t:
804   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
805   case LibFunc_Znam:
806   case LibFunc_ZnamRKSt9nothrow_t:
807   case LibFunc_ZnamSt11align_val_t:
808   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
809   case LibFunc_size_returning_new:
810   case LibFunc_size_returning_new_aligned:
811     return true;
812   case LibFunc_Znwm12__hot_cold_t:
813   case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
814   case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
815   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
816   case LibFunc_Znam12__hot_cold_t:
817   case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
818   case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
819   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
820   case LibFunc_size_returning_new_hot_cold:
821   case LibFunc_size_returning_new_aligned_hot_cold:
822     return ClMemProfMatchHotColdNew;
823   default:
824     return false;
825   }
826 }
827 
828 struct AllocMatchInfo {
829   uint64_t TotalSize = 0;
830   AllocationType AllocType = AllocationType::None;
831   bool Matched = false;
832 };
833 
834 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
835 memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
836                             function_ref<bool(uint64_t)> IsPresentInProfile) {
837   DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
838 
839   auto GetOffset = [](const DILocation *DIL) {
840     return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
841            0xffff;
842   };
843 
844   for (Function &F : M) {
845     if (F.isDeclaration())
846       continue;
847 
848     for (auto &BB : F) {
849       for (auto &I : BB) {
850         if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
851           continue;
852 
853         auto *CB = dyn_cast<CallBase>(&I);
854         auto *CalledFunction = CB->getCalledFunction();
855         // Disregard indirect calls and intrinsics.
856         if (!CalledFunction || CalledFunction->isIntrinsic())
857           continue;
858 
859         StringRef CalleeName = CalledFunction->getName();
860         // True if we are calling a heap allocation function that supports
861         // hot/cold variants.
862         bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
863         // True for the first iteration below, indicating that we are looking at
864         // a leaf node.
865         bool IsLeaf = true;
866         for (const DILocation *DIL = I.getDebugLoc(); DIL;
867              DIL = DIL->getInlinedAt()) {
868           StringRef CallerName = DIL->getSubprogramLinkageName();
869           assert(!CallerName.empty() &&
870                  "Be sure to enable -fdebug-info-for-profiling");
871           uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
872           uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
873           // Pretend that we are calling a function with GUID == 0 if we are
874           // in the inline stack leading to a heap allocation function.
875           if (IsAlloc) {
876             if (IsLeaf) {
877               // For leaf nodes, set CalleeGUID to 0 without consulting
878               // IsPresentInProfile.
879               CalleeGUID = 0;
880             } else if (!IsPresentInProfile(CalleeGUID)) {
881               // In addition to the leaf case above, continue to set CalleeGUID
882               // to 0 as long as we don't see CalleeGUID in the profile.
883               CalleeGUID = 0;
884             } else {
885               // Once we encounter a callee that exists in the profile, stop
886               // setting CalleeGUID to 0.
887               IsAlloc = false;
888             }
889           }
890 
891           LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
892           Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
893           CalleeName = CallerName;
894           IsLeaf = false;
895         }
896       }
897     }
898   }
899 
900   // Sort each call list by the source location.
901   for (auto &[CallerGUID, CallList] : Calls) {
902     llvm::sort(CallList);
903     CallList.erase(llvm::unique(CallList), CallList.end());
904   }
905 
906   return Calls;
907 }
908 
909 DenseMap<uint64_t, LocToLocMap>
910 memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
911                            const TargetLibraryInfo &TLI) {
912   DenseMap<uint64_t, LocToLocMap> UndriftMaps;
913 
914   DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile =
915       MemProfReader->getMemProfCallerCalleePairs();
916   DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR =
917       extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
918         return CallsFromProfile.contains(GUID);
919       });
920 
921   // Compute an undrift map for each CallerGUID.
922   for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
923     auto It = CallsFromProfile.find(CallerGUID);
924     if (It == CallsFromProfile.end())
925       continue;
926     const auto &ProfileAnchors = It->second;
927 
928     LocToLocMap Matchings;
929     longestCommonSequence<LineLocation, GlobalValue::GUID>(
930         ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
931         [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
932     bool Inserted = UndriftMaps.try_emplace(CallerGUID, Matchings).second;
933 
934     // The insertion must succeed because we visit each GUID exactly once.
935     assert(Inserted);
936     (void)Inserted;
937   }
938 
939   return UndriftMaps;
940 }
941 
942 // Given a MemProfRecord, undrift all the source locations present in the
943 // record in place.
944 static void
945 undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
946                      memprof::MemProfRecord &MemProfRec) {
947   // Undrift a call stack in place.
948   auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
949     for (auto &F : CallStack) {
950       auto I = UndriftMaps.find(F.Function);
951       if (I == UndriftMaps.end())
952         continue;
953       auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
954       if (J == I->second.end())
955         continue;
956       auto &NewLoc = J->second;
957       F.LineOffset = NewLoc.LineOffset;
958       F.Column = NewLoc.Column;
959     }
960   };
961 
962   for (auto &AS : MemProfRec.AllocSites)
963     UndriftCallStack(AS.CallStack);
964 
965   for (auto &CS : MemProfRec.CallSites)
966     UndriftCallStack(CS);
967 }
968 
969 static void
970 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
971             const TargetLibraryInfo &TLI,
972             std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
973             DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
974   auto &Ctx = M.getContext();
975   // Previously we used getIRPGOFuncName() here. If F is local linkage,
976   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
977   // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
978   // contain FileName's prefix. It caused local linkage function can't
979   // find MemProfRecord. So we use getName() now.
980   // 'unique-internal-linkage-names' can make MemProf work better for local
981   // linkage function.
982   auto FuncName = F.getName();
983   auto FuncGUID = Function::getGUID(FuncName);
984   std::optional<memprof::MemProfRecord> MemProfRec;
985   auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
986   if (Err) {
987     handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
988       auto Err = IPE.get();
989       bool SkipWarning = false;
990       LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
991                         << ": ");
992       if (Err == instrprof_error::unknown_function) {
993         NumOfMemProfMissing++;
994         SkipWarning = !PGOWarnMissing;
995         LLVM_DEBUG(dbgs() << "unknown function");
996       } else if (Err == instrprof_error::hash_mismatch) {
997         NumOfMemProfMismatch++;
998         SkipWarning =
999             NoPGOWarnMismatch ||
1000             (NoPGOWarnMismatchComdatWeak &&
1001              (F.hasComdat() ||
1002               F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1003         LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1004       }
1005 
1006       if (SkipWarning)
1007         return;
1008 
1009       std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
1010                          Twine(" Hash = ") + std::to_string(FuncGUID))
1011                             .str();
1012 
1013       Ctx.diagnose(
1014           DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
1015     });
1016     return;
1017   }
1018 
1019   NumOfMemProfFunc++;
1020 
1021   // If requested, undrfit MemProfRecord so that the source locations in it
1022   // match those in the IR.
1023   if (SalvageStaleProfile)
1024     undriftMemProfRecord(UndriftMaps, *MemProfRec);
1025 
1026   // Detect if there are non-zero column numbers in the profile. If not,
1027   // treat all column numbers as 0 when matching (i.e. ignore any non-zero
1028   // columns in the IR). The profiled binary might have been built with
1029   // column numbers disabled, for example.
1030   bool ProfileHasColumns = false;
1031 
1032   // Build maps of the location hash to all profile data with that leaf location
1033   // (allocation info and the callsites).
1034   std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
1035   // A hash function for std::unordered_set<ArrayRef<Frame>> to work.
1036   struct CallStackHash {
1037     size_t operator()(ArrayRef<Frame> CS) const {
1038       return computeFullStackId(CS);
1039     }
1040   };
1041   // For the callsites we need to record slices of the frame array (see comments
1042   // below where the map entries are added).
1043   std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
1044       LocHashToCallSites;
1045   for (auto &AI : MemProfRec->AllocSites) {
1046     NumOfMemProfAllocContextProfiles++;
1047     // Associate the allocation info with the leaf frame. The later matching
1048     // code will match any inlined call sequences in the IR with a longer prefix
1049     // of call stack frames.
1050     uint64_t StackId = computeStackId(AI.CallStack[0]);
1051     LocHashToAllocInfo[StackId].insert(&AI);
1052     ProfileHasColumns |= AI.CallStack[0].Column;
1053   }
1054   for (auto &CS : MemProfRec->CallSites) {
1055     NumOfMemProfCallSiteProfiles++;
1056     // Need to record all frames from leaf up to and including this function,
1057     // as any of these may or may not have been inlined at this point.
1058     unsigned Idx = 0;
1059     for (auto &StackFrame : CS) {
1060       uint64_t StackId = computeStackId(StackFrame);
1061       LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++));
1062       ProfileHasColumns |= StackFrame.Column;
1063       // Once we find this function, we can stop recording.
1064       if (StackFrame.Function == FuncGUID)
1065         break;
1066     }
1067     assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
1068   }
1069 
1070   auto GetOffset = [](const DILocation *DIL) {
1071     return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
1072            0xffff;
1073   };
1074 
1075   // Now walk the instructions, looking up the associated profile data using
1076   // debug locations.
1077   for (auto &BB : F) {
1078     for (auto &I : BB) {
1079       if (I.isDebugOrPseudoInst())
1080         continue;
1081       // We are only interested in calls (allocation or interior call stack
1082       // context calls).
1083       auto *CI = dyn_cast<CallBase>(&I);
1084       if (!CI)
1085         continue;
1086       auto *CalledFunction = CI->getCalledFunction();
1087       if (CalledFunction && CalledFunction->isIntrinsic())
1088         continue;
1089       // List of call stack ids computed from the location hashes on debug
1090       // locations (leaf to inlined at root).
1091       SmallVector<uint64_t, 8> InlinedCallStack;
1092       // Was the leaf location found in one of the profile maps?
1093       bool LeafFound = false;
1094       // If leaf was found in a map, iterators pointing to its location in both
1095       // of the maps. It might exist in neither, one, or both (the latter case
1096       // can happen because we don't currently have discriminators to
1097       // distinguish the case when a single line/col maps to both an allocation
1098       // and another callsite).
1099       std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
1100           AllocInfoIter;
1101       decltype(LocHashToCallSites)::iterator CallSitesIter;
1102       for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
1103            DIL = DIL->getInlinedAt()) {
1104         // Use C++ linkage name if possible. Need to compile with
1105         // -fdebug-info-for-profiling to get linkage name.
1106         StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
1107         if (Name.empty())
1108           Name = DIL->getScope()->getSubprogram()->getName();
1109         auto CalleeGUID = Function::getGUID(Name);
1110         auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
1111                                       ProfileHasColumns ? DIL->getColumn() : 0);
1112         // Check if we have found the profile's leaf frame. If yes, collect
1113         // the rest of the call's inlined context starting here. If not, see if
1114         // we find a match further up the inlined context (in case the profile
1115         // was missing debug frames at the leaf).
1116         if (!LeafFound) {
1117           AllocInfoIter = LocHashToAllocInfo.find(StackId);
1118           CallSitesIter = LocHashToCallSites.find(StackId);
1119           if (AllocInfoIter != LocHashToAllocInfo.end() ||
1120               CallSitesIter != LocHashToCallSites.end())
1121             LeafFound = true;
1122         }
1123         if (LeafFound)
1124           InlinedCallStack.push_back(StackId);
1125       }
1126       // If leaf not in either of the maps, skip inst.
1127       if (!LeafFound)
1128         continue;
1129 
1130       // First add !memprof metadata from allocation info, if we found the
1131       // instruction's leaf location in that map, and if the rest of the
1132       // instruction's locations match the prefix Frame locations on an
1133       // allocation context with the same leaf.
1134       if (AllocInfoIter != LocHashToAllocInfo.end()) {
1135         // Only consider allocations which support hinting.
1136         if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
1137           continue;
1138         // We may match this instruction's location list to multiple MIB
1139         // contexts. Add them to a Trie specialized for trimming the contexts to
1140         // the minimal needed to disambiguate contexts with unique behavior.
1141         CallStackTrie AllocTrie;
1142         uint64_t TotalSize = 0;
1143         uint64_t TotalColdSize = 0;
1144         for (auto *AllocInfo : AllocInfoIter->second) {
1145           // Check the full inlined call stack against this one.
1146           // If we found and thus matched all frames on the call, include
1147           // this MIB.
1148           if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
1149                                                  InlinedCallStack)) {
1150             NumOfMemProfMatchedAllocContexts++;
1151             uint64_t FullStackId = 0;
1152             if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes ||
1153                 MinClonedColdBytePercent < 100)
1154               FullStackId = computeFullStackId(AllocInfo->CallStack);
1155             auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
1156             TotalSize += AllocInfo->Info.getTotalSize();
1157             if (AllocType == AllocationType::Cold)
1158               TotalColdSize += AllocInfo->Info.getTotalSize();
1159             // Record information about the allocation if match info printing
1160             // was requested.
1161             if (ClPrintMemProfMatchInfo) {
1162               assert(FullStackId != 0);
1163               FullStackIdToAllocMatchInfo[FullStackId] = {
1164                   AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
1165             }
1166           }
1167         }
1168         // If the threshold for the percent of cold bytes is less than 100%,
1169         // and not all bytes are cold, see if we should still hint this
1170         // allocation as cold without context sensitivity.
1171         if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
1172             TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
1173           AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold,
1174                                                 "dominant");
1175           continue;
1176         }
1177 
1178         // We might not have matched any to the full inlined call stack.
1179         // But if we did, create and attach metadata, or a function attribute if
1180         // all contexts have identical profiled behavior.
1181         if (!AllocTrie.empty()) {
1182           NumOfMemProfMatchedAllocs++;
1183           // MemprofMDAttached will be false if a function attribute was
1184           // attached.
1185           bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
1186           assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
1187           if (MemprofMDAttached) {
1188             // Add callsite metadata for the instruction's location list so that
1189             // it simpler later on to identify which part of the MIB contexts
1190             // are from this particular instruction (including during inlining,
1191             // when the callsite metadata will be updated appropriately).
1192             // FIXME: can this be changed to strip out the matching stack
1193             // context ids from the MIB contexts and not add any callsite
1194             // metadata here to save space?
1195             addCallsiteMetadata(I, InlinedCallStack, Ctx);
1196           }
1197         }
1198         continue;
1199       }
1200 
1201       // Otherwise, add callsite metadata. If we reach here then we found the
1202       // instruction's leaf location in the callsites map and not the allocation
1203       // map.
1204       assert(CallSitesIter != LocHashToCallSites.end());
1205       for (auto CallStackIdx : CallSitesIter->second) {
1206         // If we found and thus matched all frames on the call, create and
1207         // attach call stack metadata.
1208         if (stackFrameIncludesInlinedCallStack(CallStackIdx,
1209                                                InlinedCallStack)) {
1210           NumOfMemProfMatchedCallSites++;
1211           addCallsiteMetadata(I, InlinedCallStack, Ctx);
1212           // Only need to find one with a matching call stack and add a single
1213           // callsite metadata.
1214           break;
1215         }
1216       }
1217     }
1218   }
1219 }
1220 
1221 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
1222                                IntrusiveRefCntPtr<vfs::FileSystem> FS)
1223     : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
1224   if (!FS)
1225     this->FS = vfs::getRealFileSystem();
1226 }
1227 
1228 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
1229   // Return immediately if the module doesn't contain any function.
1230   if (M.empty())
1231     return PreservedAnalyses::all();
1232 
1233   LLVM_DEBUG(dbgs() << "Read in memory profile:");
1234   auto &Ctx = M.getContext();
1235   auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
1236   if (Error E = ReaderOrErr.takeError()) {
1237     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1238       Ctx.diagnose(
1239           DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
1240     });
1241     return PreservedAnalyses::all();
1242   }
1243 
1244   std::unique_ptr<IndexedInstrProfReader> MemProfReader =
1245       std::move(ReaderOrErr.get());
1246   if (!MemProfReader) {
1247     Ctx.diagnose(DiagnosticInfoPGOProfile(
1248         MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
1249     return PreservedAnalyses::all();
1250   }
1251 
1252   if (!MemProfReader->hasMemoryProfile()) {
1253     Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
1254                                           "Not a memory profile"));
1255     return PreservedAnalyses::all();
1256   }
1257 
1258   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1259 
1260   TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
1261   DenseMap<uint64_t, LocToLocMap> UndriftMaps;
1262   if (SalvageStaleProfile)
1263     UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
1264 
1265   // Map from the stack has of each allocation context in the function profiles
1266   // to the total profiled size (bytes), allocation type, and whether we matched
1267   // it to an allocation in the IR.
1268   std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
1269 
1270   for (auto &F : M) {
1271     if (F.isDeclaration())
1272       continue;
1273 
1274     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1275     readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1276                 UndriftMaps);
1277   }
1278 
1279   if (ClPrintMemProfMatchInfo) {
1280     for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1281       errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1282              << " context with id " << Id << " has total profiled size "
1283              << Info.TotalSize << (Info.Matched ? " is" : " not")
1284              << " matched\n";
1285   }
1286 
1287   return PreservedAnalyses::none();
1288 }
1289