xref: /llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (revision 2e33ed9ecc52fcec27eac2efb2615d1efcf6fd32)
1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of MemProfiler. Memory accesses are instrumented
10 // to increment the access count held in a shadow memory location, or
11 // alternatively to call into the runtime. Memory intrinsic calls (memmove,
12 // memcpy, memset) are changed to call the memory profiling runtime version
13 // instead.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/MemoryBuiltins.h"
22 #include "llvm/Analysis/MemoryProfileInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/ProfileData/InstrProf.h"
37 #include "llvm/ProfileData/InstrProfReader.h"
38 #include "llvm/Support/BLAKE3.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/HashBuilder.h"
42 #include "llvm/Support/VirtualFileSystem.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
45 #include "llvm/Transforms/Utils/LongestCommonSequence.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
47 #include <map>
48 #include <set>
49 
50 using namespace llvm;
51 using namespace llvm::memprof;
52 
53 #define DEBUG_TYPE "memprof"
54 
55 namespace llvm {
56 extern cl::opt<bool> PGOWarnMissing;
57 extern cl::opt<bool> NoPGOWarnMismatch;
58 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
59 } // namespace llvm
60 
61 constexpr int LLVM_MEM_PROFILER_VERSION = 1;
62 
63 // Size of memory mapped to a single shadow location.
64 constexpr uint64_t DefaultMemGranularity = 64;
65 
66 // Size of memory mapped to a single histogram bucket.
67 constexpr uint64_t HistogramGranularity = 8;
68 
69 // Scale from granularity down to shadow size.
70 constexpr uint64_t DefaultShadowScale = 3;
71 
72 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor";
73 constexpr uint64_t MemProfCtorAndDtorPriority = 1;
74 // On Emscripten, the system needs more than one priorities for constructors.
75 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50;
76 constexpr char MemProfInitName[] = "__memprof_init";
77 constexpr char MemProfVersionCheckNamePrefix[] =
78     "__memprof_version_mismatch_check_v";
79 
80 constexpr char MemProfShadowMemoryDynamicAddress[] =
81     "__memprof_shadow_memory_dynamic_address";
82 
83 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename";
84 
85 constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram";
86 
87 // Command-line flags.
88 
89 static cl::opt<bool> ClInsertVersionCheck(
90     "memprof-guard-against-version-mismatch",
91     cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden,
92     cl::init(true));
93 
94 // This flag may need to be replaced with -f[no-]memprof-reads.
95 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",
96                                        cl::desc("instrument read instructions"),
97                                        cl::Hidden, cl::init(true));
98 
99 static cl::opt<bool>
100     ClInstrumentWrites("memprof-instrument-writes",
101                        cl::desc("instrument write instructions"), cl::Hidden,
102                        cl::init(true));
103 
104 static cl::opt<bool> ClInstrumentAtomics(
105     "memprof-instrument-atomics",
106     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
107     cl::init(true));
108 
109 static cl::opt<bool> ClUseCalls(
110     "memprof-use-callbacks",
111     cl::desc("Use callbacks instead of inline instrumentation sequences."),
112     cl::Hidden, cl::init(false));
113 
114 static cl::opt<std::string>
115     ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix",
116                                  cl::desc("Prefix for memory access callbacks"),
117                                  cl::Hidden, cl::init("__memprof_"));
118 
119 // These flags allow to change the shadow mapping.
120 // The shadow mapping looks like
121 //    Shadow = ((Mem & mask) >> scale) + offset
122 
123 static cl::opt<int> ClMappingScale("memprof-mapping-scale",
124                                    cl::desc("scale of memprof shadow mapping"),
125                                    cl::Hidden, cl::init(DefaultShadowScale));
126 
127 static cl::opt<int>
128     ClMappingGranularity("memprof-mapping-granularity",
129                          cl::desc("granularity of memprof shadow mapping"),
130                          cl::Hidden, cl::init(DefaultMemGranularity));
131 
132 static cl::opt<bool> ClStack("memprof-instrument-stack",
133                              cl::desc("Instrument scalar stack variables"),
134                              cl::Hidden, cl::init(false));
135 
136 // Debug flags.
137 
138 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,
139                             cl::init(0));
140 
141 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden,
142                                         cl::desc("Debug func"));
143 
144 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"),
145                                cl::Hidden, cl::init(-1));
146 
147 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
148                                cl::Hidden, cl::init(-1));
149 
150 // By default disable matching of allocation profiles onto operator new that
151 // already explicitly pass a hot/cold hint, since we don't currently
152 // override these hints anyway.
153 static cl::opt<bool> ClMemProfMatchHotColdNew(
154     "memprof-match-hot-cold-new",
155  cl::desc(
156         "Match allocation profiles onto existing hot/cold operator new calls"),
157     cl::Hidden, cl::init(false));
158 
159 static cl::opt<bool> ClHistogram("memprof-histogram",
160                                  cl::desc("Collect access count histograms"),
161                                  cl::Hidden, cl::init(false));
162 
163 static cl::opt<bool>
164     ClPrintMemProfMatchInfo("memprof-print-match-info",
165                             cl::desc("Print matching stats for each allocation "
166                                      "context in this module's profiles"),
167                             cl::Hidden, cl::init(false));
168 
169 static cl::opt<std::string>
170     MemprofRuntimeDefaultOptions("memprof-runtime-default-options",
171                                  cl::desc("The default memprof options"),
172                                  cl::Hidden, cl::init(""));
173 
174 extern cl::opt<bool> MemProfReportHintedSizes;
175 
176 // Instrumentation statistics
177 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
178 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
179 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
180 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
181 
182 // Matching statistics
183 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
184 STATISTIC(NumOfMemProfMismatch,
185           "Number of functions having mismatched memory profile hash.");
186 STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
187 STATISTIC(NumOfMemProfAllocContextProfiles,
188           "Number of alloc contexts in memory profile.");
189 STATISTIC(NumOfMemProfCallSiteProfiles,
190           "Number of callsites in memory profile.");
191 STATISTIC(NumOfMemProfMatchedAllocContexts,
192           "Number of matched memory profile alloc contexts.");
193 STATISTIC(NumOfMemProfMatchedAllocs,
194           "Number of matched memory profile allocs.");
195 STATISTIC(NumOfMemProfMatchedCallSites,
196           "Number of matched memory profile callsites.");
197 
198 namespace {
199 
200 /// This struct defines the shadow mapping using the rule:
201 ///   shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset.
202 struct ShadowMapping {
203   ShadowMapping() {
204     Scale = ClMappingScale;
205     Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity;
206     Mask = ~(Granularity - 1);
207   }
208 
209   int Scale;
210   int Granularity;
211   uint64_t Mask; // Computed as ~(Granularity-1)
212 };
213 
214 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) {
215   return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority
216                                        : MemProfCtorAndDtorPriority;
217 }
218 
219 struct InterestingMemoryAccess {
220   Value *Addr = nullptr;
221   bool IsWrite;
222   Type *AccessTy;
223   Value *MaybeMask = nullptr;
224 };
225 
226 /// Instrument the code in module to profile memory accesses.
227 class MemProfiler {
228 public:
229   MemProfiler(Module &M) {
230     C = &(M.getContext());
231     LongSize = M.getDataLayout().getPointerSizeInBits();
232     IntptrTy = Type::getIntNTy(*C, LongSize);
233     PtrTy = PointerType::getUnqual(*C);
234   }
235 
236   /// If it is an interesting memory access, populate information
237   /// about the access and return a InterestingMemoryAccess struct.
238   /// Otherwise return std::nullopt.
239   std::optional<InterestingMemoryAccess>
240   isInterestingMemoryAccess(Instruction *I) const;
241 
242   void instrumentMop(Instruction *I, const DataLayout &DL,
243                      InterestingMemoryAccess &Access);
244   void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
245                          Value *Addr, bool IsWrite);
246   void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
247                                    Instruction *I, Value *Addr, Type *AccessTy,
248                                    bool IsWrite);
249   void instrumentMemIntrinsic(MemIntrinsic *MI);
250   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
251   bool instrumentFunction(Function &F);
252   bool maybeInsertMemProfInitAtFunctionEntry(Function &F);
253   bool insertDynamicShadowAtFunctionEntry(Function &F);
254 
255 private:
256   void initializeCallbacks(Module &M);
257 
258   LLVMContext *C;
259   int LongSize;
260   Type *IntptrTy;
261   PointerType *PtrTy;
262   ShadowMapping Mapping;
263 
264   // These arrays is indexed by AccessIsWrite
265   FunctionCallee MemProfMemoryAccessCallback[2];
266 
267   FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset;
268   Value *DynamicShadowOffset = nullptr;
269 };
270 
271 class ModuleMemProfiler {
272 public:
273   ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); }
274 
275   bool instrumentModule(Module &);
276 
277 private:
278   Triple TargetTriple;
279   ShadowMapping Mapping;
280   Function *MemProfCtorFunction = nullptr;
281 };
282 
283 } // end anonymous namespace
284 
285 MemProfilerPass::MemProfilerPass() = default;
286 
287 PreservedAnalyses MemProfilerPass::run(Function &F,
288                                        AnalysisManager<Function> &AM) {
289   assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) &&
290          "Memprof with histogram only supports default mapping granularity");
291   Module &M = *F.getParent();
292   MemProfiler Profiler(M);
293   if (Profiler.instrumentFunction(F))
294     return PreservedAnalyses::none();
295   return PreservedAnalyses::all();
296 }
297 
298 ModuleMemProfilerPass::ModuleMemProfilerPass() = default;
299 
300 PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
301                                              AnalysisManager<Module> &AM) {
302 
303   ModuleMemProfiler Profiler(M);
304   if (Profiler.instrumentModule(M))
305     return PreservedAnalyses::none();
306   return PreservedAnalyses::all();
307 }
308 
309 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
310   // (Shadow & mask) >> scale
311   Shadow = IRB.CreateAnd(Shadow, Mapping.Mask);
312   Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
313   // (Shadow >> scale) | offset
314   assert(DynamicShadowOffset);
315   return IRB.CreateAdd(Shadow, DynamicShadowOffset);
316 }
317 
318 // Instrument memset/memmove/memcpy
319 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) {
320   IRBuilder<> IRB(MI);
321   if (isa<MemTransferInst>(MI)) {
322     IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,
323                    {MI->getOperand(0), MI->getOperand(1),
324                     IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
325   } else if (isa<MemSetInst>(MI)) {
326     IRB.CreateCall(
327         MemProfMemset,
328         {MI->getOperand(0),
329          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
330          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
331   }
332   MI->eraseFromParent();
333 }
334 
335 std::optional<InterestingMemoryAccess>
336 MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
337   // Do not instrument the load fetching the dynamic shadow address.
338   if (DynamicShadowOffset == I)
339     return std::nullopt;
340 
341   InterestingMemoryAccess Access;
342 
343   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
344     if (!ClInstrumentReads)
345       return std::nullopt;
346     Access.IsWrite = false;
347     Access.AccessTy = LI->getType();
348     Access.Addr = LI->getPointerOperand();
349   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
350     if (!ClInstrumentWrites)
351       return std::nullopt;
352     Access.IsWrite = true;
353     Access.AccessTy = SI->getValueOperand()->getType();
354     Access.Addr = SI->getPointerOperand();
355   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
356     if (!ClInstrumentAtomics)
357       return std::nullopt;
358     Access.IsWrite = true;
359     Access.AccessTy = RMW->getValOperand()->getType();
360     Access.Addr = RMW->getPointerOperand();
361   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
362     if (!ClInstrumentAtomics)
363       return std::nullopt;
364     Access.IsWrite = true;
365     Access.AccessTy = XCHG->getCompareOperand()->getType();
366     Access.Addr = XCHG->getPointerOperand();
367   } else if (auto *CI = dyn_cast<CallInst>(I)) {
368     auto *F = CI->getCalledFunction();
369     if (F && (F->getIntrinsicID() == Intrinsic::masked_load ||
370               F->getIntrinsicID() == Intrinsic::masked_store)) {
371       unsigned OpOffset = 0;
372       if (F->getIntrinsicID() == Intrinsic::masked_store) {
373         if (!ClInstrumentWrites)
374           return std::nullopt;
375         // Masked store has an initial operand for the value.
376         OpOffset = 1;
377         Access.AccessTy = CI->getArgOperand(0)->getType();
378         Access.IsWrite = true;
379       } else {
380         if (!ClInstrumentReads)
381           return std::nullopt;
382         Access.AccessTy = CI->getType();
383         Access.IsWrite = false;
384       }
385 
386       auto *BasePtr = CI->getOperand(0 + OpOffset);
387       Access.MaybeMask = CI->getOperand(2 + OpOffset);
388       Access.Addr = BasePtr;
389     }
390   }
391 
392   if (!Access.Addr)
393     return std::nullopt;
394 
395   // Do not instrument accesses from different address spaces; we cannot deal
396   // with them.
397   Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType());
398   if (PtrTy->getPointerAddressSpace() != 0)
399     return std::nullopt;
400 
401   // Ignore swifterror addresses.
402   // swifterror memory addresses are mem2reg promoted by instruction
403   // selection. As such they cannot have regular uses like an instrumentation
404   // function and it makes no sense to track them as memory.
405   if (Access.Addr->isSwiftError())
406     return std::nullopt;
407 
408   // Peel off GEPs and BitCasts.
409   auto *Addr = Access.Addr->stripInBoundsOffsets();
410 
411   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
412     // Do not instrument PGO counter updates.
413     if (GV->hasSection()) {
414       StringRef SectionName = GV->getSection();
415       // Check if the global is in the PGO counters section.
416       auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();
417       if (SectionName.ends_with(
418               getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
419         return std::nullopt;
420     }
421 
422     // Do not instrument accesses to LLVM internal variables.
423     if (GV->getName().starts_with("__llvm"))
424       return std::nullopt;
425   }
426 
427   return Access;
428 }
429 
430 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
431                                               Instruction *I, Value *Addr,
432                                               Type *AccessTy, bool IsWrite) {
433   auto *VTy = cast<FixedVectorType>(AccessTy);
434   unsigned Num = VTy->getNumElements();
435   auto *Zero = ConstantInt::get(IntptrTy, 0);
436   for (unsigned Idx = 0; Idx < Num; ++Idx) {
437     Value *InstrumentedAddress = nullptr;
438     Instruction *InsertBefore = I;
439     if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
440       // dyn_cast as we might get UndefValue
441       if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
442         if (Masked->isZero())
443           // Mask is constant false, so no instrumentation needed.
444           continue;
445         // If we have a true or undef value, fall through to instrumentAddress.
446         // with InsertBefore == I
447       }
448     } else {
449       IRBuilder<> IRB(I);
450       Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
451       Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
452       InsertBefore = ThenTerm;
453     }
454 
455     IRBuilder<> IRB(InsertBefore);
456     InstrumentedAddress =
457         IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
458     instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite);
459   }
460 }
461 
462 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
463                                 InterestingMemoryAccess &Access) {
464   // Skip instrumentation of stack accesses unless requested.
465   if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) {
466     if (Access.IsWrite)
467       ++NumSkippedStackWrites;
468     else
469       ++NumSkippedStackReads;
470     return;
471   }
472 
473   if (Access.IsWrite)
474     NumInstrumentedWrites++;
475   else
476     NumInstrumentedReads++;
477 
478   if (Access.MaybeMask) {
479     instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
480                                 Access.AccessTy, Access.IsWrite);
481   } else {
482     // Since the access counts will be accumulated across the entire allocation,
483     // we only update the shadow access count for the first location and thus
484     // don't need to worry about alignment and type size.
485     instrumentAddress(I, I, Access.Addr, Access.IsWrite);
486   }
487 }
488 
489 void MemProfiler::instrumentAddress(Instruction *OrigIns,
490                                     Instruction *InsertBefore, Value *Addr,
491                                     bool IsWrite) {
492   IRBuilder<> IRB(InsertBefore);
493   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
494 
495   if (ClUseCalls) {
496     IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong);
497     return;
498   }
499 
500   Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C);
501   Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
502 
503   Value *ShadowPtr = memToShadow(AddrLong, IRB);
504   Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
505   Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
506   // If we are profiling with histograms, add overflow protection at 255.
507   if (ClHistogram) {
508     Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255);
509     Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount);
510     Instruction *IncBlock =
511         SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false);
512     IRB.SetInsertPoint(IncBlock);
513   }
514   Value *Inc = ConstantInt::get(ShadowTy, 1);
515   ShadowValue = IRB.CreateAdd(ShadowValue, Inc);
516   IRB.CreateStore(ShadowValue, ShadowAddr);
517 }
518 
519 // Create the variable for the profile file name.
520 void createProfileFileNameVar(Module &M) {
521   const MDString *MemProfFilename =
522       dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename"));
523   if (!MemProfFilename)
524     return;
525   assert(!MemProfFilename->getString().empty() &&
526          "Unexpected MemProfProfileFilename metadata with empty string");
527   Constant *ProfileNameConst = ConstantDataArray::getString(
528       M.getContext(), MemProfFilename->getString(), true);
529   GlobalVariable *ProfileNameVar = new GlobalVariable(
530       M, ProfileNameConst->getType(), /*isConstant=*/true,
531       GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar);
532   Triple TT(M.getTargetTriple());
533   if (TT.supportsCOMDAT()) {
534     ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
535     ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar));
536   }
537 }
538 
539 // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
540 // to the runtime, changing shadow count behavior.
541 void createMemprofHistogramFlagVar(Module &M) {
542   const StringRef VarName(MemProfHistogramFlagVar);
543   Type *IntTy1 = Type::getInt1Ty(M.getContext());
544   auto MemprofHistogramFlag = new GlobalVariable(
545       M, IntTy1, true, GlobalValue::WeakAnyLinkage,
546       Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName);
547   Triple TT(M.getTargetTriple());
548   if (TT.supportsCOMDAT()) {
549     MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage);
550     MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName));
551   }
552   appendToCompilerUsed(M, MemprofHistogramFlag);
553 }
554 
555 void createMemprofDefaultOptionsVar(Module &M) {
556   Constant *OptionsConst = ConstantDataArray::getString(
557       M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true);
558   GlobalVariable *OptionsVar =
559       new GlobalVariable(M, OptionsConst->getType(), /*isConstant=*/true,
560                          GlobalValue::WeakAnyLinkage, OptionsConst,
561                          "__memprof_default_options_str");
562   Triple TT(M.getTargetTriple());
563   if (TT.supportsCOMDAT()) {
564     OptionsVar->setLinkage(GlobalValue::ExternalLinkage);
565     OptionsVar->setComdat(M.getOrInsertComdat(OptionsVar->getName()));
566   }
567 }
568 
569 bool ModuleMemProfiler::instrumentModule(Module &M) {
570 
571   // Create a module constructor.
572   std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION);
573   std::string VersionCheckName =
574       ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion)
575                            : "";
576   std::tie(MemProfCtorFunction, std::ignore) =
577       createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName,
578                                           MemProfInitName, /*InitArgTypes=*/{},
579                                           /*InitArgs=*/{}, VersionCheckName);
580 
581   const uint64_t Priority = getCtorAndDtorPriority(TargetTriple);
582   appendToGlobalCtors(M, MemProfCtorFunction, Priority);
583 
584   createProfileFileNameVar(M);
585 
586   createMemprofHistogramFlagVar(M);
587 
588   createMemprofDefaultOptionsVar(M);
589 
590   return true;
591 }
592 
593 void MemProfiler::initializeCallbacks(Module &M) {
594   IRBuilder<> IRB(*C);
595 
596   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
597     const std::string TypeStr = AccessIsWrite ? "store" : "load";
598     const std::string HistPrefix = ClHistogram ? "hist_" : "";
599 
600     SmallVector<Type *, 2> Args1{1, IntptrTy};
601     MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(
602         ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr,
603         FunctionType::get(IRB.getVoidTy(), Args1, false));
604   }
605   MemProfMemmove = M.getOrInsertFunction(
606       ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
607   MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy",
608                                         PtrTy, PtrTy, PtrTy, IntptrTy);
609   MemProfMemset =
610       M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy,
611                             PtrTy, IRB.getInt32Ty(), IntptrTy);
612 }
613 
614 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {
615   // For each NSObject descendant having a +load method, this method is invoked
616   // by the ObjC runtime before any of the static constructors is called.
617   // Therefore we need to instrument such methods with a call to __memprof_init
618   // at the beginning in order to initialize our runtime before any access to
619   // the shadow memory.
620   // We cannot just ignore these methods, because they may call other
621   // instrumented functions.
622   if (F.getName().contains(" load]")) {
623     FunctionCallee MemProfInitFunction =
624         declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {});
625     IRBuilder<> IRB(&F.front(), F.front().begin());
626     IRB.CreateCall(MemProfInitFunction, {});
627     return true;
628   }
629   return false;
630 }
631 
632 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) {
633   IRBuilder<> IRB(&F.front().front());
634   Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
635       MemProfShadowMemoryDynamicAddress, IntptrTy);
636   if (F.getParent()->getPICLevel() == PICLevel::NotPIC)
637     cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true);
638   DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
639   return true;
640 }
641 
642 bool MemProfiler::instrumentFunction(Function &F) {
643   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
644     return false;
645   if (ClDebugFunc == F.getName())
646     return false;
647   if (F.getName().starts_with("__memprof_"))
648     return false;
649 
650   bool FunctionModified = false;
651 
652   // If needed, insert __memprof_init.
653   // This function needs to be called even if the function body is not
654   // instrumented.
655   if (maybeInsertMemProfInitAtFunctionEntry(F))
656     FunctionModified = true;
657 
658   LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n");
659 
660   initializeCallbacks(*F.getParent());
661 
662   SmallVector<Instruction *, 16> ToInstrument;
663 
664   // Fill the set of memory operations to instrument.
665   for (auto &BB : F) {
666     for (auto &Inst : BB) {
667       if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst))
668         ToInstrument.push_back(&Inst);
669     }
670   }
671 
672   if (ToInstrument.empty()) {
673     LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified
674                       << " " << F << "\n");
675 
676     return FunctionModified;
677   }
678 
679   FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
680 
681   int NumInstrumented = 0;
682   for (auto *Inst : ToInstrument) {
683     if (ClDebugMin < 0 || ClDebugMax < 0 ||
684         (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
685       std::optional<InterestingMemoryAccess> Access =
686           isInterestingMemoryAccess(Inst);
687       if (Access)
688         instrumentMop(Inst, F.getDataLayout(), *Access);
689       else
690         instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
691     }
692     NumInstrumented++;
693   }
694 
695   if (NumInstrumented > 0)
696     FunctionModified = true;
697 
698   LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " "
699                     << F << "\n");
700 
701   return FunctionModified;
702 }
703 
704 static void addCallsiteMetadata(Instruction &I,
705                                 ArrayRef<uint64_t> InlinedCallStack,
706                                 LLVMContext &Ctx) {
707   I.setMetadata(LLVMContext::MD_callsite,
708                 buildCallstackMetadata(InlinedCallStack, Ctx));
709 }
710 
711 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
712                                uint32_t Column) {
713   llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
714       HashBuilder;
715   HashBuilder.add(Function, LineOffset, Column);
716   llvm::BLAKE3Result<8> Hash = HashBuilder.final();
717   uint64_t Id;
718   std::memcpy(&Id, Hash.data(), sizeof(Hash));
719   return Id;
720 }
721 
722 static uint64_t computeStackId(const memprof::Frame &Frame) {
723   return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
724 }
725 
726 // Helper to generate a single hash id for a given callstack, used for emitting
727 // matching statistics and useful for uniquing such statistics across modules.
728 static uint64_t
729 computeFullStackId(const std::vector<memprof::Frame> &CallStack) {
730   llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
731       HashBuilder;
732   for (auto &F : CallStack)
733     HashBuilder.add(F.Function, F.LineOffset, F.Column);
734   llvm::BLAKE3Result<8> Hash = HashBuilder.final();
735   uint64_t Id;
736   std::memcpy(&Id, Hash.data(), sizeof(Hash));
737   return Id;
738 }
739 
740 static AllocationType addCallStack(CallStackTrie &AllocTrie,
741                                    const AllocationInfo *AllocInfo,
742                                    uint64_t FullStackId) {
743   SmallVector<uint64_t> StackIds;
744   for (const auto &StackFrame : AllocInfo->CallStack)
745     StackIds.push_back(computeStackId(StackFrame));
746   auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
747                                 AllocInfo->Info.getAllocCount(),
748                                 AllocInfo->Info.getTotalLifetime());
749   std::vector<ContextTotalSize> ContextSizeInfo;
750   if (MemProfReportHintedSizes) {
751     auto TotalSize = AllocInfo->Info.getTotalSize();
752     assert(TotalSize);
753     assert(FullStackId != 0);
754     ContextSizeInfo.push_back({FullStackId, TotalSize});
755   }
756   AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
757   return AllocType;
758 }
759 
760 // Helper to compare the InlinedCallStack computed from an instruction's debug
761 // info to a list of Frames from profile data (either the allocation data or a
762 // callsite). For callsites, the StartIndex to use in the Frame array may be
763 // non-zero.
764 static bool
765 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
766                                    ArrayRef<uint64_t> InlinedCallStack,
767                                    unsigned StartIndex = 0) {
768   auto StackFrame = ProfileCallStack.begin() + StartIndex;
769   auto InlCallStackIter = InlinedCallStack.begin();
770   for (; StackFrame != ProfileCallStack.end() &&
771          InlCallStackIter != InlinedCallStack.end();
772        ++StackFrame, ++InlCallStackIter) {
773     uint64_t StackId = computeStackId(*StackFrame);
774     if (StackId != *InlCallStackIter)
775       return false;
776   }
777   // Return true if we found and matched all stack ids from the call
778   // instruction.
779   return InlCallStackIter == InlinedCallStack.end();
780 }
781 
782 static bool isAllocationWithHotColdVariant(const Function *Callee,
783                                            const TargetLibraryInfo &TLI) {
784   if (!Callee)
785     return false;
786   LibFunc Func;
787   if (!TLI.getLibFunc(*Callee, Func))
788     return false;
789   switch (Func) {
790   case LibFunc_Znwm:
791   case LibFunc_ZnwmRKSt9nothrow_t:
792   case LibFunc_ZnwmSt11align_val_t:
793   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
794   case LibFunc_Znam:
795   case LibFunc_ZnamRKSt9nothrow_t:
796   case LibFunc_ZnamSt11align_val_t:
797   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
798   case LibFunc_size_returning_new:
799   case LibFunc_size_returning_new_aligned:
800     return true;
801   case LibFunc_Znwm12__hot_cold_t:
802   case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
803   case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
804   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
805   case LibFunc_Znam12__hot_cold_t:
806   case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
807   case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
808   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
809   case LibFunc_size_returning_new_hot_cold:
810   case LibFunc_size_returning_new_aligned_hot_cold:
811     return ClMemProfMatchHotColdNew;
812   default:
813     return false;
814   }
815 }
816 
817 struct AllocMatchInfo {
818   uint64_t TotalSize = 0;
819   AllocationType AllocType = AllocationType::None;
820   bool Matched = false;
821 };
822 
823 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
824 memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
825   DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
826 
827   auto GetOffset = [](const DILocation *DIL) {
828     return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
829            0xffff;
830   };
831 
832   for (Function &F : M) {
833     if (F.isDeclaration())
834       continue;
835 
836     for (auto &BB : F) {
837       for (auto &I : BB) {
838         if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
839           continue;
840 
841         auto *CB = dyn_cast<CallBase>(&I);
842         auto *CalledFunction = CB->getCalledFunction();
843         // Disregard indirect calls and intrinsics.
844         if (!CalledFunction || CalledFunction->isIntrinsic())
845           continue;
846 
847         StringRef CalleeName = CalledFunction->getName();
848         bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
849         for (const DILocation *DIL = I.getDebugLoc(); DIL;
850              DIL = DIL->getInlinedAt()) {
851           StringRef CallerName = DIL->getSubprogramLinkageName();
852           assert(!CallerName.empty() &&
853                  "Be sure to enable -fdebug-info-for-profiling");
854           uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
855           uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
856           // Pretend that we are calling a function with GUID == 0 if we are
857           // calling a heap allocation function.
858           if (IsAlloc)
859             CalleeGUID = 0;
860           LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
861           Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
862           CalleeName = CallerName;
863           // FIXME: Recognize other frames that are associated with heap
864           // allocation functions.  It may be too early to reset IsAlloc to
865           // false here.
866           IsAlloc = false;
867         }
868       }
869     }
870   }
871 
872   // Sort each call list by the source location.
873   for (auto &[CallerGUID, CallList] : Calls) {
874     llvm::sort(CallList);
875     CallList.erase(llvm::unique(CallList), CallList.end());
876   }
877 
878   return Calls;
879 }
880 
881 DenseMap<uint64_t, LocToLocMap>
882 memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
883                            const TargetLibraryInfo &TLI) {
884   DenseMap<uint64_t, LocToLocMap> UndriftMaps;
885 
886   DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile =
887       MemProfReader->getMemProfCallerCalleePairs();
888   DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR =
889       extractCallsFromIR(M, TLI);
890 
891   // Compute an undrift map for each CallerGUID.
892   for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
893     auto It = CallsFromProfile.find(CallerGUID);
894     if (It == CallsFromProfile.end())
895       continue;
896     const auto &ProfileAnchors = It->second;
897 
898     LocToLocMap Matchings;
899     longestCommonSequence<LineLocation, GlobalValue::GUID>(
900         ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
901         [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
902     bool Inserted = UndriftMaps.try_emplace(CallerGUID, Matchings).second;
903 
904     // The insertion must succeed because we visit each GUID exactly once.
905     assert(Inserted);
906     (void)Inserted;
907   }
908 
909   return UndriftMaps;
910 }
911 
912 static void
913 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
914             const TargetLibraryInfo &TLI,
915             std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
916   auto &Ctx = M.getContext();
917   // Previously we used getIRPGOFuncName() here. If F is local linkage,
918   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
919   // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
920   // contain FileName's prefix. It caused local linkage function can't
921   // find MemProfRecord. So we use getName() now.
922   // 'unique-internal-linkage-names' can make MemProf work better for local
923   // linkage function.
924   auto FuncName = F.getName();
925   auto FuncGUID = Function::getGUID(FuncName);
926   std::optional<memprof::MemProfRecord> MemProfRec;
927   auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
928   if (Err) {
929     handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
930       auto Err = IPE.get();
931       bool SkipWarning = false;
932       LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
933                         << ": ");
934       if (Err == instrprof_error::unknown_function) {
935         NumOfMemProfMissing++;
936         SkipWarning = !PGOWarnMissing;
937         LLVM_DEBUG(dbgs() << "unknown function");
938       } else if (Err == instrprof_error::hash_mismatch) {
939         NumOfMemProfMismatch++;
940         SkipWarning =
941             NoPGOWarnMismatch ||
942             (NoPGOWarnMismatchComdatWeak &&
943              (F.hasComdat() ||
944               F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
945         LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
946       }
947 
948       if (SkipWarning)
949         return;
950 
951       std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
952                          Twine(" Hash = ") + std::to_string(FuncGUID))
953                             .str();
954 
955       Ctx.diagnose(
956           DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
957     });
958     return;
959   }
960 
961   NumOfMemProfFunc++;
962 
963   // Detect if there are non-zero column numbers in the profile. If not,
964   // treat all column numbers as 0 when matching (i.e. ignore any non-zero
965   // columns in the IR). The profiled binary might have been built with
966   // column numbers disabled, for example.
967   bool ProfileHasColumns = false;
968 
969   // Build maps of the location hash to all profile data with that leaf location
970   // (allocation info and the callsites).
971   std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
972   // For the callsites we need to record the index of the associated frame in
973   // the frame array (see comments below where the map entries are added).
974   std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>>
975       LocHashToCallSites;
976   for (auto &AI : MemProfRec->AllocSites) {
977     NumOfMemProfAllocContextProfiles++;
978     // Associate the allocation info with the leaf frame. The later matching
979     // code will match any inlined call sequences in the IR with a longer prefix
980     // of call stack frames.
981     uint64_t StackId = computeStackId(AI.CallStack[0]);
982     LocHashToAllocInfo[StackId].insert(&AI);
983     ProfileHasColumns |= AI.CallStack[0].Column;
984   }
985   for (auto &CS : MemProfRec->CallSites) {
986     NumOfMemProfCallSiteProfiles++;
987     // Need to record all frames from leaf up to and including this function,
988     // as any of these may or may not have been inlined at this point.
989     unsigned Idx = 0;
990     for (auto &StackFrame : CS) {
991       uint64_t StackId = computeStackId(StackFrame);
992       LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
993       ProfileHasColumns |= StackFrame.Column;
994       // Once we find this function, we can stop recording.
995       if (StackFrame.Function == FuncGUID)
996         break;
997     }
998     assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
999   }
1000 
1001   auto GetOffset = [](const DILocation *DIL) {
1002     return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
1003            0xffff;
1004   };
1005 
1006   // Now walk the instructions, looking up the associated profile data using
1007   // debug locations.
1008   for (auto &BB : F) {
1009     for (auto &I : BB) {
1010       if (I.isDebugOrPseudoInst())
1011         continue;
1012       // We are only interested in calls (allocation or interior call stack
1013       // context calls).
1014       auto *CI = dyn_cast<CallBase>(&I);
1015       if (!CI)
1016         continue;
1017       auto *CalledFunction = CI->getCalledFunction();
1018       if (CalledFunction && CalledFunction->isIntrinsic())
1019         continue;
1020       // List of call stack ids computed from the location hashes on debug
1021       // locations (leaf to inlined at root).
1022       SmallVector<uint64_t, 8> InlinedCallStack;
1023       // Was the leaf location found in one of the profile maps?
1024       bool LeafFound = false;
1025       // If leaf was found in a map, iterators pointing to its location in both
1026       // of the maps. It might exist in neither, one, or both (the latter case
1027       // can happen because we don't currently have discriminators to
1028       // distinguish the case when a single line/col maps to both an allocation
1029       // and another callsite).
1030       std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
1031           AllocInfoIter;
1032       std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *,
1033                                             unsigned>>>::iterator CallSitesIter;
1034       for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
1035            DIL = DIL->getInlinedAt()) {
1036         // Use C++ linkage name if possible. Need to compile with
1037         // -fdebug-info-for-profiling to get linkage name.
1038         StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
1039         if (Name.empty())
1040           Name = DIL->getScope()->getSubprogram()->getName();
1041         auto CalleeGUID = Function::getGUID(Name);
1042         auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
1043                                       ProfileHasColumns ? DIL->getColumn() : 0);
1044         // Check if we have found the profile's leaf frame. If yes, collect
1045         // the rest of the call's inlined context starting here. If not, see if
1046         // we find a match further up the inlined context (in case the profile
1047         // was missing debug frames at the leaf).
1048         if (!LeafFound) {
1049           AllocInfoIter = LocHashToAllocInfo.find(StackId);
1050           CallSitesIter = LocHashToCallSites.find(StackId);
1051           if (AllocInfoIter != LocHashToAllocInfo.end() ||
1052               CallSitesIter != LocHashToCallSites.end())
1053             LeafFound = true;
1054         }
1055         if (LeafFound)
1056           InlinedCallStack.push_back(StackId);
1057       }
1058       // If leaf not in either of the maps, skip inst.
1059       if (!LeafFound)
1060         continue;
1061 
1062       // First add !memprof metadata from allocation info, if we found the
1063       // instruction's leaf location in that map, and if the rest of the
1064       // instruction's locations match the prefix Frame locations on an
1065       // allocation context with the same leaf.
1066       if (AllocInfoIter != LocHashToAllocInfo.end()) {
1067         // Only consider allocations which support hinting.
1068         if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
1069           continue;
1070         // We may match this instruction's location list to multiple MIB
1071         // contexts. Add them to a Trie specialized for trimming the contexts to
1072         // the minimal needed to disambiguate contexts with unique behavior.
1073         CallStackTrie AllocTrie;
1074         for (auto *AllocInfo : AllocInfoIter->second) {
1075           // Check the full inlined call stack against this one.
1076           // If we found and thus matched all frames on the call, include
1077           // this MIB.
1078           if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
1079                                                  InlinedCallStack)) {
1080             NumOfMemProfMatchedAllocContexts++;
1081             uint64_t FullStackId = 0;
1082             if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes)
1083               FullStackId = computeFullStackId(AllocInfo->CallStack);
1084             auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
1085             // Record information about the allocation if match info printing
1086             // was requested.
1087             if (ClPrintMemProfMatchInfo) {
1088               assert(FullStackId != 0);
1089               FullStackIdToAllocMatchInfo[FullStackId] = {
1090                   AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
1091             }
1092           }
1093         }
1094         // We might not have matched any to the full inlined call stack.
1095         // But if we did, create and attach metadata, or a function attribute if
1096         // all contexts have identical profiled behavior.
1097         if (!AllocTrie.empty()) {
1098           NumOfMemProfMatchedAllocs++;
1099           // MemprofMDAttached will be false if a function attribute was
1100           // attached.
1101           bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
1102           assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
1103           if (MemprofMDAttached) {
1104             // Add callsite metadata for the instruction's location list so that
1105             // it simpler later on to identify which part of the MIB contexts
1106             // are from this particular instruction (including during inlining,
1107             // when the callsite metadata will be updated appropriately).
1108             // FIXME: can this be changed to strip out the matching stack
1109             // context ids from the MIB contexts and not add any callsite
1110             // metadata here to save space?
1111             addCallsiteMetadata(I, InlinedCallStack, Ctx);
1112           }
1113         }
1114         continue;
1115       }
1116 
1117       // Otherwise, add callsite metadata. If we reach here then we found the
1118       // instruction's leaf location in the callsites map and not the allocation
1119       // map.
1120       assert(CallSitesIter != LocHashToCallSites.end());
1121       for (auto CallStackIdx : CallSitesIter->second) {
1122         // If we found and thus matched all frames on the call, create and
1123         // attach call stack metadata.
1124         if (stackFrameIncludesInlinedCallStack(
1125                 *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
1126           NumOfMemProfMatchedCallSites++;
1127           addCallsiteMetadata(I, InlinedCallStack, Ctx);
1128           // Only need to find one with a matching call stack and add a single
1129           // callsite metadata.
1130           break;
1131         }
1132       }
1133     }
1134   }
1135 }
1136 
1137 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
1138                                IntrusiveRefCntPtr<vfs::FileSystem> FS)
1139     : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
1140   if (!FS)
1141     this->FS = vfs::getRealFileSystem();
1142 }
1143 
1144 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
1145   // Return immediately if the module doesn't contain any function.
1146   if (M.empty())
1147     return PreservedAnalyses::all();
1148 
1149   LLVM_DEBUG(dbgs() << "Read in memory profile:");
1150   auto &Ctx = M.getContext();
1151   auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
1152   if (Error E = ReaderOrErr.takeError()) {
1153     handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1154       Ctx.diagnose(
1155           DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
1156     });
1157     return PreservedAnalyses::all();
1158   }
1159 
1160   std::unique_ptr<IndexedInstrProfReader> MemProfReader =
1161       std::move(ReaderOrErr.get());
1162   if (!MemProfReader) {
1163     Ctx.diagnose(DiagnosticInfoPGOProfile(
1164         MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
1165     return PreservedAnalyses::all();
1166   }
1167 
1168   if (!MemProfReader->hasMemoryProfile()) {
1169     Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
1170                                           "Not a memory profile"));
1171     return PreservedAnalyses::all();
1172   }
1173 
1174   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1175 
1176   // Map from the stack has of each allocation context in the function profiles
1177   // to the total profiled size (bytes), allocation type, and whether we matched
1178   // it to an allocation in the IR.
1179   std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
1180 
1181   for (auto &F : M) {
1182     if (F.isDeclaration())
1183       continue;
1184 
1185     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1186     readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
1187   }
1188 
1189   if (ClPrintMemProfMatchInfo) {
1190     for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1191       errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1192              << " context with id " << Id << " has total profiled size "
1193              << Info.TotalSize << (Info.Matched ? " is" : " not")
1194              << " matched\n";
1195   }
1196 
1197   return PreservedAnalyses::none();
1198 }
1199