xref: /llvm-project/llvm/lib/ProfileData/MemProfReader.cpp (revision 1d5154663509b6200038a2f0b0ac958ea556fa9e)
12bede687SKazu Hirata //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
22bede687SKazu Hirata //
32bede687SKazu Hirata // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42bede687SKazu Hirata // See https://llvm.org/LICENSE.txt for license information.
52bede687SKazu Hirata // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62bede687SKazu Hirata //
72bede687SKazu Hirata //===----------------------------------------------------------------------===//
82bede687SKazu Hirata //
92bede687SKazu Hirata // This file contains support for reading MemProf profiling data.
102bede687SKazu Hirata //
112bede687SKazu Hirata //===----------------------------------------------------------------------===//
122bede687SKazu Hirata 
132bede687SKazu Hirata #include <cstdint>
142bede687SKazu Hirata #include <memory>
152bede687SKazu Hirata #include <type_traits>
162bede687SKazu Hirata 
172bede687SKazu Hirata #include "llvm/ADT/ArrayRef.h"
182bede687SKazu Hirata #include "llvm/ADT/DenseMap.h"
192bede687SKazu Hirata #include "llvm/ADT/SetVector.h"
202bede687SKazu Hirata #include "llvm/ADT/SmallSet.h"
212bede687SKazu Hirata #include "llvm/ADT/SmallVector.h"
222bede687SKazu Hirata #include "llvm/ADT/StringExtras.h"
232bede687SKazu Hirata #include "llvm/ADT/Twine.h"
242bede687SKazu Hirata #include "llvm/DebugInfo/DWARF/DWARFContext.h"
252bede687SKazu Hirata #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
262bede687SKazu Hirata #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
272bede687SKazu Hirata #include "llvm/Object/Binary.h"
282bede687SKazu Hirata #include "llvm/Object/BuildID.h"
292bede687SKazu Hirata #include "llvm/Object/ELFObjectFile.h"
302bede687SKazu Hirata #include "llvm/Object/ObjectFile.h"
312bede687SKazu Hirata #include "llvm/ProfileData/InstrProf.h"
322bede687SKazu Hirata #include "llvm/ProfileData/MemProf.h"
332bede687SKazu Hirata #include "llvm/ProfileData/MemProfData.inc"
342bede687SKazu Hirata #include "llvm/ProfileData/MemProfReader.h"
3566edefaeSKazu Hirata #include "llvm/ProfileData/MemProfYAML.h"
362bede687SKazu Hirata #include "llvm/ProfileData/SampleProf.h"
372bede687SKazu Hirata #include "llvm/Support/Debug.h"
382bede687SKazu Hirata #include "llvm/Support/Endian.h"
392bede687SKazu Hirata #include "llvm/Support/Error.h"
402bede687SKazu Hirata #include "llvm/Support/MemoryBuffer.h"
412bede687SKazu Hirata #include "llvm/Support/Path.h"
422bede687SKazu Hirata 
432bede687SKazu Hirata #define DEBUG_TYPE "memprof"
44e98396f4SKazu Hirata 
45e98396f4SKazu Hirata namespace llvm {
462bede687SKazu Hirata namespace memprof {
472bede687SKazu Hirata namespace {
482bede687SKazu Hirata template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
494153c2dcSFangrui Song   static_assert(std::is_integral_v<T>, "Not an integral type");
502bede687SKazu Hirata   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
512bede687SKazu Hirata   return *reinterpret_cast<const T *>(Ptr);
522bede687SKazu Hirata }
532bede687SKazu Hirata 
542bede687SKazu Hirata Error checkBuffer(const MemoryBuffer &Buffer) {
552bede687SKazu Hirata   if (!RawMemProfReader::hasFormat(Buffer))
562bede687SKazu Hirata     return make_error<InstrProfError>(instrprof_error::bad_magic);
572bede687SKazu Hirata 
582bede687SKazu Hirata   if (Buffer.getBufferSize() == 0)
592bede687SKazu Hirata     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
602bede687SKazu Hirata 
612bede687SKazu Hirata   if (Buffer.getBufferSize() < sizeof(Header)) {
622bede687SKazu Hirata     return make_error<InstrProfError>(instrprof_error::truncated);
632bede687SKazu Hirata   }
642bede687SKazu Hirata 
652bede687SKazu Hirata   // The size of the buffer can be > header total size since we allow repeated
662bede687SKazu Hirata   // serialization of memprof profiles to the same file.
672bede687SKazu Hirata   uint64_t TotalSize = 0;
682bede687SKazu Hirata   const char *Next = Buffer.getBufferStart();
692bede687SKazu Hirata   while (Next < Buffer.getBufferEnd()) {
70bfa937a4SKazu Hirata     const auto *H = reinterpret_cast<const Header *>(Next);
7130b93db5SMatthew Weingarten 
7230b93db5SMatthew Weingarten     // Check if the version in header is among the supported versions.
7330b93db5SMatthew Weingarten     bool IsSupported = false;
7430b93db5SMatthew Weingarten     for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) {
7530b93db5SMatthew Weingarten       if (H->Version == SupportedVersion)
7630b93db5SMatthew Weingarten         IsSupported = true;
7730b93db5SMatthew Weingarten     }
7830b93db5SMatthew Weingarten     if (!IsSupported) {
792bede687SKazu Hirata       return make_error<InstrProfError>(instrprof_error::unsupported_version);
802bede687SKazu Hirata     }
812bede687SKazu Hirata 
822bede687SKazu Hirata     TotalSize += H->TotalSize;
832bede687SKazu Hirata     Next += H->TotalSize;
842bede687SKazu Hirata   }
852bede687SKazu Hirata 
862bede687SKazu Hirata   if (Buffer.getBufferSize() != TotalSize) {
872bede687SKazu Hirata     return make_error<InstrProfError>(instrprof_error::malformed);
882bede687SKazu Hirata   }
892bede687SKazu Hirata   return Error::success();
902bede687SKazu Hirata }
912bede687SKazu Hirata 
922bede687SKazu Hirata llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
932bede687SKazu Hirata   using namespace support;
942bede687SKazu Hirata 
952bede687SKazu Hirata   const uint64_t NumItemsToRead =
96f430e374SKazu Hirata       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
972bede687SKazu Hirata   llvm::SmallVector<SegmentEntry> Items;
982bede687SKazu Hirata   for (uint64_t I = 0; I < NumItemsToRead; I++) {
992bede687SKazu Hirata     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
1002bede687SKazu Hirata         Ptr + I * sizeof(SegmentEntry)));
1012bede687SKazu Hirata   }
1022bede687SKazu Hirata   return Items;
1032bede687SKazu Hirata }
1042bede687SKazu Hirata 
1052bede687SKazu Hirata llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
10630b93db5SMatthew Weingarten readMemInfoBlocksV3(const char *Ptr) {
1072bede687SKazu Hirata   using namespace support;
1082bede687SKazu Hirata 
1092bede687SKazu Hirata   const uint64_t NumItemsToRead =
11030b93db5SMatthew Weingarten       endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
11130b93db5SMatthew Weingarten 
1122bede687SKazu Hirata   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
1132bede687SKazu Hirata   for (uint64_t I = 0; I < NumItemsToRead; I++) {
1142bede687SKazu Hirata     const uint64_t Id =
11530b93db5SMatthew Weingarten         endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
11630b93db5SMatthew Weingarten 
11730b93db5SMatthew Weingarten     // We cheat a bit here and remove the const from cast to set the
11830b93db5SMatthew Weingarten     // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and
11930b93db5SMatthew Weingarten     // V4 do not have the same fields. V3 is missing AccessHistogramSize and
12030b93db5SMatthew Weingarten     // AccessHistogram. This means we read "dirty" data in here, but it should
12130b93db5SMatthew Weingarten     // not segfault, since there will be callstack data placed after this in the
12230b93db5SMatthew Weingarten     // binary format.
12330b93db5SMatthew Weingarten     MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
12430b93db5SMatthew Weingarten     // Overwrite dirty data.
12530b93db5SMatthew Weingarten     MIB.AccessHistogramSize = 0;
12630b93db5SMatthew Weingarten     MIB.AccessHistogram = 0;
12730b93db5SMatthew Weingarten 
1282bede687SKazu Hirata     Items.push_back({Id, MIB});
12930b93db5SMatthew Weingarten     // Only increment by the size of MIB in V3.
13030b93db5SMatthew Weingarten     Ptr += MEMPROF_V3_MIB_SIZE;
13130b93db5SMatthew Weingarten   }
13230b93db5SMatthew Weingarten   return Items;
13330b93db5SMatthew Weingarten }
13430b93db5SMatthew Weingarten 
13530b93db5SMatthew Weingarten llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
13630b93db5SMatthew Weingarten readMemInfoBlocksV4(const char *Ptr) {
13730b93db5SMatthew Weingarten   using namespace support;
13830b93db5SMatthew Weingarten 
13930b93db5SMatthew Weingarten   const uint64_t NumItemsToRead =
14030b93db5SMatthew Weingarten       endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
14130b93db5SMatthew Weingarten 
14230b93db5SMatthew Weingarten   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
14330b93db5SMatthew Weingarten   for (uint64_t I = 0; I < NumItemsToRead; I++) {
14430b93db5SMatthew Weingarten     const uint64_t Id =
14530b93db5SMatthew Weingarten         endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
14630b93db5SMatthew Weingarten     // We cheat a bit here and remove the const from cast to set the
14730b93db5SMatthew Weingarten     // Histogram Pointer to newly allocated buffer.
14830b93db5SMatthew Weingarten     MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
14930b93db5SMatthew Weingarten 
1502bede687SKazu Hirata     // Only increment by size of MIB since readNext implicitly increments.
1512bede687SKazu Hirata     Ptr += sizeof(MemInfoBlock);
15230b93db5SMatthew Weingarten 
15330b93db5SMatthew Weingarten     if (MIB.AccessHistogramSize > 0) {
15430b93db5SMatthew Weingarten       MIB.AccessHistogram =
15530b93db5SMatthew Weingarten           (uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t));
15630b93db5SMatthew Weingarten     }
15730b93db5SMatthew Weingarten 
15830b93db5SMatthew Weingarten     for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) {
15930b93db5SMatthew Weingarten       ((uint64_t *)MIB.AccessHistogram)[J] =
16030b93db5SMatthew Weingarten           endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
16130b93db5SMatthew Weingarten     }
16230b93db5SMatthew Weingarten     Items.push_back({Id, MIB});
1632bede687SKazu Hirata   }
1642bede687SKazu Hirata   return Items;
1652bede687SKazu Hirata }
1662bede687SKazu Hirata 
1672bede687SKazu Hirata CallStackMap readStackInfo(const char *Ptr) {
1682bede687SKazu Hirata   using namespace support;
1692bede687SKazu Hirata 
1702bede687SKazu Hirata   const uint64_t NumItemsToRead =
171f430e374SKazu Hirata       endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
1722bede687SKazu Hirata   CallStackMap Items;
1732bede687SKazu Hirata 
1742bede687SKazu Hirata   for (uint64_t I = 0; I < NumItemsToRead; I++) {
1752bede687SKazu Hirata     const uint64_t StackId =
176f430e374SKazu Hirata         endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
1772bede687SKazu Hirata     const uint64_t NumPCs =
178f430e374SKazu Hirata         endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
1792bede687SKazu Hirata 
1802bede687SKazu Hirata     SmallVector<uint64_t> CallStack;
1812bede687SKazu Hirata     CallStack.reserve(NumPCs);
1822bede687SKazu Hirata     for (uint64_t J = 0; J < NumPCs; J++) {
1832bede687SKazu Hirata       CallStack.push_back(
184f430e374SKazu Hirata           endian::readNext<uint64_t, llvm::endianness::little>(Ptr));
1852bede687SKazu Hirata     }
1862bede687SKazu Hirata 
1872bede687SKazu Hirata     Items[StackId] = CallStack;
1882bede687SKazu Hirata   }
1892bede687SKazu Hirata   return Items;
1902bede687SKazu Hirata }
1912bede687SKazu Hirata 
1922bede687SKazu Hirata // Merges the contents of stack information in \p From to \p To. Returns true if
1932bede687SKazu Hirata // any stack ids observed previously map to a different set of program counter
1942bede687SKazu Hirata // addresses.
1952bede687SKazu Hirata bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
1963749e0d4SKazu Hirata   for (const auto &[Id, Stack] : From) {
197abaa8247SKazu Hirata     auto [It, Inserted] = To.try_emplace(Id, Stack);
1982bede687SKazu Hirata     // Check that the PCs are the same (in order).
199abaa8247SKazu Hirata     if (!Inserted && Stack != It->second)
2002bede687SKazu Hirata       return true;
2012bede687SKazu Hirata   }
2022bede687SKazu Hirata   return false;
2032bede687SKazu Hirata }
2042bede687SKazu Hirata 
2052bede687SKazu Hirata Error report(Error E, const StringRef Context) {
2062bede687SKazu Hirata   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
2072bede687SKazu Hirata                     std::move(E));
2082bede687SKazu Hirata }
2092bede687SKazu Hirata 
2102bede687SKazu Hirata bool isRuntimePath(const StringRef Path) {
2112bede687SKazu Hirata   const StringRef Filename = llvm::sys::path::filename(Path);
2122bede687SKazu Hirata   // This list should be updated in case new files with additional interceptors
2132bede687SKazu Hirata   // are added to the memprof runtime.
214bb6df080SKazu Hirata   return Filename == "memprof_malloc_linux.cpp" ||
215bb6df080SKazu Hirata          Filename == "memprof_interceptors.cpp" ||
216bb6df080SKazu Hirata          Filename == "memprof_new_delete.cpp";
2172bede687SKazu Hirata }
2182bede687SKazu Hirata 
2192bede687SKazu Hirata std::string getBuildIdString(const SegmentEntry &Entry) {
2202bede687SKazu Hirata   // If the build id is unset print a helpful string instead of all zeros.
2212bede687SKazu Hirata   if (Entry.BuildIdSize == 0)
2222bede687SKazu Hirata     return "<None>";
2232bede687SKazu Hirata 
2242bede687SKazu Hirata   std::string Str;
2252bede687SKazu Hirata   raw_string_ostream OS(Str);
2262bede687SKazu Hirata   for (size_t I = 0; I < Entry.BuildIdSize; I++) {
2272bede687SKazu Hirata     OS << format_hex_no_prefix(Entry.BuildId[I], 2);
2282bede687SKazu Hirata   }
2292bede687SKazu Hirata   return OS.str();
2302bede687SKazu Hirata }
2312bede687SKazu Hirata } // namespace
2322bede687SKazu Hirata 
2332bede687SKazu Hirata Expected<std::unique_ptr<RawMemProfReader>>
2342bede687SKazu Hirata RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
2352bede687SKazu Hirata                          bool KeepName) {
2362bede687SKazu Hirata   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
2372bede687SKazu Hirata   if (std::error_code EC = BufferOr.getError())
2382bede687SKazu Hirata     return report(errorCodeToError(EC), Path.getSingleStringRef());
2392bede687SKazu Hirata 
2402bede687SKazu Hirata   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
2412bede687SKazu Hirata   return create(std::move(Buffer), ProfiledBinary, KeepName);
2422bede687SKazu Hirata }
2432bede687SKazu Hirata 
2442bede687SKazu Hirata Expected<std::unique_ptr<RawMemProfReader>>
2452bede687SKazu Hirata RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
2462bede687SKazu Hirata                          const StringRef ProfiledBinary, bool KeepName) {
2472bede687SKazu Hirata   if (Error E = checkBuffer(*Buffer))
2482bede687SKazu Hirata     return report(std::move(E), Buffer->getBufferIdentifier());
2492bede687SKazu Hirata 
2502bede687SKazu Hirata   if (ProfiledBinary.empty()) {
2512bede687SKazu Hirata     // Peek the build ids to print a helpful error message.
2522bede687SKazu Hirata     const std::vector<std::string> BuildIds = peekBuildIds(Buffer.get());
2532bede687SKazu Hirata     std::string ErrorMessage(
2542bede687SKazu Hirata         R"(Path to profiled binary is empty, expected binary with one of the following build ids:
2552bede687SKazu Hirata )");
2562bede687SKazu Hirata     for (const auto &Id : BuildIds) {
2572bede687SKazu Hirata       ErrorMessage += "\n BuildId: ";
2582bede687SKazu Hirata       ErrorMessage += Id;
2592bede687SKazu Hirata     }
2602bede687SKazu Hirata     return report(
2612bede687SKazu Hirata         make_error<StringError>(ErrorMessage, inconvertibleErrorCode()),
2622bede687SKazu Hirata         /*Context=*/"");
2632bede687SKazu Hirata   }
2642bede687SKazu Hirata 
2652bede687SKazu Hirata   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
2662bede687SKazu Hirata   if (!BinaryOr) {
2672bede687SKazu Hirata     return report(BinaryOr.takeError(), ProfiledBinary);
2682bede687SKazu Hirata   }
2692bede687SKazu Hirata 
2702bede687SKazu Hirata   // Use new here since constructor is private.
2712bede687SKazu Hirata   std::unique_ptr<RawMemProfReader> Reader(
2722bede687SKazu Hirata       new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
2732bede687SKazu Hirata   if (Error E = Reader->initialize(std::move(Buffer))) {
2742bede687SKazu Hirata     return std::move(E);
2752bede687SKazu Hirata   }
2762bede687SKazu Hirata   return std::move(Reader);
2772bede687SKazu Hirata }
2782bede687SKazu Hirata 
27930b93db5SMatthew Weingarten // We need to make sure that all leftover MIB histograms that have not been
28030b93db5SMatthew Weingarten // freed by merge are freed here.
28130b93db5SMatthew Weingarten RawMemProfReader::~RawMemProfReader() {
28230b93db5SMatthew Weingarten   for (auto &[_, MIB] : CallstackProfileData) {
28330b93db5SMatthew Weingarten     if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) {
28430b93db5SMatthew Weingarten       free((void *)MIB.AccessHistogram);
28530b93db5SMatthew Weingarten     }
28630b93db5SMatthew Weingarten   }
28730b93db5SMatthew Weingarten }
28830b93db5SMatthew Weingarten 
2892bede687SKazu Hirata bool RawMemProfReader::hasFormat(const StringRef Path) {
2902bede687SKazu Hirata   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
2912bede687SKazu Hirata   if (!BufferOr)
2922bede687SKazu Hirata     return false;
2932bede687SKazu Hirata 
2942bede687SKazu Hirata   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
2952bede687SKazu Hirata   return hasFormat(*Buffer);
2962bede687SKazu Hirata }
2972bede687SKazu Hirata 
2982bede687SKazu Hirata bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
2992bede687SKazu Hirata   if (Buffer.getBufferSize() < sizeof(uint64_t))
3002bede687SKazu Hirata     return false;
3012bede687SKazu Hirata   // Aligned read to sanity check that the buffer was allocated with at least 8b
3022bede687SKazu Hirata   // alignment.
3032bede687SKazu Hirata   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
3042bede687SKazu Hirata   return Magic == MEMPROF_RAW_MAGIC_64;
3052bede687SKazu Hirata }
3062bede687SKazu Hirata 
3072bede687SKazu Hirata void RawMemProfReader::printYAML(raw_ostream &OS) {
3082bede687SKazu Hirata   uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
3095add295fSKazu Hirata   for (const auto &KV : MemProfData.Records) {
3102bede687SKazu Hirata     const size_t NumAllocSites = KV.second.AllocSites.size();
3112bede687SKazu Hirata     if (NumAllocSites > 0) {
3122bede687SKazu Hirata       NumAllocFunctions++;
3132bede687SKazu Hirata       NumMibInfo += NumAllocSites;
3142bede687SKazu Hirata     }
3152bede687SKazu Hirata   }
3162bede687SKazu Hirata 
3172bede687SKazu Hirata   OS << "MemprofProfile:\n";
3182bede687SKazu Hirata   OS << "  Summary:\n";
31930b93db5SMatthew Weingarten   OS << "    Version: " << MemprofRawVersion << "\n";
3202bede687SKazu Hirata   OS << "    NumSegments: " << SegmentInfo.size() << "\n";
3212bede687SKazu Hirata   OS << "    NumMibInfo: " << NumMibInfo << "\n";
3222bede687SKazu Hirata   OS << "    NumAllocFunctions: " << NumAllocFunctions << "\n";
3232bede687SKazu Hirata   OS << "    NumStackOffsets: " << StackMap.size() << "\n";
3242bede687SKazu Hirata   // Print out the segment information.
3252bede687SKazu Hirata   OS << "  Segments:\n";
3262bede687SKazu Hirata   for (const auto &Entry : SegmentInfo) {
3272bede687SKazu Hirata     OS << "  -\n";
3282bede687SKazu Hirata     OS << "    BuildId: " << getBuildIdString(Entry) << "\n";
3292bede687SKazu Hirata     OS << "    Start: 0x" << llvm::utohexstr(Entry.Start) << "\n";
3302bede687SKazu Hirata     OS << "    End: 0x" << llvm::utohexstr(Entry.End) << "\n";
3312bede687SKazu Hirata     OS << "    Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n";
3322bede687SKazu Hirata   }
3332bede687SKazu Hirata   // Print out the merged contents of the profiles.
3342bede687SKazu Hirata   OS << "  Records:\n";
3353749e0d4SKazu Hirata   for (const auto &[GUID, Record] : *this) {
3362bede687SKazu Hirata     OS << "  -\n";
3373749e0d4SKazu Hirata     OS << "    FunctionGUID: " << GUID << "\n";
3383749e0d4SKazu Hirata     Record.print(OS);
3392bede687SKazu Hirata   }
3402bede687SKazu Hirata }
3412bede687SKazu Hirata 
3422bede687SKazu Hirata Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
3432bede687SKazu Hirata   const StringRef FileName = Binary.getBinary()->getFileName();
3442bede687SKazu Hirata 
3452bede687SKazu Hirata   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
3462bede687SKazu Hirata   if (!ElfObject) {
3472bede687SKazu Hirata     return report(make_error<StringError>(Twine("Not an ELF file: "),
3482bede687SKazu Hirata                                           inconvertibleErrorCode()),
3492bede687SKazu Hirata                   FileName);
3502bede687SKazu Hirata   }
3512bede687SKazu Hirata 
3522bede687SKazu Hirata   // Check whether the profiled binary was built with position independent code
3532bede687SKazu Hirata   // (PIC). Perform sanity checks for assumptions we rely on to simplify
3542bede687SKazu Hirata   // symbolization.
3552bede687SKazu Hirata   auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
3562bede687SKazu Hirata   const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile();
3572bede687SKazu Hirata   auto PHdrsOr = ElfFile.program_headers();
3582bede687SKazu Hirata   if (!PHdrsOr)
3592bede687SKazu Hirata     return report(
3602bede687SKazu Hirata         make_error<StringError>(Twine("Could not read program headers: "),
3612bede687SKazu Hirata                                 inconvertibleErrorCode()),
3622bede687SKazu Hirata         FileName);
3632bede687SKazu Hirata 
3642bede687SKazu Hirata   int NumExecutableSegments = 0;
3652bede687SKazu Hirata   for (const auto &Phdr : *PHdrsOr) {
3662bede687SKazu Hirata     if (Phdr.p_type == ELF::PT_LOAD) {
3672bede687SKazu Hirata       if (Phdr.p_flags & ELF::PF_X) {
3682bede687SKazu Hirata         // We assume only one text segment in the main binary for simplicity and
3692bede687SKazu Hirata         // reduce the overhead of checking multiple ranges during symbolization.
3702bede687SKazu Hirata         if (++NumExecutableSegments > 1) {
3712bede687SKazu Hirata           return report(
3722bede687SKazu Hirata               make_error<StringError>(
3732bede687SKazu Hirata                   "Expect only one executable load segment in the binary",
3742bede687SKazu Hirata                   inconvertibleErrorCode()),
3752bede687SKazu Hirata               FileName);
3762bede687SKazu Hirata         }
3772bede687SKazu Hirata         // Segment will always be loaded at a page boundary, expect it to be
3782bede687SKazu Hirata         // aligned already. Assume 4K pagesize for the machine from which the
3792bede687SKazu Hirata         // profile has been collected. This should be fine for now, in case we
3802bede687SKazu Hirata         // want to support other pagesizes it can be recorded in the raw profile
3812bede687SKazu Hirata         // during collection.
3822bede687SKazu Hirata         PreferredTextSegmentAddress = Phdr.p_vaddr;
3832bede687SKazu Hirata         assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) &&
3842bede687SKazu Hirata                "Expect p_vaddr to always be page aligned");
3852bede687SKazu Hirata         assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization.");
3862bede687SKazu Hirata       }
3872bede687SKazu Hirata     }
3882bede687SKazu Hirata   }
3892bede687SKazu Hirata 
3902bede687SKazu Hirata   auto Triple = ElfObject->makeTriple();
3912bede687SKazu Hirata   if (!Triple.isX86())
3922bede687SKazu Hirata     return report(make_error<StringError>(Twine("Unsupported target: ") +
3932bede687SKazu Hirata                                               Triple.getArchName(),
3942bede687SKazu Hirata                                           inconvertibleErrorCode()),
3952bede687SKazu Hirata                   FileName);
3962bede687SKazu Hirata 
3972bede687SKazu Hirata   // Process the raw profile.
3982bede687SKazu Hirata   if (Error E = readRawProfile(std::move(DataBuffer)))
3992bede687SKazu Hirata     return E;
4002bede687SKazu Hirata 
4012bede687SKazu Hirata   if (Error E = setupForSymbolization())
4022bede687SKazu Hirata     return E;
4032bede687SKazu Hirata 
4042bede687SKazu Hirata   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
4052bede687SKazu Hirata   std::unique_ptr<DIContext> Context = DWARFContext::create(
4062bede687SKazu Hirata       *Object, DWARFContext::ProcessDebugRelocations::Process);
4072bede687SKazu Hirata 
4082bede687SKazu Hirata   auto SOFOr = symbolize::SymbolizableObjectFile::create(
4092bede687SKazu Hirata       Object, std::move(Context), /*UntagAddresses=*/false);
4102bede687SKazu Hirata   if (!SOFOr)
4112bede687SKazu Hirata     return report(SOFOr.takeError(), FileName);
4122bede687SKazu Hirata   auto Symbolizer = std::move(SOFOr.get());
4132bede687SKazu Hirata 
4142bede687SKazu Hirata   // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so
4152bede687SKazu Hirata   // that it is freed automatically at the end, when it is no longer used. This
4162bede687SKazu Hirata   // reduces peak memory since it won't be live while also mapping the raw
4172bede687SKazu Hirata   // profile into records afterwards.
4182bede687SKazu Hirata   if (Error E = symbolizeAndFilterStackFrames(std::move(Symbolizer)))
4192bede687SKazu Hirata     return E;
4202bede687SKazu Hirata 
4212bede687SKazu Hirata   return mapRawProfileToRecords();
4222bede687SKazu Hirata }
4232bede687SKazu Hirata 
4242bede687SKazu Hirata Error RawMemProfReader::setupForSymbolization() {
4252bede687SKazu Hirata   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
4262bede687SKazu Hirata   object::BuildIDRef BinaryId = object::getBuildID(Object);
4272bede687SKazu Hirata   if (BinaryId.empty())
4282bede687SKazu Hirata     return make_error<StringError>(Twine("No build id found in binary ") +
4292bede687SKazu Hirata                                        Binary.getBinary()->getFileName(),
4302bede687SKazu Hirata                                    inconvertibleErrorCode());
4312bede687SKazu Hirata 
4322bede687SKazu Hirata   int NumMatched = 0;
4332bede687SKazu Hirata   for (const auto &Entry : SegmentInfo) {
4342bede687SKazu Hirata     llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize);
4352bede687SKazu Hirata     if (BinaryId == SegmentId) {
4362bede687SKazu Hirata       // We assume only one text segment in the main binary for simplicity and
4372bede687SKazu Hirata       // reduce the overhead of checking multiple ranges during symbolization.
4382bede687SKazu Hirata       if (++NumMatched > 1) {
4392bede687SKazu Hirata         return make_error<StringError>(
4402bede687SKazu Hirata             "We expect only one executable segment in the profiled binary",
4412bede687SKazu Hirata             inconvertibleErrorCode());
4422bede687SKazu Hirata       }
4432bede687SKazu Hirata       ProfiledTextSegmentStart = Entry.Start;
4442bede687SKazu Hirata       ProfiledTextSegmentEnd = Entry.End;
4452bede687SKazu Hirata     }
4462bede687SKazu Hirata   }
4472bede687SKazu Hirata   assert(NumMatched != 0 && "No matching executable segments in segment info.");
4482bede687SKazu Hirata   assert((PreferredTextSegmentAddress == 0 ||
4492bede687SKazu Hirata           (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) &&
4502bede687SKazu Hirata          "Expect text segment address to be 0 or equal to profiled text "
4512bede687SKazu Hirata          "segment start.");
4522bede687SKazu Hirata   return Error::success();
4532bede687SKazu Hirata }
4542bede687SKazu Hirata 
4552bede687SKazu Hirata Error RawMemProfReader::mapRawProfileToRecords() {
4562bede687SKazu Hirata   // Hold a mapping from function to each callsite location we encounter within
4572bede687SKazu Hirata   // it that is part of some dynamic allocation context. The location is stored
4582bede687SKazu Hirata   // as a pointer to a symbolized list of inline frames.
4592bede687SKazu Hirata   using LocationPtr = const llvm::SmallVector<FrameId> *;
4602bede687SKazu Hirata   llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
4612bede687SKazu Hirata       PerFunctionCallSites;
4622bede687SKazu Hirata 
4632bede687SKazu Hirata   // Convert the raw profile callstack data into memprof records. While doing so
4642bede687SKazu Hirata   // keep track of related contexts so that we can fill these in later.
4653749e0d4SKazu Hirata   for (const auto &[StackId, MIB] : CallstackProfileData) {
4662bede687SKazu Hirata     auto It = StackMap.find(StackId);
4672bede687SKazu Hirata     if (It == StackMap.end())
4682bede687SKazu Hirata       return make_error<InstrProfError>(
4692bede687SKazu Hirata           instrprof_error::malformed,
4702bede687SKazu Hirata           "memprof callstack record does not contain id: " + Twine(StackId));
4712bede687SKazu Hirata 
4722bede687SKazu Hirata     // Construct the symbolized callstack.
4732bede687SKazu Hirata     llvm::SmallVector<FrameId> Callstack;
4742bede687SKazu Hirata     Callstack.reserve(It->getSecond().size());
4752bede687SKazu Hirata 
4762bede687SKazu Hirata     llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
4772bede687SKazu Hirata     for (size_t I = 0; I < Addresses.size(); I++) {
4782bede687SKazu Hirata       const uint64_t Address = Addresses[I];
4792bede687SKazu Hirata       assert(SymbolizedFrame.count(Address) > 0 &&
4802bede687SKazu Hirata              "Address not found in SymbolizedFrame map");
4812bede687SKazu Hirata       const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
4822bede687SKazu Hirata 
4832bede687SKazu Hirata       assert(!idToFrame(Frames.back()).IsInlineFrame &&
4842bede687SKazu Hirata              "The last frame should not be inlined");
4852bede687SKazu Hirata 
4862bede687SKazu Hirata       // Record the callsites for each function. Skip the first frame of the
4872bede687SKazu Hirata       // first address since it is the allocation site itself that is recorded
4882bede687SKazu Hirata       // as an alloc site.
4892bede687SKazu Hirata       for (size_t J = 0; J < Frames.size(); J++) {
4902bede687SKazu Hirata         if (I == 0 && J == 0)
4912bede687SKazu Hirata           continue;
4922bede687SKazu Hirata         // We attach the entire bottom-up frame here for the callsite even
4932bede687SKazu Hirata         // though we only need the frames up to and including the frame for
4942bede687SKazu Hirata         // Frames[J].Function. This will enable better deduplication for
4952bede687SKazu Hirata         // compression in the future.
4962bede687SKazu Hirata         const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
4972bede687SKazu Hirata         PerFunctionCallSites[Guid].insert(&Frames);
4982bede687SKazu Hirata       }
4992bede687SKazu Hirata 
5002bede687SKazu Hirata       // Add all the frames to the current allocation callstack.
5012bede687SKazu Hirata       Callstack.append(Frames.begin(), Frames.end());
5022bede687SKazu Hirata     }
5032bede687SKazu Hirata 
504c5e4e8f8SKazu Hirata     CallStackId CSId = MemProfData.addCallStack(Callstack);
5052bede687SKazu Hirata 
5062bede687SKazu Hirata     // We attach the memprof record to each function bottom-up including the
5072bede687SKazu Hirata     // first non-inline frame.
5082bede687SKazu Hirata     for (size_t I = 0; /*Break out using the condition below*/; I++) {
5092bede687SKazu Hirata       const Frame &F = idToFrame(Callstack[I]);
5105add295fSKazu Hirata       IndexedMemProfRecord &Record = MemProfData.Records[F.Function];
5113ce8b7d2SKazu Hirata       Record.AllocSites.emplace_back(CSId, MIB);
5122bede687SKazu Hirata 
5132bede687SKazu Hirata       if (!F.IsInlineFrame)
5142bede687SKazu Hirata         break;
5152bede687SKazu Hirata     }
5162bede687SKazu Hirata   }
5172bede687SKazu Hirata 
5182bede687SKazu Hirata   // Fill in the related callsites per function.
5192bede687SKazu Hirata   for (const auto &[Id, Locs] : PerFunctionCallSites) {
5202bede687SKazu Hirata     // Some functions may have only callsite data and no allocation data. Here
5212bede687SKazu Hirata     // we insert a new entry for callsite data if we need to.
5225add295fSKazu Hirata     IndexedMemProfRecord &Record = MemProfData.Records[Id];
523c5e4e8f8SKazu Hirata     for (LocationPtr Loc : Locs)
524c5e4e8f8SKazu Hirata       Record.CallSiteIds.push_back(MemProfData.addCallStack(*Loc));
5252bede687SKazu Hirata   }
5262bede687SKazu Hirata 
5272bede687SKazu Hirata   return Error::success();
5282bede687SKazu Hirata }
5292bede687SKazu Hirata 
5302bede687SKazu Hirata Error RawMemProfReader::symbolizeAndFilterStackFrames(
5312bede687SKazu Hirata     std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) {
5322bede687SKazu Hirata   // The specifier to use when symbolization is requested.
5332bede687SKazu Hirata   const DILineInfoSpecifier Specifier(
5342bede687SKazu Hirata       DILineInfoSpecifier::FileLineInfoKind::RawValue,
5352bede687SKazu Hirata       DILineInfoSpecifier::FunctionNameKind::LinkageName);
5362bede687SKazu Hirata 
5372bede687SKazu Hirata   // For entries where all PCs in the callstack are discarded, we erase the
5382bede687SKazu Hirata   // entry from the stack map.
5392bede687SKazu Hirata   llvm::SmallVector<uint64_t> EntriesToErase;
5402bede687SKazu Hirata   // We keep track of all prior discarded entries so that we can avoid invoking
5412bede687SKazu Hirata   // the symbolizer for such entries.
5422bede687SKazu Hirata   llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
5432bede687SKazu Hirata   for (auto &Entry : StackMap) {
5442bede687SKazu Hirata     for (const uint64_t VAddr : Entry.getSecond()) {
5452bede687SKazu Hirata       // Check if we have already symbolized and cached the result or if we
5462bede687SKazu Hirata       // don't want to attempt symbolization since we know this address is bad.
5472bede687SKazu Hirata       // In this case the address is also removed from the current callstack.
5482bede687SKazu Hirata       if (SymbolizedFrame.count(VAddr) > 0 ||
5492bede687SKazu Hirata           AllVAddrsToDiscard.contains(VAddr))
5502bede687SKazu Hirata         continue;
5512bede687SKazu Hirata 
5522bede687SKazu Hirata       Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
5532bede687SKazu Hirata           getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
5542bede687SKazu Hirata       if (!DIOr)
5552bede687SKazu Hirata         return DIOr.takeError();
5562bede687SKazu Hirata       DIInliningInfo DI = DIOr.get();
5572bede687SKazu Hirata 
5582bede687SKazu Hirata       // Drop frames which we can't symbolize or if they belong to the runtime.
5592bede687SKazu Hirata       if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
5602bede687SKazu Hirata           isRuntimePath(DI.getFrame(0).FileName)) {
5612bede687SKazu Hirata         AllVAddrsToDiscard.insert(VAddr);
5622bede687SKazu Hirata         continue;
5632bede687SKazu Hirata       }
5642bede687SKazu Hirata 
5652bede687SKazu Hirata       for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
5662bede687SKazu Hirata            I++) {
5672bede687SKazu Hirata         const auto &DIFrame = DI.getFrame(I);
5682bede687SKazu Hirata         const uint64_t Guid =
5692bede687SKazu Hirata             IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
5702bede687SKazu Hirata         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
5712bede687SKazu Hirata                       // Only the last entry is not an inlined location.
5722bede687SKazu Hirata                       I != NumFrames - 1);
5732bede687SKazu Hirata         // Here we retain a mapping from the GUID to canonical symbol name
5742bede687SKazu Hirata         // instead of adding it to the frame object directly to reduce memory
5752bede687SKazu Hirata         // overhead. This is because there can be many unique frames,
5762bede687SKazu Hirata         // particularly for callsite frames.
5772bede687SKazu Hirata         if (KeepSymbolName) {
5782bede687SKazu Hirata           StringRef CanonicalName =
5792bede687SKazu Hirata               sampleprof::FunctionSamples::getCanonicalFnName(
5802bede687SKazu Hirata                   DIFrame.FunctionName);
5812bede687SKazu Hirata           GuidToSymbolName.insert({Guid, CanonicalName.str()});
5822bede687SKazu Hirata         }
5832bede687SKazu Hirata 
58450f8580eSKazu Hirata         SymbolizedFrame[VAddr].push_back(MemProfData.addFrame(F));
5852bede687SKazu Hirata       }
5862bede687SKazu Hirata     }
5872bede687SKazu Hirata 
5882bede687SKazu Hirata     auto &CallStack = Entry.getSecond();
5892bede687SKazu Hirata     llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) {
5902bede687SKazu Hirata       return AllVAddrsToDiscard.contains(A);
5912bede687SKazu Hirata     });
5922bede687SKazu Hirata     if (CallStack.empty())
5932bede687SKazu Hirata       EntriesToErase.push_back(Entry.getFirst());
5942bede687SKazu Hirata   }
5952bede687SKazu Hirata 
5962bede687SKazu Hirata   // Drop the entries where the callstack is empty.
5972bede687SKazu Hirata   for (const uint64_t Id : EntriesToErase) {
5982bede687SKazu Hirata     StackMap.erase(Id);
599ca4e5a8dSMatthew Weingarten     if (CallstackProfileData[Id].AccessHistogramSize > 0)
600ca4e5a8dSMatthew Weingarten       free((void *)CallstackProfileData[Id].AccessHistogram);
6012bede687SKazu Hirata     CallstackProfileData.erase(Id);
6022bede687SKazu Hirata   }
6032bede687SKazu Hirata 
6042bede687SKazu Hirata   if (StackMap.empty())
6052bede687SKazu Hirata     return make_error<InstrProfError>(
6062bede687SKazu Hirata         instrprof_error::malformed,
6072bede687SKazu Hirata         "no entries in callstack map after symbolization");
6082bede687SKazu Hirata 
6092bede687SKazu Hirata   return Error::success();
6102bede687SKazu Hirata }
6112bede687SKazu Hirata 
6122bede687SKazu Hirata std::vector<std::string>
6132bede687SKazu Hirata RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) {
6142bede687SKazu Hirata   const char *Next = DataBuffer->getBufferStart();
61515135afaSKazu Hirata   // Use a SetVector since a profile file may contain multiple raw profile
6162bede687SKazu Hirata   // dumps, each with segment information. We want them unique and in order they
6172bede687SKazu Hirata   // were stored in the profile; the profiled binary should be the first entry.
6182bede687SKazu Hirata   // The runtime uses dl_iterate_phdr and the "... first object visited by
6192bede687SKazu Hirata   // callback is the main program."
6202bede687SKazu Hirata   // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html
62115135afaSKazu Hirata   llvm::SetVector<std::string, std::vector<std::string>,
62215135afaSKazu Hirata                   llvm::SmallSet<std::string, 10>>
62315135afaSKazu Hirata       BuildIds;
6242bede687SKazu Hirata   while (Next < DataBuffer->getBufferEnd()) {
625bfa937a4SKazu Hirata     const auto *Header = reinterpret_cast<const memprof::Header *>(Next);
6262bede687SKazu Hirata 
6272bede687SKazu Hirata     const llvm::SmallVector<SegmentEntry> Entries =
6282bede687SKazu Hirata         readSegmentEntries(Next + Header->SegmentOffset);
6292bede687SKazu Hirata 
63015135afaSKazu Hirata     for (const auto &Entry : Entries)
63115135afaSKazu Hirata       BuildIds.insert(getBuildIdString(Entry));
6322bede687SKazu Hirata 
6332bede687SKazu Hirata     Next += Header->TotalSize;
6342bede687SKazu Hirata   }
63515135afaSKazu Hirata   return BuildIds.takeVector();
6362bede687SKazu Hirata }
6372bede687SKazu Hirata 
63830b93db5SMatthew Weingarten // FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This
63930b93db5SMatthew Weingarten // will help being able to deserialize different versions raw memprof versions
64030b93db5SMatthew Weingarten // more easily.
64130b93db5SMatthew Weingarten llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
64230b93db5SMatthew Weingarten RawMemProfReader::readMemInfoBlocks(const char *Ptr) {
64330b93db5SMatthew Weingarten   if (MemprofRawVersion == 3ULL)
64430b93db5SMatthew Weingarten     return readMemInfoBlocksV3(Ptr);
64522b36bfaSKazu Hirata   if (MemprofRawVersion == 4ULL)
64630b93db5SMatthew Weingarten     return readMemInfoBlocksV4(Ptr);
64722b36bfaSKazu Hirata   llvm_unreachable(
64830b93db5SMatthew Weingarten       "Panic: Unsupported version number when reading MemInfoBlocks");
64930b93db5SMatthew Weingarten }
65030b93db5SMatthew Weingarten 
6512bede687SKazu Hirata Error RawMemProfReader::readRawProfile(
6522bede687SKazu Hirata     std::unique_ptr<MemoryBuffer> DataBuffer) {
6532bede687SKazu Hirata   const char *Next = DataBuffer->getBufferStart();
6542bede687SKazu Hirata 
6552bede687SKazu Hirata   while (Next < DataBuffer->getBufferEnd()) {
656bfa937a4SKazu Hirata     const auto *Header = reinterpret_cast<const memprof::Header *>(Next);
6572bede687SKazu Hirata 
65830b93db5SMatthew Weingarten     // Set Reader version to memprof raw version of profile. Checking if version
65930b93db5SMatthew Weingarten     // is supported is checked before creating the reader.
66030b93db5SMatthew Weingarten     MemprofRawVersion = Header->Version;
66130b93db5SMatthew Weingarten 
6622bede687SKazu Hirata     // Read in the segment information, check whether its the same across all
6632bede687SKazu Hirata     // profiles in this binary file.
6642bede687SKazu Hirata     const llvm::SmallVector<SegmentEntry> Entries =
6652bede687SKazu Hirata         readSegmentEntries(Next + Header->SegmentOffset);
6662bede687SKazu Hirata     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
6672bede687SKazu Hirata       // We do not expect segment information to change when deserializing from
6682bede687SKazu Hirata       // the same binary profile file. This can happen if dynamic libraries are
6692bede687SKazu Hirata       // loaded/unloaded between profile dumping.
6702bede687SKazu Hirata       return make_error<InstrProfError>(
6712bede687SKazu Hirata           instrprof_error::malformed,
6722bede687SKazu Hirata           "memprof raw profile has different segment information");
6732bede687SKazu Hirata     }
6742bede687SKazu Hirata     SegmentInfo.assign(Entries.begin(), Entries.end());
6752bede687SKazu Hirata 
6762bede687SKazu Hirata     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
6772bede687SKazu Hirata     // raw profiles in the same binary file are from the same process so the
6782bede687SKazu Hirata     // stackdepot ids are the same.
6792bede687SKazu Hirata     for (const auto &[Id, MIB] : readMemInfoBlocks(Next + Header->MIBOffset)) {
6802bede687SKazu Hirata       if (CallstackProfileData.count(Id)) {
68130b93db5SMatthew Weingarten 
68230b93db5SMatthew Weingarten         if (MemprofRawVersion >= 4ULL &&
68330b93db5SMatthew Weingarten             (CallstackProfileData[Id].AccessHistogramSize > 0 ||
68430b93db5SMatthew Weingarten              MIB.AccessHistogramSize > 0)) {
68530b93db5SMatthew Weingarten           uintptr_t ShorterHistogram;
68630b93db5SMatthew Weingarten           if (CallstackProfileData[Id].AccessHistogramSize >
68730b93db5SMatthew Weingarten               MIB.AccessHistogramSize)
68830b93db5SMatthew Weingarten             ShorterHistogram = MIB.AccessHistogram;
68930b93db5SMatthew Weingarten           else
69030b93db5SMatthew Weingarten             ShorterHistogram = CallstackProfileData[Id].AccessHistogram;
6912bede687SKazu Hirata           CallstackProfileData[Id].Merge(MIB);
69230b93db5SMatthew Weingarten           free((void *)ShorterHistogram);
69330b93db5SMatthew Weingarten         } else {
69430b93db5SMatthew Weingarten           CallstackProfileData[Id].Merge(MIB);
69530b93db5SMatthew Weingarten         }
6962bede687SKazu Hirata       } else {
6972bede687SKazu Hirata         CallstackProfileData[Id] = MIB;
6982bede687SKazu Hirata       }
6992bede687SKazu Hirata     }
7002bede687SKazu Hirata 
7012bede687SKazu Hirata     // Read in the callstack for each ids. For multiple raw profiles in the same
7022bede687SKazu Hirata     // file, we expect that the callstack is the same for a unique id.
7032bede687SKazu Hirata     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
7042bede687SKazu Hirata     if (StackMap.empty()) {
7052bede687SKazu Hirata       StackMap = CSM;
7062bede687SKazu Hirata     } else {
7072bede687SKazu Hirata       if (mergeStackMap(CSM, StackMap))
7082bede687SKazu Hirata         return make_error<InstrProfError>(
7092bede687SKazu Hirata             instrprof_error::malformed,
7102bede687SKazu Hirata             "memprof raw profile got different call stack for same id");
7112bede687SKazu Hirata     }
7122bede687SKazu Hirata 
7132bede687SKazu Hirata     Next += Header->TotalSize;
7142bede687SKazu Hirata   }
7152bede687SKazu Hirata 
7162bede687SKazu Hirata   return Error::success();
7172bede687SKazu Hirata }
7182bede687SKazu Hirata 
7192bede687SKazu Hirata object::SectionedAddress
7202bede687SKazu Hirata RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
7212bede687SKazu Hirata   if (VirtualAddress > ProfiledTextSegmentStart &&
7222bede687SKazu Hirata       VirtualAddress <= ProfiledTextSegmentEnd) {
7232bede687SKazu Hirata     // For PIE binaries, the preferred address is zero and we adjust the virtual
7242bede687SKazu Hirata     // address by start of the profiled segment assuming that the offset of the
7252bede687SKazu Hirata     // segment in the binary is zero. For non-PIE binaries the preferred and
7262bede687SKazu Hirata     // profiled segment addresses should be equal and this is a no-op.
7272bede687SKazu Hirata     const uint64_t AdjustedAddress =
7282bede687SKazu Hirata         VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart;
7292bede687SKazu Hirata     return object::SectionedAddress{AdjustedAddress};
7302bede687SKazu Hirata   }
7312bede687SKazu Hirata   // Addresses which do not originate from the profiled text segment in the
7322bede687SKazu Hirata   // binary are not adjusted. These will fail symbolization and be filtered out
7332bede687SKazu Hirata   // during processing.
7342bede687SKazu Hirata   return object::SectionedAddress{VirtualAddress};
7352bede687SKazu Hirata }
7362bede687SKazu Hirata 
7372bede687SKazu Hirata Error RawMemProfReader::readNextRecord(
7382bede687SKazu Hirata     GuidMemProfRecordPair &GuidRecord,
7392bede687SKazu Hirata     std::function<const Frame(const FrameId)> Callback) {
7402bede687SKazu Hirata   // Create a new callback for the RawMemProfRecord iterator so that we can
7412bede687SKazu Hirata   // provide the symbol name if the reader was initialized with KeepSymbolName =
7422bede687SKazu Hirata   // true. This is useful for debugging and testing.
7432bede687SKazu Hirata   auto IdToFrameCallback = [this](const FrameId Id) {
7442bede687SKazu Hirata     Frame F = this->idToFrame(Id);
7452bede687SKazu Hirata     if (!this->KeepSymbolName)
7462bede687SKazu Hirata       return F;
7472bede687SKazu Hirata     auto Iter = this->GuidToSymbolName.find(F.Function);
7482bede687SKazu Hirata     assert(Iter != this->GuidToSymbolName.end());
749d55e235bSKazu Hirata     F.SymbolName = std::make_unique<std::string>(Iter->getSecond());
7502bede687SKazu Hirata     return F;
7512bede687SKazu Hirata   };
7522bede687SKazu Hirata   return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
7532bede687SKazu Hirata }
754e98396f4SKazu Hirata 
755684e79f2SKazu Hirata Expected<std::unique_ptr<YAMLMemProfReader>>
756684e79f2SKazu Hirata YAMLMemProfReader::create(const Twine &Path) {
757*1d515466SZibi Sarbinowski   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
758684e79f2SKazu Hirata   if (std::error_code EC = BufferOr.getError())
759684e79f2SKazu Hirata     return report(errorCodeToError(EC), Path.getSingleStringRef());
760684e79f2SKazu Hirata 
761684e79f2SKazu Hirata   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
762684e79f2SKazu Hirata   return create(std::move(Buffer));
763684e79f2SKazu Hirata }
764684e79f2SKazu Hirata 
765684e79f2SKazu Hirata Expected<std::unique_ptr<YAMLMemProfReader>>
766684e79f2SKazu Hirata YAMLMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
767684e79f2SKazu Hirata   auto Reader = std::make_unique<YAMLMemProfReader>();
768684e79f2SKazu Hirata   Reader->parse(Buffer->getBuffer());
769684e79f2SKazu Hirata   return std::move(Reader);
770684e79f2SKazu Hirata }
771684e79f2SKazu Hirata 
772684e79f2SKazu Hirata bool YAMLMemProfReader::hasFormat(const StringRef Path) {
773*1d515466SZibi Sarbinowski   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
774684e79f2SKazu Hirata   if (!BufferOr)
775684e79f2SKazu Hirata     return false;
776684e79f2SKazu Hirata 
777684e79f2SKazu Hirata   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
778684e79f2SKazu Hirata   return hasFormat(*Buffer);
779684e79f2SKazu Hirata }
780684e79f2SKazu Hirata 
781684e79f2SKazu Hirata bool YAMLMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
782684e79f2SKazu Hirata   return Buffer.getBuffer().starts_with("---");
783684e79f2SKazu Hirata }
784684e79f2SKazu Hirata 
785e98396f4SKazu Hirata void YAMLMemProfReader::parse(StringRef YAMLData) {
786e98396f4SKazu Hirata   memprof::AllMemProfData Doc;
787e98396f4SKazu Hirata   yaml::Input Yin(YAMLData);
788e98396f4SKazu Hirata 
789e98396f4SKazu Hirata   Yin >> Doc;
790e98396f4SKazu Hirata   if (Yin.error())
791e98396f4SKazu Hirata     return;
792e98396f4SKazu Hirata 
793e98396f4SKazu Hirata   // Add a call stack to MemProfData.CallStacks and return its CallStackId.
794e98396f4SKazu Hirata   auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId {
795e98396f4SKazu Hirata     SmallVector<FrameId> IndexedCallStack;
796e98396f4SKazu Hirata     IndexedCallStack.reserve(CallStack.size());
79750f8580eSKazu Hirata     for (const Frame &F : CallStack)
79850f8580eSKazu Hirata       IndexedCallStack.push_back(MemProfData.addFrame(F));
799c5e4e8f8SKazu Hirata     return MemProfData.addCallStack(std::move(IndexedCallStack));
800e98396f4SKazu Hirata   };
801e98396f4SKazu Hirata 
802e98396f4SKazu Hirata   for (const auto &[GUID, Record] : Doc.HeapProfileRecords) {
803e98396f4SKazu Hirata     IndexedMemProfRecord IndexedRecord;
804e98396f4SKazu Hirata 
805e98396f4SKazu Hirata     // Convert AllocationInfo to IndexedAllocationInfo.
806e98396f4SKazu Hirata     for (const AllocationInfo &AI : Record.AllocSites) {
807e98396f4SKazu Hirata       CallStackId CSId = AddCallStack(AI.CallStack);
808e98396f4SKazu Hirata       IndexedRecord.AllocSites.emplace_back(CSId, AI.Info);
809e98396f4SKazu Hirata     }
810e98396f4SKazu Hirata 
811e98396f4SKazu Hirata     // Populate CallSiteIds.
812e98396f4SKazu Hirata     for (const auto &CallSite : Record.CallSites) {
813e98396f4SKazu Hirata       CallStackId CSId = AddCallStack(CallSite);
814e98396f4SKazu Hirata       IndexedRecord.CallSiteIds.push_back(CSId);
815e98396f4SKazu Hirata     }
816e98396f4SKazu Hirata 
817e98396f4SKazu Hirata     MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
818e98396f4SKazu Hirata   }
819e98396f4SKazu Hirata }
8202bede687SKazu Hirata } // namespace memprof
8212bede687SKazu Hirata } // namespace llvm
822