12bede687SKazu Hirata //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 22bede687SKazu Hirata // 32bede687SKazu Hirata // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42bede687SKazu Hirata // See https://llvm.org/LICENSE.txt for license information. 52bede687SKazu Hirata // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 62bede687SKazu Hirata // 72bede687SKazu Hirata //===----------------------------------------------------------------------===// 82bede687SKazu Hirata // 92bede687SKazu Hirata // This file contains support for reading MemProf profiling data. 102bede687SKazu Hirata // 112bede687SKazu Hirata //===----------------------------------------------------------------------===// 122bede687SKazu Hirata 132bede687SKazu Hirata #include <cstdint> 142bede687SKazu Hirata #include <memory> 152bede687SKazu Hirata #include <type_traits> 162bede687SKazu Hirata 172bede687SKazu Hirata #include "llvm/ADT/ArrayRef.h" 182bede687SKazu Hirata #include "llvm/ADT/DenseMap.h" 192bede687SKazu Hirata #include "llvm/ADT/SetVector.h" 202bede687SKazu Hirata #include "llvm/ADT/SmallSet.h" 212bede687SKazu Hirata #include "llvm/ADT/SmallVector.h" 222bede687SKazu Hirata #include "llvm/ADT/StringExtras.h" 232bede687SKazu Hirata #include "llvm/ADT/Twine.h" 242bede687SKazu Hirata #include "llvm/DebugInfo/DWARF/DWARFContext.h" 252bede687SKazu Hirata #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 262bede687SKazu Hirata #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 272bede687SKazu Hirata #include "llvm/Object/Binary.h" 282bede687SKazu Hirata #include "llvm/Object/BuildID.h" 292bede687SKazu Hirata #include "llvm/Object/ELFObjectFile.h" 302bede687SKazu Hirata #include "llvm/Object/ObjectFile.h" 312bede687SKazu Hirata #include "llvm/ProfileData/InstrProf.h" 322bede687SKazu Hirata #include "llvm/ProfileData/MemProf.h" 332bede687SKazu Hirata #include "llvm/ProfileData/MemProfData.inc" 342bede687SKazu Hirata #include "llvm/ProfileData/MemProfReader.h" 3566edefaeSKazu Hirata #include "llvm/ProfileData/MemProfYAML.h" 362bede687SKazu Hirata #include "llvm/ProfileData/SampleProf.h" 372bede687SKazu Hirata #include "llvm/Support/Debug.h" 382bede687SKazu Hirata #include "llvm/Support/Endian.h" 392bede687SKazu Hirata #include "llvm/Support/Error.h" 402bede687SKazu Hirata #include "llvm/Support/MemoryBuffer.h" 412bede687SKazu Hirata #include "llvm/Support/Path.h" 422bede687SKazu Hirata 432bede687SKazu Hirata #define DEBUG_TYPE "memprof" 44e98396f4SKazu Hirata 45e98396f4SKazu Hirata namespace llvm { 462bede687SKazu Hirata namespace memprof { 472bede687SKazu Hirata namespace { 482bede687SKazu Hirata template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 494153c2dcSFangrui Song static_assert(std::is_integral_v<T>, "Not an integral type"); 502bede687SKazu Hirata assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 512bede687SKazu Hirata return *reinterpret_cast<const T *>(Ptr); 522bede687SKazu Hirata } 532bede687SKazu Hirata 542bede687SKazu Hirata Error checkBuffer(const MemoryBuffer &Buffer) { 552bede687SKazu Hirata if (!RawMemProfReader::hasFormat(Buffer)) 562bede687SKazu Hirata return make_error<InstrProfError>(instrprof_error::bad_magic); 572bede687SKazu Hirata 582bede687SKazu Hirata if (Buffer.getBufferSize() == 0) 592bede687SKazu Hirata return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 602bede687SKazu Hirata 612bede687SKazu Hirata if (Buffer.getBufferSize() < sizeof(Header)) { 622bede687SKazu Hirata return make_error<InstrProfError>(instrprof_error::truncated); 632bede687SKazu Hirata } 642bede687SKazu Hirata 652bede687SKazu Hirata // The size of the buffer can be > header total size since we allow repeated 662bede687SKazu Hirata // serialization of memprof profiles to the same file. 672bede687SKazu Hirata uint64_t TotalSize = 0; 682bede687SKazu Hirata const char *Next = Buffer.getBufferStart(); 692bede687SKazu Hirata while (Next < Buffer.getBufferEnd()) { 70bfa937a4SKazu Hirata const auto *H = reinterpret_cast<const Header *>(Next); 7130b93db5SMatthew Weingarten 7230b93db5SMatthew Weingarten // Check if the version in header is among the supported versions. 7330b93db5SMatthew Weingarten bool IsSupported = false; 7430b93db5SMatthew Weingarten for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) { 7530b93db5SMatthew Weingarten if (H->Version == SupportedVersion) 7630b93db5SMatthew Weingarten IsSupported = true; 7730b93db5SMatthew Weingarten } 7830b93db5SMatthew Weingarten if (!IsSupported) { 792bede687SKazu Hirata return make_error<InstrProfError>(instrprof_error::unsupported_version); 802bede687SKazu Hirata } 812bede687SKazu Hirata 822bede687SKazu Hirata TotalSize += H->TotalSize; 832bede687SKazu Hirata Next += H->TotalSize; 842bede687SKazu Hirata } 852bede687SKazu Hirata 862bede687SKazu Hirata if (Buffer.getBufferSize() != TotalSize) { 872bede687SKazu Hirata return make_error<InstrProfError>(instrprof_error::malformed); 882bede687SKazu Hirata } 892bede687SKazu Hirata return Error::success(); 902bede687SKazu Hirata } 912bede687SKazu Hirata 922bede687SKazu Hirata llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 932bede687SKazu Hirata using namespace support; 942bede687SKazu Hirata 952bede687SKazu Hirata const uint64_t NumItemsToRead = 96f430e374SKazu Hirata endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 972bede687SKazu Hirata llvm::SmallVector<SegmentEntry> Items; 982bede687SKazu Hirata for (uint64_t I = 0; I < NumItemsToRead; I++) { 992bede687SKazu Hirata Items.push_back(*reinterpret_cast<const SegmentEntry *>( 1002bede687SKazu Hirata Ptr + I * sizeof(SegmentEntry))); 1012bede687SKazu Hirata } 1022bede687SKazu Hirata return Items; 1032bede687SKazu Hirata } 1042bede687SKazu Hirata 1052bede687SKazu Hirata llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 10630b93db5SMatthew Weingarten readMemInfoBlocksV3(const char *Ptr) { 1072bede687SKazu Hirata using namespace support; 1082bede687SKazu Hirata 1092bede687SKazu Hirata const uint64_t NumItemsToRead = 11030b93db5SMatthew Weingarten endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 11130b93db5SMatthew Weingarten 1122bede687SKazu Hirata llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 1132bede687SKazu Hirata for (uint64_t I = 0; I < NumItemsToRead; I++) { 1142bede687SKazu Hirata const uint64_t Id = 11530b93db5SMatthew Weingarten endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 11630b93db5SMatthew Weingarten 11730b93db5SMatthew Weingarten // We cheat a bit here and remove the const from cast to set the 11830b93db5SMatthew Weingarten // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and 11930b93db5SMatthew Weingarten // V4 do not have the same fields. V3 is missing AccessHistogramSize and 12030b93db5SMatthew Weingarten // AccessHistogram. This means we read "dirty" data in here, but it should 12130b93db5SMatthew Weingarten // not segfault, since there will be callstack data placed after this in the 12230b93db5SMatthew Weingarten // binary format. 12330b93db5SMatthew Weingarten MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 12430b93db5SMatthew Weingarten // Overwrite dirty data. 12530b93db5SMatthew Weingarten MIB.AccessHistogramSize = 0; 12630b93db5SMatthew Weingarten MIB.AccessHistogram = 0; 12730b93db5SMatthew Weingarten 1282bede687SKazu Hirata Items.push_back({Id, MIB}); 12930b93db5SMatthew Weingarten // Only increment by the size of MIB in V3. 13030b93db5SMatthew Weingarten Ptr += MEMPROF_V3_MIB_SIZE; 13130b93db5SMatthew Weingarten } 13230b93db5SMatthew Weingarten return Items; 13330b93db5SMatthew Weingarten } 13430b93db5SMatthew Weingarten 13530b93db5SMatthew Weingarten llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 13630b93db5SMatthew Weingarten readMemInfoBlocksV4(const char *Ptr) { 13730b93db5SMatthew Weingarten using namespace support; 13830b93db5SMatthew Weingarten 13930b93db5SMatthew Weingarten const uint64_t NumItemsToRead = 14030b93db5SMatthew Weingarten endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 14130b93db5SMatthew Weingarten 14230b93db5SMatthew Weingarten llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 14330b93db5SMatthew Weingarten for (uint64_t I = 0; I < NumItemsToRead; I++) { 14430b93db5SMatthew Weingarten const uint64_t Id = 14530b93db5SMatthew Weingarten endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 14630b93db5SMatthew Weingarten // We cheat a bit here and remove the const from cast to set the 14730b93db5SMatthew Weingarten // Histogram Pointer to newly allocated buffer. 14830b93db5SMatthew Weingarten MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 14930b93db5SMatthew Weingarten 1502bede687SKazu Hirata // Only increment by size of MIB since readNext implicitly increments. 1512bede687SKazu Hirata Ptr += sizeof(MemInfoBlock); 15230b93db5SMatthew Weingarten 15330b93db5SMatthew Weingarten if (MIB.AccessHistogramSize > 0) { 15430b93db5SMatthew Weingarten MIB.AccessHistogram = 15530b93db5SMatthew Weingarten (uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t)); 15630b93db5SMatthew Weingarten } 15730b93db5SMatthew Weingarten 15830b93db5SMatthew Weingarten for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) { 15930b93db5SMatthew Weingarten ((uint64_t *)MIB.AccessHistogram)[J] = 16030b93db5SMatthew Weingarten endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr); 16130b93db5SMatthew Weingarten } 16230b93db5SMatthew Weingarten Items.push_back({Id, MIB}); 1632bede687SKazu Hirata } 1642bede687SKazu Hirata return Items; 1652bede687SKazu Hirata } 1662bede687SKazu Hirata 1672bede687SKazu Hirata CallStackMap readStackInfo(const char *Ptr) { 1682bede687SKazu Hirata using namespace support; 1692bede687SKazu Hirata 1702bede687SKazu Hirata const uint64_t NumItemsToRead = 171f430e374SKazu Hirata endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1722bede687SKazu Hirata CallStackMap Items; 1732bede687SKazu Hirata 1742bede687SKazu Hirata for (uint64_t I = 0; I < NumItemsToRead; I++) { 1752bede687SKazu Hirata const uint64_t StackId = 176f430e374SKazu Hirata endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1772bede687SKazu Hirata const uint64_t NumPCs = 178f430e374SKazu Hirata endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1792bede687SKazu Hirata 1802bede687SKazu Hirata SmallVector<uint64_t> CallStack; 1812bede687SKazu Hirata CallStack.reserve(NumPCs); 1822bede687SKazu Hirata for (uint64_t J = 0; J < NumPCs; J++) { 1832bede687SKazu Hirata CallStack.push_back( 184f430e374SKazu Hirata endian::readNext<uint64_t, llvm::endianness::little>(Ptr)); 1852bede687SKazu Hirata } 1862bede687SKazu Hirata 1872bede687SKazu Hirata Items[StackId] = CallStack; 1882bede687SKazu Hirata } 1892bede687SKazu Hirata return Items; 1902bede687SKazu Hirata } 1912bede687SKazu Hirata 1922bede687SKazu Hirata // Merges the contents of stack information in \p From to \p To. Returns true if 1932bede687SKazu Hirata // any stack ids observed previously map to a different set of program counter 1942bede687SKazu Hirata // addresses. 1952bede687SKazu Hirata bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 1963749e0d4SKazu Hirata for (const auto &[Id, Stack] : From) { 197abaa8247SKazu Hirata auto [It, Inserted] = To.try_emplace(Id, Stack); 1982bede687SKazu Hirata // Check that the PCs are the same (in order). 199abaa8247SKazu Hirata if (!Inserted && Stack != It->second) 2002bede687SKazu Hirata return true; 2012bede687SKazu Hirata } 2022bede687SKazu Hirata return false; 2032bede687SKazu Hirata } 2042bede687SKazu Hirata 2052bede687SKazu Hirata Error report(Error E, const StringRef Context) { 2062bede687SKazu Hirata return joinErrors(createStringError(inconvertibleErrorCode(), Context), 2072bede687SKazu Hirata std::move(E)); 2082bede687SKazu Hirata } 2092bede687SKazu Hirata 2102bede687SKazu Hirata bool isRuntimePath(const StringRef Path) { 2112bede687SKazu Hirata const StringRef Filename = llvm::sys::path::filename(Path); 2122bede687SKazu Hirata // This list should be updated in case new files with additional interceptors 2132bede687SKazu Hirata // are added to the memprof runtime. 214bb6df080SKazu Hirata return Filename == "memprof_malloc_linux.cpp" || 215bb6df080SKazu Hirata Filename == "memprof_interceptors.cpp" || 216bb6df080SKazu Hirata Filename == "memprof_new_delete.cpp"; 2172bede687SKazu Hirata } 2182bede687SKazu Hirata 2192bede687SKazu Hirata std::string getBuildIdString(const SegmentEntry &Entry) { 2202bede687SKazu Hirata // If the build id is unset print a helpful string instead of all zeros. 2212bede687SKazu Hirata if (Entry.BuildIdSize == 0) 2222bede687SKazu Hirata return "<None>"; 2232bede687SKazu Hirata 2242bede687SKazu Hirata std::string Str; 2252bede687SKazu Hirata raw_string_ostream OS(Str); 2262bede687SKazu Hirata for (size_t I = 0; I < Entry.BuildIdSize; I++) { 2272bede687SKazu Hirata OS << format_hex_no_prefix(Entry.BuildId[I], 2); 2282bede687SKazu Hirata } 2292bede687SKazu Hirata return OS.str(); 2302bede687SKazu Hirata } 2312bede687SKazu Hirata } // namespace 2322bede687SKazu Hirata 2332bede687SKazu Hirata Expected<std::unique_ptr<RawMemProfReader>> 2342bede687SKazu Hirata RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, 2352bede687SKazu Hirata bool KeepName) { 2362bede687SKazu Hirata auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 2372bede687SKazu Hirata if (std::error_code EC = BufferOr.getError()) 2382bede687SKazu Hirata return report(errorCodeToError(EC), Path.getSingleStringRef()); 2392bede687SKazu Hirata 2402bede687SKazu Hirata std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 2412bede687SKazu Hirata return create(std::move(Buffer), ProfiledBinary, KeepName); 2422bede687SKazu Hirata } 2432bede687SKazu Hirata 2442bede687SKazu Hirata Expected<std::unique_ptr<RawMemProfReader>> 2452bede687SKazu Hirata RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 2462bede687SKazu Hirata const StringRef ProfiledBinary, bool KeepName) { 2472bede687SKazu Hirata if (Error E = checkBuffer(*Buffer)) 2482bede687SKazu Hirata return report(std::move(E), Buffer->getBufferIdentifier()); 2492bede687SKazu Hirata 2502bede687SKazu Hirata if (ProfiledBinary.empty()) { 2512bede687SKazu Hirata // Peek the build ids to print a helpful error message. 2522bede687SKazu Hirata const std::vector<std::string> BuildIds = peekBuildIds(Buffer.get()); 2532bede687SKazu Hirata std::string ErrorMessage( 2542bede687SKazu Hirata R"(Path to profiled binary is empty, expected binary with one of the following build ids: 2552bede687SKazu Hirata )"); 2562bede687SKazu Hirata for (const auto &Id : BuildIds) { 2572bede687SKazu Hirata ErrorMessage += "\n BuildId: "; 2582bede687SKazu Hirata ErrorMessage += Id; 2592bede687SKazu Hirata } 2602bede687SKazu Hirata return report( 2612bede687SKazu Hirata make_error<StringError>(ErrorMessage, inconvertibleErrorCode()), 2622bede687SKazu Hirata /*Context=*/""); 2632bede687SKazu Hirata } 2642bede687SKazu Hirata 2652bede687SKazu Hirata auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 2662bede687SKazu Hirata if (!BinaryOr) { 2672bede687SKazu Hirata return report(BinaryOr.takeError(), ProfiledBinary); 2682bede687SKazu Hirata } 2692bede687SKazu Hirata 2702bede687SKazu Hirata // Use new here since constructor is private. 2712bede687SKazu Hirata std::unique_ptr<RawMemProfReader> Reader( 2722bede687SKazu Hirata new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); 2732bede687SKazu Hirata if (Error E = Reader->initialize(std::move(Buffer))) { 2742bede687SKazu Hirata return std::move(E); 2752bede687SKazu Hirata } 2762bede687SKazu Hirata return std::move(Reader); 2772bede687SKazu Hirata } 2782bede687SKazu Hirata 27930b93db5SMatthew Weingarten // We need to make sure that all leftover MIB histograms that have not been 28030b93db5SMatthew Weingarten // freed by merge are freed here. 28130b93db5SMatthew Weingarten RawMemProfReader::~RawMemProfReader() { 28230b93db5SMatthew Weingarten for (auto &[_, MIB] : CallstackProfileData) { 28330b93db5SMatthew Weingarten if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) { 28430b93db5SMatthew Weingarten free((void *)MIB.AccessHistogram); 28530b93db5SMatthew Weingarten } 28630b93db5SMatthew Weingarten } 28730b93db5SMatthew Weingarten } 28830b93db5SMatthew Weingarten 2892bede687SKazu Hirata bool RawMemProfReader::hasFormat(const StringRef Path) { 2902bede687SKazu Hirata auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 2912bede687SKazu Hirata if (!BufferOr) 2922bede687SKazu Hirata return false; 2932bede687SKazu Hirata 2942bede687SKazu Hirata std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 2952bede687SKazu Hirata return hasFormat(*Buffer); 2962bede687SKazu Hirata } 2972bede687SKazu Hirata 2982bede687SKazu Hirata bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 2992bede687SKazu Hirata if (Buffer.getBufferSize() < sizeof(uint64_t)) 3002bede687SKazu Hirata return false; 3012bede687SKazu Hirata // Aligned read to sanity check that the buffer was allocated with at least 8b 3022bede687SKazu Hirata // alignment. 3032bede687SKazu Hirata const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 3042bede687SKazu Hirata return Magic == MEMPROF_RAW_MAGIC_64; 3052bede687SKazu Hirata } 3062bede687SKazu Hirata 3072bede687SKazu Hirata void RawMemProfReader::printYAML(raw_ostream &OS) { 3082bede687SKazu Hirata uint64_t NumAllocFunctions = 0, NumMibInfo = 0; 3095add295fSKazu Hirata for (const auto &KV : MemProfData.Records) { 3102bede687SKazu Hirata const size_t NumAllocSites = KV.second.AllocSites.size(); 3112bede687SKazu Hirata if (NumAllocSites > 0) { 3122bede687SKazu Hirata NumAllocFunctions++; 3132bede687SKazu Hirata NumMibInfo += NumAllocSites; 3142bede687SKazu Hirata } 3152bede687SKazu Hirata } 3162bede687SKazu Hirata 3172bede687SKazu Hirata OS << "MemprofProfile:\n"; 3182bede687SKazu Hirata OS << " Summary:\n"; 31930b93db5SMatthew Weingarten OS << " Version: " << MemprofRawVersion << "\n"; 3202bede687SKazu Hirata OS << " NumSegments: " << SegmentInfo.size() << "\n"; 3212bede687SKazu Hirata OS << " NumMibInfo: " << NumMibInfo << "\n"; 3222bede687SKazu Hirata OS << " NumAllocFunctions: " << NumAllocFunctions << "\n"; 3232bede687SKazu Hirata OS << " NumStackOffsets: " << StackMap.size() << "\n"; 3242bede687SKazu Hirata // Print out the segment information. 3252bede687SKazu Hirata OS << " Segments:\n"; 3262bede687SKazu Hirata for (const auto &Entry : SegmentInfo) { 3272bede687SKazu Hirata OS << " -\n"; 3282bede687SKazu Hirata OS << " BuildId: " << getBuildIdString(Entry) << "\n"; 3292bede687SKazu Hirata OS << " Start: 0x" << llvm::utohexstr(Entry.Start) << "\n"; 3302bede687SKazu Hirata OS << " End: 0x" << llvm::utohexstr(Entry.End) << "\n"; 3312bede687SKazu Hirata OS << " Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n"; 3322bede687SKazu Hirata } 3332bede687SKazu Hirata // Print out the merged contents of the profiles. 3342bede687SKazu Hirata OS << " Records:\n"; 3353749e0d4SKazu Hirata for (const auto &[GUID, Record] : *this) { 3362bede687SKazu Hirata OS << " -\n"; 3373749e0d4SKazu Hirata OS << " FunctionGUID: " << GUID << "\n"; 3383749e0d4SKazu Hirata Record.print(OS); 3392bede687SKazu Hirata } 3402bede687SKazu Hirata } 3412bede687SKazu Hirata 3422bede687SKazu Hirata Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { 3432bede687SKazu Hirata const StringRef FileName = Binary.getBinary()->getFileName(); 3442bede687SKazu Hirata 3452bede687SKazu Hirata auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 3462bede687SKazu Hirata if (!ElfObject) { 3472bede687SKazu Hirata return report(make_error<StringError>(Twine("Not an ELF file: "), 3482bede687SKazu Hirata inconvertibleErrorCode()), 3492bede687SKazu Hirata FileName); 3502bede687SKazu Hirata } 3512bede687SKazu Hirata 3522bede687SKazu Hirata // Check whether the profiled binary was built with position independent code 3532bede687SKazu Hirata // (PIC). Perform sanity checks for assumptions we rely on to simplify 3542bede687SKazu Hirata // symbolization. 3552bede687SKazu Hirata auto *Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject); 3562bede687SKazu Hirata const llvm::object::ELF64LEFile &ElfFile = Elf64LEObject->getELFFile(); 3572bede687SKazu Hirata auto PHdrsOr = ElfFile.program_headers(); 3582bede687SKazu Hirata if (!PHdrsOr) 3592bede687SKazu Hirata return report( 3602bede687SKazu Hirata make_error<StringError>(Twine("Could not read program headers: "), 3612bede687SKazu Hirata inconvertibleErrorCode()), 3622bede687SKazu Hirata FileName); 3632bede687SKazu Hirata 3642bede687SKazu Hirata int NumExecutableSegments = 0; 3652bede687SKazu Hirata for (const auto &Phdr : *PHdrsOr) { 3662bede687SKazu Hirata if (Phdr.p_type == ELF::PT_LOAD) { 3672bede687SKazu Hirata if (Phdr.p_flags & ELF::PF_X) { 3682bede687SKazu Hirata // We assume only one text segment in the main binary for simplicity and 3692bede687SKazu Hirata // reduce the overhead of checking multiple ranges during symbolization. 3702bede687SKazu Hirata if (++NumExecutableSegments > 1) { 3712bede687SKazu Hirata return report( 3722bede687SKazu Hirata make_error<StringError>( 3732bede687SKazu Hirata "Expect only one executable load segment in the binary", 3742bede687SKazu Hirata inconvertibleErrorCode()), 3752bede687SKazu Hirata FileName); 3762bede687SKazu Hirata } 3772bede687SKazu Hirata // Segment will always be loaded at a page boundary, expect it to be 3782bede687SKazu Hirata // aligned already. Assume 4K pagesize for the machine from which the 3792bede687SKazu Hirata // profile has been collected. This should be fine for now, in case we 3802bede687SKazu Hirata // want to support other pagesizes it can be recorded in the raw profile 3812bede687SKazu Hirata // during collection. 3822bede687SKazu Hirata PreferredTextSegmentAddress = Phdr.p_vaddr; 3832bede687SKazu Hirata assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) && 3842bede687SKazu Hirata "Expect p_vaddr to always be page aligned"); 3852bede687SKazu Hirata assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization."); 3862bede687SKazu Hirata } 3872bede687SKazu Hirata } 3882bede687SKazu Hirata } 3892bede687SKazu Hirata 3902bede687SKazu Hirata auto Triple = ElfObject->makeTriple(); 3912bede687SKazu Hirata if (!Triple.isX86()) 3922bede687SKazu Hirata return report(make_error<StringError>(Twine("Unsupported target: ") + 3932bede687SKazu Hirata Triple.getArchName(), 3942bede687SKazu Hirata inconvertibleErrorCode()), 3952bede687SKazu Hirata FileName); 3962bede687SKazu Hirata 3972bede687SKazu Hirata // Process the raw profile. 3982bede687SKazu Hirata if (Error E = readRawProfile(std::move(DataBuffer))) 3992bede687SKazu Hirata return E; 4002bede687SKazu Hirata 4012bede687SKazu Hirata if (Error E = setupForSymbolization()) 4022bede687SKazu Hirata return E; 4032bede687SKazu Hirata 4042bede687SKazu Hirata auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 4052bede687SKazu Hirata std::unique_ptr<DIContext> Context = DWARFContext::create( 4062bede687SKazu Hirata *Object, DWARFContext::ProcessDebugRelocations::Process); 4072bede687SKazu Hirata 4082bede687SKazu Hirata auto SOFOr = symbolize::SymbolizableObjectFile::create( 4092bede687SKazu Hirata Object, std::move(Context), /*UntagAddresses=*/false); 4102bede687SKazu Hirata if (!SOFOr) 4112bede687SKazu Hirata return report(SOFOr.takeError(), FileName); 4122bede687SKazu Hirata auto Symbolizer = std::move(SOFOr.get()); 4132bede687SKazu Hirata 4142bede687SKazu Hirata // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so 4152bede687SKazu Hirata // that it is freed automatically at the end, when it is no longer used. This 4162bede687SKazu Hirata // reduces peak memory since it won't be live while also mapping the raw 4172bede687SKazu Hirata // profile into records afterwards. 4182bede687SKazu Hirata if (Error E = symbolizeAndFilterStackFrames(std::move(Symbolizer))) 4192bede687SKazu Hirata return E; 4202bede687SKazu Hirata 4212bede687SKazu Hirata return mapRawProfileToRecords(); 4222bede687SKazu Hirata } 4232bede687SKazu Hirata 4242bede687SKazu Hirata Error RawMemProfReader::setupForSymbolization() { 4252bede687SKazu Hirata auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 4262bede687SKazu Hirata object::BuildIDRef BinaryId = object::getBuildID(Object); 4272bede687SKazu Hirata if (BinaryId.empty()) 4282bede687SKazu Hirata return make_error<StringError>(Twine("No build id found in binary ") + 4292bede687SKazu Hirata Binary.getBinary()->getFileName(), 4302bede687SKazu Hirata inconvertibleErrorCode()); 4312bede687SKazu Hirata 4322bede687SKazu Hirata int NumMatched = 0; 4332bede687SKazu Hirata for (const auto &Entry : SegmentInfo) { 4342bede687SKazu Hirata llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize); 4352bede687SKazu Hirata if (BinaryId == SegmentId) { 4362bede687SKazu Hirata // We assume only one text segment in the main binary for simplicity and 4372bede687SKazu Hirata // reduce the overhead of checking multiple ranges during symbolization. 4382bede687SKazu Hirata if (++NumMatched > 1) { 4392bede687SKazu Hirata return make_error<StringError>( 4402bede687SKazu Hirata "We expect only one executable segment in the profiled binary", 4412bede687SKazu Hirata inconvertibleErrorCode()); 4422bede687SKazu Hirata } 4432bede687SKazu Hirata ProfiledTextSegmentStart = Entry.Start; 4442bede687SKazu Hirata ProfiledTextSegmentEnd = Entry.End; 4452bede687SKazu Hirata } 4462bede687SKazu Hirata } 4472bede687SKazu Hirata assert(NumMatched != 0 && "No matching executable segments in segment info."); 4482bede687SKazu Hirata assert((PreferredTextSegmentAddress == 0 || 4492bede687SKazu Hirata (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) && 4502bede687SKazu Hirata "Expect text segment address to be 0 or equal to profiled text " 4512bede687SKazu Hirata "segment start."); 4522bede687SKazu Hirata return Error::success(); 4532bede687SKazu Hirata } 4542bede687SKazu Hirata 4552bede687SKazu Hirata Error RawMemProfReader::mapRawProfileToRecords() { 4562bede687SKazu Hirata // Hold a mapping from function to each callsite location we encounter within 4572bede687SKazu Hirata // it that is part of some dynamic allocation context. The location is stored 4582bede687SKazu Hirata // as a pointer to a symbolized list of inline frames. 4592bede687SKazu Hirata using LocationPtr = const llvm::SmallVector<FrameId> *; 4602bede687SKazu Hirata llvm::MapVector<GlobalValue::GUID, llvm::SetVector<LocationPtr>> 4612bede687SKazu Hirata PerFunctionCallSites; 4622bede687SKazu Hirata 4632bede687SKazu Hirata // Convert the raw profile callstack data into memprof records. While doing so 4642bede687SKazu Hirata // keep track of related contexts so that we can fill these in later. 4653749e0d4SKazu Hirata for (const auto &[StackId, MIB] : CallstackProfileData) { 4662bede687SKazu Hirata auto It = StackMap.find(StackId); 4672bede687SKazu Hirata if (It == StackMap.end()) 4682bede687SKazu Hirata return make_error<InstrProfError>( 4692bede687SKazu Hirata instrprof_error::malformed, 4702bede687SKazu Hirata "memprof callstack record does not contain id: " + Twine(StackId)); 4712bede687SKazu Hirata 4722bede687SKazu Hirata // Construct the symbolized callstack. 4732bede687SKazu Hirata llvm::SmallVector<FrameId> Callstack; 4742bede687SKazu Hirata Callstack.reserve(It->getSecond().size()); 4752bede687SKazu Hirata 4762bede687SKazu Hirata llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); 4772bede687SKazu Hirata for (size_t I = 0; I < Addresses.size(); I++) { 4782bede687SKazu Hirata const uint64_t Address = Addresses[I]; 4792bede687SKazu Hirata assert(SymbolizedFrame.count(Address) > 0 && 4802bede687SKazu Hirata "Address not found in SymbolizedFrame map"); 4812bede687SKazu Hirata const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; 4822bede687SKazu Hirata 4832bede687SKazu Hirata assert(!idToFrame(Frames.back()).IsInlineFrame && 4842bede687SKazu Hirata "The last frame should not be inlined"); 4852bede687SKazu Hirata 4862bede687SKazu Hirata // Record the callsites for each function. Skip the first frame of the 4872bede687SKazu Hirata // first address since it is the allocation site itself that is recorded 4882bede687SKazu Hirata // as an alloc site. 4892bede687SKazu Hirata for (size_t J = 0; J < Frames.size(); J++) { 4902bede687SKazu Hirata if (I == 0 && J == 0) 4912bede687SKazu Hirata continue; 4922bede687SKazu Hirata // We attach the entire bottom-up frame here for the callsite even 4932bede687SKazu Hirata // though we only need the frames up to and including the frame for 4942bede687SKazu Hirata // Frames[J].Function. This will enable better deduplication for 4952bede687SKazu Hirata // compression in the future. 4962bede687SKazu Hirata const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function; 4972bede687SKazu Hirata PerFunctionCallSites[Guid].insert(&Frames); 4982bede687SKazu Hirata } 4992bede687SKazu Hirata 5002bede687SKazu Hirata // Add all the frames to the current allocation callstack. 5012bede687SKazu Hirata Callstack.append(Frames.begin(), Frames.end()); 5022bede687SKazu Hirata } 5032bede687SKazu Hirata 504c5e4e8f8SKazu Hirata CallStackId CSId = MemProfData.addCallStack(Callstack); 5052bede687SKazu Hirata 5062bede687SKazu Hirata // We attach the memprof record to each function bottom-up including the 5072bede687SKazu Hirata // first non-inline frame. 5082bede687SKazu Hirata for (size_t I = 0; /*Break out using the condition below*/; I++) { 5092bede687SKazu Hirata const Frame &F = idToFrame(Callstack[I]); 5105add295fSKazu Hirata IndexedMemProfRecord &Record = MemProfData.Records[F.Function]; 5113ce8b7d2SKazu Hirata Record.AllocSites.emplace_back(CSId, MIB); 5122bede687SKazu Hirata 5132bede687SKazu Hirata if (!F.IsInlineFrame) 5142bede687SKazu Hirata break; 5152bede687SKazu Hirata } 5162bede687SKazu Hirata } 5172bede687SKazu Hirata 5182bede687SKazu Hirata // Fill in the related callsites per function. 5192bede687SKazu Hirata for (const auto &[Id, Locs] : PerFunctionCallSites) { 5202bede687SKazu Hirata // Some functions may have only callsite data and no allocation data. Here 5212bede687SKazu Hirata // we insert a new entry for callsite data if we need to. 5225add295fSKazu Hirata IndexedMemProfRecord &Record = MemProfData.Records[Id]; 523c5e4e8f8SKazu Hirata for (LocationPtr Loc : Locs) 524c5e4e8f8SKazu Hirata Record.CallSiteIds.push_back(MemProfData.addCallStack(*Loc)); 5252bede687SKazu Hirata } 5262bede687SKazu Hirata 5272bede687SKazu Hirata return Error::success(); 5282bede687SKazu Hirata } 5292bede687SKazu Hirata 5302bede687SKazu Hirata Error RawMemProfReader::symbolizeAndFilterStackFrames( 5312bede687SKazu Hirata std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer) { 5322bede687SKazu Hirata // The specifier to use when symbolization is requested. 5332bede687SKazu Hirata const DILineInfoSpecifier Specifier( 5342bede687SKazu Hirata DILineInfoSpecifier::FileLineInfoKind::RawValue, 5352bede687SKazu Hirata DILineInfoSpecifier::FunctionNameKind::LinkageName); 5362bede687SKazu Hirata 5372bede687SKazu Hirata // For entries where all PCs in the callstack are discarded, we erase the 5382bede687SKazu Hirata // entry from the stack map. 5392bede687SKazu Hirata llvm::SmallVector<uint64_t> EntriesToErase; 5402bede687SKazu Hirata // We keep track of all prior discarded entries so that we can avoid invoking 5412bede687SKazu Hirata // the symbolizer for such entries. 5422bede687SKazu Hirata llvm::DenseSet<uint64_t> AllVAddrsToDiscard; 5432bede687SKazu Hirata for (auto &Entry : StackMap) { 5442bede687SKazu Hirata for (const uint64_t VAddr : Entry.getSecond()) { 5452bede687SKazu Hirata // Check if we have already symbolized and cached the result or if we 5462bede687SKazu Hirata // don't want to attempt symbolization since we know this address is bad. 5472bede687SKazu Hirata // In this case the address is also removed from the current callstack. 5482bede687SKazu Hirata if (SymbolizedFrame.count(VAddr) > 0 || 5492bede687SKazu Hirata AllVAddrsToDiscard.contains(VAddr)) 5502bede687SKazu Hirata continue; 5512bede687SKazu Hirata 5522bede687SKazu Hirata Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 5532bede687SKazu Hirata getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); 5542bede687SKazu Hirata if (!DIOr) 5552bede687SKazu Hirata return DIOr.takeError(); 5562bede687SKazu Hirata DIInliningInfo DI = DIOr.get(); 5572bede687SKazu Hirata 5582bede687SKazu Hirata // Drop frames which we can't symbolize or if they belong to the runtime. 5592bede687SKazu Hirata if (DI.getFrame(0).FunctionName == DILineInfo::BadString || 5602bede687SKazu Hirata isRuntimePath(DI.getFrame(0).FileName)) { 5612bede687SKazu Hirata AllVAddrsToDiscard.insert(VAddr); 5622bede687SKazu Hirata continue; 5632bede687SKazu Hirata } 5642bede687SKazu Hirata 5652bede687SKazu Hirata for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; 5662bede687SKazu Hirata I++) { 5672bede687SKazu Hirata const auto &DIFrame = DI.getFrame(I); 5682bede687SKazu Hirata const uint64_t Guid = 5692bede687SKazu Hirata IndexedMemProfRecord::getGUID(DIFrame.FunctionName); 5702bede687SKazu Hirata const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, 5712bede687SKazu Hirata // Only the last entry is not an inlined location. 5722bede687SKazu Hirata I != NumFrames - 1); 5732bede687SKazu Hirata // Here we retain a mapping from the GUID to canonical symbol name 5742bede687SKazu Hirata // instead of adding it to the frame object directly to reduce memory 5752bede687SKazu Hirata // overhead. This is because there can be many unique frames, 5762bede687SKazu Hirata // particularly for callsite frames. 5772bede687SKazu Hirata if (KeepSymbolName) { 5782bede687SKazu Hirata StringRef CanonicalName = 5792bede687SKazu Hirata sampleprof::FunctionSamples::getCanonicalFnName( 5802bede687SKazu Hirata DIFrame.FunctionName); 5812bede687SKazu Hirata GuidToSymbolName.insert({Guid, CanonicalName.str()}); 5822bede687SKazu Hirata } 5832bede687SKazu Hirata 58450f8580eSKazu Hirata SymbolizedFrame[VAddr].push_back(MemProfData.addFrame(F)); 5852bede687SKazu Hirata } 5862bede687SKazu Hirata } 5872bede687SKazu Hirata 5882bede687SKazu Hirata auto &CallStack = Entry.getSecond(); 5892bede687SKazu Hirata llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) { 5902bede687SKazu Hirata return AllVAddrsToDiscard.contains(A); 5912bede687SKazu Hirata }); 5922bede687SKazu Hirata if (CallStack.empty()) 5932bede687SKazu Hirata EntriesToErase.push_back(Entry.getFirst()); 5942bede687SKazu Hirata } 5952bede687SKazu Hirata 5962bede687SKazu Hirata // Drop the entries where the callstack is empty. 5972bede687SKazu Hirata for (const uint64_t Id : EntriesToErase) { 5982bede687SKazu Hirata StackMap.erase(Id); 599ca4e5a8dSMatthew Weingarten if (CallstackProfileData[Id].AccessHistogramSize > 0) 600ca4e5a8dSMatthew Weingarten free((void *)CallstackProfileData[Id].AccessHistogram); 6012bede687SKazu Hirata CallstackProfileData.erase(Id); 6022bede687SKazu Hirata } 6032bede687SKazu Hirata 6042bede687SKazu Hirata if (StackMap.empty()) 6052bede687SKazu Hirata return make_error<InstrProfError>( 6062bede687SKazu Hirata instrprof_error::malformed, 6072bede687SKazu Hirata "no entries in callstack map after symbolization"); 6082bede687SKazu Hirata 6092bede687SKazu Hirata return Error::success(); 6102bede687SKazu Hirata } 6112bede687SKazu Hirata 6122bede687SKazu Hirata std::vector<std::string> 6132bede687SKazu Hirata RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) { 6142bede687SKazu Hirata const char *Next = DataBuffer->getBufferStart(); 61515135afaSKazu Hirata // Use a SetVector since a profile file may contain multiple raw profile 6162bede687SKazu Hirata // dumps, each with segment information. We want them unique and in order they 6172bede687SKazu Hirata // were stored in the profile; the profiled binary should be the first entry. 6182bede687SKazu Hirata // The runtime uses dl_iterate_phdr and the "... first object visited by 6192bede687SKazu Hirata // callback is the main program." 6202bede687SKazu Hirata // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html 62115135afaSKazu Hirata llvm::SetVector<std::string, std::vector<std::string>, 62215135afaSKazu Hirata llvm::SmallSet<std::string, 10>> 62315135afaSKazu Hirata BuildIds; 6242bede687SKazu Hirata while (Next < DataBuffer->getBufferEnd()) { 625bfa937a4SKazu Hirata const auto *Header = reinterpret_cast<const memprof::Header *>(Next); 6262bede687SKazu Hirata 6272bede687SKazu Hirata const llvm::SmallVector<SegmentEntry> Entries = 6282bede687SKazu Hirata readSegmentEntries(Next + Header->SegmentOffset); 6292bede687SKazu Hirata 63015135afaSKazu Hirata for (const auto &Entry : Entries) 63115135afaSKazu Hirata BuildIds.insert(getBuildIdString(Entry)); 6322bede687SKazu Hirata 6332bede687SKazu Hirata Next += Header->TotalSize; 6342bede687SKazu Hirata } 63515135afaSKazu Hirata return BuildIds.takeVector(); 6362bede687SKazu Hirata } 6372bede687SKazu Hirata 63830b93db5SMatthew Weingarten // FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This 63930b93db5SMatthew Weingarten // will help being able to deserialize different versions raw memprof versions 64030b93db5SMatthew Weingarten // more easily. 64130b93db5SMatthew Weingarten llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 64230b93db5SMatthew Weingarten RawMemProfReader::readMemInfoBlocks(const char *Ptr) { 64330b93db5SMatthew Weingarten if (MemprofRawVersion == 3ULL) 64430b93db5SMatthew Weingarten return readMemInfoBlocksV3(Ptr); 64522b36bfaSKazu Hirata if (MemprofRawVersion == 4ULL) 64630b93db5SMatthew Weingarten return readMemInfoBlocksV4(Ptr); 64722b36bfaSKazu Hirata llvm_unreachable( 64830b93db5SMatthew Weingarten "Panic: Unsupported version number when reading MemInfoBlocks"); 64930b93db5SMatthew Weingarten } 65030b93db5SMatthew Weingarten 6512bede687SKazu Hirata Error RawMemProfReader::readRawProfile( 6522bede687SKazu Hirata std::unique_ptr<MemoryBuffer> DataBuffer) { 6532bede687SKazu Hirata const char *Next = DataBuffer->getBufferStart(); 6542bede687SKazu Hirata 6552bede687SKazu Hirata while (Next < DataBuffer->getBufferEnd()) { 656bfa937a4SKazu Hirata const auto *Header = reinterpret_cast<const memprof::Header *>(Next); 6572bede687SKazu Hirata 65830b93db5SMatthew Weingarten // Set Reader version to memprof raw version of profile. Checking if version 65930b93db5SMatthew Weingarten // is supported is checked before creating the reader. 66030b93db5SMatthew Weingarten MemprofRawVersion = Header->Version; 66130b93db5SMatthew Weingarten 6622bede687SKazu Hirata // Read in the segment information, check whether its the same across all 6632bede687SKazu Hirata // profiles in this binary file. 6642bede687SKazu Hirata const llvm::SmallVector<SegmentEntry> Entries = 6652bede687SKazu Hirata readSegmentEntries(Next + Header->SegmentOffset); 6662bede687SKazu Hirata if (!SegmentInfo.empty() && SegmentInfo != Entries) { 6672bede687SKazu Hirata // We do not expect segment information to change when deserializing from 6682bede687SKazu Hirata // the same binary profile file. This can happen if dynamic libraries are 6692bede687SKazu Hirata // loaded/unloaded between profile dumping. 6702bede687SKazu Hirata return make_error<InstrProfError>( 6712bede687SKazu Hirata instrprof_error::malformed, 6722bede687SKazu Hirata "memprof raw profile has different segment information"); 6732bede687SKazu Hirata } 6742bede687SKazu Hirata SegmentInfo.assign(Entries.begin(), Entries.end()); 6752bede687SKazu Hirata 6762bede687SKazu Hirata // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 6772bede687SKazu Hirata // raw profiles in the same binary file are from the same process so the 6782bede687SKazu Hirata // stackdepot ids are the same. 6792bede687SKazu Hirata for (const auto &[Id, MIB] : readMemInfoBlocks(Next + Header->MIBOffset)) { 6802bede687SKazu Hirata if (CallstackProfileData.count(Id)) { 68130b93db5SMatthew Weingarten 68230b93db5SMatthew Weingarten if (MemprofRawVersion >= 4ULL && 68330b93db5SMatthew Weingarten (CallstackProfileData[Id].AccessHistogramSize > 0 || 68430b93db5SMatthew Weingarten MIB.AccessHistogramSize > 0)) { 68530b93db5SMatthew Weingarten uintptr_t ShorterHistogram; 68630b93db5SMatthew Weingarten if (CallstackProfileData[Id].AccessHistogramSize > 68730b93db5SMatthew Weingarten MIB.AccessHistogramSize) 68830b93db5SMatthew Weingarten ShorterHistogram = MIB.AccessHistogram; 68930b93db5SMatthew Weingarten else 69030b93db5SMatthew Weingarten ShorterHistogram = CallstackProfileData[Id].AccessHistogram; 6912bede687SKazu Hirata CallstackProfileData[Id].Merge(MIB); 69230b93db5SMatthew Weingarten free((void *)ShorterHistogram); 69330b93db5SMatthew Weingarten } else { 69430b93db5SMatthew Weingarten CallstackProfileData[Id].Merge(MIB); 69530b93db5SMatthew Weingarten } 6962bede687SKazu Hirata } else { 6972bede687SKazu Hirata CallstackProfileData[Id] = MIB; 6982bede687SKazu Hirata } 6992bede687SKazu Hirata } 7002bede687SKazu Hirata 7012bede687SKazu Hirata // Read in the callstack for each ids. For multiple raw profiles in the same 7022bede687SKazu Hirata // file, we expect that the callstack is the same for a unique id. 7032bede687SKazu Hirata const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 7042bede687SKazu Hirata if (StackMap.empty()) { 7052bede687SKazu Hirata StackMap = CSM; 7062bede687SKazu Hirata } else { 7072bede687SKazu Hirata if (mergeStackMap(CSM, StackMap)) 7082bede687SKazu Hirata return make_error<InstrProfError>( 7092bede687SKazu Hirata instrprof_error::malformed, 7102bede687SKazu Hirata "memprof raw profile got different call stack for same id"); 7112bede687SKazu Hirata } 7122bede687SKazu Hirata 7132bede687SKazu Hirata Next += Header->TotalSize; 7142bede687SKazu Hirata } 7152bede687SKazu Hirata 7162bede687SKazu Hirata return Error::success(); 7172bede687SKazu Hirata } 7182bede687SKazu Hirata 7192bede687SKazu Hirata object::SectionedAddress 7202bede687SKazu Hirata RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 7212bede687SKazu Hirata if (VirtualAddress > ProfiledTextSegmentStart && 7222bede687SKazu Hirata VirtualAddress <= ProfiledTextSegmentEnd) { 7232bede687SKazu Hirata // For PIE binaries, the preferred address is zero and we adjust the virtual 7242bede687SKazu Hirata // address by start of the profiled segment assuming that the offset of the 7252bede687SKazu Hirata // segment in the binary is zero. For non-PIE binaries the preferred and 7262bede687SKazu Hirata // profiled segment addresses should be equal and this is a no-op. 7272bede687SKazu Hirata const uint64_t AdjustedAddress = 7282bede687SKazu Hirata VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart; 7292bede687SKazu Hirata return object::SectionedAddress{AdjustedAddress}; 7302bede687SKazu Hirata } 7312bede687SKazu Hirata // Addresses which do not originate from the profiled text segment in the 7322bede687SKazu Hirata // binary are not adjusted. These will fail symbolization and be filtered out 7332bede687SKazu Hirata // during processing. 7342bede687SKazu Hirata return object::SectionedAddress{VirtualAddress}; 7352bede687SKazu Hirata } 7362bede687SKazu Hirata 7372bede687SKazu Hirata Error RawMemProfReader::readNextRecord( 7382bede687SKazu Hirata GuidMemProfRecordPair &GuidRecord, 7392bede687SKazu Hirata std::function<const Frame(const FrameId)> Callback) { 7402bede687SKazu Hirata // Create a new callback for the RawMemProfRecord iterator so that we can 7412bede687SKazu Hirata // provide the symbol name if the reader was initialized with KeepSymbolName = 7422bede687SKazu Hirata // true. This is useful for debugging and testing. 7432bede687SKazu Hirata auto IdToFrameCallback = [this](const FrameId Id) { 7442bede687SKazu Hirata Frame F = this->idToFrame(Id); 7452bede687SKazu Hirata if (!this->KeepSymbolName) 7462bede687SKazu Hirata return F; 7472bede687SKazu Hirata auto Iter = this->GuidToSymbolName.find(F.Function); 7482bede687SKazu Hirata assert(Iter != this->GuidToSymbolName.end()); 749d55e235bSKazu Hirata F.SymbolName = std::make_unique<std::string>(Iter->getSecond()); 7502bede687SKazu Hirata return F; 7512bede687SKazu Hirata }; 7522bede687SKazu Hirata return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback); 7532bede687SKazu Hirata } 754e98396f4SKazu Hirata 755684e79f2SKazu Hirata Expected<std::unique_ptr<YAMLMemProfReader>> 756684e79f2SKazu Hirata YAMLMemProfReader::create(const Twine &Path) { 757*1d515466SZibi Sarbinowski auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 758684e79f2SKazu Hirata if (std::error_code EC = BufferOr.getError()) 759684e79f2SKazu Hirata return report(errorCodeToError(EC), Path.getSingleStringRef()); 760684e79f2SKazu Hirata 761684e79f2SKazu Hirata std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 762684e79f2SKazu Hirata return create(std::move(Buffer)); 763684e79f2SKazu Hirata } 764684e79f2SKazu Hirata 765684e79f2SKazu Hirata Expected<std::unique_ptr<YAMLMemProfReader>> 766684e79f2SKazu Hirata YAMLMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 767684e79f2SKazu Hirata auto Reader = std::make_unique<YAMLMemProfReader>(); 768684e79f2SKazu Hirata Reader->parse(Buffer->getBuffer()); 769684e79f2SKazu Hirata return std::move(Reader); 770684e79f2SKazu Hirata } 771684e79f2SKazu Hirata 772684e79f2SKazu Hirata bool YAMLMemProfReader::hasFormat(const StringRef Path) { 773*1d515466SZibi Sarbinowski auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 774684e79f2SKazu Hirata if (!BufferOr) 775684e79f2SKazu Hirata return false; 776684e79f2SKazu Hirata 777684e79f2SKazu Hirata std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 778684e79f2SKazu Hirata return hasFormat(*Buffer); 779684e79f2SKazu Hirata } 780684e79f2SKazu Hirata 781684e79f2SKazu Hirata bool YAMLMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 782684e79f2SKazu Hirata return Buffer.getBuffer().starts_with("---"); 783684e79f2SKazu Hirata } 784684e79f2SKazu Hirata 785e98396f4SKazu Hirata void YAMLMemProfReader::parse(StringRef YAMLData) { 786e98396f4SKazu Hirata memprof::AllMemProfData Doc; 787e98396f4SKazu Hirata yaml::Input Yin(YAMLData); 788e98396f4SKazu Hirata 789e98396f4SKazu Hirata Yin >> Doc; 790e98396f4SKazu Hirata if (Yin.error()) 791e98396f4SKazu Hirata return; 792e98396f4SKazu Hirata 793e98396f4SKazu Hirata // Add a call stack to MemProfData.CallStacks and return its CallStackId. 794e98396f4SKazu Hirata auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId { 795e98396f4SKazu Hirata SmallVector<FrameId> IndexedCallStack; 796e98396f4SKazu Hirata IndexedCallStack.reserve(CallStack.size()); 79750f8580eSKazu Hirata for (const Frame &F : CallStack) 79850f8580eSKazu Hirata IndexedCallStack.push_back(MemProfData.addFrame(F)); 799c5e4e8f8SKazu Hirata return MemProfData.addCallStack(std::move(IndexedCallStack)); 800e98396f4SKazu Hirata }; 801e98396f4SKazu Hirata 802e98396f4SKazu Hirata for (const auto &[GUID, Record] : Doc.HeapProfileRecords) { 803e98396f4SKazu Hirata IndexedMemProfRecord IndexedRecord; 804e98396f4SKazu Hirata 805e98396f4SKazu Hirata // Convert AllocationInfo to IndexedAllocationInfo. 806e98396f4SKazu Hirata for (const AllocationInfo &AI : Record.AllocSites) { 807e98396f4SKazu Hirata CallStackId CSId = AddCallStack(AI.CallStack); 808e98396f4SKazu Hirata IndexedRecord.AllocSites.emplace_back(CSId, AI.Info); 809e98396f4SKazu Hirata } 810e98396f4SKazu Hirata 811e98396f4SKazu Hirata // Populate CallSiteIds. 812e98396f4SKazu Hirata for (const auto &CallSite : Record.CallSites) { 813e98396f4SKazu Hirata CallStackId CSId = AddCallStack(CallSite); 814e98396f4SKazu Hirata IndexedRecord.CallSiteIds.push_back(CSId); 815e98396f4SKazu Hirata } 816e98396f4SKazu Hirata 817e98396f4SKazu Hirata MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord)); 818e98396f4SKazu Hirata } 819e98396f4SKazu Hirata } 8202bede687SKazu Hirata } // namespace memprof 8212bede687SKazu Hirata } // namespace llvm 822