1 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_ 14 #define LLVM_PROFILEDATA_MEMPROFREADER_H_ 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 20 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 21 #include "llvm/IR/GlobalValue.h" 22 #include "llvm/Object/Binary.h" 23 #include "llvm/Object/ObjectFile.h" 24 #include "llvm/ProfileData/InstrProfReader.h" 25 #include "llvm/ProfileData/MemProf.h" 26 #include "llvm/ProfileData/MemProfData.inc" 27 #include "llvm/Support/Error.h" 28 #include "llvm/Support/MemoryBuffer.h" 29 30 #include <functional> 31 32 namespace llvm { 33 namespace memprof { 34 // A class for memprof profile data populated directly from external 35 // sources. 36 class MemProfReader { 37 public: 38 // The MemProfReader only holds memory profile information. 39 InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } 40 41 using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>; 42 using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>; 43 Iterator end() { return Iterator(); } 44 Iterator begin() { 45 Iter = MemProfData.Records.begin(); 46 return Iterator(this); 47 } 48 49 // Take the complete profile data. Once this function is invoked, 50 // MemProfReader no longer owns the MemProf profile. 51 IndexedMemProfData takeMemProfData() { return std::move(MemProfData); } 52 53 virtual Error 54 readNextRecord(GuidMemProfRecordPair &GuidRecord, 55 std::function<const Frame(const FrameId)> Callback = nullptr) { 56 if (MemProfData.Records.empty()) 57 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 58 59 if (Iter == MemProfData.Records.end()) 60 return make_error<InstrProfError>(instrprof_error::eof); 61 62 if (Callback == nullptr) 63 Callback = 64 std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1); 65 66 CallStackIdConverter<decltype(MemProfData.CallStacks)> CSIdConv( 67 MemProfData.CallStacks, Callback); 68 69 const IndexedMemProfRecord &IndexedRecord = Iter->second; 70 GuidRecord = { 71 Iter->first, 72 IndexedRecord.toMemProfRecord(CSIdConv), 73 }; 74 if (CSIdConv.LastUnmappedId) 75 return make_error<InstrProfError>(instrprof_error::hash_mismatch); 76 Iter++; 77 return Error::success(); 78 } 79 80 // Allow default construction for derived classes which can populate the 81 // contents after construction. 82 MemProfReader() = default; 83 virtual ~MemProfReader() = default; 84 85 // Initialize the MemProfReader with the given MemProf profile. 86 MemProfReader(IndexedMemProfData &&MemProfData) 87 : MemProfData(std::move(MemProfData)) {} 88 89 protected: 90 // A helper method to extract the frame from the IdToFrame map. 91 const Frame &idToFrame(const FrameId Id) const { 92 auto It = MemProfData.Frames.find(Id); 93 assert(It != MemProfData.Frames.end() && "Id not found in map."); 94 return It->second; 95 } 96 // A complete pacakge of the MemProf profile. 97 IndexedMemProfData MemProfData; 98 // An iterator to the internal function profile data structure. 99 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter; 100 }; 101 102 // Map from id (recorded from sanitizer stack depot) to virtual addresses for 103 // each program counter address in the callstack. 104 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>; 105 106 // Specializes the MemProfReader class to populate the contents from raw binary 107 // memprof profiles from instrumentation based profiling. 108 class RawMemProfReader final : public MemProfReader { 109 public: 110 RawMemProfReader(const RawMemProfReader &) = delete; 111 RawMemProfReader &operator=(const RawMemProfReader &) = delete; 112 virtual ~RawMemProfReader() override; 113 114 // Prints the contents of the profile in YAML format. 115 void printYAML(raw_ostream &OS); 116 117 // Return true if the \p DataBuffer starts with magic bytes indicating it is 118 // a raw binary memprof profile. 119 static bool hasFormat(const MemoryBuffer &DataBuffer); 120 // Return true if the file at \p Path starts with magic bytes indicating it is 121 // a raw binary memprof profile. 122 static bool hasFormat(const StringRef Path); 123 124 // Create a RawMemProfReader after sanity checking the contents of the file at 125 // \p Path or the \p Buffer. The binary from which the profile has been 126 // collected is specified via a path in \p ProfiledBinary. 127 static Expected<std::unique_ptr<RawMemProfReader>> 128 create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false); 129 static Expected<std::unique_ptr<RawMemProfReader>> 130 create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary, 131 bool KeepName = false); 132 133 // Returns a list of build ids recorded in the segment information. 134 static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer); 135 136 Error 137 readNextRecord(GuidMemProfRecordPair &GuidRecord, 138 std::function<const Frame(const FrameId)> Callback) override; 139 140 // Constructor for unittests only. 141 RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym, 142 llvm::SmallVectorImpl<SegmentEntry> &Seg, 143 llvm::MapVector<uint64_t, MemInfoBlock> &Prof, 144 CallStackMap &SM, bool KeepName = false) 145 : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof), 146 StackMap(SM), KeepSymbolName(KeepName) { 147 // We don't call initialize here since there is no raw profile to read. The 148 // test should pass in the raw profile as structured data. 149 150 // If there is an error here then the mock symbolizer has not been 151 // initialized properly. 152 if (Error E = symbolizeAndFilterStackFrames(std::move(Sym))) 153 report_fatal_error(std::move(E)); 154 if (Error E = mapRawProfileToRecords()) 155 report_fatal_error(std::move(E)); 156 } 157 158 private: 159 RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName) 160 : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} 161 // Initializes the RawMemProfReader with the contents in `DataBuffer`. 162 Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer); 163 // Read and parse the contents of the `DataBuffer` as a binary format profile. 164 Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer); 165 // Initialize the segment mapping information for symbolization. 166 Error setupForSymbolization(); 167 // Symbolize and cache all the virtual addresses we encounter in the 168 // callstacks from the raw profile. Also prune callstack frames which we can't 169 // symbolize or those that belong to the runtime. For profile entries where 170 // the entire callstack is pruned, we drop the entry from the profile. 171 Error symbolizeAndFilterStackFrames( 172 std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer); 173 // Construct memprof records for each function and store it in the 174 // `FunctionProfileData` map. A function may have allocation profile data or 175 // callsite data or both. 176 Error mapRawProfileToRecords(); 177 178 object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); 179 180 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 181 readMemInfoBlocks(const char *Ptr); 182 183 // The profiled binary. 184 object::OwningBinary<object::Binary> Binary; 185 // Version of raw memprof binary currently being read. Defaults to most up 186 // to date version. 187 uint64_t MemprofRawVersion = MEMPROF_RAW_VERSION; 188 // The preferred load address of the executable segment. 189 uint64_t PreferredTextSegmentAddress = 0; 190 // The base address of the text segment in the process during profiling. 191 uint64_t ProfiledTextSegmentStart = 0; 192 // The limit address of the text segment in the process during profiling. 193 uint64_t ProfiledTextSegmentEnd = 0; 194 195 // The memory mapped segment information for all executable segments in the 196 // profiled binary (filtered from the raw profile using the build id). 197 llvm::SmallVector<SegmentEntry, 2> SegmentInfo; 198 199 // A map from callstack id (same as key in CallStackMap below) to the heap 200 // information recorded for that allocation context. 201 llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData; 202 CallStackMap StackMap; 203 204 // Cached symbolization from PC to Frame. 205 llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame; 206 207 // Whether to keep the symbol name for each frame after hashing. 208 bool KeepSymbolName = false; 209 // A mapping of the hash to symbol name, only used if KeepSymbolName is true. 210 llvm::DenseMap<uint64_t, std::string> GuidToSymbolName; 211 }; 212 213 class YAMLMemProfReader final : public MemProfReader { 214 public: 215 YAMLMemProfReader() = default; 216 217 // Return true if the \p DataBuffer starts with "---" indicating it is a YAML 218 // file. 219 static bool hasFormat(const MemoryBuffer &DataBuffer); 220 // Wrapper around hasFormat above, reading the file instead of the memory 221 // buffer. 222 static bool hasFormat(const StringRef Path); 223 224 // Create a YAMLMemProfReader after sanity checking the contents of the file 225 // at \p Path or the \p Buffer. 226 static Expected<std::unique_ptr<YAMLMemProfReader>> create(const Twine &Path); 227 static Expected<std::unique_ptr<YAMLMemProfReader>> 228 create(std::unique_ptr<MemoryBuffer> Buffer); 229 230 void parse(StringRef YAMLData); 231 }; 232 } // namespace memprof 233 } // namespace llvm 234 235 #endif // LLVM_PROFILEDATA_MEMPROFREADER_H_ 236