xref: /llvm-project/llvm/include/llvm/ProfileData/MemProfReader.h (revision 684e79f25415250afa51bfcd294d793720aa4bae)
1 //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading MemProf profiling data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_
14 #define LLVM_PROFILEDATA_MEMPROFREADER_H_
15 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/Object/Binary.h"
23 #include "llvm/Object/ObjectFile.h"
24 #include "llvm/ProfileData/InstrProfReader.h"
25 #include "llvm/ProfileData/MemProf.h"
26 #include "llvm/ProfileData/MemProfData.inc"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 
30 #include <functional>
31 
32 namespace llvm {
33 namespace memprof {
34 // A class for memprof profile data populated directly from external
35 // sources.
36 class MemProfReader {
37 public:
38   // The MemProfReader only holds memory profile information.
39   InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
40 
41   using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
42   using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>;
43   Iterator end() { return Iterator(); }
44   Iterator begin() {
45     Iter = MemProfData.Records.begin();
46     return Iterator(this);
47   }
48 
49   // Take the complete profile data.  Once this function is invoked,
50   // MemProfReader no longer owns the MemProf profile.
51   IndexedMemProfData takeMemProfData() { return std::move(MemProfData); }
52 
53   virtual Error
54   readNextRecord(GuidMemProfRecordPair &GuidRecord,
55                  std::function<const Frame(const FrameId)> Callback = nullptr) {
56     if (MemProfData.Records.empty())
57       return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
58 
59     if (Iter == MemProfData.Records.end())
60       return make_error<InstrProfError>(instrprof_error::eof);
61 
62     if (Callback == nullptr)
63       Callback =
64           std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
65 
66     CallStackIdConverter<decltype(MemProfData.CallStacks)> CSIdConv(
67         MemProfData.CallStacks, Callback);
68 
69     const IndexedMemProfRecord &IndexedRecord = Iter->second;
70     GuidRecord = {
71         Iter->first,
72         IndexedRecord.toMemProfRecord(CSIdConv),
73     };
74     if (CSIdConv.LastUnmappedId)
75       return make_error<InstrProfError>(instrprof_error::hash_mismatch);
76     Iter++;
77     return Error::success();
78   }
79 
80   // Allow default construction for derived classes which can populate the
81   // contents after construction.
82   MemProfReader() = default;
83   virtual ~MemProfReader() = default;
84 
85   // Initialize the MemProfReader with the given MemProf profile.
86   MemProfReader(IndexedMemProfData &&MemProfData)
87       : MemProfData(std::move(MemProfData)) {}
88 
89 protected:
90   // A helper method to extract the frame from the IdToFrame map.
91   const Frame &idToFrame(const FrameId Id) const {
92     auto It = MemProfData.Frames.find(Id);
93     assert(It != MemProfData.Frames.end() && "Id not found in map.");
94     return It->second;
95   }
96   // A complete pacakge of the MemProf profile.
97   IndexedMemProfData MemProfData;
98   // An iterator to the internal function profile data structure.
99   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
100 };
101 
102 // Map from id (recorded from sanitizer stack depot) to virtual addresses for
103 // each program counter address in the callstack.
104 using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;
105 
106 // Specializes the MemProfReader class to populate the contents from raw binary
107 // memprof profiles from instrumentation based profiling.
108 class RawMemProfReader final : public MemProfReader {
109 public:
110   RawMemProfReader(const RawMemProfReader &) = delete;
111   RawMemProfReader &operator=(const RawMemProfReader &) = delete;
112   virtual ~RawMemProfReader() override;
113 
114   // Prints the contents of the profile in YAML format.
115   void printYAML(raw_ostream &OS);
116 
117   // Return true if the \p DataBuffer starts with magic bytes indicating it is
118   // a raw binary memprof profile.
119   static bool hasFormat(const MemoryBuffer &DataBuffer);
120   // Return true if the file at \p Path starts with magic bytes indicating it is
121   // a raw binary memprof profile.
122   static bool hasFormat(const StringRef Path);
123 
124   // Create a RawMemProfReader after sanity checking the contents of the file at
125   // \p Path or the \p Buffer. The binary from which the profile has been
126   // collected is specified via a path in \p ProfiledBinary.
127   static Expected<std::unique_ptr<RawMemProfReader>>
128   create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false);
129   static Expected<std::unique_ptr<RawMemProfReader>>
130   create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary,
131          bool KeepName = false);
132 
133   // Returns a list of build ids recorded in the segment information.
134   static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer);
135 
136   Error
137   readNextRecord(GuidMemProfRecordPair &GuidRecord,
138                  std::function<const Frame(const FrameId)> Callback) override;
139 
140   // Constructor for unittests only.
141   RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
142                    llvm::SmallVectorImpl<SegmentEntry> &Seg,
143                    llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
144                    CallStackMap &SM, bool KeepName = false)
145       : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof),
146         StackMap(SM), KeepSymbolName(KeepName) {
147     // We don't call initialize here since there is no raw profile to read. The
148     // test should pass in the raw profile as structured data.
149 
150     // If there is an error here then the mock symbolizer has not been
151     // initialized properly.
152     if (Error E = symbolizeAndFilterStackFrames(std::move(Sym)))
153       report_fatal_error(std::move(E));
154     if (Error E = mapRawProfileToRecords())
155       report_fatal_error(std::move(E));
156   }
157 
158 private:
159   RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
160       : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
161   // Initializes the RawMemProfReader with the contents in `DataBuffer`.
162   Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
163   // Read and parse the contents of the `DataBuffer` as a binary format profile.
164   Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
165   // Initialize the segment mapping information for symbolization.
166   Error setupForSymbolization();
167   // Symbolize and cache all the virtual addresses we encounter in the
168   // callstacks from the raw profile. Also prune callstack frames which we can't
169   // symbolize or those that belong to the runtime. For profile entries where
170   // the entire callstack is pruned, we drop the entry from the profile.
171   Error symbolizeAndFilterStackFrames(
172       std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer);
173   // Construct memprof records for each function and store it in the
174   // `FunctionProfileData` map. A function may have allocation profile data or
175   // callsite data or both.
176   Error mapRawProfileToRecords();
177 
178   object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
179 
180   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
181   readMemInfoBlocks(const char *Ptr);
182 
183   // The profiled binary.
184   object::OwningBinary<object::Binary> Binary;
185   // Version of raw memprof binary currently being read. Defaults to most up
186   // to date version.
187   uint64_t MemprofRawVersion = MEMPROF_RAW_VERSION;
188   // The preferred load address of the executable segment.
189   uint64_t PreferredTextSegmentAddress = 0;
190   // The base address of the text segment in the process during profiling.
191   uint64_t ProfiledTextSegmentStart = 0;
192   // The limit address of the text segment in the process during profiling.
193   uint64_t ProfiledTextSegmentEnd = 0;
194 
195   // The memory mapped segment information for all executable segments in the
196   // profiled binary (filtered from the raw profile using the build id).
197   llvm::SmallVector<SegmentEntry, 2> SegmentInfo;
198 
199   // A map from callstack id (same as key in CallStackMap below) to the heap
200   // information recorded for that allocation context.
201   llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
202   CallStackMap StackMap;
203 
204   // Cached symbolization from PC to Frame.
205   llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
206 
207   // Whether to keep the symbol name for each frame after hashing.
208   bool KeepSymbolName = false;
209   // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
210   llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
211 };
212 
213 class YAMLMemProfReader final : public MemProfReader {
214 public:
215   YAMLMemProfReader() = default;
216 
217   // Return true if the \p DataBuffer starts with "---" indicating it is a YAML
218   // file.
219   static bool hasFormat(const MemoryBuffer &DataBuffer);
220   // Wrapper around hasFormat above, reading the file instead of the memory
221   // buffer.
222   static bool hasFormat(const StringRef Path);
223 
224   // Create a YAMLMemProfReader after sanity checking the contents of the file
225   // at \p Path or the \p Buffer.
226   static Expected<std::unique_ptr<YAMLMemProfReader>> create(const Twine &Path);
227   static Expected<std::unique_ptr<YAMLMemProfReader>>
228   create(std::unique_ptr<MemoryBuffer> Buffer);
229 
230   void parse(StringRef YAMLData);
231 };
232 } // namespace memprof
233 } // namespace llvm
234 
235 #endif // LLVM_PROFILEDATA_MEMPROFREADER_H_
236