xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/ProfileData/MemProf.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
181ad6265SDimitry Andric #ifndef LLVM_PROFILEDATA_MEMPROF_H_
281ad6265SDimitry Andric #define LLVM_PROFILEDATA_MEMPROF_H_
381ad6265SDimitry Andric 
4*0fca6ea1SDimitry Andric #include "llvm/ADT/MapVector.h"
5*0fca6ea1SDimitry Andric #include "llvm/ADT/STLForwardCompat.h"
681ad6265SDimitry Andric #include "llvm/ADT/STLFunctionalExtras.h"
781ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h"
881ad6265SDimitry Andric #include "llvm/IR/GlobalValue.h"
981ad6265SDimitry Andric #include "llvm/ProfileData/MemProfData.inc"
1081ad6265SDimitry Andric #include "llvm/Support/Endian.h"
1181ad6265SDimitry Andric #include "llvm/Support/EndianStream.h"
1281ad6265SDimitry Andric #include "llvm/Support/raw_ostream.h"
1381ad6265SDimitry Andric 
14*0fca6ea1SDimitry Andric #include <bitset>
1581ad6265SDimitry Andric #include <cstdint>
16bdd1243dSDimitry Andric #include <optional>
1781ad6265SDimitry Andric 
1881ad6265SDimitry Andric namespace llvm {
1981ad6265SDimitry Andric namespace memprof {
2081ad6265SDimitry Andric 
21*0fca6ea1SDimitry Andric struct MemProfRecord;
22*0fca6ea1SDimitry Andric 
23*0fca6ea1SDimitry Andric // The versions of the indexed MemProf format
24*0fca6ea1SDimitry Andric enum IndexedVersion : uint64_t {
25*0fca6ea1SDimitry Andric   // Version 0: This version didn't have a version field.
26*0fca6ea1SDimitry Andric   Version0 = 0,
27*0fca6ea1SDimitry Andric   // Version 1: Added a version field to the header.
28*0fca6ea1SDimitry Andric   Version1 = 1,
29*0fca6ea1SDimitry Andric   // Version 2: Added a call stack table.
30*0fca6ea1SDimitry Andric   Version2 = 2,
31*0fca6ea1SDimitry Andric   // Version 3: Added a radix tree for call stacks.  Switched to linear IDs for
32*0fca6ea1SDimitry Andric   // frames and call stacks.
33*0fca6ea1SDimitry Andric   Version3 = 3,
34*0fca6ea1SDimitry Andric };
35*0fca6ea1SDimitry Andric 
36*0fca6ea1SDimitry Andric constexpr uint64_t MinimumSupportedVersion = Version0;
37*0fca6ea1SDimitry Andric constexpr uint64_t MaximumSupportedVersion = Version3;
38*0fca6ea1SDimitry Andric 
39*0fca6ea1SDimitry Andric // Verify that the minimum and maximum satisfy the obvious constraint.
40*0fca6ea1SDimitry Andric static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
41*0fca6ea1SDimitry Andric 
4281ad6265SDimitry Andric enum class Meta : uint64_t {
4381ad6265SDimitry Andric   Start = 0,
4481ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) NameTag,
4581ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
4681ad6265SDimitry Andric #undef MIBEntryDef
4781ad6265SDimitry Andric   Size
4881ad6265SDimitry Andric };
4981ad6265SDimitry Andric 
5081ad6265SDimitry Andric using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>;
5181ad6265SDimitry Andric 
52*0fca6ea1SDimitry Andric // Returns the full schema currently in use.
53*0fca6ea1SDimitry Andric MemProfSchema getFullSchema();
54*0fca6ea1SDimitry Andric 
55*0fca6ea1SDimitry Andric // Returns the schema consisting of the fields used for hot cold memory hinting.
56*0fca6ea1SDimitry Andric MemProfSchema getHotColdSchema();
57*0fca6ea1SDimitry Andric 
5881ad6265SDimitry Andric // Holds the actual MemInfoBlock data with all fields. Contents may be read or
5981ad6265SDimitry Andric // written partially by providing an appropriate schema to the serialize and
6081ad6265SDimitry Andric // deserialize methods.
6181ad6265SDimitry Andric struct PortableMemInfoBlock {
6281ad6265SDimitry Andric   PortableMemInfoBlock() = default;
63*0fca6ea1SDimitry Andric   explicit PortableMemInfoBlock(const MemInfoBlock &Block,
64*0fca6ea1SDimitry Andric                                 const MemProfSchema &IncomingSchema) {
65*0fca6ea1SDimitry Andric     for (const Meta Id : IncomingSchema)
66*0fca6ea1SDimitry Andric       Schema.set(llvm::to_underlying(Id));
6781ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) Name = Block.Name;
6881ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
6981ad6265SDimitry Andric #undef MIBEntryDef
7081ad6265SDimitry Andric   }
7181ad6265SDimitry Andric 
7281ad6265SDimitry Andric   PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) {
7381ad6265SDimitry Andric     deserialize(Schema, Ptr);
7481ad6265SDimitry Andric   }
7581ad6265SDimitry Andric 
7681ad6265SDimitry Andric   // Read the contents of \p Ptr based on the \p Schema to populate the
7781ad6265SDimitry Andric   // MemInfoBlock member.
78*0fca6ea1SDimitry Andric   void deserialize(const MemProfSchema &IncomingSchema,
79*0fca6ea1SDimitry Andric                    const unsigned char *Ptr) {
8081ad6265SDimitry Andric     using namespace support;
8181ad6265SDimitry Andric 
82*0fca6ea1SDimitry Andric     Schema.reset();
83*0fca6ea1SDimitry Andric     for (const Meta Id : IncomingSchema) {
8481ad6265SDimitry Andric       switch (Id) {
8581ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type)                                       \
8681ad6265SDimitry Andric   case Meta::Name: {                                                           \
87*0fca6ea1SDimitry Andric     Name = endian::readNext<Type, llvm::endianness::little>(Ptr);              \
8881ad6265SDimitry Andric   } break;
8981ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
9081ad6265SDimitry Andric #undef MIBEntryDef
9181ad6265SDimitry Andric       default:
9281ad6265SDimitry Andric         llvm_unreachable("Unknown meta type id, is the profile collected from "
9381ad6265SDimitry Andric                          "a newer version of the runtime?");
9481ad6265SDimitry Andric       }
95*0fca6ea1SDimitry Andric 
96*0fca6ea1SDimitry Andric       Schema.set(llvm::to_underlying(Id));
9781ad6265SDimitry Andric     }
9881ad6265SDimitry Andric   }
9981ad6265SDimitry Andric 
10081ad6265SDimitry Andric   // Write the contents of the MemInfoBlock based on the \p Schema provided to
10181ad6265SDimitry Andric   // the raw_ostream \p OS.
10281ad6265SDimitry Andric   void serialize(const MemProfSchema &Schema, raw_ostream &OS) const {
10381ad6265SDimitry Andric     using namespace support;
10481ad6265SDimitry Andric 
1055f757f3fSDimitry Andric     endian::Writer LE(OS, llvm::endianness::little);
10681ad6265SDimitry Andric     for (const Meta Id : Schema) {
10781ad6265SDimitry Andric       switch (Id) {
10881ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type)                                       \
10981ad6265SDimitry Andric   case Meta::Name: {                                                           \
11081ad6265SDimitry Andric     LE.write<Type>(Name);                                                      \
11181ad6265SDimitry Andric   } break;
11281ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
11381ad6265SDimitry Andric #undef MIBEntryDef
11481ad6265SDimitry Andric       default:
11581ad6265SDimitry Andric         llvm_unreachable("Unknown meta type id, invalid input?");
11681ad6265SDimitry Andric       }
11781ad6265SDimitry Andric     }
11881ad6265SDimitry Andric   }
11981ad6265SDimitry Andric 
12081ad6265SDimitry Andric   // Print out the contents of the MemInfoBlock in YAML format.
12181ad6265SDimitry Andric   void printYAML(raw_ostream &OS) const {
12281ad6265SDimitry Andric     OS << "      MemInfoBlock:\n";
12381ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type)                                       \
12481ad6265SDimitry Andric   OS << "        " << #Name << ": " << Name << "\n";
12581ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
12681ad6265SDimitry Andric #undef MIBEntryDef
127*0fca6ea1SDimitry Andric     if (AccessHistogramSize > 0) {
128*0fca6ea1SDimitry Andric       OS << "        " << "AccessHistogramValues" << ":";
129*0fca6ea1SDimitry Andric       for (uint32_t I = 0; I < AccessHistogramSize; ++I) {
130*0fca6ea1SDimitry Andric         OS << " " << ((uint64_t *)AccessHistogram)[I];
131*0fca6ea1SDimitry Andric       }
132*0fca6ea1SDimitry Andric       OS << "\n";
133*0fca6ea1SDimitry Andric     }
134*0fca6ea1SDimitry Andric   }
135*0fca6ea1SDimitry Andric 
136*0fca6ea1SDimitry Andric   // Return the schema, only for unit tests.
137*0fca6ea1SDimitry Andric   std::bitset<llvm::to_underlying(Meta::Size)> getSchema() const {
138*0fca6ea1SDimitry Andric     return Schema;
13981ad6265SDimitry Andric   }
14081ad6265SDimitry Andric 
14181ad6265SDimitry Andric   // Define getters for each type which can be called by analyses.
14281ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type)                                       \
143*0fca6ea1SDimitry Andric   Type get##Name() const {                                                     \
144*0fca6ea1SDimitry Andric     assert(Schema[llvm::to_underlying(Meta::Name)]);                           \
145*0fca6ea1SDimitry Andric     return Name;                                                               \
146*0fca6ea1SDimitry Andric   }
14781ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
14881ad6265SDimitry Andric #undef MIBEntryDef
14981ad6265SDimitry Andric 
15081ad6265SDimitry Andric   void clear() { *this = PortableMemInfoBlock(); }
15181ad6265SDimitry Andric 
15281ad6265SDimitry Andric   bool operator==(const PortableMemInfoBlock &Other) const {
153*0fca6ea1SDimitry Andric     if (Other.Schema != Schema)
154*0fca6ea1SDimitry Andric       return false;
155*0fca6ea1SDimitry Andric 
15681ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type)                                       \
157*0fca6ea1SDimitry Andric   if (Schema[llvm::to_underlying(Meta::Name)] &&                               \
158*0fca6ea1SDimitry Andric       Other.get##Name() != get##Name())                                        \
15981ad6265SDimitry Andric     return false;
16081ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
16181ad6265SDimitry Andric #undef MIBEntryDef
16281ad6265SDimitry Andric     return true;
16381ad6265SDimitry Andric   }
16481ad6265SDimitry Andric 
16581ad6265SDimitry Andric   bool operator!=(const PortableMemInfoBlock &Other) const {
16681ad6265SDimitry Andric     return !operator==(Other);
16781ad6265SDimitry Andric   }
16881ad6265SDimitry Andric 
169*0fca6ea1SDimitry Andric   static size_t serializedSize(const MemProfSchema &Schema) {
17081ad6265SDimitry Andric     size_t Result = 0;
171*0fca6ea1SDimitry Andric 
172*0fca6ea1SDimitry Andric     for (const Meta Id : Schema) {
173*0fca6ea1SDimitry Andric       switch (Id) {
174*0fca6ea1SDimitry Andric #define MIBEntryDef(NameTag, Name, Type)                                       \
175*0fca6ea1SDimitry Andric   case Meta::Name: {                                                           \
176*0fca6ea1SDimitry Andric     Result += sizeof(Type);                                                    \
177*0fca6ea1SDimitry Andric   } break;
17881ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
17981ad6265SDimitry Andric #undef MIBEntryDef
180*0fca6ea1SDimitry Andric       default:
181*0fca6ea1SDimitry Andric         llvm_unreachable("Unknown meta type id, invalid input?");
182*0fca6ea1SDimitry Andric       }
183*0fca6ea1SDimitry Andric     }
184*0fca6ea1SDimitry Andric 
18581ad6265SDimitry Andric     return Result;
18681ad6265SDimitry Andric   }
18781ad6265SDimitry Andric 
18881ad6265SDimitry Andric private:
189*0fca6ea1SDimitry Andric   // The set of available fields, indexed by Meta::Name.
190*0fca6ea1SDimitry Andric   std::bitset<llvm::to_underlying(Meta::Size)> Schema;
191*0fca6ea1SDimitry Andric 
19281ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) Type Name = Type();
19381ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc"
19481ad6265SDimitry Andric #undef MIBEntryDef
19581ad6265SDimitry Andric };
19681ad6265SDimitry Andric 
19781ad6265SDimitry Andric // A type representing the id generated by hashing the contents of the Frame.
19881ad6265SDimitry Andric using FrameId = uint64_t;
199*0fca6ea1SDimitry Andric // A type representing the id to index into the frame array.
200*0fca6ea1SDimitry Andric using LinearFrameId = uint32_t;
20181ad6265SDimitry Andric // Describes a call frame for a dynamic allocation context. The contents of
20281ad6265SDimitry Andric // the frame are populated by symbolizing the stack depot call frame from the
20381ad6265SDimitry Andric // compiler runtime.
20481ad6265SDimitry Andric struct Frame {
20581ad6265SDimitry Andric   // A uuid (uint64_t) identifying the function. It is obtained by
20681ad6265SDimitry Andric   // llvm::md5(FunctionName) which returns the lower 64 bits.
20781ad6265SDimitry Andric   GlobalValue::GUID Function;
20881ad6265SDimitry Andric   // The symbol name for the function. Only populated in the Frame by the reader
20981ad6265SDimitry Andric   // if requested during initialization. This field should not be serialized.
210*0fca6ea1SDimitry Andric   std::unique_ptr<std::string> SymbolName;
21181ad6265SDimitry Andric   // The source line offset of the call from the beginning of parent function.
21281ad6265SDimitry Andric   uint32_t LineOffset;
21381ad6265SDimitry Andric   // The source column number of the call to help distinguish multiple calls
21481ad6265SDimitry Andric   // on the same line.
21581ad6265SDimitry Andric   uint32_t Column;
21681ad6265SDimitry Andric   // Whether the current frame is inlined.
21781ad6265SDimitry Andric   bool IsInlineFrame;
21881ad6265SDimitry Andric 
21981ad6265SDimitry Andric   Frame(const Frame &Other) {
22081ad6265SDimitry Andric     Function = Other.Function;
221*0fca6ea1SDimitry Andric     SymbolName = Other.SymbolName
222*0fca6ea1SDimitry Andric                      ? std::make_unique<std::string>(*Other.SymbolName)
223*0fca6ea1SDimitry Andric                      : nullptr;
22481ad6265SDimitry Andric     LineOffset = Other.LineOffset;
22581ad6265SDimitry Andric     Column = Other.Column;
22681ad6265SDimitry Andric     IsInlineFrame = Other.IsInlineFrame;
22781ad6265SDimitry Andric   }
22881ad6265SDimitry Andric 
229*0fca6ea1SDimitry Andric   Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline)
23081ad6265SDimitry Andric       : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
23181ad6265SDimitry Andric 
23281ad6265SDimitry Andric   bool operator==(const Frame &Other) const {
23381ad6265SDimitry Andric     // Ignore the SymbolName field to avoid a string compare. Comparing the
23481ad6265SDimitry Andric     // function hash serves the same purpose.
23581ad6265SDimitry Andric     return Other.Function == Function && Other.LineOffset == LineOffset &&
23681ad6265SDimitry Andric            Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
23781ad6265SDimitry Andric   }
23881ad6265SDimitry Andric 
23981ad6265SDimitry Andric   Frame &operator=(const Frame &Other) {
24081ad6265SDimitry Andric     Function = Other.Function;
241*0fca6ea1SDimitry Andric     SymbolName = Other.SymbolName
242*0fca6ea1SDimitry Andric                      ? std::make_unique<std::string>(*Other.SymbolName)
243*0fca6ea1SDimitry Andric                      : nullptr;
24481ad6265SDimitry Andric     LineOffset = Other.LineOffset;
24581ad6265SDimitry Andric     Column = Other.Column;
24681ad6265SDimitry Andric     IsInlineFrame = Other.IsInlineFrame;
24781ad6265SDimitry Andric     return *this;
24881ad6265SDimitry Andric   }
24981ad6265SDimitry Andric 
25081ad6265SDimitry Andric   bool operator!=(const Frame &Other) const { return !operator==(Other); }
25181ad6265SDimitry Andric 
252*0fca6ea1SDimitry Andric   bool hasSymbolName() const { return !!SymbolName; }
253*0fca6ea1SDimitry Andric 
254*0fca6ea1SDimitry Andric   StringRef getSymbolName() const {
255*0fca6ea1SDimitry Andric     assert(hasSymbolName());
256*0fca6ea1SDimitry Andric     return *SymbolName;
257*0fca6ea1SDimitry Andric   }
258*0fca6ea1SDimitry Andric 
259*0fca6ea1SDimitry Andric   std::string getSymbolNameOr(StringRef Alt) const {
260*0fca6ea1SDimitry Andric     return std::string(hasSymbolName() ? getSymbolName() : Alt);
261*0fca6ea1SDimitry Andric   }
262*0fca6ea1SDimitry Andric 
26381ad6265SDimitry Andric   // Write the contents of the frame to the ostream \p OS.
26481ad6265SDimitry Andric   void serialize(raw_ostream &OS) const {
26581ad6265SDimitry Andric     using namespace support;
26681ad6265SDimitry Andric 
2675f757f3fSDimitry Andric     endian::Writer LE(OS, llvm::endianness::little);
26881ad6265SDimitry Andric 
26981ad6265SDimitry Andric     // If the type of the GlobalValue::GUID changes, then we need to update
27081ad6265SDimitry Andric     // the reader and the writer.
27181ad6265SDimitry Andric     static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
27281ad6265SDimitry Andric                   "Expect GUID to be uint64_t.");
27381ad6265SDimitry Andric     LE.write<uint64_t>(Function);
27481ad6265SDimitry Andric 
27581ad6265SDimitry Andric     LE.write<uint32_t>(LineOffset);
27681ad6265SDimitry Andric     LE.write<uint32_t>(Column);
27781ad6265SDimitry Andric     LE.write<bool>(IsInlineFrame);
27881ad6265SDimitry Andric   }
27981ad6265SDimitry Andric 
28081ad6265SDimitry Andric   // Read a frame from char data which has been serialized as little endian.
28181ad6265SDimitry Andric   static Frame deserialize(const unsigned char *Ptr) {
28281ad6265SDimitry Andric     using namespace support;
28381ad6265SDimitry Andric 
2845f757f3fSDimitry Andric     const uint64_t F =
285*0fca6ea1SDimitry Andric         endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
2865f757f3fSDimitry Andric     const uint32_t L =
287*0fca6ea1SDimitry Andric         endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
2885f757f3fSDimitry Andric     const uint32_t C =
289*0fca6ea1SDimitry Andric         endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
290*0fca6ea1SDimitry Andric     const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr);
29181ad6265SDimitry Andric     return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C,
29281ad6265SDimitry Andric                  /*IsInlineFrame=*/I);
29381ad6265SDimitry Andric   }
29481ad6265SDimitry Andric 
29581ad6265SDimitry Andric   // Returns the size of the frame information.
29681ad6265SDimitry Andric   static constexpr size_t serializedSize() {
29781ad6265SDimitry Andric     return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
29881ad6265SDimitry Andric            sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
29981ad6265SDimitry Andric   }
30081ad6265SDimitry Andric 
30181ad6265SDimitry Andric   // Print the frame information in YAML format.
30281ad6265SDimitry Andric   void printYAML(raw_ostream &OS) const {
30381ad6265SDimitry Andric     OS << "      -\n"
30481ad6265SDimitry Andric        << "        Function: " << Function << "\n"
305*0fca6ea1SDimitry Andric        << "        SymbolName: " << getSymbolNameOr("<None>") << "\n"
30681ad6265SDimitry Andric        << "        LineOffset: " << LineOffset << "\n"
30781ad6265SDimitry Andric        << "        Column: " << Column << "\n"
30881ad6265SDimitry Andric        << "        Inline: " << IsInlineFrame << "\n";
30981ad6265SDimitry Andric   }
31081ad6265SDimitry Andric 
31181ad6265SDimitry Andric   // Return a hash value based on the contents of the frame. Here we don't use
31281ad6265SDimitry Andric   // hashing from llvm ADT since we are going to persist the hash id, the hash
31381ad6265SDimitry Andric   // combine algorithm in ADT uses a new randomized seed each time.
31481ad6265SDimitry Andric   inline FrameId hash() const {
31581ad6265SDimitry Andric     auto HashCombine = [](auto Value, size_t Seed) {
31681ad6265SDimitry Andric       std::hash<decltype(Value)> Hasher;
31781ad6265SDimitry Andric       // The constant used below is the 64 bit representation of the fractional
31881ad6265SDimitry Andric       // part of the golden ratio. Used here for the randomness in their bit
31981ad6265SDimitry Andric       // pattern.
32081ad6265SDimitry Andric       return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2);
32181ad6265SDimitry Andric     };
32281ad6265SDimitry Andric 
32381ad6265SDimitry Andric     size_t Result = 0;
32481ad6265SDimitry Andric     Result ^= HashCombine(Function, Result);
32581ad6265SDimitry Andric     Result ^= HashCombine(LineOffset, Result);
32681ad6265SDimitry Andric     Result ^= HashCombine(Column, Result);
32781ad6265SDimitry Andric     Result ^= HashCombine(IsInlineFrame, Result);
32881ad6265SDimitry Andric     return static_cast<FrameId>(Result);
32981ad6265SDimitry Andric   }
33081ad6265SDimitry Andric };
33181ad6265SDimitry Andric 
332*0fca6ea1SDimitry Andric // A type representing the index into the table of call stacks.
333*0fca6ea1SDimitry Andric using CallStackId = uint64_t;
334*0fca6ea1SDimitry Andric 
335*0fca6ea1SDimitry Andric // A type representing the index into the call stack array.
336*0fca6ea1SDimitry Andric using LinearCallStackId = uint32_t;
337*0fca6ea1SDimitry Andric 
33881ad6265SDimitry Andric // Holds allocation information in a space efficient format where frames are
33981ad6265SDimitry Andric // represented using unique identifiers.
34081ad6265SDimitry Andric struct IndexedAllocationInfo {
34181ad6265SDimitry Andric   // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
34281ad6265SDimitry Andric   // order. Frame contents are stored out-of-line.
343*0fca6ea1SDimitry Andric   // TODO: Remove once we fully transition to CSId.
34481ad6265SDimitry Andric   llvm::SmallVector<FrameId> CallStack;
345*0fca6ea1SDimitry Andric   // Conceptually the same as above.  We are going to keep both CallStack and
346*0fca6ea1SDimitry Andric   // CallStackId while we are transitioning from CallStack to CallStackId.
347*0fca6ea1SDimitry Andric   CallStackId CSId = 0;
34881ad6265SDimitry Andric   // The statistics obtained from the runtime for the allocation.
34981ad6265SDimitry Andric   PortableMemInfoBlock Info;
35081ad6265SDimitry Andric 
35181ad6265SDimitry Andric   IndexedAllocationInfo() = default;
352*0fca6ea1SDimitry Andric   IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId,
353*0fca6ea1SDimitry Andric                         const MemInfoBlock &MB,
354*0fca6ea1SDimitry Andric                         const MemProfSchema &Schema = getFullSchema())
355*0fca6ea1SDimitry Andric       : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB, Schema) {}
35681ad6265SDimitry Andric 
35781ad6265SDimitry Andric   // Returns the size in bytes when this allocation info struct is serialized.
358*0fca6ea1SDimitry Andric   size_t serializedSize(const MemProfSchema &Schema,
359*0fca6ea1SDimitry Andric                         IndexedVersion Version) const;
36081ad6265SDimitry Andric 
36181ad6265SDimitry Andric   bool operator==(const IndexedAllocationInfo &Other) const {
36281ad6265SDimitry Andric     if (Other.Info != Info)
36381ad6265SDimitry Andric       return false;
36481ad6265SDimitry Andric 
365*0fca6ea1SDimitry Andric     if (Other.CSId != CSId)
36681ad6265SDimitry Andric       return false;
36781ad6265SDimitry Andric     return true;
36881ad6265SDimitry Andric   }
36981ad6265SDimitry Andric 
37081ad6265SDimitry Andric   bool operator!=(const IndexedAllocationInfo &Other) const {
37181ad6265SDimitry Andric     return !operator==(Other);
37281ad6265SDimitry Andric   }
37381ad6265SDimitry Andric };
37481ad6265SDimitry Andric 
37581ad6265SDimitry Andric // Holds allocation information with frame contents inline. The type should
37681ad6265SDimitry Andric // be used for temporary in-memory instances.
37781ad6265SDimitry Andric struct AllocationInfo {
37881ad6265SDimitry Andric   // Same as IndexedAllocationInfo::CallStack with the frame contents inline.
379*0fca6ea1SDimitry Andric   std::vector<Frame> CallStack;
38081ad6265SDimitry Andric   // Same as IndexedAllocationInfo::Info;
38181ad6265SDimitry Andric   PortableMemInfoBlock Info;
38281ad6265SDimitry Andric 
38381ad6265SDimitry Andric   AllocationInfo() = default;
38481ad6265SDimitry Andric   AllocationInfo(
38581ad6265SDimitry Andric       const IndexedAllocationInfo &IndexedAI,
38681ad6265SDimitry Andric       llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) {
38781ad6265SDimitry Andric     for (const FrameId &Id : IndexedAI.CallStack) {
38881ad6265SDimitry Andric       CallStack.push_back(IdToFrameCallback(Id));
38981ad6265SDimitry Andric     }
39081ad6265SDimitry Andric     Info = IndexedAI.Info;
39181ad6265SDimitry Andric   }
39281ad6265SDimitry Andric 
39381ad6265SDimitry Andric   void printYAML(raw_ostream &OS) const {
39481ad6265SDimitry Andric     OS << "    -\n";
39581ad6265SDimitry Andric     OS << "      Callstack:\n";
39681ad6265SDimitry Andric     // TODO: Print out the frame on one line with to make it easier for deep
39781ad6265SDimitry Andric     // callstacks once we have a test to check valid YAML is generated.
39881ad6265SDimitry Andric     for (const Frame &F : CallStack) {
39981ad6265SDimitry Andric       F.printYAML(OS);
40081ad6265SDimitry Andric     }
40181ad6265SDimitry Andric     Info.printYAML(OS);
40281ad6265SDimitry Andric   }
40381ad6265SDimitry Andric };
40481ad6265SDimitry Andric 
40581ad6265SDimitry Andric // Holds the memprof profile information for a function. The internal
40681ad6265SDimitry Andric // representation stores frame ids for efficiency. This representation should
40781ad6265SDimitry Andric // be used in the profile conversion and manipulation tools.
40881ad6265SDimitry Andric struct IndexedMemProfRecord {
40981ad6265SDimitry Andric   // Memory allocation sites in this function for which we have memory
41081ad6265SDimitry Andric   // profiling data.
41181ad6265SDimitry Andric   llvm::SmallVector<IndexedAllocationInfo> AllocSites;
41281ad6265SDimitry Andric   // Holds call sites in this function which are part of some memory
41381ad6265SDimitry Andric   // allocation context. We store this as a list of locations, each with its
41481ad6265SDimitry Andric   // list of inline locations in bottom-up order i.e. from leaf to root. The
41581ad6265SDimitry Andric   // inline location list may include additional entries, users should pick
41681ad6265SDimitry Andric   // the last entry in the list with the same function GUID.
41781ad6265SDimitry Andric   llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
418*0fca6ea1SDimitry Andric   // Conceptually the same as above.  We are going to keep both CallSites and
419*0fca6ea1SDimitry Andric   // CallSiteIds while we are transitioning from CallSites to CallSiteIds.
420*0fca6ea1SDimitry Andric   llvm::SmallVector<CallStackId> CallSiteIds;
42181ad6265SDimitry Andric 
42281ad6265SDimitry Andric   void clear() {
42381ad6265SDimitry Andric     AllocSites.clear();
42481ad6265SDimitry Andric     CallSites.clear();
42581ad6265SDimitry Andric   }
42681ad6265SDimitry Andric 
42781ad6265SDimitry Andric   void merge(const IndexedMemProfRecord &Other) {
42881ad6265SDimitry Andric     // TODO: Filter out duplicates which may occur if multiple memprof
42981ad6265SDimitry Andric     // profiles are merged together using llvm-profdata.
43081ad6265SDimitry Andric     AllocSites.append(Other.AllocSites);
43181ad6265SDimitry Andric     CallSites.append(Other.CallSites);
43281ad6265SDimitry Andric   }
43381ad6265SDimitry Andric 
434*0fca6ea1SDimitry Andric   size_t serializedSize(const MemProfSchema &Schema,
435*0fca6ea1SDimitry Andric                         IndexedVersion Version) const;
43681ad6265SDimitry Andric 
43781ad6265SDimitry Andric   bool operator==(const IndexedMemProfRecord &Other) const {
438*0fca6ea1SDimitry Andric     if (Other.AllocSites != AllocSites)
43981ad6265SDimitry Andric       return false;
44081ad6265SDimitry Andric 
441*0fca6ea1SDimitry Andric     if (Other.CallSiteIds != CallSiteIds)
44281ad6265SDimitry Andric       return false;
44381ad6265SDimitry Andric     return true;
44481ad6265SDimitry Andric   }
44581ad6265SDimitry Andric 
44681ad6265SDimitry Andric   // Serializes the memprof records in \p Records to the ostream \p OS based
44781ad6265SDimitry Andric   // on the schema provided in \p Schema.
448*0fca6ea1SDimitry Andric   void serialize(const MemProfSchema &Schema, raw_ostream &OS,
449*0fca6ea1SDimitry Andric                  IndexedVersion Version,
450*0fca6ea1SDimitry Andric                  llvm::DenseMap<CallStackId, LinearCallStackId>
451*0fca6ea1SDimitry Andric                      *MemProfCallStackIndexes = nullptr) const;
45281ad6265SDimitry Andric 
45381ad6265SDimitry Andric   // Deserializes memprof records from the Buffer.
45481ad6265SDimitry Andric   static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
455*0fca6ea1SDimitry Andric                                           const unsigned char *Buffer,
456*0fca6ea1SDimitry Andric                                           IndexedVersion Version);
457*0fca6ea1SDimitry Andric 
458*0fca6ea1SDimitry Andric   // Convert IndexedMemProfRecord to MemProfRecord.  Callback is used to
459*0fca6ea1SDimitry Andric   // translate CallStackId to call stacks with frames inline.
460*0fca6ea1SDimitry Andric   MemProfRecord toMemProfRecord(
461*0fca6ea1SDimitry Andric       llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const;
46281ad6265SDimitry Andric 
46381ad6265SDimitry Andric   // Returns the GUID for the function name after canonicalization. For
46481ad6265SDimitry Andric   // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
46581ad6265SDimitry Andric   // mapped to functions using this GUID.
46681ad6265SDimitry Andric   static GlobalValue::GUID getGUID(const StringRef FunctionName);
46781ad6265SDimitry Andric };
46881ad6265SDimitry Andric 
46981ad6265SDimitry Andric // Holds the memprof profile information for a function. The internal
47081ad6265SDimitry Andric // representation stores frame contents inline. This representation should
47181ad6265SDimitry Andric // be used for small amount of temporary, in memory instances.
47281ad6265SDimitry Andric struct MemProfRecord {
47381ad6265SDimitry Andric   // Same as IndexedMemProfRecord::AllocSites with frame contents inline.
47481ad6265SDimitry Andric   llvm::SmallVector<AllocationInfo> AllocSites;
47581ad6265SDimitry Andric   // Same as IndexedMemProfRecord::CallSites with frame contents inline.
476*0fca6ea1SDimitry Andric   llvm::SmallVector<std::vector<Frame>> CallSites;
47781ad6265SDimitry Andric 
47881ad6265SDimitry Andric   MemProfRecord() = default;
47981ad6265SDimitry Andric   MemProfRecord(
48081ad6265SDimitry Andric       const IndexedMemProfRecord &Record,
48181ad6265SDimitry Andric       llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) {
48281ad6265SDimitry Andric     for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) {
48381ad6265SDimitry Andric       AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
48481ad6265SDimitry Andric     }
48581ad6265SDimitry Andric     for (const ArrayRef<FrameId> Site : Record.CallSites) {
486*0fca6ea1SDimitry Andric       std::vector<Frame> Frames;
48781ad6265SDimitry Andric       for (const FrameId Id : Site) {
48881ad6265SDimitry Andric         Frames.push_back(IdToFrameCallback(Id));
48981ad6265SDimitry Andric       }
49081ad6265SDimitry Andric       CallSites.push_back(Frames);
49181ad6265SDimitry Andric     }
49281ad6265SDimitry Andric   }
49381ad6265SDimitry Andric 
49481ad6265SDimitry Andric   // Prints out the contents of the memprof record in YAML.
49581ad6265SDimitry Andric   void print(llvm::raw_ostream &OS) const {
49681ad6265SDimitry Andric     if (!AllocSites.empty()) {
49781ad6265SDimitry Andric       OS << "    AllocSites:\n";
49881ad6265SDimitry Andric       for (const AllocationInfo &N : AllocSites)
49981ad6265SDimitry Andric         N.printYAML(OS);
50081ad6265SDimitry Andric     }
50181ad6265SDimitry Andric 
50281ad6265SDimitry Andric     if (!CallSites.empty()) {
50381ad6265SDimitry Andric       OS << "    CallSites:\n";
504*0fca6ea1SDimitry Andric       for (const std::vector<Frame> &Frames : CallSites) {
50581ad6265SDimitry Andric         for (const Frame &F : Frames) {
50681ad6265SDimitry Andric           OS << "    -\n";
50781ad6265SDimitry Andric           F.printYAML(OS);
50881ad6265SDimitry Andric         }
50981ad6265SDimitry Andric       }
51081ad6265SDimitry Andric     }
51181ad6265SDimitry Andric   }
51281ad6265SDimitry Andric };
51381ad6265SDimitry Andric 
51481ad6265SDimitry Andric // Reads a memprof schema from a buffer. All entries in the buffer are
51581ad6265SDimitry Andric // interpreted as uint64_t. The first entry in the buffer denotes the number of
51681ad6265SDimitry Andric // ids in the schema. Subsequent entries are integers which map to memprof::Meta
51781ad6265SDimitry Andric // enum class entries. After successfully reading the schema, the pointer is one
51881ad6265SDimitry Andric // byte past the schema contents.
51981ad6265SDimitry Andric Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
52081ad6265SDimitry Andric 
52181ad6265SDimitry Andric // Trait for reading IndexedMemProfRecord data from the on-disk hash table.
52281ad6265SDimitry Andric class RecordLookupTrait {
52381ad6265SDimitry Andric public:
52481ad6265SDimitry Andric   using data_type = const IndexedMemProfRecord &;
52581ad6265SDimitry Andric   using internal_key_type = uint64_t;
52681ad6265SDimitry Andric   using external_key_type = uint64_t;
52781ad6265SDimitry Andric   using hash_value_type = uint64_t;
52881ad6265SDimitry Andric   using offset_type = uint64_t;
52981ad6265SDimitry Andric 
53081ad6265SDimitry Andric   RecordLookupTrait() = delete;
531*0fca6ea1SDimitry Andric   RecordLookupTrait(IndexedVersion V, const MemProfSchema &S)
532*0fca6ea1SDimitry Andric       : Version(V), Schema(S) {}
53381ad6265SDimitry Andric 
53481ad6265SDimitry Andric   static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
53581ad6265SDimitry Andric   static uint64_t GetInternalKey(uint64_t K) { return K; }
53681ad6265SDimitry Andric   static uint64_t GetExternalKey(uint64_t K) { return K; }
53781ad6265SDimitry Andric 
53881ad6265SDimitry Andric   hash_value_type ComputeHash(uint64_t K) { return K; }
53981ad6265SDimitry Andric 
54081ad6265SDimitry Andric   static std::pair<offset_type, offset_type>
54181ad6265SDimitry Andric   ReadKeyDataLength(const unsigned char *&D) {
54281ad6265SDimitry Andric     using namespace support;
54381ad6265SDimitry Andric 
5445f757f3fSDimitry Andric     offset_type KeyLen =
545*0fca6ea1SDimitry Andric         endian::readNext<offset_type, llvm::endianness::little>(D);
5465f757f3fSDimitry Andric     offset_type DataLen =
547*0fca6ea1SDimitry Andric         endian::readNext<offset_type, llvm::endianness::little>(D);
54881ad6265SDimitry Andric     return std::make_pair(KeyLen, DataLen);
54981ad6265SDimitry Andric   }
55081ad6265SDimitry Andric 
55181ad6265SDimitry Andric   uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
55281ad6265SDimitry Andric     using namespace support;
553*0fca6ea1SDimitry Andric     return endian::readNext<external_key_type, llvm::endianness::little>(D);
55481ad6265SDimitry Andric   }
55581ad6265SDimitry Andric 
55681ad6265SDimitry Andric   data_type ReadData(uint64_t K, const unsigned char *D,
55781ad6265SDimitry Andric                      offset_type /*Unused*/) {
558*0fca6ea1SDimitry Andric     Record = IndexedMemProfRecord::deserialize(Schema, D, Version);
55981ad6265SDimitry Andric     return Record;
56081ad6265SDimitry Andric   }
56181ad6265SDimitry Andric 
56281ad6265SDimitry Andric private:
563*0fca6ea1SDimitry Andric   // Holds the MemProf version.
564*0fca6ea1SDimitry Andric   IndexedVersion Version;
56581ad6265SDimitry Andric   // Holds the memprof schema used to deserialize records.
56681ad6265SDimitry Andric   MemProfSchema Schema;
56781ad6265SDimitry Andric   // Holds the records from one function deserialized from the indexed format.
56881ad6265SDimitry Andric   IndexedMemProfRecord Record;
56981ad6265SDimitry Andric };
57081ad6265SDimitry Andric 
57181ad6265SDimitry Andric // Trait for writing IndexedMemProfRecord data to the on-disk hash table.
57281ad6265SDimitry Andric class RecordWriterTrait {
57381ad6265SDimitry Andric public:
57481ad6265SDimitry Andric   using key_type = uint64_t;
57581ad6265SDimitry Andric   using key_type_ref = uint64_t;
57681ad6265SDimitry Andric 
57781ad6265SDimitry Andric   using data_type = IndexedMemProfRecord;
57881ad6265SDimitry Andric   using data_type_ref = IndexedMemProfRecord &;
57981ad6265SDimitry Andric 
58081ad6265SDimitry Andric   using hash_value_type = uint64_t;
58181ad6265SDimitry Andric   using offset_type = uint64_t;
58281ad6265SDimitry Andric 
583*0fca6ea1SDimitry Andric private:
584*0fca6ea1SDimitry Andric   // Pointer to the memprof schema to use for the generator.
585*0fca6ea1SDimitry Andric   const MemProfSchema *Schema;
586*0fca6ea1SDimitry Andric   // The MemProf version to use for the serialization.
587*0fca6ea1SDimitry Andric   IndexedVersion Version;
58881ad6265SDimitry Andric 
589*0fca6ea1SDimitry Andric   // Mappings from CallStackId to the indexes into the call stack array.
590*0fca6ea1SDimitry Andric   llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes;
591*0fca6ea1SDimitry Andric 
592*0fca6ea1SDimitry Andric public:
593*0fca6ea1SDimitry Andric   // We do not support the default constructor, which does not set Version.
594*0fca6ea1SDimitry Andric   RecordWriterTrait() = delete;
595*0fca6ea1SDimitry Andric   RecordWriterTrait(
596*0fca6ea1SDimitry Andric       const MemProfSchema *Schema, IndexedVersion V,
597*0fca6ea1SDimitry Andric       llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
598*0fca6ea1SDimitry Andric       : Schema(Schema), Version(V),
599*0fca6ea1SDimitry Andric         MemProfCallStackIndexes(MemProfCallStackIndexes) {}
60081ad6265SDimitry Andric 
60181ad6265SDimitry Andric   static hash_value_type ComputeHash(key_type_ref K) { return K; }
60281ad6265SDimitry Andric 
603*0fca6ea1SDimitry Andric   std::pair<offset_type, offset_type>
60481ad6265SDimitry Andric   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
60581ad6265SDimitry Andric     using namespace support;
60681ad6265SDimitry Andric 
6075f757f3fSDimitry Andric     endian::Writer LE(Out, llvm::endianness::little);
60881ad6265SDimitry Andric     offset_type N = sizeof(K);
60981ad6265SDimitry Andric     LE.write<offset_type>(N);
610*0fca6ea1SDimitry Andric     offset_type M = V.serializedSize(*Schema, Version);
61181ad6265SDimitry Andric     LE.write<offset_type>(M);
61281ad6265SDimitry Andric     return std::make_pair(N, M);
61381ad6265SDimitry Andric   }
61481ad6265SDimitry Andric 
61581ad6265SDimitry Andric   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
61681ad6265SDimitry Andric     using namespace support;
6175f757f3fSDimitry Andric     endian::Writer LE(Out, llvm::endianness::little);
61881ad6265SDimitry Andric     LE.write<uint64_t>(K);
61981ad6265SDimitry Andric   }
62081ad6265SDimitry Andric 
62181ad6265SDimitry Andric   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
62281ad6265SDimitry Andric                 offset_type /*Unused*/) {
62381ad6265SDimitry Andric     assert(Schema != nullptr && "MemProf schema is not initialized!");
624*0fca6ea1SDimitry Andric     V.serialize(*Schema, Out, Version, MemProfCallStackIndexes);
6255f757f3fSDimitry Andric     // Clear the IndexedMemProfRecord which results in clearing/freeing its
6265f757f3fSDimitry Andric     // vectors of allocs and callsites. This is owned by the associated on-disk
6275f757f3fSDimitry Andric     // hash table, but unused after this point. See also the comment added to
6285f757f3fSDimitry Andric     // the client which constructs the on-disk hash table for this trait.
6295f757f3fSDimitry Andric     V.clear();
63081ad6265SDimitry Andric   }
63181ad6265SDimitry Andric };
63281ad6265SDimitry Andric 
63381ad6265SDimitry Andric // Trait for writing frame mappings to the on-disk hash table.
63481ad6265SDimitry Andric class FrameWriterTrait {
63581ad6265SDimitry Andric public:
63681ad6265SDimitry Andric   using key_type = FrameId;
63781ad6265SDimitry Andric   using key_type_ref = FrameId;
63881ad6265SDimitry Andric 
63981ad6265SDimitry Andric   using data_type = Frame;
64081ad6265SDimitry Andric   using data_type_ref = Frame &;
64181ad6265SDimitry Andric 
64281ad6265SDimitry Andric   using hash_value_type = FrameId;
64381ad6265SDimitry Andric   using offset_type = uint64_t;
64481ad6265SDimitry Andric 
64581ad6265SDimitry Andric   static hash_value_type ComputeHash(key_type_ref K) { return K; }
64681ad6265SDimitry Andric 
64781ad6265SDimitry Andric   static std::pair<offset_type, offset_type>
64881ad6265SDimitry Andric   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
64981ad6265SDimitry Andric     using namespace support;
6505f757f3fSDimitry Andric     endian::Writer LE(Out, llvm::endianness::little);
65181ad6265SDimitry Andric     offset_type N = sizeof(K);
65281ad6265SDimitry Andric     LE.write<offset_type>(N);
65381ad6265SDimitry Andric     offset_type M = V.serializedSize();
65481ad6265SDimitry Andric     LE.write<offset_type>(M);
65581ad6265SDimitry Andric     return std::make_pair(N, M);
65681ad6265SDimitry Andric   }
65781ad6265SDimitry Andric 
65881ad6265SDimitry Andric   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
65981ad6265SDimitry Andric     using namespace support;
6605f757f3fSDimitry Andric     endian::Writer LE(Out, llvm::endianness::little);
66181ad6265SDimitry Andric     LE.write<key_type>(K);
66281ad6265SDimitry Andric   }
66381ad6265SDimitry Andric 
66481ad6265SDimitry Andric   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
66581ad6265SDimitry Andric                 offset_type /*Unused*/) {
66681ad6265SDimitry Andric     V.serialize(Out);
66781ad6265SDimitry Andric   }
66881ad6265SDimitry Andric };
66981ad6265SDimitry Andric 
67081ad6265SDimitry Andric // Trait for reading frame mappings from the on-disk hash table.
67181ad6265SDimitry Andric class FrameLookupTrait {
67281ad6265SDimitry Andric public:
67381ad6265SDimitry Andric   using data_type = const Frame;
67481ad6265SDimitry Andric   using internal_key_type = FrameId;
67581ad6265SDimitry Andric   using external_key_type = FrameId;
67681ad6265SDimitry Andric   using hash_value_type = FrameId;
67781ad6265SDimitry Andric   using offset_type = uint64_t;
67881ad6265SDimitry Andric 
67981ad6265SDimitry Andric   static bool EqualKey(internal_key_type A, internal_key_type B) {
68081ad6265SDimitry Andric     return A == B;
68181ad6265SDimitry Andric   }
68281ad6265SDimitry Andric   static uint64_t GetInternalKey(internal_key_type K) { return K; }
68381ad6265SDimitry Andric   static uint64_t GetExternalKey(external_key_type K) { return K; }
68481ad6265SDimitry Andric 
68581ad6265SDimitry Andric   hash_value_type ComputeHash(internal_key_type K) { return K; }
68681ad6265SDimitry Andric 
68781ad6265SDimitry Andric   static std::pair<offset_type, offset_type>
68881ad6265SDimitry Andric   ReadKeyDataLength(const unsigned char *&D) {
68981ad6265SDimitry Andric     using namespace support;
69081ad6265SDimitry Andric 
6915f757f3fSDimitry Andric     offset_type KeyLen =
692*0fca6ea1SDimitry Andric         endian::readNext<offset_type, llvm::endianness::little>(D);
6935f757f3fSDimitry Andric     offset_type DataLen =
694*0fca6ea1SDimitry Andric         endian::readNext<offset_type, llvm::endianness::little>(D);
69581ad6265SDimitry Andric     return std::make_pair(KeyLen, DataLen);
69681ad6265SDimitry Andric   }
69781ad6265SDimitry Andric 
69881ad6265SDimitry Andric   uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
69981ad6265SDimitry Andric     using namespace support;
700*0fca6ea1SDimitry Andric     return endian::readNext<external_key_type, llvm::endianness::little>(D);
70181ad6265SDimitry Andric   }
70281ad6265SDimitry Andric 
70381ad6265SDimitry Andric   data_type ReadData(uint64_t K, const unsigned char *D,
70481ad6265SDimitry Andric                      offset_type /*Unused*/) {
70581ad6265SDimitry Andric     return Frame::deserialize(D);
70681ad6265SDimitry Andric   }
70781ad6265SDimitry Andric };
708*0fca6ea1SDimitry Andric 
709*0fca6ea1SDimitry Andric // Trait for writing call stacks to the on-disk hash table.
710*0fca6ea1SDimitry Andric class CallStackWriterTrait {
711*0fca6ea1SDimitry Andric public:
712*0fca6ea1SDimitry Andric   using key_type = CallStackId;
713*0fca6ea1SDimitry Andric   using key_type_ref = CallStackId;
714*0fca6ea1SDimitry Andric 
715*0fca6ea1SDimitry Andric   using data_type = llvm::SmallVector<FrameId>;
716*0fca6ea1SDimitry Andric   using data_type_ref = llvm::SmallVector<FrameId> &;
717*0fca6ea1SDimitry Andric 
718*0fca6ea1SDimitry Andric   using hash_value_type = CallStackId;
719*0fca6ea1SDimitry Andric   using offset_type = uint64_t;
720*0fca6ea1SDimitry Andric 
721*0fca6ea1SDimitry Andric   static hash_value_type ComputeHash(key_type_ref K) { return K; }
722*0fca6ea1SDimitry Andric 
723*0fca6ea1SDimitry Andric   static std::pair<offset_type, offset_type>
724*0fca6ea1SDimitry Andric   EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
725*0fca6ea1SDimitry Andric     using namespace support;
726*0fca6ea1SDimitry Andric     endian::Writer LE(Out, llvm::endianness::little);
727*0fca6ea1SDimitry Andric     // We do not explicitly emit the key length because it is a constant.
728*0fca6ea1SDimitry Andric     offset_type N = sizeof(K);
729*0fca6ea1SDimitry Andric     offset_type M = sizeof(FrameId) * V.size();
730*0fca6ea1SDimitry Andric     LE.write<offset_type>(M);
731*0fca6ea1SDimitry Andric     return std::make_pair(N, M);
732*0fca6ea1SDimitry Andric   }
733*0fca6ea1SDimitry Andric 
734*0fca6ea1SDimitry Andric   void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
735*0fca6ea1SDimitry Andric     using namespace support;
736*0fca6ea1SDimitry Andric     endian::Writer LE(Out, llvm::endianness::little);
737*0fca6ea1SDimitry Andric     LE.write<key_type>(K);
738*0fca6ea1SDimitry Andric   }
739*0fca6ea1SDimitry Andric 
740*0fca6ea1SDimitry Andric   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
741*0fca6ea1SDimitry Andric                 offset_type /*Unused*/) {
742*0fca6ea1SDimitry Andric     using namespace support;
743*0fca6ea1SDimitry Andric     endian::Writer LE(Out, llvm::endianness::little);
744*0fca6ea1SDimitry Andric     // Emit the frames.  We do not explicitly emit the length of the vector
745*0fca6ea1SDimitry Andric     // because it can be inferred from the data length.
746*0fca6ea1SDimitry Andric     for (FrameId F : V)
747*0fca6ea1SDimitry Andric       LE.write<FrameId>(F);
748*0fca6ea1SDimitry Andric   }
749*0fca6ea1SDimitry Andric };
750*0fca6ea1SDimitry Andric 
751*0fca6ea1SDimitry Andric // Trait for reading call stack mappings from the on-disk hash table.
752*0fca6ea1SDimitry Andric class CallStackLookupTrait {
753*0fca6ea1SDimitry Andric public:
754*0fca6ea1SDimitry Andric   using data_type = const llvm::SmallVector<FrameId>;
755*0fca6ea1SDimitry Andric   using internal_key_type = CallStackId;
756*0fca6ea1SDimitry Andric   using external_key_type = CallStackId;
757*0fca6ea1SDimitry Andric   using hash_value_type = CallStackId;
758*0fca6ea1SDimitry Andric   using offset_type = uint64_t;
759*0fca6ea1SDimitry Andric 
760*0fca6ea1SDimitry Andric   static bool EqualKey(internal_key_type A, internal_key_type B) {
761*0fca6ea1SDimitry Andric     return A == B;
762*0fca6ea1SDimitry Andric   }
763*0fca6ea1SDimitry Andric   static uint64_t GetInternalKey(internal_key_type K) { return K; }
764*0fca6ea1SDimitry Andric   static uint64_t GetExternalKey(external_key_type K) { return K; }
765*0fca6ea1SDimitry Andric 
766*0fca6ea1SDimitry Andric   hash_value_type ComputeHash(internal_key_type K) { return K; }
767*0fca6ea1SDimitry Andric 
768*0fca6ea1SDimitry Andric   static std::pair<offset_type, offset_type>
769*0fca6ea1SDimitry Andric   ReadKeyDataLength(const unsigned char *&D) {
770*0fca6ea1SDimitry Andric     using namespace support;
771*0fca6ea1SDimitry Andric 
772*0fca6ea1SDimitry Andric     // We do not explicitly read the key length because it is a constant.
773*0fca6ea1SDimitry Andric     offset_type KeyLen = sizeof(external_key_type);
774*0fca6ea1SDimitry Andric     offset_type DataLen =
775*0fca6ea1SDimitry Andric         endian::readNext<offset_type, llvm::endianness::little>(D);
776*0fca6ea1SDimitry Andric     return std::make_pair(KeyLen, DataLen);
777*0fca6ea1SDimitry Andric   }
778*0fca6ea1SDimitry Andric 
779*0fca6ea1SDimitry Andric   uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
780*0fca6ea1SDimitry Andric     using namespace support;
781*0fca6ea1SDimitry Andric     return endian::readNext<external_key_type, llvm::endianness::little>(D);
782*0fca6ea1SDimitry Andric   }
783*0fca6ea1SDimitry Andric 
784*0fca6ea1SDimitry Andric   data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
785*0fca6ea1SDimitry Andric     using namespace support;
786*0fca6ea1SDimitry Andric     llvm::SmallVector<FrameId> CS;
787*0fca6ea1SDimitry Andric     // Derive the number of frames from the data length.
788*0fca6ea1SDimitry Andric     uint64_t NumFrames = Length / sizeof(FrameId);
789*0fca6ea1SDimitry Andric     assert(Length % sizeof(FrameId) == 0);
790*0fca6ea1SDimitry Andric     CS.reserve(NumFrames);
791*0fca6ea1SDimitry Andric     for (size_t I = 0; I != NumFrames; ++I) {
792*0fca6ea1SDimitry Andric       FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
793*0fca6ea1SDimitry Andric       CS.push_back(F);
794*0fca6ea1SDimitry Andric     }
795*0fca6ea1SDimitry Andric     return CS;
796*0fca6ea1SDimitry Andric   }
797*0fca6ea1SDimitry Andric };
798*0fca6ea1SDimitry Andric 
799*0fca6ea1SDimitry Andric // Compute a CallStackId for a given call stack.
800*0fca6ea1SDimitry Andric CallStackId hashCallStack(ArrayRef<FrameId> CS);
801*0fca6ea1SDimitry Andric 
802*0fca6ea1SDimitry Andric namespace detail {
803*0fca6ea1SDimitry Andric // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable.  We have
804*0fca6ea1SDimitry Andric // to do so in one of two different ways depending on the type of the hash
805*0fca6ea1SDimitry Andric // table.
806*0fca6ea1SDimitry Andric template <typename value_type, typename IterTy>
807*0fca6ea1SDimitry Andric value_type DerefIterator(IterTy Iter) {
808*0fca6ea1SDimitry Andric   using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
809*0fca6ea1SDimitry Andric   if constexpr (std::is_same_v<deref_type, value_type>)
810*0fca6ea1SDimitry Andric     return *Iter;
811*0fca6ea1SDimitry Andric   else
812*0fca6ea1SDimitry Andric     return Iter->second;
813*0fca6ea1SDimitry Andric }
814*0fca6ea1SDimitry Andric } // namespace detail
815*0fca6ea1SDimitry Andric 
816*0fca6ea1SDimitry Andric // A function object that returns a frame for a given FrameId.
817*0fca6ea1SDimitry Andric template <typename MapTy> struct FrameIdConverter {
818*0fca6ea1SDimitry Andric   std::optional<FrameId> LastUnmappedId;
819*0fca6ea1SDimitry Andric   MapTy &Map;
820*0fca6ea1SDimitry Andric 
821*0fca6ea1SDimitry Andric   FrameIdConverter() = delete;
822*0fca6ea1SDimitry Andric   FrameIdConverter(MapTy &Map) : Map(Map) {}
823*0fca6ea1SDimitry Andric 
824*0fca6ea1SDimitry Andric   // Delete the copy constructor and copy assignment operator to avoid a
825*0fca6ea1SDimitry Andric   // situation where a copy of FrameIdConverter gets an error in LastUnmappedId
826*0fca6ea1SDimitry Andric   // while the original instance doesn't.
827*0fca6ea1SDimitry Andric   FrameIdConverter(const FrameIdConverter &) = delete;
828*0fca6ea1SDimitry Andric   FrameIdConverter &operator=(const FrameIdConverter &) = delete;
829*0fca6ea1SDimitry Andric 
830*0fca6ea1SDimitry Andric   Frame operator()(FrameId Id) {
831*0fca6ea1SDimitry Andric     auto Iter = Map.find(Id);
832*0fca6ea1SDimitry Andric     if (Iter == Map.end()) {
833*0fca6ea1SDimitry Andric       LastUnmappedId = Id;
834*0fca6ea1SDimitry Andric       return Frame(0, 0, 0, false);
835*0fca6ea1SDimitry Andric     }
836*0fca6ea1SDimitry Andric     return detail::DerefIterator<Frame>(Iter);
837*0fca6ea1SDimitry Andric   }
838*0fca6ea1SDimitry Andric };
839*0fca6ea1SDimitry Andric 
840*0fca6ea1SDimitry Andric // A function object that returns a call stack for a given CallStackId.
841*0fca6ea1SDimitry Andric template <typename MapTy> struct CallStackIdConverter {
842*0fca6ea1SDimitry Andric   std::optional<CallStackId> LastUnmappedId;
843*0fca6ea1SDimitry Andric   MapTy &Map;
844*0fca6ea1SDimitry Andric   llvm::function_ref<Frame(FrameId)> FrameIdToFrame;
845*0fca6ea1SDimitry Andric 
846*0fca6ea1SDimitry Andric   CallStackIdConverter() = delete;
847*0fca6ea1SDimitry Andric   CallStackIdConverter(MapTy &Map,
848*0fca6ea1SDimitry Andric                        llvm::function_ref<Frame(FrameId)> FrameIdToFrame)
849*0fca6ea1SDimitry Andric       : Map(Map), FrameIdToFrame(FrameIdToFrame) {}
850*0fca6ea1SDimitry Andric 
851*0fca6ea1SDimitry Andric   // Delete the copy constructor and copy assignment operator to avoid a
852*0fca6ea1SDimitry Andric   // situation where a copy of CallStackIdConverter gets an error in
853*0fca6ea1SDimitry Andric   // LastUnmappedId while the original instance doesn't.
854*0fca6ea1SDimitry Andric   CallStackIdConverter(const CallStackIdConverter &) = delete;
855*0fca6ea1SDimitry Andric   CallStackIdConverter &operator=(const CallStackIdConverter &) = delete;
856*0fca6ea1SDimitry Andric 
857*0fca6ea1SDimitry Andric   std::vector<Frame> operator()(CallStackId CSId) {
858*0fca6ea1SDimitry Andric     std::vector<Frame> Frames;
859*0fca6ea1SDimitry Andric     auto CSIter = Map.find(CSId);
860*0fca6ea1SDimitry Andric     if (CSIter == Map.end()) {
861*0fca6ea1SDimitry Andric       LastUnmappedId = CSId;
862*0fca6ea1SDimitry Andric     } else {
863*0fca6ea1SDimitry Andric       llvm::SmallVector<FrameId> CS =
864*0fca6ea1SDimitry Andric           detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
865*0fca6ea1SDimitry Andric       Frames.reserve(CS.size());
866*0fca6ea1SDimitry Andric       for (FrameId Id : CS)
867*0fca6ea1SDimitry Andric         Frames.push_back(FrameIdToFrame(Id));
868*0fca6ea1SDimitry Andric     }
869*0fca6ea1SDimitry Andric     return Frames;
870*0fca6ea1SDimitry Andric   }
871*0fca6ea1SDimitry Andric };
872*0fca6ea1SDimitry Andric 
873*0fca6ea1SDimitry Andric // A function object that returns a Frame stored at a given index into the Frame
874*0fca6ea1SDimitry Andric // array in the profile.
875*0fca6ea1SDimitry Andric struct LinearFrameIdConverter {
876*0fca6ea1SDimitry Andric   const unsigned char *FrameBase;
877*0fca6ea1SDimitry Andric 
878*0fca6ea1SDimitry Andric   LinearFrameIdConverter() = delete;
879*0fca6ea1SDimitry Andric   LinearFrameIdConverter(const unsigned char *FrameBase)
880*0fca6ea1SDimitry Andric       : FrameBase(FrameBase) {}
881*0fca6ea1SDimitry Andric 
882*0fca6ea1SDimitry Andric   Frame operator()(LinearFrameId LinearId) {
883*0fca6ea1SDimitry Andric     uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
884*0fca6ea1SDimitry Andric     return Frame::deserialize(FrameBase + Offset);
885*0fca6ea1SDimitry Andric   }
886*0fca6ea1SDimitry Andric };
887*0fca6ea1SDimitry Andric 
888*0fca6ea1SDimitry Andric // A function object that returns a call stack stored at a given index into the
889*0fca6ea1SDimitry Andric // call stack array in the profile.
890*0fca6ea1SDimitry Andric struct LinearCallStackIdConverter {
891*0fca6ea1SDimitry Andric   const unsigned char *CallStackBase;
892*0fca6ea1SDimitry Andric   std::function<Frame(LinearFrameId)> FrameIdToFrame;
893*0fca6ea1SDimitry Andric 
894*0fca6ea1SDimitry Andric   LinearCallStackIdConverter() = delete;
895*0fca6ea1SDimitry Andric   LinearCallStackIdConverter(const unsigned char *CallStackBase,
896*0fca6ea1SDimitry Andric                              std::function<Frame(LinearFrameId)> FrameIdToFrame)
897*0fca6ea1SDimitry Andric       : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}
898*0fca6ea1SDimitry Andric 
899*0fca6ea1SDimitry Andric   std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
900*0fca6ea1SDimitry Andric     std::vector<Frame> Frames;
901*0fca6ea1SDimitry Andric 
902*0fca6ea1SDimitry Andric     const unsigned char *Ptr =
903*0fca6ea1SDimitry Andric         CallStackBase +
904*0fca6ea1SDimitry Andric         static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
905*0fca6ea1SDimitry Andric     uint32_t NumFrames =
906*0fca6ea1SDimitry Andric         support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
907*0fca6ea1SDimitry Andric     Frames.reserve(NumFrames);
908*0fca6ea1SDimitry Andric     for (; NumFrames; --NumFrames) {
909*0fca6ea1SDimitry Andric       LinearFrameId Elem =
910*0fca6ea1SDimitry Andric           support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
911*0fca6ea1SDimitry Andric       // Follow a pointer to the parent, if any.  See comments below on
912*0fca6ea1SDimitry Andric       // CallStackRadixTreeBuilder for the description of the radix tree format.
913*0fca6ea1SDimitry Andric       if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
914*0fca6ea1SDimitry Andric         Ptr += (-Elem) * sizeof(LinearFrameId);
915*0fca6ea1SDimitry Andric         Elem =
916*0fca6ea1SDimitry Andric             support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
917*0fca6ea1SDimitry Andric       }
918*0fca6ea1SDimitry Andric       // We shouldn't encounter another pointer.
919*0fca6ea1SDimitry Andric       assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
920*0fca6ea1SDimitry Andric       Frames.push_back(FrameIdToFrame(Elem));
921*0fca6ea1SDimitry Andric       Ptr += sizeof(LinearFrameId);
922*0fca6ea1SDimitry Andric     }
923*0fca6ea1SDimitry Andric 
924*0fca6ea1SDimitry Andric     return Frames;
925*0fca6ea1SDimitry Andric   }
926*0fca6ea1SDimitry Andric };
927*0fca6ea1SDimitry Andric 
928*0fca6ea1SDimitry Andric struct IndexedMemProfData {
929*0fca6ea1SDimitry Andric   // A map to hold memprof data per function. The lower 64 bits obtained from
930*0fca6ea1SDimitry Andric   // the md5 hash of the function name is used to index into the map.
931*0fca6ea1SDimitry Andric   llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> Records;
932*0fca6ea1SDimitry Andric 
933*0fca6ea1SDimitry Andric   // A map to hold frame id to frame mappings. The mappings are used to
934*0fca6ea1SDimitry Andric   // convert IndexedMemProfRecord to MemProfRecords with frame information
935*0fca6ea1SDimitry Andric   // inline.
936*0fca6ea1SDimitry Andric   llvm::MapVector<FrameId, Frame> Frames;
937*0fca6ea1SDimitry Andric 
938*0fca6ea1SDimitry Andric   // A map to hold call stack id to call stacks.
939*0fca6ea1SDimitry Andric   llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks;
940*0fca6ea1SDimitry Andric };
941*0fca6ea1SDimitry Andric 
942*0fca6ea1SDimitry Andric struct FrameStat {
943*0fca6ea1SDimitry Andric   // The number of occurrences of a given FrameId.
944*0fca6ea1SDimitry Andric   uint64_t Count = 0;
945*0fca6ea1SDimitry Andric   // The sum of indexes where a given FrameId shows up.
946*0fca6ea1SDimitry Andric   uint64_t PositionSum = 0;
947*0fca6ea1SDimitry Andric };
948*0fca6ea1SDimitry Andric 
949*0fca6ea1SDimitry Andric // Compute a histogram of Frames in call stacks.
950*0fca6ea1SDimitry Andric llvm::DenseMap<FrameId, FrameStat>
951*0fca6ea1SDimitry Andric computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
952*0fca6ea1SDimitry Andric                           &MemProfCallStackData);
953*0fca6ea1SDimitry Andric 
954*0fca6ea1SDimitry Andric // Construct a radix tree of call stacks.
955*0fca6ea1SDimitry Andric //
956*0fca6ea1SDimitry Andric // A set of call stacks might look like:
957*0fca6ea1SDimitry Andric //
958*0fca6ea1SDimitry Andric // CallStackId 1:  f1 -> f2 -> f3
959*0fca6ea1SDimitry Andric // CallStackId 2:  f1 -> f2 -> f4 -> f5
960*0fca6ea1SDimitry Andric // CallStackId 3:  f1 -> f2 -> f4 -> f6
961*0fca6ea1SDimitry Andric // CallStackId 4:  f7 -> f8 -> f9
962*0fca6ea1SDimitry Andric //
963*0fca6ea1SDimitry Andric // where each fn refers to a stack frame.
964*0fca6ea1SDimitry Andric //
965*0fca6ea1SDimitry Andric // Since we expect a lot of common prefixes, we can compress the call stacks
966*0fca6ea1SDimitry Andric // into a radix tree like:
967*0fca6ea1SDimitry Andric //
968*0fca6ea1SDimitry Andric // CallStackId 1:  f1 -> f2 -> f3
969*0fca6ea1SDimitry Andric //                       |
970*0fca6ea1SDimitry Andric // CallStackId 2:        +---> f4 -> f5
971*0fca6ea1SDimitry Andric //                             |
972*0fca6ea1SDimitry Andric // CallStackId 3:              +---> f6
973*0fca6ea1SDimitry Andric //
974*0fca6ea1SDimitry Andric // CallStackId 4:  f7 -> f8 -> f9
975*0fca6ea1SDimitry Andric //
976*0fca6ea1SDimitry Andric // Now, we are interested in retrieving call stacks for a given CallStackId, so
977*0fca6ea1SDimitry Andric // we just need a pointer from a given call stack to its parent.  For example,
978*0fca6ea1SDimitry Andric // CallStackId 2 would point to CallStackId 1 as a parent.
979*0fca6ea1SDimitry Andric //
980*0fca6ea1SDimitry Andric // We serialize the radix tree above into a single array along with the length
981*0fca6ea1SDimitry Andric // of each call stack and pointers to the parent call stacks.
982*0fca6ea1SDimitry Andric //
983*0fca6ea1SDimitry Andric // Index:              0  1  2  3  4  5  6  7  8  9 10 11 12 13 14
984*0fca6ea1SDimitry Andric // Array:             L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
985*0fca6ea1SDimitry Andric //                     ^           ^        ^           ^
986*0fca6ea1SDimitry Andric //                     |           |        |           |
987*0fca6ea1SDimitry Andric // CallStackId 4:  0 --+           |        |           |
988*0fca6ea1SDimitry Andric // CallStackId 3:  4 --------------+        |           |
989*0fca6ea1SDimitry Andric // CallStackId 2:  7 -----------------------+           |
990*0fca6ea1SDimitry Andric // CallStackId 1: 11 -----------------------------------+
991*0fca6ea1SDimitry Andric //
992*0fca6ea1SDimitry Andric // - LN indicates the length of a call stack, encoded as ordinary integer N.
993*0fca6ea1SDimitry Andric //
994*0fca6ea1SDimitry Andric // - JN indicates a pointer to the parent, encoded as -N.
995*0fca6ea1SDimitry Andric //
996*0fca6ea1SDimitry Andric // The radix tree allows us to reconstruct call stacks in the leaf-to-root
997*0fca6ea1SDimitry Andric // order as we scan the array from left ro right while following pointers to
998*0fca6ea1SDimitry Andric // parents along the way.
999*0fca6ea1SDimitry Andric //
1000*0fca6ea1SDimitry Andric // For example, if we are decoding CallStackId 2, we start a forward traversal
1001*0fca6ea1SDimitry Andric // at Index 7, noting the call stack length of 4 and obtaining f5 and f4.  When
1002*0fca6ea1SDimitry Andric // we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
1003*0fca6ea1SDimitry Andric // picking up f2 and f1.  We are done after collecting 4 frames as indicated at
1004*0fca6ea1SDimitry Andric // the beginning of the traversal.
1005*0fca6ea1SDimitry Andric //
1006*0fca6ea1SDimitry Andric // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
1007*0fca6ea1SDimitry Andric // the radix tree array, so we do not explicitly encode mappings like:
1008*0fca6ea1SDimitry Andric // "CallStackId 1 -> 11".
1009*0fca6ea1SDimitry Andric class CallStackRadixTreeBuilder {
1010*0fca6ea1SDimitry Andric   // The radix tree array.
1011*0fca6ea1SDimitry Andric   std::vector<LinearFrameId> RadixArray;
1012*0fca6ea1SDimitry Andric 
1013*0fca6ea1SDimitry Andric   // Mapping from CallStackIds to indexes into RadixArray.
1014*0fca6ea1SDimitry Andric   llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos;
1015*0fca6ea1SDimitry Andric 
1016*0fca6ea1SDimitry Andric   // In build, we partition a given call stack into two parts -- the prefix
1017*0fca6ea1SDimitry Andric   // that's common with the previously encoded call stack and the frames beyond
1018*0fca6ea1SDimitry Andric   // the common prefix -- the unique portion.  Then we want to find out where
1019*0fca6ea1SDimitry Andric   // the common prefix is stored in RadixArray so that we can link the unique
1020*0fca6ea1SDimitry Andric   // portion to the common prefix.  Indexes, declared below, helps with our
1021*0fca6ea1SDimitry Andric   // needs.  Intuitively, Indexes tells us where each of the previously encoded
1022*0fca6ea1SDimitry Andric   // call stack is stored in RadixArray.  More formally, Indexes satisfies:
1023*0fca6ea1SDimitry Andric   //
1024*0fca6ea1SDimitry Andric   //   RadixArray[Indexes[I]] == Prev[I]
1025*0fca6ea1SDimitry Andric   //
1026*0fca6ea1SDimitry Andric   // for every I, where Prev is the the call stack in the root-to-leaf order
1027*0fca6ea1SDimitry Andric   // previously encoded by build.  (Note that Prev, as passed to
1028*0fca6ea1SDimitry Andric   // encodeCallStack, is in the leaf-to-root order.)
1029*0fca6ea1SDimitry Andric   //
1030*0fca6ea1SDimitry Andric   // For example, if the call stack being encoded shares 5 frames at the root of
1031*0fca6ea1SDimitry Andric   // the call stack with the previously encoded call stack,
1032*0fca6ea1SDimitry Andric   // RadixArray[Indexes[0]] is the root frame of the common prefix.
1033*0fca6ea1SDimitry Andric   // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
1034*0fca6ea1SDimitry Andric   std::vector<LinearCallStackId> Indexes;
1035*0fca6ea1SDimitry Andric 
1036*0fca6ea1SDimitry Andric   using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
1037*0fca6ea1SDimitry Andric 
1038*0fca6ea1SDimitry Andric   // Encode a call stack into RadixArray.  Return the starting index within
1039*0fca6ea1SDimitry Andric   // RadixArray.
1040*0fca6ea1SDimitry Andric   LinearCallStackId encodeCallStack(
1041*0fca6ea1SDimitry Andric       const llvm::SmallVector<FrameId> *CallStack,
1042*0fca6ea1SDimitry Andric       const llvm::SmallVector<FrameId> *Prev,
1043*0fca6ea1SDimitry Andric       const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
1044*0fca6ea1SDimitry Andric 
1045*0fca6ea1SDimitry Andric public:
1046*0fca6ea1SDimitry Andric   CallStackRadixTreeBuilder() = default;
1047*0fca6ea1SDimitry Andric 
1048*0fca6ea1SDimitry Andric   // Build a radix tree array.
1049*0fca6ea1SDimitry Andric   void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
1050*0fca6ea1SDimitry Andric                  &&MemProfCallStackData,
1051*0fca6ea1SDimitry Andric              const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
1052*0fca6ea1SDimitry Andric              llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);
1053*0fca6ea1SDimitry Andric 
1054*0fca6ea1SDimitry Andric   const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; }
1055*0fca6ea1SDimitry Andric 
1056*0fca6ea1SDimitry Andric   llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() {
1057*0fca6ea1SDimitry Andric     return std::move(CallStackPos);
1058*0fca6ea1SDimitry Andric   }
1059*0fca6ea1SDimitry Andric };
1060*0fca6ea1SDimitry Andric 
1061*0fca6ea1SDimitry Andric // Verify that each CallStackId is computed with hashCallStack.  This function
1062*0fca6ea1SDimitry Andric // is intended to help transition from CallStack to CSId in
1063*0fca6ea1SDimitry Andric // IndexedAllocationInfo.
1064*0fca6ea1SDimitry Andric void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record);
1065*0fca6ea1SDimitry Andric 
1066*0fca6ea1SDimitry Andric // Verify that each CallStackId is computed with hashCallStack.  This function
1067*0fca6ea1SDimitry Andric // is intended to help transition from CallStack to CSId in
1068*0fca6ea1SDimitry Andric // IndexedAllocationInfo.
1069*0fca6ea1SDimitry Andric void verifyFunctionProfileData(
1070*0fca6ea1SDimitry Andric     const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
1071*0fca6ea1SDimitry Andric         &FunctionProfileData);
107281ad6265SDimitry Andric } // namespace memprof
107381ad6265SDimitry Andric } // namespace llvm
107481ad6265SDimitry Andric 
107581ad6265SDimitry Andric #endif // LLVM_PROFILEDATA_MEMPROF_H_
1076